2014-04-14 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob8bc4e8fc791358f7cec292b7c1c4eed1b65b674d
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
116 #include "expr.h"
117 #include "recog.h"
119 /* The infinite cost. */
120 #define INFTY 10000000
122 #define AVG_LOOP_NITER(LOOP) 5
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop *loop)
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
135 return niter;
138 /* Representation of the induction variable. */
139 struct iv
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 bool biv_p; /* Is it a biv? */
146 bool have_use_for; /* Do we already have a use for it? */
147 unsigned use_id; /* The identifier in the use if it is the case. */
150 /* Per-ssa version information (induction variable descriptions, etc.). */
151 struct version_info
153 tree name; /* The ssa name. */
154 struct iv *iv; /* Induction variable description. */
155 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv; /* For the original biv, whether to preserve it. */
158 unsigned inv_id; /* Id of an invariant. */
161 /* Types of uses. */
162 enum use_type
164 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
165 USE_ADDRESS, /* Use in an address. */
166 USE_COMPARE /* Use is a compare. */
169 /* Cost of a computation. */
170 typedef struct
172 int cost; /* The runtime cost. */
173 unsigned complexity; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
177 } comp_cost;
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
182 /* The candidate - cost pair. */
183 struct cost_pair
185 struct iv_cand *cand; /* The candidate. */
186 comp_cost cost; /* The cost. */
187 bitmap depends_on; /* The list of invariants that have to be
188 preserved. */
189 tree value; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp; /* For iv elimination, the comparison. */
193 int inv_expr_id; /* Loop invariant expression id. */
196 /* Use. */
197 struct iv_use
199 unsigned id; /* The id of the use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
211 struct iv_cand *selected;
212 /* The selected candidate. */
215 /* The position where the iv is computed. */
216 enum iv_position
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
225 /* The induction variable candidate. */
226 struct iv_cand
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
251 tree expr;
252 int id;
253 hashval_t hash;
256 /* The data used by the induction variable optimizations. */
258 typedef struct iv_use *iv_use_p;
260 typedef struct iv_cand *iv_cand_p;
262 /* Hashtable helpers. */
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
266 typedef iv_inv_expr_ent value_type;
267 typedef iv_inv_expr_ent compare_type;
268 static inline hashval_t hash (const value_type *);
269 static inline bool equal (const value_type *, const compare_type *);
272 /* Hash function for loop invariant expressions. */
274 inline hashval_t
275 iv_inv_expr_hasher::hash (const value_type *expr)
277 return expr->hash;
280 /* Hash table equality function for expressions. */
282 inline bool
283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
285 return expr1->hash == expr2->hash
286 && operand_equal_p (expr1->expr, expr2->expr, 0);
289 struct ivopts_data
291 /* The currently optimized loop. */
292 struct loop *current_loop;
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t *niters;
297 /* Number of registers used in it. */
298 unsigned regs_used;
300 /* The size of version_info array allocated. */
301 unsigned version_info_size;
303 /* The array of information for the ssa names. */
304 struct version_info *version_info;
306 /* The hashtable of loop invariant expressions created
307 by ivopt. */
308 hash_table <iv_inv_expr_hasher> inv_expr_tab;
310 /* Loop invariant expression id. */
311 int inv_expr_id;
313 /* The bitmap of indices in version_info whose value was changed. */
314 bitmap relevant;
316 /* The uses of induction variables. */
317 vec<iv_use_p> iv_uses;
319 /* The candidates. */
320 vec<iv_cand_p> iv_candidates;
322 /* A bitmap of important candidates. */
323 bitmap important_candidates;
325 /* The maximum invariant id. */
326 unsigned max_inv_id;
328 /* Whether to consider just related and important candidates when replacing a
329 use. */
330 bool consider_all_candidates;
332 /* Are we optimizing for speed? */
333 bool speed;
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call;
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p;
342 /* An assignment of iv candidates to uses. */
344 struct iv_ca
346 /* The number of uses covered by the assignment. */
347 unsigned upto;
349 /* Number of uses that cannot be expressed by the candidates in the set. */
350 unsigned bad_uses;
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair **cand_for_use;
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses;
358 /* The candidates used. */
359 bitmap cands;
361 /* The number of candidates in the set. */
362 unsigned n_cands;
364 /* Total number of registers needed. */
365 unsigned n_regs;
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost;
370 /* Total cost of candidates. */
371 unsigned cand_cost;
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses;
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr;
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr;
383 /* Total cost of the assignment. */
384 comp_cost cost;
387 /* Difference of two iv candidate assignments. */
389 struct iv_ca_delta
391 /* Changed use. */
392 struct iv_use *use;
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair *old_cp;
397 /* A new assignment. */
398 struct cost_pair *new_cp;
400 /* Next change in the list. */
401 struct iv_ca_delta *next_change;
404 /* Bound on number of candidates below that all candidates are considered. */
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
421 /* The list of trees for that the decl_rtl field must be reset is stored
422 here. */
424 static vec<tree> decl_rtl_to_reset;
426 static comp_cost force_expr_to_var_cost (tree, bool);
428 /* Number of uses recorded in DATA. */
430 static inline unsigned
431 n_iv_uses (struct ivopts_data *data)
433 return data->iv_uses.length ();
436 /* Ith use recorded in DATA. */
438 static inline struct iv_use *
439 iv_use (struct ivopts_data *data, unsigned i)
441 return data->iv_uses[i];
444 /* Number of candidates recorded in DATA. */
446 static inline unsigned
447 n_iv_cands (struct ivopts_data *data)
449 return data->iv_candidates.length ();
452 /* Ith candidate recorded in DATA. */
454 static inline struct iv_cand *
455 iv_cand (struct ivopts_data *data, unsigned i)
457 return data->iv_candidates[i];
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
462 edge
463 single_dom_exit (struct loop *loop)
465 edge exit = single_exit (loop);
467 if (!exit)
468 return NULL;
470 if (!just_once_each_iteration_p (loop, exit->src))
471 return NULL;
473 return exit;
476 /* Dumps information about the induction variable IV to FILE. */
478 void
479 dump_iv (FILE *file, struct iv *iv)
481 if (iv->ssa_name)
483 fprintf (file, "ssa name ");
484 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485 fprintf (file, "\n");
488 fprintf (file, " type ");
489 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490 fprintf (file, "\n");
492 if (iv->step)
494 fprintf (file, " base ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
498 fprintf (file, " step ");
499 print_generic_expr (file, iv->step, TDF_SLIM);
500 fprintf (file, "\n");
502 else
504 fprintf (file, " invariant ");
505 print_generic_expr (file, iv->base, TDF_SLIM);
506 fprintf (file, "\n");
509 if (iv->base_object)
511 fprintf (file, " base object ");
512 print_generic_expr (file, iv->base_object, TDF_SLIM);
513 fprintf (file, "\n");
516 if (iv->biv_p)
517 fprintf (file, " is a biv\n");
520 /* Dumps information about the USE to FILE. */
522 void
523 dump_use (FILE *file, struct iv_use *use)
525 fprintf (file, "use %d\n", use->id);
527 switch (use->type)
529 case USE_NONLINEAR_EXPR:
530 fprintf (file, " generic\n");
531 break;
533 case USE_ADDRESS:
534 fprintf (file, " address\n");
535 break;
537 case USE_COMPARE:
538 fprintf (file, " compare\n");
539 break;
541 default:
542 gcc_unreachable ();
545 fprintf (file, " in statement ");
546 print_gimple_stmt (file, use->stmt, 0, 0);
547 fprintf (file, "\n");
549 fprintf (file, " at position ");
550 if (use->op_p)
551 print_generic_expr (file, *use->op_p, TDF_SLIM);
552 fprintf (file, "\n");
554 dump_iv (file, use->iv);
556 if (use->related_cands)
558 fprintf (file, " related candidates ");
559 dump_bitmap (file, use->related_cands);
563 /* Dumps information about the uses to FILE. */
565 void
566 dump_uses (FILE *file, struct ivopts_data *data)
568 unsigned i;
569 struct iv_use *use;
571 for (i = 0; i < n_iv_uses (data); i++)
573 use = iv_use (data, i);
575 dump_use (file, use);
576 fprintf (file, "\n");
580 /* Dumps information about induction variable candidate CAND to FILE. */
582 void
583 dump_cand (FILE *file, struct iv_cand *cand)
585 struct iv *iv = cand->iv;
587 fprintf (file, "candidate %d%s\n",
588 cand->id, cand->important ? " (important)" : "");
590 if (cand->depends_on)
592 fprintf (file, " depends on ");
593 dump_bitmap (file, cand->depends_on);
596 if (!iv)
598 fprintf (file, " final value replacement\n");
599 return;
602 if (cand->var_before)
604 fprintf (file, " var_before ");
605 print_generic_expr (file, cand->var_before, TDF_SLIM);
606 fprintf (file, "\n");
608 if (cand->var_after)
610 fprintf (file, " var_after ");
611 print_generic_expr (file, cand->var_after, TDF_SLIM);
612 fprintf (file, "\n");
615 switch (cand->pos)
617 case IP_NORMAL:
618 fprintf (file, " incremented before exit test\n");
619 break;
621 case IP_BEFORE_USE:
622 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
623 break;
625 case IP_AFTER_USE:
626 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
627 break;
629 case IP_END:
630 fprintf (file, " incremented at end\n");
631 break;
633 case IP_ORIGINAL:
634 fprintf (file, " original biv\n");
635 break;
638 dump_iv (file, iv);
641 /* Returns the info for ssa version VER. */
643 static inline struct version_info *
644 ver_info (struct ivopts_data *data, unsigned ver)
646 return data->version_info + ver;
649 /* Returns the info for ssa name NAME. */
651 static inline struct version_info *
652 name_info (struct ivopts_data *data, tree name)
654 return ver_info (data, SSA_NAME_VERSION (name));
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658 emitted in LOOP. */
660 static bool
661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
663 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
665 gcc_assert (bb);
667 if (sbb == loop->latch)
668 return true;
670 if (sbb != bb)
671 return false;
673 return stmt == last_stmt (bb);
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
680 static bool
681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
683 basic_block cand_bb = gimple_bb (cand->incremented_at);
684 basic_block stmt_bb = gimple_bb (stmt);
686 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687 return false;
689 if (stmt_bb != cand_bb)
690 return true;
692 if (true_if_equal
693 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694 return true;
695 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
701 static bool
702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
704 switch (cand->pos)
706 case IP_END:
707 return false;
709 case IP_NORMAL:
710 return stmt_after_ip_normal_pos (loop, stmt);
712 case IP_ORIGINAL:
713 case IP_AFTER_USE:
714 return stmt_after_inc_pos (cand, stmt, false);
716 case IP_BEFORE_USE:
717 return stmt_after_inc_pos (cand, stmt, true);
719 default:
720 gcc_unreachable ();
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
726 static bool
727 abnormal_ssa_name_p (tree exp)
729 if (!exp)
730 return false;
732 if (TREE_CODE (exp) != SSA_NAME)
733 return false;
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
741 static bool
742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 void *data ATTRIBUTE_UNUSED)
745 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 return false;
749 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 return false;
753 return !abnormal_ssa_name_p (*index);
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
759 bool
760 contains_abnormal_ssa_name_p (tree expr)
762 enum tree_code code;
763 enum tree_code_class codeclass;
765 if (!expr)
766 return false;
768 code = TREE_CODE (expr);
769 codeclass = TREE_CODE_CLASS (code);
771 if (code == SSA_NAME)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
774 if (code == INTEGER_CST
775 || is_gimple_min_invariant (expr))
776 return false;
778 if (code == ADDR_EXPR)
779 return !for_each_index (&TREE_OPERAND (expr, 0),
780 idx_contains_abnormal_ssa_name_p,
781 NULL);
783 if (code == COND_EXPR)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
788 switch (codeclass)
790 case tcc_binary:
791 case tcc_comparison:
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 return true;
795 /* Fallthru. */
796 case tcc_unary:
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 return true;
800 break;
802 default:
803 gcc_unreachable ();
806 return false;
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
812 static struct tree_niter_desc *
813 niter_for_exit (struct ivopts_data *data, edge exit)
815 struct tree_niter_desc *desc;
816 void **slot;
818 if (!data->niters)
820 data->niters = pointer_map_create ();
821 slot = NULL;
823 else
824 slot = pointer_map_contains (data->niters, exit);
826 if (!slot)
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc = XNEW (struct tree_niter_desc);
832 if (!number_of_iterations_exit (data->current_loop,
833 exit, desc, true)
834 || contains_abnormal_ssa_name_p (desc->niter))
836 XDELETE (desc);
837 desc = NULL;
839 slot = pointer_map_insert (data->niters, exit);
840 *slot = desc;
842 else
843 desc = (struct tree_niter_desc *) *slot;
845 return desc;
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
855 edge exit = single_dom_exit (data->current_loop);
857 if (!exit)
858 return NULL;
860 return niter_for_exit (data, exit);
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab.create (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
885 static tree
886 determine_base_object (tree expr)
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
900 switch (code)
902 case INTEGER_CST:
903 return NULL_TREE;
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
909 if (!base)
910 return expr;
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
926 default:
927 return fold_convert (ptr_type_node, expr);
931 /* Allocates an induction variable with given initial value BASE and step STEP
932 for loop LOOP. */
934 static struct iv *
935 alloc_iv (tree base, tree step)
937 tree base_object = base;
938 struct iv *iv = XCNEW (struct iv);
939 gcc_assert (step != NULL_TREE);
941 /* Lower all address expressions except ones with DECL_P as operand.
942 By doing this:
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object);
947 if (TREE_CODE (base_object) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object, 0)))
950 aff_tree comb;
951 double_int size;
952 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 &comb, &size);
954 gcc_assert (base_object != NULL_TREE);
955 base_object = build_fold_addr_expr (base_object);
956 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
959 iv->base = base;
960 iv->base_object = determine_base_object (base_object);
961 iv->step = step;
962 iv->biv_p = false;
963 iv->have_use_for = false;
964 iv->use_id = 0;
965 iv->ssa_name = NULL_TREE;
967 return iv;
970 /* Sets STEP and BASE for induction variable IV. */
972 static void
973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
975 struct version_info *info = name_info (data, iv);
977 gcc_assert (!info->iv);
979 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980 info->iv = alloc_iv (base, step);
981 info->iv->ssa_name = iv;
984 /* Finds induction variable declaration for VAR. */
986 static struct iv *
987 get_iv (struct ivopts_data *data, tree var)
989 basic_block bb;
990 tree type = TREE_TYPE (var);
992 if (!POINTER_TYPE_P (type)
993 && !INTEGRAL_TYPE_P (type))
994 return NULL;
996 if (!name_info (data, var)->iv)
998 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1000 if (!bb
1001 || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 set_iv (data, var, var, build_int_cst (type, 0));
1005 return name_info (data, var)->iv;
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1011 static tree
1012 determine_biv_step (gimple phi)
1014 struct loop *loop = gimple_bb (phi)->loop_father;
1015 tree name = PHI_RESULT (phi);
1016 affine_iv iv;
1018 if (virtual_operand_p (name))
1019 return NULL_TREE;
1021 if (!simple_iv (loop, loop, name, &iv, true))
1022 return NULL_TREE;
1024 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1027 /* Finds basic ivs. */
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1032 gimple phi;
1033 tree step, type, base;
1034 bool found = false;
1035 struct loop *loop = data->current_loop;
1036 gimple_stmt_iterator psi;
1038 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1040 phi = gsi_stmt (psi);
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 continue;
1045 step = determine_biv_step (phi);
1046 if (!step)
1047 continue;
1049 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050 base = expand_simple_operations (base);
1051 if (contains_abnormal_ssa_name_p (base)
1052 || contains_abnormal_ssa_name_p (step))
1053 continue;
1055 type = TREE_TYPE (PHI_RESULT (phi));
1056 base = fold_convert (type, base);
1057 if (step)
1059 if (POINTER_TYPE_P (type))
1060 step = convert_to_ptrofftype (step);
1061 else
1062 step = fold_convert (type, step);
1065 set_iv (data, PHI_RESULT (phi), base, step);
1066 found = true;
1069 return found;
1072 /* Marks basic ivs. */
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1077 gimple phi, def;
1078 tree var;
1079 struct iv *iv, *incr_iv;
1080 struct loop *loop = data->current_loop;
1081 basic_block incr_bb;
1082 gimple_stmt_iterator psi;
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1086 phi = gsi_stmt (psi);
1088 iv = get_iv (data, PHI_RESULT (phi));
1089 if (!iv)
1090 continue;
1092 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093 def = SSA_NAME_DEF_STMT (var);
1094 /* Don't mark iv peeled from other one as biv. */
1095 if (def
1096 && gimple_code (def) == GIMPLE_PHI
1097 && gimple_bb (def) == loop->header)
1098 continue;
1100 incr_iv = get_iv (data, var);
1101 if (!incr_iv)
1102 continue;
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106 if (incr_bb->loop_father != data->current_loop
1107 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108 continue;
1110 iv->biv_p = true;
1111 incr_iv->biv_p = true;
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1118 static bool
1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1121 tree lhs;
1122 struct loop *loop = data->current_loop;
1124 iv->base = NULL_TREE;
1125 iv->step = NULL_TREE;
1127 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128 return false;
1130 lhs = gimple_assign_lhs (stmt);
1131 if (TREE_CODE (lhs) != SSA_NAME)
1132 return false;
1134 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135 return false;
1136 iv->base = expand_simple_operations (iv->base);
1138 if (contains_abnormal_ssa_name_p (iv->base)
1139 || contains_abnormal_ssa_name_p (iv->step))
1140 return false;
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt))
1146 return false;
1148 return true;
1151 /* Finds general ivs in statement STMT. */
1153 static void
1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1156 affine_iv iv;
1158 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159 return;
1161 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1164 /* Finds general ivs in basic block BB. */
1166 static void
1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1169 gimple_stmt_iterator bsi;
1171 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172 find_givs_in_stmt (data, gsi_stmt (bsi));
1175 /* Finds general ivs. */
1177 static void
1178 find_givs (struct ivopts_data *data)
1180 struct loop *loop = data->current_loop;
1181 basic_block *body = get_loop_body_in_dom_order (loop);
1182 unsigned i;
1184 for (i = 0; i < loop->num_nodes; i++)
1185 find_givs_in_bb (data, body[i]);
1186 free (body);
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1192 static bool
1193 find_induction_variables (struct ivopts_data *data)
1195 unsigned i;
1196 bitmap_iterator bi;
1198 if (!find_bivs (data))
1199 return false;
1201 find_givs (data);
1202 mark_bivs (data);
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1206 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1208 if (niter)
1210 fprintf (dump_file, " number of iterations ");
1211 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212 if (!integer_zerop (niter->may_be_zero))
1214 fprintf (dump_file, "; zero if ");
1215 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1217 fprintf (dump_file, "\n\n");
1220 fprintf (dump_file, "Induction variables:\n\n");
1222 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1224 if (ver_info (data, i)->iv)
1225 dump_iv (dump_file, ver_info (data, i)->iv);
1229 return true;
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1234 static struct iv_use *
1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236 gimple stmt, enum use_type use_type)
1238 struct iv_use *use = XCNEW (struct iv_use);
1240 use->id = n_iv_uses (data);
1241 use->type = use_type;
1242 use->iv = iv;
1243 use->stmt = stmt;
1244 use->op_p = use_p;
1245 use->related_cands = BITMAP_ALLOC (NULL);
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1248 caller. */
1249 iv->ssa_name = NULL_TREE;
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 dump_use (dump_file, use);
1254 data->iv_uses.safe_push (use);
1256 return use;
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1263 static void
1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1266 basic_block bb;
1267 struct version_info *info;
1269 if (TREE_CODE (op) != SSA_NAME
1270 || virtual_operand_p (op))
1271 return;
1273 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274 if (bb
1275 && flow_bb_inside_loop_p (data->current_loop, bb))
1276 return;
1278 info = name_info (data, op);
1279 info->name = op;
1280 info->has_nonlin_use |= nonlinear_use;
1281 if (!info->inv_id)
1282 info->inv_id = ++data->max_inv_id;
1283 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1286 /* Checks whether the use OP is interesting and if so, records it. */
1288 static struct iv_use *
1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1291 struct iv *iv;
1292 struct iv *civ;
1293 gimple stmt;
1294 struct iv_use *use;
1296 if (TREE_CODE (op) != SSA_NAME)
1297 return NULL;
1299 iv = get_iv (data, op);
1300 if (!iv)
1301 return NULL;
1303 if (iv->have_use_for)
1305 use = iv_use (data, iv->use_id);
1307 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308 return use;
1311 if (integer_zerop (iv->step))
1313 record_invariant (data, op, true);
1314 return NULL;
1316 iv->have_use_for = true;
1318 civ = XNEW (struct iv);
1319 *civ = *iv;
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323 || is_gimple_assign (stmt));
1325 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326 iv->use_id = use->id;
1328 return use;
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1339 static bool
1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341 tree **control_var, tree **bound,
1342 struct iv **iv_var, struct iv **iv_bound)
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv;
1346 static tree zero;
1347 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349 bool ret = false;
1351 if (gimple_code (stmt) == GIMPLE_COND)
1353 op0 = gimple_cond_lhs_ptr (stmt);
1354 op1 = gimple_cond_rhs_ptr (stmt);
1356 else
1358 op0 = gimple_assign_rhs1_ptr (stmt);
1359 op1 = gimple_assign_rhs2_ptr (stmt);
1362 zero = integer_zero_node;
1363 const_iv.step = integer_zero_node;
1365 if (TREE_CODE (*op0) == SSA_NAME)
1366 iv0 = get_iv (data, *op0);
1367 if (TREE_CODE (*op1) == SSA_NAME)
1368 iv1 = get_iv (data, *op1);
1370 /* Exactly one of the compared values must be an iv, and the other one must
1371 be an invariant. */
1372 if (!iv0 || !iv1)
1373 goto end;
1375 if (integer_zerop (iv0->step))
1377 /* Control variable may be on the other side. */
1378 tmp_op = op0; op0 = op1; op1 = tmp_op;
1379 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1381 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1383 end:
1384 if (control_var)
1385 *control_var = op0;;
1386 if (iv_var)
1387 *iv_var = iv0;;
1388 if (bound)
1389 *bound = op1;
1390 if (iv_bound)
1391 *iv_bound = iv1;
1393 return ret;
1396 /* Checks whether the condition in STMT is interesting and if so,
1397 records it. */
1399 static void
1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1402 tree *var_p, *bound_p;
1403 struct iv *var_iv, *civ;
1405 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1407 find_interesting_uses_op (data, *var_p);
1408 find_interesting_uses_op (data, *bound_p);
1409 return;
1412 civ = XNEW (struct iv);
1413 *civ = *var_iv;
1414 record_use (data, NULL, civ, stmt, USE_COMPARE);
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1422 struct loop *
1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1425 basic_block def_bb;
1426 unsigned i, len;
1428 if (is_gimple_min_invariant (expr))
1429 return current_loops->tree_root;
1431 if (TREE_CODE (expr) == SSA_NAME)
1433 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434 if (def_bb)
1436 if (flow_bb_inside_loop_p (loop, def_bb))
1437 return NULL;
1438 return superloop_at_depth (loop,
1439 loop_depth (def_bb->loop_father) + 1);
1442 return current_loops->tree_root;
1445 if (!EXPR_P (expr))
1446 return NULL;
1448 unsigned maxdepth = 0;
1449 len = TREE_OPERAND_LENGTH (expr);
1450 for (i = 0; i < len; i++)
1452 struct loop *ivloop;
1453 if (!TREE_OPERAND (expr, i))
1454 continue;
1456 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457 if (!ivloop)
1458 return NULL;
1459 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1462 return superloop_at_depth (loop, maxdepth);
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1469 bool
1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1472 basic_block def_bb;
1473 unsigned i, len;
1475 gcc_assert (loop_depth (loop) > 0);
1477 if (is_gimple_min_invariant (expr))
1478 return true;
1480 if (TREE_CODE (expr) == SSA_NAME)
1482 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483 if (def_bb
1484 && flow_bb_inside_loop_p (loop, def_bb))
1485 return false;
1487 return true;
1490 if (!EXPR_P (expr))
1491 return false;
1493 len = TREE_OPERAND_LENGTH (expr);
1494 for (i = 0; i < len; i++)
1495 if (TREE_OPERAND (expr, i)
1496 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497 return false;
1499 return true;
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1506 struct ifs_ivopts_data
1508 struct ivopts_data *ivopts_data;
1509 gimple stmt;
1510 tree step;
1513 static bool
1514 idx_find_step (tree base, tree *idx, void *data)
1516 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517 struct iv *iv;
1518 tree step, iv_base, iv_step, lbound, off;
1519 struct loop *loop = dta->ivopts_data->current_loop;
1521 /* If base is a component ref, require that the offset of the reference
1522 be invariant. */
1523 if (TREE_CODE (base) == COMPONENT_REF)
1525 off = component_ref_field_offset (base);
1526 return expr_invariant_in_loop_p (loop, off);
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538 return false;
1540 step = array_ref_element_size (base);
1541 lbound = array_ref_low_bound (base);
1543 if (!expr_invariant_in_loop_p (loop, step)
1544 || !expr_invariant_in_loop_p (loop, lbound))
1545 return false;
1548 if (TREE_CODE (*idx) != SSA_NAME)
1549 return true;
1551 iv = get_iv (dta->ivopts_data, *idx);
1552 if (!iv)
1553 return false;
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1558 *idx = iv->base;
1560 if (integer_zerop (iv->step))
1561 return true;
1563 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1565 step = array_ref_element_size (base);
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step) != INTEGER_CST)
1569 return false;
1571 else
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step = size_one_node;
1575 iv_base = iv->base;
1576 iv_step = iv->step;
1577 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578 sizetype, &iv_base, &iv_step, dta->stmt,
1579 false))
1581 /* The index might wrap. */
1582 return false;
1585 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1588 return true;
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1594 static bool
1595 idx_record_use (tree base, tree *idx,
1596 void *vdata)
1598 struct ivopts_data *data = (struct ivopts_data *) vdata;
1599 find_interesting_uses_op (data, *idx);
1600 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1602 find_interesting_uses_op (data, array_ref_element_size (base));
1603 find_interesting_uses_op (data, array_ref_low_bound (base));
1605 return true;
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1613 static bool
1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1616 tree mby;
1617 enum tree_code code;
1618 double_int res, p0, p1;
1619 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1621 STRIP_NOPS (top);
1622 STRIP_NOPS (bot);
1624 if (operand_equal_p (top, bot, 0))
1626 *mul = double_int_one;
1627 return true;
1630 code = TREE_CODE (top);
1631 switch (code)
1633 case MULT_EXPR:
1634 mby = TREE_OPERAND (top, 1);
1635 if (TREE_CODE (mby) != INTEGER_CST)
1636 return false;
1638 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639 return false;
1641 *mul = (res * tree_to_double_int (mby)).sext (precision);
1642 return true;
1644 case PLUS_EXPR:
1645 case MINUS_EXPR:
1646 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648 return false;
1650 if (code == MINUS_EXPR)
1651 p1 = -p1;
1652 *mul = (p0 + p1).sext (precision);
1653 return true;
1655 case INTEGER_CST:
1656 if (TREE_CODE (bot) != INTEGER_CST)
1657 return false;
1659 p0 = tree_to_double_int (top).sext (precision);
1660 p1 = tree_to_double_int (bot).sext (precision);
1661 if (p1.is_zero ())
1662 return false;
1663 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664 return res.is_zero ();
1666 default:
1667 return false;
1671 /* Return true if memory reference REF with step STEP may be unaligned. */
1673 static bool
1674 may_be_unaligned_p (tree ref, tree step)
1676 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1677 thus they are not misaligned. */
1678 if (TREE_CODE (ref) == TARGET_MEM_REF)
1679 return false;
1681 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1683 unsigned HOST_WIDE_INT bitpos;
1684 unsigned int ref_align;
1685 get_object_alignment_1 (ref, &ref_align, &bitpos);
1686 if (ref_align < align
1687 || (bitpos % align) != 0
1688 || (bitpos % BITS_PER_UNIT) != 0)
1689 return true;
1691 unsigned int trailing_zeros = tree_ctz (step);
1692 if (trailing_zeros < HOST_BITS_PER_INT
1693 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1694 return true;
1696 return false;
1699 /* Return true if EXPR may be non-addressable. */
1701 bool
1702 may_be_nonaddressable_p (tree expr)
1704 switch (TREE_CODE (expr))
1706 case TARGET_MEM_REF:
1707 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1708 target, thus they are always addressable. */
1709 return false;
1711 case COMPONENT_REF:
1712 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1713 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1715 case VIEW_CONVERT_EXPR:
1716 /* This kind of view-conversions may wrap non-addressable objects
1717 and make them look addressable. After some processing the
1718 non-addressability may be uncovered again, causing ADDR_EXPRs
1719 of inappropriate objects to be built. */
1720 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1721 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1722 return true;
1724 /* ... fall through ... */
1726 case ARRAY_REF:
1727 case ARRAY_RANGE_REF:
1728 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1730 CASE_CONVERT:
1731 return true;
1733 default:
1734 break;
1737 return false;
1740 /* Finds addresses in *OP_P inside STMT. */
1742 static void
1743 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1745 tree base = *op_p, step = size_zero_node;
1746 struct iv *civ;
1747 struct ifs_ivopts_data ifs_ivopts_data;
1749 /* Do not play with volatile memory references. A bit too conservative,
1750 perhaps, but safe. */
1751 if (gimple_has_volatile_ops (stmt))
1752 goto fail;
1754 /* Ignore bitfields for now. Not really something terribly complicated
1755 to handle. TODO. */
1756 if (TREE_CODE (base) == BIT_FIELD_REF)
1757 goto fail;
1759 base = unshare_expr (base);
1761 if (TREE_CODE (base) == TARGET_MEM_REF)
1763 tree type = build_pointer_type (TREE_TYPE (base));
1764 tree astep;
1766 if (TMR_BASE (base)
1767 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1769 civ = get_iv (data, TMR_BASE (base));
1770 if (!civ)
1771 goto fail;
1773 TMR_BASE (base) = civ->base;
1774 step = civ->step;
1776 if (TMR_INDEX2 (base)
1777 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1779 civ = get_iv (data, TMR_INDEX2 (base));
1780 if (!civ)
1781 goto fail;
1783 TMR_INDEX2 (base) = civ->base;
1784 step = civ->step;
1786 if (TMR_INDEX (base)
1787 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1789 civ = get_iv (data, TMR_INDEX (base));
1790 if (!civ)
1791 goto fail;
1793 TMR_INDEX (base) = civ->base;
1794 astep = civ->step;
1796 if (astep)
1798 if (TMR_STEP (base))
1799 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1801 step = fold_build2 (PLUS_EXPR, type, step, astep);
1805 if (integer_zerop (step))
1806 goto fail;
1807 base = tree_mem_ref_addr (type, base);
1809 else
1811 ifs_ivopts_data.ivopts_data = data;
1812 ifs_ivopts_data.stmt = stmt;
1813 ifs_ivopts_data.step = size_zero_node;
1814 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1815 || integer_zerop (ifs_ivopts_data.step))
1816 goto fail;
1817 step = ifs_ivopts_data.step;
1819 /* Check that the base expression is addressable. This needs
1820 to be done after substituting bases of IVs into it. */
1821 if (may_be_nonaddressable_p (base))
1822 goto fail;
1824 /* Moreover, on strict alignment platforms, check that it is
1825 sufficiently aligned. */
1826 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1827 goto fail;
1829 base = build_fold_addr_expr (base);
1831 /* Substituting bases of IVs into the base expression might
1832 have caused folding opportunities. */
1833 if (TREE_CODE (base) == ADDR_EXPR)
1835 tree *ref = &TREE_OPERAND (base, 0);
1836 while (handled_component_p (*ref))
1837 ref = &TREE_OPERAND (*ref, 0);
1838 if (TREE_CODE (*ref) == MEM_REF)
1840 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1841 TREE_OPERAND (*ref, 0),
1842 TREE_OPERAND (*ref, 1));
1843 if (tem)
1844 *ref = tem;
1849 civ = alloc_iv (base, step);
1850 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1851 return;
1853 fail:
1854 for_each_index (op_p, idx_record_use, data);
1857 /* Finds and records invariants used in STMT. */
1859 static void
1860 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1862 ssa_op_iter iter;
1863 use_operand_p use_p;
1864 tree op;
1866 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1868 op = USE_FROM_PTR (use_p);
1869 record_invariant (data, op, false);
1873 /* Finds interesting uses of induction variables in the statement STMT. */
1875 static void
1876 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1878 struct iv *iv;
1879 tree op, *lhs, *rhs;
1880 ssa_op_iter iter;
1881 use_operand_p use_p;
1882 enum tree_code code;
1884 find_invariants_stmt (data, stmt);
1886 if (gimple_code (stmt) == GIMPLE_COND)
1888 find_interesting_uses_cond (data, stmt);
1889 return;
1892 if (is_gimple_assign (stmt))
1894 lhs = gimple_assign_lhs_ptr (stmt);
1895 rhs = gimple_assign_rhs1_ptr (stmt);
1897 if (TREE_CODE (*lhs) == SSA_NAME)
1899 /* If the statement defines an induction variable, the uses are not
1900 interesting by themselves. */
1902 iv = get_iv (data, *lhs);
1904 if (iv && !integer_zerop (iv->step))
1905 return;
1908 code = gimple_assign_rhs_code (stmt);
1909 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1910 && (REFERENCE_CLASS_P (*rhs)
1911 || is_gimple_val (*rhs)))
1913 if (REFERENCE_CLASS_P (*rhs))
1914 find_interesting_uses_address (data, stmt, rhs);
1915 else
1916 find_interesting_uses_op (data, *rhs);
1918 if (REFERENCE_CLASS_P (*lhs))
1919 find_interesting_uses_address (data, stmt, lhs);
1920 return;
1922 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1924 find_interesting_uses_cond (data, stmt);
1925 return;
1928 /* TODO -- we should also handle address uses of type
1930 memory = call (whatever);
1934 call (memory). */
1937 if (gimple_code (stmt) == GIMPLE_PHI
1938 && gimple_bb (stmt) == data->current_loop->header)
1940 iv = get_iv (data, PHI_RESULT (stmt));
1942 if (iv && !integer_zerop (iv->step))
1943 return;
1946 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1948 op = USE_FROM_PTR (use_p);
1950 if (TREE_CODE (op) != SSA_NAME)
1951 continue;
1953 iv = get_iv (data, op);
1954 if (!iv)
1955 continue;
1957 find_interesting_uses_op (data, op);
1961 /* Finds interesting uses of induction variables outside of loops
1962 on loop exit edge EXIT. */
1964 static void
1965 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1967 gimple phi;
1968 gimple_stmt_iterator psi;
1969 tree def;
1971 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1973 phi = gsi_stmt (psi);
1974 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1975 if (!virtual_operand_p (def))
1976 find_interesting_uses_op (data, def);
1980 /* Finds uses of the induction variables that are interesting. */
1982 static void
1983 find_interesting_uses (struct ivopts_data *data)
1985 basic_block bb;
1986 gimple_stmt_iterator bsi;
1987 basic_block *body = get_loop_body (data->current_loop);
1988 unsigned i;
1989 struct version_info *info;
1990 edge e;
1992 if (dump_file && (dump_flags & TDF_DETAILS))
1993 fprintf (dump_file, "Uses:\n\n");
1995 for (i = 0; i < data->current_loop->num_nodes; i++)
1997 edge_iterator ei;
1998 bb = body[i];
2000 FOR_EACH_EDGE (e, ei, bb->succs)
2001 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2002 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2003 find_interesting_uses_outside (data, e);
2005 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2006 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2007 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2008 if (!is_gimple_debug (gsi_stmt (bsi)))
2009 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2014 bitmap_iterator bi;
2016 fprintf (dump_file, "\n");
2018 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2020 info = ver_info (data, i);
2021 if (info->inv_id)
2023 fprintf (dump_file, " ");
2024 print_generic_expr (dump_file, info->name, TDF_SLIM);
2025 fprintf (dump_file, " is invariant (%d)%s\n",
2026 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2030 fprintf (dump_file, "\n");
2033 free (body);
2036 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2037 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2038 we are at the top-level of the processed address. */
2040 static tree
2041 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2042 HOST_WIDE_INT *offset)
2044 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2045 enum tree_code code;
2046 tree type, orig_type = TREE_TYPE (expr);
2047 HOST_WIDE_INT off0, off1, st;
2048 tree orig_expr = expr;
2050 STRIP_NOPS (expr);
2052 type = TREE_TYPE (expr);
2053 code = TREE_CODE (expr);
2054 *offset = 0;
2056 switch (code)
2058 case INTEGER_CST:
2059 if (!cst_and_fits_in_hwi (expr)
2060 || integer_zerop (expr))
2061 return orig_expr;
2063 *offset = int_cst_value (expr);
2064 return build_int_cst (orig_type, 0);
2066 case POINTER_PLUS_EXPR:
2067 case PLUS_EXPR:
2068 case MINUS_EXPR:
2069 op0 = TREE_OPERAND (expr, 0);
2070 op1 = TREE_OPERAND (expr, 1);
2072 op0 = strip_offset_1 (op0, false, false, &off0);
2073 op1 = strip_offset_1 (op1, false, false, &off1);
2075 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2076 if (op0 == TREE_OPERAND (expr, 0)
2077 && op1 == TREE_OPERAND (expr, 1))
2078 return orig_expr;
2080 if (integer_zerop (op1))
2081 expr = op0;
2082 else if (integer_zerop (op0))
2084 if (code == MINUS_EXPR)
2085 expr = fold_build1 (NEGATE_EXPR, type, op1);
2086 else
2087 expr = op1;
2089 else
2090 expr = fold_build2 (code, type, op0, op1);
2092 return fold_convert (orig_type, expr);
2094 case MULT_EXPR:
2095 op1 = TREE_OPERAND (expr, 1);
2096 if (!cst_and_fits_in_hwi (op1))
2097 return orig_expr;
2099 op0 = TREE_OPERAND (expr, 0);
2100 op0 = strip_offset_1 (op0, false, false, &off0);
2101 if (op0 == TREE_OPERAND (expr, 0))
2102 return orig_expr;
2104 *offset = off0 * int_cst_value (op1);
2105 if (integer_zerop (op0))
2106 expr = op0;
2107 else
2108 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2110 return fold_convert (orig_type, expr);
2112 case ARRAY_REF:
2113 case ARRAY_RANGE_REF:
2114 if (!inside_addr)
2115 return orig_expr;
2117 step = array_ref_element_size (expr);
2118 if (!cst_and_fits_in_hwi (step))
2119 break;
2121 st = int_cst_value (step);
2122 op1 = TREE_OPERAND (expr, 1);
2123 op1 = strip_offset_1 (op1, false, false, &off1);
2124 *offset = off1 * st;
2126 if (top_compref
2127 && integer_zerop (op1))
2129 /* Strip the component reference completely. */
2130 op0 = TREE_OPERAND (expr, 0);
2131 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2132 *offset += off0;
2133 return op0;
2135 break;
2137 case COMPONENT_REF:
2139 tree field;
2141 if (!inside_addr)
2142 return orig_expr;
2144 tmp = component_ref_field_offset (expr);
2145 field = TREE_OPERAND (expr, 1);
2146 if (top_compref
2147 && cst_and_fits_in_hwi (tmp)
2148 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2150 HOST_WIDE_INT boffset, abs_off;
2152 /* Strip the component reference completely. */
2153 op0 = TREE_OPERAND (expr, 0);
2154 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2155 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2156 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2157 if (boffset < 0)
2158 abs_off = -abs_off;
2160 *offset = off0 + int_cst_value (tmp) + abs_off;
2161 return op0;
2164 break;
2166 case ADDR_EXPR:
2167 op0 = TREE_OPERAND (expr, 0);
2168 op0 = strip_offset_1 (op0, true, true, &off0);
2169 *offset += off0;
2171 if (op0 == TREE_OPERAND (expr, 0))
2172 return orig_expr;
2174 expr = build_fold_addr_expr (op0);
2175 return fold_convert (orig_type, expr);
2177 case MEM_REF:
2178 /* ??? Offset operand? */
2179 inside_addr = false;
2180 break;
2182 default:
2183 return orig_expr;
2186 /* Default handling of expressions for that we want to recurse into
2187 the first operand. */
2188 op0 = TREE_OPERAND (expr, 0);
2189 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2190 *offset += off0;
2192 if (op0 == TREE_OPERAND (expr, 0)
2193 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2194 return orig_expr;
2196 expr = copy_node (expr);
2197 TREE_OPERAND (expr, 0) = op0;
2198 if (op1)
2199 TREE_OPERAND (expr, 1) = op1;
2201 /* Inside address, we might strip the top level component references,
2202 thus changing type of the expression. Handling of ADDR_EXPR
2203 will fix that. */
2204 expr = fold_convert (orig_type, expr);
2206 return expr;
2209 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2211 static tree
2212 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2214 HOST_WIDE_INT off;
2215 tree core = strip_offset_1 (expr, false, false, &off);
2216 *offset = off;
2217 return core;
2220 /* Returns variant of TYPE that can be used as base for different uses.
2221 We return unsigned type with the same precision, which avoids problems
2222 with overflows. */
2224 static tree
2225 generic_type_for (tree type)
2227 if (POINTER_TYPE_P (type))
2228 return unsigned_type_for (type);
2230 if (TYPE_UNSIGNED (type))
2231 return type;
2233 return unsigned_type_for (type);
2236 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2237 the bitmap to that we should store it. */
2239 static struct ivopts_data *fd_ivopts_data;
2240 static tree
2241 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2243 bitmap *depends_on = (bitmap *) data;
2244 struct version_info *info;
2246 if (TREE_CODE (*expr_p) != SSA_NAME)
2247 return NULL_TREE;
2248 info = name_info (fd_ivopts_data, *expr_p);
2250 if (!info->inv_id || info->has_nonlin_use)
2251 return NULL_TREE;
2253 if (!*depends_on)
2254 *depends_on = BITMAP_ALLOC (NULL);
2255 bitmap_set_bit (*depends_on, info->inv_id);
2257 return NULL_TREE;
2260 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2261 position to POS. If USE is not NULL, the candidate is set as related to
2262 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2263 replacement of the final value of the iv by a direct computation. */
2265 static struct iv_cand *
2266 add_candidate_1 (struct ivopts_data *data,
2267 tree base, tree step, bool important, enum iv_position pos,
2268 struct iv_use *use, gimple incremented_at)
2270 unsigned i;
2271 struct iv_cand *cand = NULL;
2272 tree type, orig_type;
2274 /* For non-original variables, make sure their values are computed in a type
2275 that does not invoke undefined behavior on overflows (since in general,
2276 we cannot prove that these induction variables are non-wrapping). */
2277 if (pos != IP_ORIGINAL)
2279 orig_type = TREE_TYPE (base);
2280 type = generic_type_for (orig_type);
2281 if (type != orig_type)
2283 base = fold_convert (type, base);
2284 step = fold_convert (type, step);
2288 for (i = 0; i < n_iv_cands (data); i++)
2290 cand = iv_cand (data, i);
2292 if (cand->pos != pos)
2293 continue;
2295 if (cand->incremented_at != incremented_at
2296 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2297 && cand->ainc_use != use))
2298 continue;
2300 if (!cand->iv)
2302 if (!base && !step)
2303 break;
2305 continue;
2308 if (!base && !step)
2309 continue;
2311 if (operand_equal_p (base, cand->iv->base, 0)
2312 && operand_equal_p (step, cand->iv->step, 0)
2313 && (TYPE_PRECISION (TREE_TYPE (base))
2314 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2315 break;
2318 if (i == n_iv_cands (data))
2320 cand = XCNEW (struct iv_cand);
2321 cand->id = i;
2323 if (!base && !step)
2324 cand->iv = NULL;
2325 else
2326 cand->iv = alloc_iv (base, step);
2328 cand->pos = pos;
2329 if (pos != IP_ORIGINAL && cand->iv)
2331 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2332 cand->var_after = cand->var_before;
2334 cand->important = important;
2335 cand->incremented_at = incremented_at;
2336 data->iv_candidates.safe_push (cand);
2338 if (step
2339 && TREE_CODE (step) != INTEGER_CST)
2341 fd_ivopts_data = data;
2342 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2345 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2346 cand->ainc_use = use;
2347 else
2348 cand->ainc_use = NULL;
2350 if (dump_file && (dump_flags & TDF_DETAILS))
2351 dump_cand (dump_file, cand);
2354 if (important && !cand->important)
2356 cand->important = true;
2357 if (dump_file && (dump_flags & TDF_DETAILS))
2358 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2361 if (use)
2363 bitmap_set_bit (use->related_cands, i);
2364 if (dump_file && (dump_flags & TDF_DETAILS))
2365 fprintf (dump_file, "Candidate %d is related to use %d\n",
2366 cand->id, use->id);
2369 return cand;
2372 /* Returns true if incrementing the induction variable at the end of the LOOP
2373 is allowed.
2375 The purpose is to avoid splitting latch edge with a biv increment, thus
2376 creating a jump, possibly confusing other optimization passes and leaving
2377 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2378 is not available (so we do not have a better alternative), or if the latch
2379 edge is already nonempty. */
2381 static bool
2382 allow_ip_end_pos_p (struct loop *loop)
2384 if (!ip_normal_pos (loop))
2385 return true;
2387 if (!empty_block_p (ip_end_pos (loop)))
2388 return true;
2390 return false;
2393 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2394 Important field is set to IMPORTANT. */
2396 static void
2397 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2398 bool important, struct iv_use *use)
2400 basic_block use_bb = gimple_bb (use->stmt);
2401 enum machine_mode mem_mode;
2402 unsigned HOST_WIDE_INT cstepi;
2404 /* If we insert the increment in any position other than the standard
2405 ones, we must ensure that it is incremented once per iteration.
2406 It must not be in an inner nested loop, or one side of an if
2407 statement. */
2408 if (use_bb->loop_father != data->current_loop
2409 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2410 || stmt_could_throw_p (use->stmt)
2411 || !cst_and_fits_in_hwi (step))
2412 return;
2414 cstepi = int_cst_value (step);
2416 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2417 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2418 || USE_STORE_PRE_INCREMENT (mem_mode))
2419 && GET_MODE_SIZE (mem_mode) == cstepi)
2420 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2421 || USE_STORE_PRE_DECREMENT (mem_mode))
2422 && GET_MODE_SIZE (mem_mode) == -cstepi))
2424 enum tree_code code = MINUS_EXPR;
2425 tree new_base;
2426 tree new_step = step;
2428 if (POINTER_TYPE_P (TREE_TYPE (base)))
2430 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2431 code = POINTER_PLUS_EXPR;
2433 else
2434 new_step = fold_convert (TREE_TYPE (base), new_step);
2435 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2436 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2437 use->stmt);
2439 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2440 || USE_STORE_POST_INCREMENT (mem_mode))
2441 && GET_MODE_SIZE (mem_mode) == cstepi)
2442 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2443 || USE_STORE_POST_DECREMENT (mem_mode))
2444 && GET_MODE_SIZE (mem_mode) == -cstepi))
2446 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2447 use->stmt);
2451 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2452 position to POS. If USE is not NULL, the candidate is set as related to
2453 it. The candidate computation is scheduled on all available positions. */
2455 static void
2456 add_candidate (struct ivopts_data *data,
2457 tree base, tree step, bool important, struct iv_use *use)
2459 if (ip_normal_pos (data->current_loop))
2460 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2461 if (ip_end_pos (data->current_loop)
2462 && allow_ip_end_pos_p (data->current_loop))
2463 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2465 if (use != NULL && use->type == USE_ADDRESS)
2466 add_autoinc_candidates (data, base, step, important, use);
2469 /* Adds standard iv candidates. */
2471 static void
2472 add_standard_iv_candidates (struct ivopts_data *data)
2474 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2476 /* The same for a double-integer type if it is still fast enough. */
2477 if (TYPE_PRECISION
2478 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2479 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2480 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2481 build_int_cst (long_integer_type_node, 1), true, NULL);
2483 /* The same for a double-integer type if it is still fast enough. */
2484 if (TYPE_PRECISION
2485 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2486 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2487 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2488 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2492 /* Adds candidates bases on the old induction variable IV. */
2494 static void
2495 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2497 gimple phi;
2498 tree def;
2499 struct iv_cand *cand;
2501 add_candidate (data, iv->base, iv->step, true, NULL);
2503 /* The same, but with initial value zero. */
2504 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2505 add_candidate (data, size_int (0), iv->step, true, NULL);
2506 else
2507 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2508 iv->step, true, NULL);
2510 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2511 if (gimple_code (phi) == GIMPLE_PHI)
2513 /* Additionally record the possibility of leaving the original iv
2514 untouched. */
2515 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2516 /* Don't add candidate if it's from another PHI node because
2517 it's an affine iv appearing in the form of PEELED_CHREC. */
2518 phi = SSA_NAME_DEF_STMT (def);
2519 if (gimple_code (phi) != GIMPLE_PHI)
2521 cand = add_candidate_1 (data,
2522 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2523 SSA_NAME_DEF_STMT (def));
2524 cand->var_before = iv->ssa_name;
2525 cand->var_after = def;
2527 else
2528 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2532 /* Adds candidates based on the old induction variables. */
2534 static void
2535 add_old_ivs_candidates (struct ivopts_data *data)
2537 unsigned i;
2538 struct iv *iv;
2539 bitmap_iterator bi;
2541 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2543 iv = ver_info (data, i)->iv;
2544 if (iv && iv->biv_p && !integer_zerop (iv->step))
2545 add_old_iv_candidates (data, iv);
2549 /* Adds candidates based on the value of the induction variable IV and USE. */
2551 static void
2552 add_iv_value_candidates (struct ivopts_data *data,
2553 struct iv *iv, struct iv_use *use)
2555 unsigned HOST_WIDE_INT offset;
2556 tree base;
2557 tree basetype;
2559 add_candidate (data, iv->base, iv->step, false, use);
2561 /* The same, but with initial value zero. Make such variable important,
2562 since it is generic enough so that possibly many uses may be based
2563 on it. */
2564 basetype = TREE_TYPE (iv->base);
2565 if (POINTER_TYPE_P (basetype))
2566 basetype = sizetype;
2567 add_candidate (data, build_int_cst (basetype, 0),
2568 iv->step, true, use);
2570 /* Third, try removing the constant offset. Make sure to even
2571 add a candidate for &a[0] vs. (T *)&a. */
2572 base = strip_offset (iv->base, &offset);
2573 if (offset
2574 || base != iv->base)
2575 add_candidate (data, base, iv->step, false, use);
2578 /* Adds candidates based on the uses. */
2580 static void
2581 add_derived_ivs_candidates (struct ivopts_data *data)
2583 unsigned i;
2585 for (i = 0; i < n_iv_uses (data); i++)
2587 struct iv_use *use = iv_use (data, i);
2589 if (!use)
2590 continue;
2592 switch (use->type)
2594 case USE_NONLINEAR_EXPR:
2595 case USE_COMPARE:
2596 case USE_ADDRESS:
2597 /* Just add the ivs based on the value of the iv used here. */
2598 add_iv_value_candidates (data, use->iv, use);
2599 break;
2601 default:
2602 gcc_unreachable ();
2607 /* Record important candidates and add them to related_cands bitmaps
2608 if needed. */
2610 static void
2611 record_important_candidates (struct ivopts_data *data)
2613 unsigned i;
2614 struct iv_use *use;
2616 for (i = 0; i < n_iv_cands (data); i++)
2618 struct iv_cand *cand = iv_cand (data, i);
2620 if (cand->important)
2621 bitmap_set_bit (data->important_candidates, i);
2624 data->consider_all_candidates = (n_iv_cands (data)
2625 <= CONSIDER_ALL_CANDIDATES_BOUND);
2627 if (data->consider_all_candidates)
2629 /* We will not need "related_cands" bitmaps in this case,
2630 so release them to decrease peak memory consumption. */
2631 for (i = 0; i < n_iv_uses (data); i++)
2633 use = iv_use (data, i);
2634 BITMAP_FREE (use->related_cands);
2637 else
2639 /* Add important candidates to the related_cands bitmaps. */
2640 for (i = 0; i < n_iv_uses (data); i++)
2641 bitmap_ior_into (iv_use (data, i)->related_cands,
2642 data->important_candidates);
2646 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2647 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2648 we allocate a simple list to every use. */
2650 static void
2651 alloc_use_cost_map (struct ivopts_data *data)
2653 unsigned i, size, s;
2655 for (i = 0; i < n_iv_uses (data); i++)
2657 struct iv_use *use = iv_use (data, i);
2659 if (data->consider_all_candidates)
2660 size = n_iv_cands (data);
2661 else
2663 s = bitmap_count_bits (use->related_cands);
2665 /* Round up to the power of two, so that moduling by it is fast. */
2666 size = s ? (1 << ceil_log2 (s)) : 1;
2669 use->n_map_members = size;
2670 use->cost_map = XCNEWVEC (struct cost_pair, size);
2674 /* Returns description of computation cost of expression whose runtime
2675 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2677 static comp_cost
2678 new_cost (unsigned runtime, unsigned complexity)
2680 comp_cost cost;
2682 cost.cost = runtime;
2683 cost.complexity = complexity;
2685 return cost;
2688 /* Adds costs COST1 and COST2. */
2690 static comp_cost
2691 add_costs (comp_cost cost1, comp_cost cost2)
2693 cost1.cost += cost2.cost;
2694 cost1.complexity += cost2.complexity;
2696 return cost1;
2698 /* Subtracts costs COST1 and COST2. */
2700 static comp_cost
2701 sub_costs (comp_cost cost1, comp_cost cost2)
2703 cost1.cost -= cost2.cost;
2704 cost1.complexity -= cost2.complexity;
2706 return cost1;
2709 /* Returns a negative number if COST1 < COST2, a positive number if
2710 COST1 > COST2, and 0 if COST1 = COST2. */
2712 static int
2713 compare_costs (comp_cost cost1, comp_cost cost2)
2715 if (cost1.cost == cost2.cost)
2716 return cost1.complexity - cost2.complexity;
2718 return cost1.cost - cost2.cost;
2721 /* Returns true if COST is infinite. */
2723 static bool
2724 infinite_cost_p (comp_cost cost)
2726 return cost.cost == INFTY;
2729 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2730 on invariants DEPENDS_ON and that the value used in expressing it
2731 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2733 static void
2734 set_use_iv_cost (struct ivopts_data *data,
2735 struct iv_use *use, struct iv_cand *cand,
2736 comp_cost cost, bitmap depends_on, tree value,
2737 enum tree_code comp, int inv_expr_id)
2739 unsigned i, s;
2741 if (infinite_cost_p (cost))
2743 BITMAP_FREE (depends_on);
2744 return;
2747 if (data->consider_all_candidates)
2749 use->cost_map[cand->id].cand = cand;
2750 use->cost_map[cand->id].cost = cost;
2751 use->cost_map[cand->id].depends_on = depends_on;
2752 use->cost_map[cand->id].value = value;
2753 use->cost_map[cand->id].comp = comp;
2754 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2755 return;
2758 /* n_map_members is a power of two, so this computes modulo. */
2759 s = cand->id & (use->n_map_members - 1);
2760 for (i = s; i < use->n_map_members; i++)
2761 if (!use->cost_map[i].cand)
2762 goto found;
2763 for (i = 0; i < s; i++)
2764 if (!use->cost_map[i].cand)
2765 goto found;
2767 gcc_unreachable ();
2769 found:
2770 use->cost_map[i].cand = cand;
2771 use->cost_map[i].cost = cost;
2772 use->cost_map[i].depends_on = depends_on;
2773 use->cost_map[i].value = value;
2774 use->cost_map[i].comp = comp;
2775 use->cost_map[i].inv_expr_id = inv_expr_id;
2778 /* Gets cost of (USE, CANDIDATE) pair. */
2780 static struct cost_pair *
2781 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2782 struct iv_cand *cand)
2784 unsigned i, s;
2785 struct cost_pair *ret;
2787 if (!cand)
2788 return NULL;
2790 if (data->consider_all_candidates)
2792 ret = use->cost_map + cand->id;
2793 if (!ret->cand)
2794 return NULL;
2796 return ret;
2799 /* n_map_members is a power of two, so this computes modulo. */
2800 s = cand->id & (use->n_map_members - 1);
2801 for (i = s; i < use->n_map_members; i++)
2802 if (use->cost_map[i].cand == cand)
2803 return use->cost_map + i;
2804 else if (use->cost_map[i].cand == NULL)
2805 return NULL;
2806 for (i = 0; i < s; i++)
2807 if (use->cost_map[i].cand == cand)
2808 return use->cost_map + i;
2809 else if (use->cost_map[i].cand == NULL)
2810 return NULL;
2812 return NULL;
2815 /* Returns estimate on cost of computing SEQ. */
2817 static unsigned
2818 seq_cost (rtx seq, bool speed)
2820 unsigned cost = 0;
2821 rtx set;
2823 for (; seq; seq = NEXT_INSN (seq))
2825 set = single_set (seq);
2826 if (set)
2827 cost += set_src_cost (SET_SRC (set), speed);
2828 else
2829 cost++;
2832 return cost;
2835 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2836 static rtx
2837 produce_memory_decl_rtl (tree obj, int *regno)
2839 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2840 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2841 rtx x;
2843 gcc_assert (obj);
2844 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2846 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2847 x = gen_rtx_SYMBOL_REF (address_mode, name);
2848 SET_SYMBOL_REF_DECL (x, obj);
2849 x = gen_rtx_MEM (DECL_MODE (obj), x);
2850 set_mem_addr_space (x, as);
2851 targetm.encode_section_info (obj, x, true);
2853 else
2855 x = gen_raw_REG (address_mode, (*regno)++);
2856 x = gen_rtx_MEM (DECL_MODE (obj), x);
2857 set_mem_addr_space (x, as);
2860 return x;
2863 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2864 walk_tree. DATA contains the actual fake register number. */
2866 static tree
2867 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2869 tree obj = NULL_TREE;
2870 rtx x = NULL_RTX;
2871 int *regno = (int *) data;
2873 switch (TREE_CODE (*expr_p))
2875 case ADDR_EXPR:
2876 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2877 handled_component_p (*expr_p);
2878 expr_p = &TREE_OPERAND (*expr_p, 0))
2879 continue;
2880 obj = *expr_p;
2881 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2882 x = produce_memory_decl_rtl (obj, regno);
2883 break;
2885 case SSA_NAME:
2886 *ws = 0;
2887 obj = SSA_NAME_VAR (*expr_p);
2888 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2889 if (!obj)
2890 return NULL_TREE;
2891 if (!DECL_RTL_SET_P (obj))
2892 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2893 break;
2895 case VAR_DECL:
2896 case PARM_DECL:
2897 case RESULT_DECL:
2898 *ws = 0;
2899 obj = *expr_p;
2901 if (DECL_RTL_SET_P (obj))
2902 break;
2904 if (DECL_MODE (obj) == BLKmode)
2905 x = produce_memory_decl_rtl (obj, regno);
2906 else
2907 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2909 break;
2911 default:
2912 break;
2915 if (x)
2917 decl_rtl_to_reset.safe_push (obj);
2918 SET_DECL_RTL (obj, x);
2921 return NULL_TREE;
2924 /* Determines cost of the computation of EXPR. */
2926 static unsigned
2927 computation_cost (tree expr, bool speed)
2929 rtx seq, rslt;
2930 tree type = TREE_TYPE (expr);
2931 unsigned cost;
2932 /* Avoid using hard regs in ways which may be unsupported. */
2933 int regno = LAST_VIRTUAL_REGISTER + 1;
2934 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2935 enum node_frequency real_frequency = node->frequency;
2937 node->frequency = NODE_FREQUENCY_NORMAL;
2938 crtl->maybe_hot_insn_p = speed;
2939 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2940 start_sequence ();
2941 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2942 seq = get_insns ();
2943 end_sequence ();
2944 default_rtl_profile ();
2945 node->frequency = real_frequency;
2947 cost = seq_cost (seq, speed);
2948 if (MEM_P (rslt))
2949 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2950 TYPE_ADDR_SPACE (type), speed);
2951 else if (!REG_P (rslt))
2952 cost += set_src_cost (rslt, speed);
2954 return cost;
2957 /* Returns variable containing the value of candidate CAND at statement AT. */
2959 static tree
2960 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2962 if (stmt_after_increment (loop, cand, stmt))
2963 return cand->var_after;
2964 else
2965 return cand->var_before;
2968 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2969 same precision that is at least as wide as the precision of TYPE, stores
2970 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2971 type of A and B. */
2973 static tree
2974 determine_common_wider_type (tree *a, tree *b)
2976 tree wider_type = NULL;
2977 tree suba, subb;
2978 tree atype = TREE_TYPE (*a);
2980 if (CONVERT_EXPR_P (*a))
2982 suba = TREE_OPERAND (*a, 0);
2983 wider_type = TREE_TYPE (suba);
2984 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2985 return atype;
2987 else
2988 return atype;
2990 if (CONVERT_EXPR_P (*b))
2992 subb = TREE_OPERAND (*b, 0);
2993 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2994 return atype;
2996 else
2997 return atype;
2999 *a = suba;
3000 *b = subb;
3001 return wider_type;
3004 /* Determines the expression by that USE is expressed from induction variable
3005 CAND at statement AT in LOOP. The expression is stored in a decomposed
3006 form into AFF. Returns false if USE cannot be expressed using CAND. */
3008 static bool
3009 get_computation_aff (struct loop *loop,
3010 struct iv_use *use, struct iv_cand *cand, gimple at,
3011 struct aff_tree *aff)
3013 tree ubase = use->iv->base;
3014 tree ustep = use->iv->step;
3015 tree cbase = cand->iv->base;
3016 tree cstep = cand->iv->step, cstep_common;
3017 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3018 tree common_type, var;
3019 tree uutype;
3020 aff_tree cbase_aff, var_aff;
3021 double_int rat;
3023 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3025 /* We do not have a precision to express the values of use. */
3026 return false;
3029 var = var_at_stmt (loop, cand, at);
3030 uutype = unsigned_type_for (utype);
3032 /* If the conversion is not noop, perform it. */
3033 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3035 cstep = fold_convert (uutype, cstep);
3036 cbase = fold_convert (uutype, cbase);
3037 var = fold_convert (uutype, var);
3040 if (!constant_multiple_of (ustep, cstep, &rat))
3041 return false;
3043 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3044 type, we achieve better folding by computing their difference in this
3045 wider type, and cast the result to UUTYPE. We do not need to worry about
3046 overflows, as all the arithmetics will in the end be performed in UUTYPE
3047 anyway. */
3048 common_type = determine_common_wider_type (&ubase, &cbase);
3050 /* use = ubase - ratio * cbase + ratio * var. */
3051 tree_to_aff_combination (ubase, common_type, aff);
3052 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3053 tree_to_aff_combination (var, uutype, &var_aff);
3055 /* We need to shift the value if we are after the increment. */
3056 if (stmt_after_increment (loop, cand, at))
3058 aff_tree cstep_aff;
3060 if (common_type != uutype)
3061 cstep_common = fold_convert (common_type, cstep);
3062 else
3063 cstep_common = cstep;
3065 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3066 aff_combination_add (&cbase_aff, &cstep_aff);
3069 aff_combination_scale (&cbase_aff, -rat);
3070 aff_combination_add (aff, &cbase_aff);
3071 if (common_type != uutype)
3072 aff_combination_convert (aff, uutype);
3074 aff_combination_scale (&var_aff, rat);
3075 aff_combination_add (aff, &var_aff);
3077 return true;
3080 /* Return the type of USE. */
3082 static tree
3083 get_use_type (struct iv_use *use)
3085 tree base_type = TREE_TYPE (use->iv->base);
3086 tree type;
3088 if (use->type == USE_ADDRESS)
3090 /* The base_type may be a void pointer. Create a pointer type based on
3091 the mem_ref instead. */
3092 type = build_pointer_type (TREE_TYPE (*use->op_p));
3093 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3094 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3096 else
3097 type = base_type;
3099 return type;
3102 /* Determines the expression by that USE is expressed from induction variable
3103 CAND at statement AT in LOOP. The computation is unshared. */
3105 static tree
3106 get_computation_at (struct loop *loop,
3107 struct iv_use *use, struct iv_cand *cand, gimple at)
3109 aff_tree aff;
3110 tree type = get_use_type (use);
3112 if (!get_computation_aff (loop, use, cand, at, &aff))
3113 return NULL_TREE;
3114 unshare_aff_combination (&aff);
3115 return fold_convert (type, aff_combination_to_tree (&aff));
3118 /* Determines the expression by that USE is expressed from induction variable
3119 CAND in LOOP. The computation is unshared. */
3121 static tree
3122 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3124 return get_computation_at (loop, use, cand, use->stmt);
3127 /* Adjust the cost COST for being in loop setup rather than loop body.
3128 If we're optimizing for space, the loop setup overhead is constant;
3129 if we're optimizing for speed, amortize it over the per-iteration cost. */
3130 static unsigned
3131 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3133 if (cost == INFTY)
3134 return cost;
3135 else if (optimize_loop_for_speed_p (data->current_loop))
3136 return cost / avg_loop_niter (data->current_loop);
3137 else
3138 return cost;
3141 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3142 validity for a memory reference accessing memory of mode MODE in
3143 address space AS. */
3146 bool
3147 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3148 addr_space_t as)
3150 #define MAX_RATIO 128
3151 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3152 static vec<sbitmap> valid_mult_list;
3153 sbitmap valid_mult;
3155 if (data_index >= valid_mult_list.length ())
3156 valid_mult_list.safe_grow_cleared (data_index + 1);
3158 valid_mult = valid_mult_list[data_index];
3159 if (!valid_mult)
3161 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3162 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3163 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3164 rtx addr, scaled;
3165 HOST_WIDE_INT i;
3167 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3168 bitmap_clear (valid_mult);
3169 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3170 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3171 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3173 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3174 if (memory_address_addr_space_p (mode, addr, as)
3175 || memory_address_addr_space_p (mode, scaled, as))
3176 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3179 if (dump_file && (dump_flags & TDF_DETAILS))
3181 fprintf (dump_file, " allowed multipliers:");
3182 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3183 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3184 fprintf (dump_file, " %d", (int) i);
3185 fprintf (dump_file, "\n");
3186 fprintf (dump_file, "\n");
3189 valid_mult_list[data_index] = valid_mult;
3192 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3193 return false;
3195 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3198 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3199 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3200 variable is omitted. Compute the cost for a memory reference that accesses
3201 a memory location of mode MEM_MODE in address space AS.
3203 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3204 size of MEM_MODE / RATIO) is available. To make this determination, we
3205 look at the size of the increment to be made, which is given in CSTEP.
3206 CSTEP may be zero if the step is unknown.
3207 STMT_AFTER_INC is true iff the statement we're looking at is after the
3208 increment of the original biv.
3210 TODO -- there must be some better way. This all is quite crude. */
3212 enum ainc_type
3214 AINC_PRE_INC, /* Pre increment. */
3215 AINC_PRE_DEC, /* Pre decrement. */
3216 AINC_POST_INC, /* Post increment. */
3217 AINC_POST_DEC, /* Post decrement. */
3218 AINC_NONE /* Also the number of auto increment types. */
3221 typedef struct address_cost_data_s
3223 HOST_WIDE_INT min_offset, max_offset;
3224 unsigned costs[2][2][2][2];
3225 unsigned ainc_costs[AINC_NONE];
3226 } *address_cost_data;
3229 static comp_cost
3230 get_address_cost (bool symbol_present, bool var_present,
3231 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3232 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3233 addr_space_t as, bool speed,
3234 bool stmt_after_inc, bool *may_autoinc)
3236 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3237 static vec<address_cost_data> address_cost_data_list;
3238 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3239 address_cost_data data;
3240 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3241 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3242 unsigned cost, acost, complexity;
3243 enum ainc_type autoinc_type;
3244 bool offset_p, ratio_p, autoinc;
3245 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3246 unsigned HOST_WIDE_INT mask;
3247 unsigned bits;
3249 if (data_index >= address_cost_data_list.length ())
3250 address_cost_data_list.safe_grow_cleared (data_index + 1);
3252 data = address_cost_data_list[data_index];
3253 if (!data)
3255 HOST_WIDE_INT i;
3256 HOST_WIDE_INT rat, off = 0;
3257 int old_cse_not_expected, width;
3258 unsigned sym_p, var_p, off_p, rat_p, add_c;
3259 rtx seq, addr, base;
3260 rtx reg0, reg1;
3262 data = (address_cost_data) xcalloc (1, sizeof (*data));
3264 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266 width = GET_MODE_BITSIZE (address_mode) - 1;
3267 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3268 width = HOST_BITS_PER_WIDE_INT - 1;
3269 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271 for (i = width; i >= 0; i--)
3273 off = -((unsigned HOST_WIDE_INT) 1 << i);
3274 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3275 if (memory_address_addr_space_p (mem_mode, addr, as))
3276 break;
3278 data->min_offset = (i == -1? 0 : off);
3280 for (i = width; i >= 0; i--)
3282 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3283 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284 if (memory_address_addr_space_p (mem_mode, addr, as))
3285 break;
3287 if (i == -1)
3288 off = 0;
3289 data->max_offset = off;
3291 if (dump_file && (dump_flags & TDF_DETAILS))
3293 fprintf (dump_file, "get_address_cost:\n");
3294 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3295 GET_MODE_NAME (mem_mode),
3296 data->min_offset);
3297 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3298 GET_MODE_NAME (mem_mode),
3299 data->max_offset);
3302 rat = 1;
3303 for (i = 2; i <= MAX_RATIO; i++)
3304 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306 rat = i;
3307 break;
3310 /* Compute the cost of various addressing modes. */
3311 acost = 0;
3312 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3313 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3316 || USE_STORE_PRE_DECREMENT (mem_mode))
3318 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3319 has_predec[mem_mode]
3320 = memory_address_addr_space_p (mem_mode, addr, as);
3322 if (has_predec[mem_mode])
3323 data->ainc_costs[AINC_PRE_DEC]
3324 = address_cost (addr, mem_mode, as, speed);
3326 if (USE_LOAD_POST_DECREMENT (mem_mode)
3327 || USE_STORE_POST_DECREMENT (mem_mode))
3329 addr = gen_rtx_POST_DEC (address_mode, reg0);
3330 has_postdec[mem_mode]
3331 = memory_address_addr_space_p (mem_mode, addr, as);
3333 if (has_postdec[mem_mode])
3334 data->ainc_costs[AINC_POST_DEC]
3335 = address_cost (addr, mem_mode, as, speed);
3337 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3338 || USE_STORE_PRE_DECREMENT (mem_mode))
3340 addr = gen_rtx_PRE_INC (address_mode, reg0);
3341 has_preinc[mem_mode]
3342 = memory_address_addr_space_p (mem_mode, addr, as);
3344 if (has_preinc[mem_mode])
3345 data->ainc_costs[AINC_PRE_INC]
3346 = address_cost (addr, mem_mode, as, speed);
3348 if (USE_LOAD_POST_INCREMENT (mem_mode)
3349 || USE_STORE_POST_INCREMENT (mem_mode))
3351 addr = gen_rtx_POST_INC (address_mode, reg0);
3352 has_postinc[mem_mode]
3353 = memory_address_addr_space_p (mem_mode, addr, as);
3355 if (has_postinc[mem_mode])
3356 data->ainc_costs[AINC_POST_INC]
3357 = address_cost (addr, mem_mode, as, speed);
3359 for (i = 0; i < 16; i++)
3361 sym_p = i & 1;
3362 var_p = (i >> 1) & 1;
3363 off_p = (i >> 2) & 1;
3364 rat_p = (i >> 3) & 1;
3366 addr = reg0;
3367 if (rat_p)
3368 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3369 gen_int_mode (rat, address_mode));
3371 if (var_p)
3372 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3374 if (sym_p)
3376 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3377 /* ??? We can run into trouble with some backends by presenting
3378 it with symbols which haven't been properly passed through
3379 targetm.encode_section_info. By setting the local bit, we
3380 enhance the probability of things working. */
3381 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3383 if (off_p)
3384 base = gen_rtx_fmt_e (CONST, address_mode,
3385 gen_rtx_fmt_ee
3386 (PLUS, address_mode, base,
3387 gen_int_mode (off, address_mode)));
3389 else if (off_p)
3390 base = gen_int_mode (off, address_mode);
3391 else
3392 base = NULL_RTX;
3394 if (base)
3395 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3397 start_sequence ();
3398 /* To avoid splitting addressing modes, pretend that no cse will
3399 follow. */
3400 old_cse_not_expected = cse_not_expected;
3401 cse_not_expected = true;
3402 addr = memory_address_addr_space (mem_mode, addr, as);
3403 cse_not_expected = old_cse_not_expected;
3404 seq = get_insns ();
3405 end_sequence ();
3407 acost = seq_cost (seq, speed);
3408 acost += address_cost (addr, mem_mode, as, speed);
3410 if (!acost)
3411 acost = 1;
3412 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3415 /* On some targets, it is quite expensive to load symbol to a register,
3416 which makes addresses that contain symbols look much more expensive.
3417 However, the symbol will have to be loaded in any case before the
3418 loop (and quite likely we have it in register already), so it does not
3419 make much sense to penalize them too heavily. So make some final
3420 tweaks for the SYMBOL_PRESENT modes:
3422 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3423 var is cheaper, use this mode with small penalty.
3424 If VAR_PRESENT is true, try whether the mode with
3425 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3426 if this is the case, use it. */
3427 add_c = add_cost (speed, address_mode);
3428 for (i = 0; i < 8; i++)
3430 var_p = i & 1;
3431 off_p = (i >> 1) & 1;
3432 rat_p = (i >> 2) & 1;
3434 acost = data->costs[0][1][off_p][rat_p] + 1;
3435 if (var_p)
3436 acost += add_c;
3438 if (acost < data->costs[1][var_p][off_p][rat_p])
3439 data->costs[1][var_p][off_p][rat_p] = acost;
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3444 fprintf (dump_file, "Address costs:\n");
3446 for (i = 0; i < 16; i++)
3448 sym_p = i & 1;
3449 var_p = (i >> 1) & 1;
3450 off_p = (i >> 2) & 1;
3451 rat_p = (i >> 3) & 1;
3453 fprintf (dump_file, " ");
3454 if (sym_p)
3455 fprintf (dump_file, "sym + ");
3456 if (var_p)
3457 fprintf (dump_file, "var + ");
3458 if (off_p)
3459 fprintf (dump_file, "cst + ");
3460 if (rat_p)
3461 fprintf (dump_file, "rat * ");
3463 acost = data->costs[sym_p][var_p][off_p][rat_p];
3464 fprintf (dump_file, "index costs %d\n", acost);
3466 if (has_predec[mem_mode] || has_postdec[mem_mode]
3467 || has_preinc[mem_mode] || has_postinc[mem_mode])
3468 fprintf (dump_file, " May include autoinc/dec\n");
3469 fprintf (dump_file, "\n");
3472 address_cost_data_list[data_index] = data;
3475 bits = GET_MODE_BITSIZE (address_mode);
3476 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3477 offset &= mask;
3478 if ((offset >> (bits - 1) & 1))
3479 offset |= ~mask;
3480 s_offset = offset;
3482 autoinc = false;
3483 autoinc_type = AINC_NONE;
3484 msize = GET_MODE_SIZE (mem_mode);
3485 autoinc_offset = offset;
3486 if (stmt_after_inc)
3487 autoinc_offset += ratio * cstep;
3488 if (symbol_present || var_present || ratio != 1)
3489 autoinc = false;
3490 else
3492 if (has_postinc[mem_mode] && autoinc_offset == 0
3493 && msize == cstep)
3494 autoinc_type = AINC_POST_INC;
3495 else if (has_postdec[mem_mode] && autoinc_offset == 0
3496 && msize == -cstep)
3497 autoinc_type = AINC_POST_DEC;
3498 else if (has_preinc[mem_mode] && autoinc_offset == msize
3499 && msize == cstep)
3500 autoinc_type = AINC_PRE_INC;
3501 else if (has_predec[mem_mode] && autoinc_offset == -msize
3502 && msize == -cstep)
3503 autoinc_type = AINC_PRE_DEC;
3505 if (autoinc_type != AINC_NONE)
3506 autoinc = true;
3509 cost = 0;
3510 offset_p = (s_offset != 0
3511 && data->min_offset <= s_offset
3512 && s_offset <= data->max_offset);
3513 ratio_p = (ratio != 1
3514 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3516 if (ratio != 1 && !ratio_p)
3517 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3519 if (s_offset && !offset_p && !symbol_present)
3520 cost += add_cost (speed, address_mode);
3522 if (may_autoinc)
3523 *may_autoinc = autoinc;
3524 if (autoinc)
3525 acost = data->ainc_costs[autoinc_type];
3526 else
3527 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3528 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3529 return new_cost (cost + acost, complexity);
3532 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3533 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3534 calculating the operands of EXPR. Returns true if successful, and returns
3535 the cost in COST. */
3537 static bool
3538 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3539 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3541 comp_cost res;
3542 tree op1 = TREE_OPERAND (expr, 1);
3543 tree cst = TREE_OPERAND (mult, 1);
3544 tree multop = TREE_OPERAND (mult, 0);
3545 int m = exact_log2 (int_cst_value (cst));
3546 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3547 int sa_cost;
3548 bool equal_p = false;
3550 if (!(m >= 0 && m < maxm))
3551 return false;
3553 if (operand_equal_p (op1, mult, 0))
3554 equal_p = true;
3556 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3557 ? shiftadd_cost (speed, mode, m)
3558 : (equal_p
3559 ? shiftsub1_cost (speed, mode, m)
3560 : shiftsub0_cost (speed, mode, m)));
3561 res = new_cost (sa_cost, 0);
3562 res = add_costs (res, equal_p ? cost0 : cost1);
3564 STRIP_NOPS (multop);
3565 if (!is_gimple_val (multop))
3566 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3568 *cost = res;
3569 return true;
3572 /* Estimates cost of forcing expression EXPR into a variable. */
3574 static comp_cost
3575 force_expr_to_var_cost (tree expr, bool speed)
3577 static bool costs_initialized = false;
3578 static unsigned integer_cost [2];
3579 static unsigned symbol_cost [2];
3580 static unsigned address_cost [2];
3581 tree op0, op1;
3582 comp_cost cost0, cost1, cost;
3583 enum machine_mode mode;
3585 if (!costs_initialized)
3587 tree type = build_pointer_type (integer_type_node);
3588 tree var, addr;
3589 rtx x;
3590 int i;
3592 var = create_tmp_var_raw (integer_type_node, "test_var");
3593 TREE_STATIC (var) = 1;
3594 x = produce_memory_decl_rtl (var, NULL);
3595 SET_DECL_RTL (var, x);
3597 addr = build1 (ADDR_EXPR, type, var);
3600 for (i = 0; i < 2; i++)
3602 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3603 2000), i);
3605 symbol_cost[i] = computation_cost (addr, i) + 1;
3607 address_cost[i]
3608 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3609 if (dump_file && (dump_flags & TDF_DETAILS))
3611 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3612 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3613 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3614 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3615 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3616 fprintf (dump_file, "\n");
3620 costs_initialized = true;
3623 STRIP_NOPS (expr);
3625 if (SSA_VAR_P (expr))
3626 return no_cost;
3628 if (is_gimple_min_invariant (expr))
3630 if (TREE_CODE (expr) == INTEGER_CST)
3631 return new_cost (integer_cost [speed], 0);
3633 if (TREE_CODE (expr) == ADDR_EXPR)
3635 tree obj = TREE_OPERAND (expr, 0);
3637 if (TREE_CODE (obj) == VAR_DECL
3638 || TREE_CODE (obj) == PARM_DECL
3639 || TREE_CODE (obj) == RESULT_DECL)
3640 return new_cost (symbol_cost [speed], 0);
3643 return new_cost (address_cost [speed], 0);
3646 switch (TREE_CODE (expr))
3648 case POINTER_PLUS_EXPR:
3649 case PLUS_EXPR:
3650 case MINUS_EXPR:
3651 case MULT_EXPR:
3652 op0 = TREE_OPERAND (expr, 0);
3653 op1 = TREE_OPERAND (expr, 1);
3654 STRIP_NOPS (op0);
3655 STRIP_NOPS (op1);
3656 break;
3658 CASE_CONVERT:
3659 case NEGATE_EXPR:
3660 op0 = TREE_OPERAND (expr, 0);
3661 STRIP_NOPS (op0);
3662 op1 = NULL_TREE;
3663 break;
3665 default:
3666 /* Just an arbitrary value, FIXME. */
3667 return new_cost (target_spill_cost[speed], 0);
3670 if (op0 == NULL_TREE
3671 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3672 cost0 = no_cost;
3673 else
3674 cost0 = force_expr_to_var_cost (op0, speed);
3676 if (op1 == NULL_TREE
3677 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3678 cost1 = no_cost;
3679 else
3680 cost1 = force_expr_to_var_cost (op1, speed);
3682 mode = TYPE_MODE (TREE_TYPE (expr));
3683 switch (TREE_CODE (expr))
3685 case POINTER_PLUS_EXPR:
3686 case PLUS_EXPR:
3687 case MINUS_EXPR:
3688 case NEGATE_EXPR:
3689 cost = new_cost (add_cost (speed, mode), 0);
3690 if (TREE_CODE (expr) != NEGATE_EXPR)
3692 tree mult = NULL_TREE;
3693 comp_cost sa_cost;
3694 if (TREE_CODE (op1) == MULT_EXPR)
3695 mult = op1;
3696 else if (TREE_CODE (op0) == MULT_EXPR)
3697 mult = op0;
3699 if (mult != NULL_TREE
3700 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3701 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3702 speed, &sa_cost))
3703 return sa_cost;
3705 break;
3707 CASE_CONVERT:
3709 tree inner_mode, outer_mode;
3710 outer_mode = TREE_TYPE (expr);
3711 inner_mode = TREE_TYPE (op0);
3712 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3713 TYPE_MODE (inner_mode), speed), 0);
3715 break;
3717 case MULT_EXPR:
3718 if (cst_and_fits_in_hwi (op0))
3719 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3720 mode, speed), 0);
3721 else if (cst_and_fits_in_hwi (op1))
3722 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3723 mode, speed), 0);
3724 else
3725 return new_cost (target_spill_cost [speed], 0);
3726 break;
3728 default:
3729 gcc_unreachable ();
3732 cost = add_costs (cost, cost0);
3733 cost = add_costs (cost, cost1);
3735 /* Bound the cost by target_spill_cost. The parts of complicated
3736 computations often are either loop invariant or at least can
3737 be shared between several iv uses, so letting this grow without
3738 limits would not give reasonable results. */
3739 if (cost.cost > (int) target_spill_cost [speed])
3740 cost.cost = target_spill_cost [speed];
3742 return cost;
3745 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3746 invariants the computation depends on. */
3748 static comp_cost
3749 force_var_cost (struct ivopts_data *data,
3750 tree expr, bitmap *depends_on)
3752 if (depends_on)
3754 fd_ivopts_data = data;
3755 walk_tree (&expr, find_depends, depends_on, NULL);
3758 return force_expr_to_var_cost (expr, data->speed);
3761 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3762 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3763 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3764 invariants the computation depends on. */
3766 static comp_cost
3767 split_address_cost (struct ivopts_data *data,
3768 tree addr, bool *symbol_present, bool *var_present,
3769 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3771 tree core;
3772 HOST_WIDE_INT bitsize;
3773 HOST_WIDE_INT bitpos;
3774 tree toffset;
3775 enum machine_mode mode;
3776 int unsignedp, volatilep;
3778 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3779 &unsignedp, &volatilep, false);
3781 if (toffset != 0
3782 || bitpos % BITS_PER_UNIT != 0
3783 || TREE_CODE (core) != VAR_DECL)
3785 *symbol_present = false;
3786 *var_present = true;
3787 fd_ivopts_data = data;
3788 walk_tree (&addr, find_depends, depends_on, NULL);
3789 return new_cost (target_spill_cost[data->speed], 0);
3792 *offset += bitpos / BITS_PER_UNIT;
3793 if (TREE_STATIC (core)
3794 || DECL_EXTERNAL (core))
3796 *symbol_present = true;
3797 *var_present = false;
3798 return no_cost;
3801 *symbol_present = false;
3802 *var_present = true;
3803 return no_cost;
3806 /* Estimates cost of expressing difference of addresses E1 - E2 as
3807 var + symbol + offset. The value of offset is added to OFFSET,
3808 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3809 part is missing. DEPENDS_ON is a set of the invariants the computation
3810 depends on. */
3812 static comp_cost
3813 ptr_difference_cost (struct ivopts_data *data,
3814 tree e1, tree e2, bool *symbol_present, bool *var_present,
3815 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3817 HOST_WIDE_INT diff = 0;
3818 aff_tree aff_e1, aff_e2;
3819 tree type;
3821 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3823 if (ptr_difference_const (e1, e2, &diff))
3825 *offset += diff;
3826 *symbol_present = false;
3827 *var_present = false;
3828 return no_cost;
3831 if (integer_zerop (e2))
3832 return split_address_cost (data, TREE_OPERAND (e1, 0),
3833 symbol_present, var_present, offset, depends_on);
3835 *symbol_present = false;
3836 *var_present = true;
3838 type = signed_type_for (TREE_TYPE (e1));
3839 tree_to_aff_combination (e1, type, &aff_e1);
3840 tree_to_aff_combination (e2, type, &aff_e2);
3841 aff_combination_scale (&aff_e2, double_int_minus_one);
3842 aff_combination_add (&aff_e1, &aff_e2);
3844 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3847 /* Estimates cost of expressing difference E1 - E2 as
3848 var + symbol + offset. The value of offset is added to OFFSET,
3849 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3850 part is missing. DEPENDS_ON is a set of the invariants the computation
3851 depends on. */
3853 static comp_cost
3854 difference_cost (struct ivopts_data *data,
3855 tree e1, tree e2, bool *symbol_present, bool *var_present,
3856 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3858 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3859 unsigned HOST_WIDE_INT off1, off2;
3860 aff_tree aff_e1, aff_e2;
3861 tree type;
3863 e1 = strip_offset (e1, &off1);
3864 e2 = strip_offset (e2, &off2);
3865 *offset += off1 - off2;
3867 STRIP_NOPS (e1);
3868 STRIP_NOPS (e2);
3870 if (TREE_CODE (e1) == ADDR_EXPR)
3871 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3872 offset, depends_on);
3873 *symbol_present = false;
3875 if (operand_equal_p (e1, e2, 0))
3877 *var_present = false;
3878 return no_cost;
3881 *var_present = true;
3883 if (integer_zerop (e2))
3884 return force_var_cost (data, e1, depends_on);
3886 if (integer_zerop (e1))
3888 comp_cost cost = force_var_cost (data, e2, depends_on);
3889 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3890 return cost;
3893 type = signed_type_for (TREE_TYPE (e1));
3894 tree_to_aff_combination (e1, type, &aff_e1);
3895 tree_to_aff_combination (e2, type, &aff_e2);
3896 aff_combination_scale (&aff_e2, double_int_minus_one);
3897 aff_combination_add (&aff_e1, &aff_e2);
3899 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3902 /* Returns true if AFF1 and AFF2 are identical. */
3904 static bool
3905 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3907 unsigned i;
3909 if (aff1->n != aff2->n)
3910 return false;
3912 for (i = 0; i < aff1->n; i++)
3914 if (aff1->elts[i].coef != aff2->elts[i].coef)
3915 return false;
3917 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3918 return false;
3920 return true;
3923 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3925 static int
3926 get_expr_id (struct ivopts_data *data, tree expr)
3928 struct iv_inv_expr_ent ent;
3929 struct iv_inv_expr_ent **slot;
3931 ent.expr = expr;
3932 ent.hash = iterative_hash_expr (expr, 0);
3933 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3934 if (*slot)
3935 return (*slot)->id;
3937 *slot = XNEW (struct iv_inv_expr_ent);
3938 (*slot)->expr = expr;
3939 (*slot)->hash = ent.hash;
3940 (*slot)->id = data->inv_expr_id++;
3941 return (*slot)->id;
3944 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3945 requires a new compiler generated temporary. Returns -1 otherwise.
3946 ADDRESS_P is a flag indicating if the expression is for address
3947 computation. */
3949 static int
3950 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3951 tree cbase, HOST_WIDE_INT ratio,
3952 bool address_p)
3954 aff_tree ubase_aff, cbase_aff;
3955 tree expr, ub, cb;
3957 STRIP_NOPS (ubase);
3958 STRIP_NOPS (cbase);
3959 ub = ubase;
3960 cb = cbase;
3962 if ((TREE_CODE (ubase) == INTEGER_CST)
3963 && (TREE_CODE (cbase) == INTEGER_CST))
3964 return -1;
3966 /* Strips the constant part. */
3967 if (TREE_CODE (ubase) == PLUS_EXPR
3968 || TREE_CODE (ubase) == MINUS_EXPR
3969 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3971 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3972 ubase = TREE_OPERAND (ubase, 0);
3975 /* Strips the constant part. */
3976 if (TREE_CODE (cbase) == PLUS_EXPR
3977 || TREE_CODE (cbase) == MINUS_EXPR
3978 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3980 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3981 cbase = TREE_OPERAND (cbase, 0);
3984 if (address_p)
3986 if (((TREE_CODE (ubase) == SSA_NAME)
3987 || (TREE_CODE (ubase) == ADDR_EXPR
3988 && is_gimple_min_invariant (ubase)))
3989 && (TREE_CODE (cbase) == INTEGER_CST))
3990 return -1;
3992 if (((TREE_CODE (cbase) == SSA_NAME)
3993 || (TREE_CODE (cbase) == ADDR_EXPR
3994 && is_gimple_min_invariant (cbase)))
3995 && (TREE_CODE (ubase) == INTEGER_CST))
3996 return -1;
3999 if (ratio == 1)
4001 if (operand_equal_p (ubase, cbase, 0))
4002 return -1;
4004 if (TREE_CODE (ubase) == ADDR_EXPR
4005 && TREE_CODE (cbase) == ADDR_EXPR)
4007 tree usym, csym;
4009 usym = TREE_OPERAND (ubase, 0);
4010 csym = TREE_OPERAND (cbase, 0);
4011 if (TREE_CODE (usym) == ARRAY_REF)
4013 tree ind = TREE_OPERAND (usym, 1);
4014 if (TREE_CODE (ind) == INTEGER_CST
4015 && tree_fits_shwi_p (ind)
4016 && tree_to_shwi (ind) == 0)
4017 usym = TREE_OPERAND (usym, 0);
4019 if (TREE_CODE (csym) == ARRAY_REF)
4021 tree ind = TREE_OPERAND (csym, 1);
4022 if (TREE_CODE (ind) == INTEGER_CST
4023 && tree_fits_shwi_p (ind)
4024 && tree_to_shwi (ind) == 0)
4025 csym = TREE_OPERAND (csym, 0);
4027 if (operand_equal_p (usym, csym, 0))
4028 return -1;
4030 /* Now do more complex comparison */
4031 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4032 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4033 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4034 return -1;
4037 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4038 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4040 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4041 aff_combination_add (&ubase_aff, &cbase_aff);
4042 expr = aff_combination_to_tree (&ubase_aff);
4043 return get_expr_id (data, expr);
4048 /* Determines the cost of the computation by that USE is expressed
4049 from induction variable CAND. If ADDRESS_P is true, we just need
4050 to create an address from it, otherwise we want to get it into
4051 register. A set of invariants we depend on is stored in
4052 DEPENDS_ON. AT is the statement at that the value is computed.
4053 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4054 addressing is likely. */
4056 static comp_cost
4057 get_computation_cost_at (struct ivopts_data *data,
4058 struct iv_use *use, struct iv_cand *cand,
4059 bool address_p, bitmap *depends_on, gimple at,
4060 bool *can_autoinc,
4061 int *inv_expr_id)
4063 tree ubase = use->iv->base, ustep = use->iv->step;
4064 tree cbase, cstep;
4065 tree utype = TREE_TYPE (ubase), ctype;
4066 unsigned HOST_WIDE_INT cstepi, offset = 0;
4067 HOST_WIDE_INT ratio, aratio;
4068 bool var_present, symbol_present, stmt_is_after_inc;
4069 comp_cost cost;
4070 double_int rat;
4071 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4072 enum machine_mode mem_mode = (address_p
4073 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4074 : VOIDmode);
4076 *depends_on = NULL;
4078 /* Only consider real candidates. */
4079 if (!cand->iv)
4080 return infinite_cost;
4082 cbase = cand->iv->base;
4083 cstep = cand->iv->step;
4084 ctype = TREE_TYPE (cbase);
4086 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4088 /* We do not have a precision to express the values of use. */
4089 return infinite_cost;
4092 if (address_p
4093 || (use->iv->base_object
4094 && cand->iv->base_object
4095 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4096 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4098 /* Do not try to express address of an object with computation based
4099 on address of a different object. This may cause problems in rtl
4100 level alias analysis (that does not expect this to be happening,
4101 as this is illegal in C), and would be unlikely to be useful
4102 anyway. */
4103 if (use->iv->base_object
4104 && cand->iv->base_object
4105 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4106 return infinite_cost;
4109 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4111 /* TODO -- add direct handling of this case. */
4112 goto fallback;
4115 /* CSTEPI is removed from the offset in case statement is after the
4116 increment. If the step is not constant, we use zero instead.
4117 This is a bit imprecise (there is the extra addition), but
4118 redundancy elimination is likely to transform the code so that
4119 it uses value of the variable before increment anyway,
4120 so it is not that much unrealistic. */
4121 if (cst_and_fits_in_hwi (cstep))
4122 cstepi = int_cst_value (cstep);
4123 else
4124 cstepi = 0;
4126 if (!constant_multiple_of (ustep, cstep, &rat))
4127 return infinite_cost;
4129 if (rat.fits_shwi ())
4130 ratio = rat.to_shwi ();
4131 else
4132 return infinite_cost;
4134 STRIP_NOPS (cbase);
4135 ctype = TREE_TYPE (cbase);
4137 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4139 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4140 or ratio == 1, it is better to handle this like
4142 ubase - ratio * cbase + ratio * var
4144 (also holds in the case ratio == -1, TODO. */
4146 if (cst_and_fits_in_hwi (cbase))
4148 offset = - ratio * int_cst_value (cbase);
4149 cost = difference_cost (data,
4150 ubase, build_int_cst (utype, 0),
4151 &symbol_present, &var_present, &offset,
4152 depends_on);
4153 cost.cost /= avg_loop_niter (data->current_loop);
4155 else if (ratio == 1)
4157 tree real_cbase = cbase;
4159 /* Check to see if any adjustment is needed. */
4160 if (cstepi == 0 && stmt_is_after_inc)
4162 aff_tree real_cbase_aff;
4163 aff_tree cstep_aff;
4165 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4166 &real_cbase_aff);
4167 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4169 aff_combination_add (&real_cbase_aff, &cstep_aff);
4170 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4173 cost = difference_cost (data,
4174 ubase, real_cbase,
4175 &symbol_present, &var_present, &offset,
4176 depends_on);
4177 cost.cost /= avg_loop_niter (data->current_loop);
4179 else if (address_p
4180 && !POINTER_TYPE_P (ctype)
4181 && multiplier_allowed_in_address_p
4182 (ratio, mem_mode,
4183 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4185 cbase
4186 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4187 cost = difference_cost (data,
4188 ubase, cbase,
4189 &symbol_present, &var_present, &offset,
4190 depends_on);
4191 cost.cost /= avg_loop_niter (data->current_loop);
4193 else
4195 cost = force_var_cost (data, cbase, depends_on);
4196 cost = add_costs (cost,
4197 difference_cost (data,
4198 ubase, build_int_cst (utype, 0),
4199 &symbol_present, &var_present,
4200 &offset, depends_on));
4201 cost.cost /= avg_loop_niter (data->current_loop);
4202 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4205 if (inv_expr_id)
4207 *inv_expr_id =
4208 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4209 /* Clear depends on. */
4210 if (*inv_expr_id != -1 && depends_on && *depends_on)
4211 bitmap_clear (*depends_on);
4214 /* If we are after the increment, the value of the candidate is higher by
4215 one iteration. */
4216 if (stmt_is_after_inc)
4217 offset -= ratio * cstepi;
4219 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4220 (symbol/var1/const parts may be omitted). If we are looking for an
4221 address, find the cost of addressing this. */
4222 if (address_p)
4223 return add_costs (cost,
4224 get_address_cost (symbol_present, var_present,
4225 offset, ratio, cstepi,
4226 mem_mode,
4227 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4228 speed, stmt_is_after_inc,
4229 can_autoinc));
4231 /* Otherwise estimate the costs for computing the expression. */
4232 if (!symbol_present && !var_present && !offset)
4234 if (ratio != 1)
4235 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4236 return cost;
4239 /* Symbol + offset should be compile-time computable so consider that they
4240 are added once to the variable, if present. */
4241 if (var_present && (symbol_present || offset))
4242 cost.cost += adjust_setup_cost (data,
4243 add_cost (speed, TYPE_MODE (ctype)));
4245 /* Having offset does not affect runtime cost in case it is added to
4246 symbol, but it increases complexity. */
4247 if (offset)
4248 cost.complexity++;
4250 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4252 aratio = ratio > 0 ? ratio : -ratio;
4253 if (aratio != 1)
4254 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4255 return cost;
4257 fallback:
4258 if (can_autoinc)
4259 *can_autoinc = false;
4262 /* Just get the expression, expand it and measure the cost. */
4263 tree comp = get_computation_at (data->current_loop, use, cand, at);
4265 if (!comp)
4266 return infinite_cost;
4268 if (address_p)
4269 comp = build_simple_mem_ref (comp);
4271 return new_cost (computation_cost (comp, speed), 0);
4275 /* Determines the cost of the computation by that USE is expressed
4276 from induction variable CAND. If ADDRESS_P is true, we just need
4277 to create an address from it, otherwise we want to get it into
4278 register. A set of invariants we depend on is stored in
4279 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4280 autoinc addressing is likely. */
4282 static comp_cost
4283 get_computation_cost (struct ivopts_data *data,
4284 struct iv_use *use, struct iv_cand *cand,
4285 bool address_p, bitmap *depends_on,
4286 bool *can_autoinc, int *inv_expr_id)
4288 return get_computation_cost_at (data,
4289 use, cand, address_p, depends_on, use->stmt,
4290 can_autoinc, inv_expr_id);
4293 /* Determines cost of basing replacement of USE on CAND in a generic
4294 expression. */
4296 static bool
4297 determine_use_iv_cost_generic (struct ivopts_data *data,
4298 struct iv_use *use, struct iv_cand *cand)
4300 bitmap depends_on;
4301 comp_cost cost;
4302 int inv_expr_id = -1;
4304 /* The simple case first -- if we need to express value of the preserved
4305 original biv, the cost is 0. This also prevents us from counting the
4306 cost of increment twice -- once at this use and once in the cost of
4307 the candidate. */
4308 if (cand->pos == IP_ORIGINAL
4309 && cand->incremented_at == use->stmt)
4311 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4312 ERROR_MARK, -1);
4313 return true;
4316 cost = get_computation_cost (data, use, cand, false, &depends_on,
4317 NULL, &inv_expr_id);
4319 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4320 inv_expr_id);
4322 return !infinite_cost_p (cost);
4325 /* Determines cost of basing replacement of USE on CAND in an address. */
4327 static bool
4328 determine_use_iv_cost_address (struct ivopts_data *data,
4329 struct iv_use *use, struct iv_cand *cand)
4331 bitmap depends_on;
4332 bool can_autoinc;
4333 int inv_expr_id = -1;
4334 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4335 &can_autoinc, &inv_expr_id);
4337 if (cand->ainc_use == use)
4339 if (can_autoinc)
4340 cost.cost -= cand->cost_step;
4341 /* If we generated the candidate solely for exploiting autoincrement
4342 opportunities, and it turns out it can't be used, set the cost to
4343 infinity to make sure we ignore it. */
4344 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4345 cost = infinite_cost;
4347 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4348 inv_expr_id);
4350 return !infinite_cost_p (cost);
4353 /* Computes value of candidate CAND at position AT in iteration NITER, and
4354 stores it to VAL. */
4356 static void
4357 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4358 aff_tree *val)
4360 aff_tree step, delta, nit;
4361 struct iv *iv = cand->iv;
4362 tree type = TREE_TYPE (iv->base);
4363 tree steptype = type;
4364 if (POINTER_TYPE_P (type))
4365 steptype = sizetype;
4366 steptype = unsigned_type_for (type);
4368 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4369 aff_combination_convert (&step, steptype);
4370 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4371 aff_combination_convert (&nit, steptype);
4372 aff_combination_mult (&nit, &step, &delta);
4373 if (stmt_after_increment (loop, cand, at))
4374 aff_combination_add (&delta, &step);
4376 tree_to_aff_combination (iv->base, type, val);
4377 if (!POINTER_TYPE_P (type))
4378 aff_combination_convert (val, steptype);
4379 aff_combination_add (val, &delta);
4382 /* Returns period of induction variable iv. */
4384 static tree
4385 iv_period (struct iv *iv)
4387 tree step = iv->step, period, type;
4388 tree pow2div;
4390 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4392 type = unsigned_type_for (TREE_TYPE (step));
4393 /* Period of the iv is lcm (step, type_range)/step -1,
4394 i.e., N*type_range/step - 1. Since type range is power
4395 of two, N == (step >> num_of_ending_zeros_binary (step),
4396 so the final result is
4398 (type_range >> num_of_ending_zeros_binary (step)) - 1
4401 pow2div = num_ending_zeros (step);
4403 period = build_low_bits_mask (type,
4404 (TYPE_PRECISION (type)
4405 - tree_to_uhwi (pow2div)));
4407 return period;
4410 /* Returns the comparison operator used when eliminating the iv USE. */
4412 static enum tree_code
4413 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4415 struct loop *loop = data->current_loop;
4416 basic_block ex_bb;
4417 edge exit;
4419 ex_bb = gimple_bb (use->stmt);
4420 exit = EDGE_SUCC (ex_bb, 0);
4421 if (flow_bb_inside_loop_p (loop, exit->dest))
4422 exit = EDGE_SUCC (ex_bb, 1);
4424 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4427 static tree
4428 strip_wrap_conserving_type_conversions (tree exp)
4430 while (tree_ssa_useless_type_conversion (exp)
4431 && (nowrap_type_p (TREE_TYPE (exp))
4432 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4433 exp = TREE_OPERAND (exp, 0);
4434 return exp;
4437 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4438 check for an exact match. */
4440 static bool
4441 expr_equal_p (tree e, tree what)
4443 gimple stmt;
4444 enum tree_code code;
4446 e = strip_wrap_conserving_type_conversions (e);
4447 what = strip_wrap_conserving_type_conversions (what);
4449 code = TREE_CODE (what);
4450 if (TREE_TYPE (e) != TREE_TYPE (what))
4451 return false;
4453 if (operand_equal_p (e, what, 0))
4454 return true;
4456 if (TREE_CODE (e) != SSA_NAME)
4457 return false;
4459 stmt = SSA_NAME_DEF_STMT (e);
4460 if (gimple_code (stmt) != GIMPLE_ASSIGN
4461 || gimple_assign_rhs_code (stmt) != code)
4462 return false;
4464 switch (get_gimple_rhs_class (code))
4466 case GIMPLE_BINARY_RHS:
4467 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4468 return false;
4469 /* Fallthru. */
4471 case GIMPLE_UNARY_RHS:
4472 case GIMPLE_SINGLE_RHS:
4473 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4474 default:
4475 return false;
4479 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4480 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4481 calculation is performed in non-wrapping type.
4483 TODO: More generally, we could test for the situation that
4484 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4485 This would require knowing the sign of OFFSET.
4487 Also, we only look for the first addition in the computation of BASE.
4488 More complex analysis would be better, but introducing it just for
4489 this optimization seems like an overkill. */
4491 static bool
4492 difference_cannot_overflow_p (tree base, tree offset)
4494 enum tree_code code;
4495 tree e1, e2;
4497 if (!nowrap_type_p (TREE_TYPE (base)))
4498 return false;
4500 base = expand_simple_operations (base);
4502 if (TREE_CODE (base) == SSA_NAME)
4504 gimple stmt = SSA_NAME_DEF_STMT (base);
4506 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4507 return false;
4509 code = gimple_assign_rhs_code (stmt);
4510 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4511 return false;
4513 e1 = gimple_assign_rhs1 (stmt);
4514 e2 = gimple_assign_rhs2 (stmt);
4516 else
4518 code = TREE_CODE (base);
4519 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4520 return false;
4521 e1 = TREE_OPERAND (base, 0);
4522 e2 = TREE_OPERAND (base, 1);
4525 /* TODO: deeper inspection may be necessary to prove the equality. */
4526 switch (code)
4528 case PLUS_EXPR:
4529 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4530 case POINTER_PLUS_EXPR:
4531 return expr_equal_p (e2, offset);
4533 default:
4534 return false;
4538 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4539 comparison with CAND. NITER describes the number of iterations of
4540 the loops. If successful, the comparison in COMP_P is altered accordingly.
4542 We aim to handle the following situation:
4544 sometype *base, *p;
4545 int a, b, i;
4547 i = a;
4548 p = p_0 = base + a;
4552 bla (*p);
4553 p++;
4554 i++;
4556 while (i < b);
4558 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4559 We aim to optimize this to
4561 p = p_0 = base + a;
4564 bla (*p);
4565 p++;
4567 while (p < p_0 - a + b);
4569 This preserves the correctness, since the pointer arithmetics does not
4570 overflow. More precisely:
4572 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4573 overflow in computing it or the values of p.
4574 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4575 overflow. To prove this, we use the fact that p_0 = base + a. */
4577 static bool
4578 iv_elimination_compare_lt (struct ivopts_data *data,
4579 struct iv_cand *cand, enum tree_code *comp_p,
4580 struct tree_niter_desc *niter)
4582 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4583 struct aff_tree nit, tmpa, tmpb;
4584 enum tree_code comp;
4585 HOST_WIDE_INT step;
4587 /* We need to know that the candidate induction variable does not overflow.
4588 While more complex analysis may be used to prove this, for now just
4589 check that the variable appears in the original program and that it
4590 is computed in a type that guarantees no overflows. */
4591 cand_type = TREE_TYPE (cand->iv->base);
4592 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4593 return false;
4595 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4596 the calculation of the BOUND could overflow, making the comparison
4597 invalid. */
4598 if (!data->loop_single_exit_p)
4599 return false;
4601 /* We need to be able to decide whether candidate is increasing or decreasing
4602 in order to choose the right comparison operator. */
4603 if (!cst_and_fits_in_hwi (cand->iv->step))
4604 return false;
4605 step = int_cst_value (cand->iv->step);
4607 /* Check that the number of iterations matches the expected pattern:
4608 a + 1 > b ? 0 : b - a - 1. */
4609 mbz = niter->may_be_zero;
4610 if (TREE_CODE (mbz) == GT_EXPR)
4612 /* Handle a + 1 > b. */
4613 tree op0 = TREE_OPERAND (mbz, 0);
4614 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4616 a = TREE_OPERAND (op0, 0);
4617 b = TREE_OPERAND (mbz, 1);
4619 else
4620 return false;
4622 else if (TREE_CODE (mbz) == LT_EXPR)
4624 tree op1 = TREE_OPERAND (mbz, 1);
4626 /* Handle b < a + 1. */
4627 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4629 a = TREE_OPERAND (op1, 0);
4630 b = TREE_OPERAND (mbz, 0);
4632 else
4633 return false;
4635 else
4636 return false;
4638 /* Expected number of iterations is B - A - 1. Check that it matches
4639 the actual number, i.e., that B - A - NITER = 1. */
4640 tree_to_aff_combination (niter->niter, nit_type, &nit);
4641 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4642 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4643 aff_combination_scale (&nit, double_int_minus_one);
4644 aff_combination_scale (&tmpa, double_int_minus_one);
4645 aff_combination_add (&tmpb, &tmpa);
4646 aff_combination_add (&tmpb, &nit);
4647 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4648 return false;
4650 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4651 overflow. */
4652 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4653 cand->iv->step,
4654 fold_convert (TREE_TYPE (cand->iv->step), a));
4655 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4656 return false;
4658 /* Determine the new comparison operator. */
4659 comp = step < 0 ? GT_EXPR : LT_EXPR;
4660 if (*comp_p == NE_EXPR)
4661 *comp_p = comp;
4662 else if (*comp_p == EQ_EXPR)
4663 *comp_p = invert_tree_comparison (comp, false);
4664 else
4665 gcc_unreachable ();
4667 return true;
4670 /* Check whether it is possible to express the condition in USE by comparison
4671 of candidate CAND. If so, store the value compared with to BOUND, and the
4672 comparison operator to COMP. */
4674 static bool
4675 may_eliminate_iv (struct ivopts_data *data,
4676 struct iv_use *use, struct iv_cand *cand, tree *bound,
4677 enum tree_code *comp)
4679 basic_block ex_bb;
4680 edge exit;
4681 tree period;
4682 struct loop *loop = data->current_loop;
4683 aff_tree bnd;
4684 struct tree_niter_desc *desc = NULL;
4686 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4687 return false;
4689 /* For now works only for exits that dominate the loop latch.
4690 TODO: extend to other conditions inside loop body. */
4691 ex_bb = gimple_bb (use->stmt);
4692 if (use->stmt != last_stmt (ex_bb)
4693 || gimple_code (use->stmt) != GIMPLE_COND
4694 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4695 return false;
4697 exit = EDGE_SUCC (ex_bb, 0);
4698 if (flow_bb_inside_loop_p (loop, exit->dest))
4699 exit = EDGE_SUCC (ex_bb, 1);
4700 if (flow_bb_inside_loop_p (loop, exit->dest))
4701 return false;
4703 desc = niter_for_exit (data, exit);
4704 if (!desc)
4705 return false;
4707 /* Determine whether we can use the variable to test the exit condition.
4708 This is the case iff the period of the induction variable is greater
4709 than the number of iterations for which the exit condition is true. */
4710 period = iv_period (cand->iv);
4712 /* If the number of iterations is constant, compare against it directly. */
4713 if (TREE_CODE (desc->niter) == INTEGER_CST)
4715 /* See cand_value_at. */
4716 if (stmt_after_increment (loop, cand, use->stmt))
4718 if (!tree_int_cst_lt (desc->niter, period))
4719 return false;
4721 else
4723 if (tree_int_cst_lt (period, desc->niter))
4724 return false;
4728 /* If not, and if this is the only possible exit of the loop, see whether
4729 we can get a conservative estimate on the number of iterations of the
4730 entire loop and compare against that instead. */
4731 else
4733 double_int period_value, max_niter;
4735 max_niter = desc->max;
4736 if (stmt_after_increment (loop, cand, use->stmt))
4737 max_niter += double_int_one;
4738 period_value = tree_to_double_int (period);
4739 if (max_niter.ugt (period_value))
4741 /* See if we can take advantage of inferred loop bound information. */
4742 if (data->loop_single_exit_p)
4744 if (!max_loop_iterations (loop, &max_niter))
4745 return false;
4746 /* The loop bound is already adjusted by adding 1. */
4747 if (max_niter.ugt (period_value))
4748 return false;
4750 else
4751 return false;
4755 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4757 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4758 aff_combination_to_tree (&bnd));
4759 *comp = iv_elimination_compare (data, use);
4761 /* It is unlikely that computing the number of iterations using division
4762 would be more profitable than keeping the original induction variable. */
4763 if (expression_expensive_p (*bound))
4764 return false;
4766 /* Sometimes, it is possible to handle the situation that the number of
4767 iterations may be zero unless additional assumtions by using <
4768 instead of != in the exit condition.
4770 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4771 base the exit condition on it. However, that is often too
4772 expensive. */
4773 if (!integer_zerop (desc->may_be_zero))
4774 return iv_elimination_compare_lt (data, cand, comp, desc);
4776 return true;
4779 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4780 be copied, if is is used in the loop body and DATA->body_includes_call. */
4782 static int
4783 parm_decl_cost (struct ivopts_data *data, tree bound)
4785 tree sbound = bound;
4786 STRIP_NOPS (sbound);
4788 if (TREE_CODE (sbound) == SSA_NAME
4789 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4790 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4791 && data->body_includes_call)
4792 return COSTS_N_INSNS (1);
4794 return 0;
4797 /* Determines cost of basing replacement of USE on CAND in a condition. */
4799 static bool
4800 determine_use_iv_cost_condition (struct ivopts_data *data,
4801 struct iv_use *use, struct iv_cand *cand)
4803 tree bound = NULL_TREE;
4804 struct iv *cmp_iv;
4805 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4806 comp_cost elim_cost, express_cost, cost, bound_cost;
4807 bool ok;
4808 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4809 tree *control_var, *bound_cst;
4810 enum tree_code comp = ERROR_MARK;
4812 /* Only consider real candidates. */
4813 if (!cand->iv)
4815 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4816 ERROR_MARK, -1);
4817 return false;
4820 /* Try iv elimination. */
4821 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4823 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4824 if (elim_cost.cost == 0)
4825 elim_cost.cost = parm_decl_cost (data, bound);
4826 else if (TREE_CODE (bound) == INTEGER_CST)
4827 elim_cost.cost = 0;
4828 /* If we replace a loop condition 'i < n' with 'p < base + n',
4829 depends_on_elim will have 'base' and 'n' set, which implies
4830 that both 'base' and 'n' will be live during the loop. More likely,
4831 'base + n' will be loop invariant, resulting in only one live value
4832 during the loop. So in that case we clear depends_on_elim and set
4833 elim_inv_expr_id instead. */
4834 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4836 elim_inv_expr_id = get_expr_id (data, bound);
4837 bitmap_clear (depends_on_elim);
4839 /* The bound is a loop invariant, so it will be only computed
4840 once. */
4841 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4843 else
4844 elim_cost = infinite_cost;
4846 /* Try expressing the original giv. If it is compared with an invariant,
4847 note that we cannot get rid of it. */
4848 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4849 NULL, &cmp_iv);
4850 gcc_assert (ok);
4852 /* When the condition is a comparison of the candidate IV against
4853 zero, prefer this IV.
4855 TODO: The constant that we're subtracting from the cost should
4856 be target-dependent. This information should be added to the
4857 target costs for each backend. */
4858 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4859 && integer_zerop (*bound_cst)
4860 && (operand_equal_p (*control_var, cand->var_after, 0)
4861 || operand_equal_p (*control_var, cand->var_before, 0)))
4862 elim_cost.cost -= 1;
4864 express_cost = get_computation_cost (data, use, cand, false,
4865 &depends_on_express, NULL,
4866 &express_inv_expr_id);
4867 fd_ivopts_data = data;
4868 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4870 /* Count the cost of the original bound as well. */
4871 bound_cost = force_var_cost (data, *bound_cst, NULL);
4872 if (bound_cost.cost == 0)
4873 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4874 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4875 bound_cost.cost = 0;
4876 express_cost.cost += bound_cost.cost;
4878 /* Choose the better approach, preferring the eliminated IV. */
4879 if (compare_costs (elim_cost, express_cost) <= 0)
4881 cost = elim_cost;
4882 depends_on = depends_on_elim;
4883 depends_on_elim = NULL;
4884 inv_expr_id = elim_inv_expr_id;
4886 else
4888 cost = express_cost;
4889 depends_on = depends_on_express;
4890 depends_on_express = NULL;
4891 bound = NULL_TREE;
4892 comp = ERROR_MARK;
4893 inv_expr_id = express_inv_expr_id;
4896 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4898 if (depends_on_elim)
4899 BITMAP_FREE (depends_on_elim);
4900 if (depends_on_express)
4901 BITMAP_FREE (depends_on_express);
4903 return !infinite_cost_p (cost);
4906 /* Determines cost of basing replacement of USE on CAND. Returns false
4907 if USE cannot be based on CAND. */
4909 static bool
4910 determine_use_iv_cost (struct ivopts_data *data,
4911 struct iv_use *use, struct iv_cand *cand)
4913 switch (use->type)
4915 case USE_NONLINEAR_EXPR:
4916 return determine_use_iv_cost_generic (data, use, cand);
4918 case USE_ADDRESS:
4919 return determine_use_iv_cost_address (data, use, cand);
4921 case USE_COMPARE:
4922 return determine_use_iv_cost_condition (data, use, cand);
4924 default:
4925 gcc_unreachable ();
4929 /* Return true if get_computation_cost indicates that autoincrement is
4930 a possibility for the pair of USE and CAND, false otherwise. */
4932 static bool
4933 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4934 struct iv_cand *cand)
4936 bitmap depends_on;
4937 bool can_autoinc;
4938 comp_cost cost;
4940 if (use->type != USE_ADDRESS)
4941 return false;
4943 cost = get_computation_cost (data, use, cand, true, &depends_on,
4944 &can_autoinc, NULL);
4946 BITMAP_FREE (depends_on);
4948 return !infinite_cost_p (cost) && can_autoinc;
4951 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4952 use that allows autoincrement, and set their AINC_USE if possible. */
4954 static void
4955 set_autoinc_for_original_candidates (struct ivopts_data *data)
4957 unsigned i, j;
4959 for (i = 0; i < n_iv_cands (data); i++)
4961 struct iv_cand *cand = iv_cand (data, i);
4962 struct iv_use *closest_before = NULL;
4963 struct iv_use *closest_after = NULL;
4964 if (cand->pos != IP_ORIGINAL)
4965 continue;
4967 for (j = 0; j < n_iv_uses (data); j++)
4969 struct iv_use *use = iv_use (data, j);
4970 unsigned uid = gimple_uid (use->stmt);
4972 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4973 continue;
4975 if (uid < gimple_uid (cand->incremented_at)
4976 && (closest_before == NULL
4977 || uid > gimple_uid (closest_before->stmt)))
4978 closest_before = use;
4980 if (uid > gimple_uid (cand->incremented_at)
4981 && (closest_after == NULL
4982 || uid < gimple_uid (closest_after->stmt)))
4983 closest_after = use;
4986 if (closest_before != NULL
4987 && autoinc_possible_for_pair (data, closest_before, cand))
4988 cand->ainc_use = closest_before;
4989 else if (closest_after != NULL
4990 && autoinc_possible_for_pair (data, closest_after, cand))
4991 cand->ainc_use = closest_after;
4995 /* Finds the candidates for the induction variables. */
4997 static void
4998 find_iv_candidates (struct ivopts_data *data)
5000 /* Add commonly used ivs. */
5001 add_standard_iv_candidates (data);
5003 /* Add old induction variables. */
5004 add_old_ivs_candidates (data);
5006 /* Add induction variables derived from uses. */
5007 add_derived_ivs_candidates (data);
5009 set_autoinc_for_original_candidates (data);
5011 /* Record the important candidates. */
5012 record_important_candidates (data);
5015 /* Determines costs of basing the use of the iv on an iv candidate. */
5017 static void
5018 determine_use_iv_costs (struct ivopts_data *data)
5020 unsigned i, j;
5021 struct iv_use *use;
5022 struct iv_cand *cand;
5023 bitmap to_clear = BITMAP_ALLOC (NULL);
5025 alloc_use_cost_map (data);
5027 for (i = 0; i < n_iv_uses (data); i++)
5029 use = iv_use (data, i);
5031 if (data->consider_all_candidates)
5033 for (j = 0; j < n_iv_cands (data); j++)
5035 cand = iv_cand (data, j);
5036 determine_use_iv_cost (data, use, cand);
5039 else
5041 bitmap_iterator bi;
5043 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5045 cand = iv_cand (data, j);
5046 if (!determine_use_iv_cost (data, use, cand))
5047 bitmap_set_bit (to_clear, j);
5050 /* Remove the candidates for that the cost is infinite from
5051 the list of related candidates. */
5052 bitmap_and_compl_into (use->related_cands, to_clear);
5053 bitmap_clear (to_clear);
5057 BITMAP_FREE (to_clear);
5059 if (dump_file && (dump_flags & TDF_DETAILS))
5061 fprintf (dump_file, "Use-candidate costs:\n");
5063 for (i = 0; i < n_iv_uses (data); i++)
5065 use = iv_use (data, i);
5067 fprintf (dump_file, "Use %d:\n", i);
5068 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5069 for (j = 0; j < use->n_map_members; j++)
5071 if (!use->cost_map[j].cand
5072 || infinite_cost_p (use->cost_map[j].cost))
5073 continue;
5075 fprintf (dump_file, " %d\t%d\t%d\t",
5076 use->cost_map[j].cand->id,
5077 use->cost_map[j].cost.cost,
5078 use->cost_map[j].cost.complexity);
5079 if (use->cost_map[j].depends_on)
5080 bitmap_print (dump_file,
5081 use->cost_map[j].depends_on, "","");
5082 if (use->cost_map[j].inv_expr_id != -1)
5083 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5084 fprintf (dump_file, "\n");
5087 fprintf (dump_file, "\n");
5089 fprintf (dump_file, "\n");
5093 /* Determines cost of the candidate CAND. */
5095 static void
5096 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5098 comp_cost cost_base;
5099 unsigned cost, cost_step;
5100 tree base;
5102 if (!cand->iv)
5104 cand->cost = 0;
5105 return;
5108 /* There are two costs associated with the candidate -- its increment
5109 and its initialization. The second is almost negligible for any loop
5110 that rolls enough, so we take it just very little into account. */
5112 base = cand->iv->base;
5113 cost_base = force_var_cost (data, base, NULL);
5114 /* It will be exceptional that the iv register happens to be initialized with
5115 the proper value at no cost. In general, there will at least be a regcopy
5116 or a const set. */
5117 if (cost_base.cost == 0)
5118 cost_base.cost = COSTS_N_INSNS (1);
5119 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5121 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5123 /* Prefer the original ivs unless we may gain something by replacing it.
5124 The reason is to make debugging simpler; so this is not relevant for
5125 artificial ivs created by other optimization passes. */
5126 if (cand->pos != IP_ORIGINAL
5127 || !SSA_NAME_VAR (cand->var_before)
5128 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5129 cost++;
5131 /* Prefer not to insert statements into latch unless there are some
5132 already (so that we do not create unnecessary jumps). */
5133 if (cand->pos == IP_END
5134 && empty_block_p (ip_end_pos (data->current_loop)))
5135 cost++;
5137 cand->cost = cost;
5138 cand->cost_step = cost_step;
5141 /* Determines costs of computation of the candidates. */
5143 static void
5144 determine_iv_costs (struct ivopts_data *data)
5146 unsigned i;
5148 if (dump_file && (dump_flags & TDF_DETAILS))
5150 fprintf (dump_file, "Candidate costs:\n");
5151 fprintf (dump_file, " cand\tcost\n");
5154 for (i = 0; i < n_iv_cands (data); i++)
5156 struct iv_cand *cand = iv_cand (data, i);
5158 determine_iv_cost (data, cand);
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5164 if (dump_file && (dump_flags & TDF_DETAILS))
5165 fprintf (dump_file, "\n");
5168 /* Calculates cost for having SIZE induction variables. */
5170 static unsigned
5171 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5173 /* We add size to the cost, so that we prefer eliminating ivs
5174 if possible. */
5175 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5176 data->body_includes_call);
5179 /* For each size of the induction variable set determine the penalty. */
5181 static void
5182 determine_set_costs (struct ivopts_data *data)
5184 unsigned j, n;
5185 gimple phi;
5186 gimple_stmt_iterator psi;
5187 tree op;
5188 struct loop *loop = data->current_loop;
5189 bitmap_iterator bi;
5191 if (dump_file && (dump_flags & TDF_DETAILS))
5193 fprintf (dump_file, "Global costs:\n");
5194 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5195 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5196 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5197 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5200 n = 0;
5201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5203 phi = gsi_stmt (psi);
5204 op = PHI_RESULT (phi);
5206 if (virtual_operand_p (op))
5207 continue;
5209 if (get_iv (data, op))
5210 continue;
5212 n++;
5215 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217 struct version_info *info = ver_info (data, j);
5219 if (info->inv_id && info->has_nonlin_use)
5220 n++;
5223 data->regs_used = n;
5224 if (dump_file && (dump_flags & TDF_DETAILS))
5225 fprintf (dump_file, " regs_used %d\n", n);
5227 if (dump_file && (dump_flags & TDF_DETAILS))
5229 fprintf (dump_file, " cost for size:\n");
5230 fprintf (dump_file, " ivs\tcost\n");
5231 for (j = 0; j <= 2 * target_avail_regs; j++)
5232 fprintf (dump_file, " %d\t%d\n", j,
5233 ivopts_global_cost_for_size (data, j));
5234 fprintf (dump_file, "\n");
5238 /* Returns true if A is a cheaper cost pair than B. */
5240 static bool
5241 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5243 int cmp;
5245 if (!a)
5246 return false;
5248 if (!b)
5249 return true;
5251 cmp = compare_costs (a->cost, b->cost);
5252 if (cmp < 0)
5253 return true;
5255 if (cmp > 0)
5256 return false;
5258 /* In case the costs are the same, prefer the cheaper candidate. */
5259 if (a->cand->cost < b->cand->cost)
5260 return true;
5262 return false;
5266 /* Returns candidate by that USE is expressed in IVS. */
5268 static struct cost_pair *
5269 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5271 return ivs->cand_for_use[use->id];
5274 /* Computes the cost field of IVS structure. */
5276 static void
5277 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5279 comp_cost cost = ivs->cand_use_cost;
5281 cost.cost += ivs->cand_cost;
5283 cost.cost += ivopts_global_cost_for_size (data,
5284 ivs->n_regs + ivs->num_used_inv_expr);
5286 ivs->cost = cost;
5289 /* Remove invariants in set INVS to set IVS. */
5291 static void
5292 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5294 bitmap_iterator bi;
5295 unsigned iid;
5297 if (!invs)
5298 return;
5300 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5302 ivs->n_invariant_uses[iid]--;
5303 if (ivs->n_invariant_uses[iid] == 0)
5304 ivs->n_regs--;
5308 /* Set USE not to be expressed by any candidate in IVS. */
5310 static void
5311 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5312 struct iv_use *use)
5314 unsigned uid = use->id, cid;
5315 struct cost_pair *cp;
5317 cp = ivs->cand_for_use[uid];
5318 if (!cp)
5319 return;
5320 cid = cp->cand->id;
5322 ivs->bad_uses++;
5323 ivs->cand_for_use[uid] = NULL;
5324 ivs->n_cand_uses[cid]--;
5326 if (ivs->n_cand_uses[cid] == 0)
5328 bitmap_clear_bit (ivs->cands, cid);
5329 /* Do not count the pseudocandidates. */
5330 if (cp->cand->iv)
5331 ivs->n_regs--;
5332 ivs->n_cands--;
5333 ivs->cand_cost -= cp->cand->cost;
5335 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5338 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5340 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5342 if (cp->inv_expr_id != -1)
5344 ivs->used_inv_expr[cp->inv_expr_id]--;
5345 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5346 ivs->num_used_inv_expr--;
5348 iv_ca_recount_cost (data, ivs);
5351 /* Add invariants in set INVS to set IVS. */
5353 static void
5354 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5356 bitmap_iterator bi;
5357 unsigned iid;
5359 if (!invs)
5360 return;
5362 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5364 ivs->n_invariant_uses[iid]++;
5365 if (ivs->n_invariant_uses[iid] == 1)
5366 ivs->n_regs++;
5370 /* Set cost pair for USE in set IVS to CP. */
5372 static void
5373 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5374 struct iv_use *use, struct cost_pair *cp)
5376 unsigned uid = use->id, cid;
5378 if (ivs->cand_for_use[uid] == cp)
5379 return;
5381 if (ivs->cand_for_use[uid])
5382 iv_ca_set_no_cp (data, ivs, use);
5384 if (cp)
5386 cid = cp->cand->id;
5388 ivs->bad_uses--;
5389 ivs->cand_for_use[uid] = cp;
5390 ivs->n_cand_uses[cid]++;
5391 if (ivs->n_cand_uses[cid] == 1)
5393 bitmap_set_bit (ivs->cands, cid);
5394 /* Do not count the pseudocandidates. */
5395 if (cp->cand->iv)
5396 ivs->n_regs++;
5397 ivs->n_cands++;
5398 ivs->cand_cost += cp->cand->cost;
5400 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5403 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5404 iv_ca_set_add_invariants (ivs, cp->depends_on);
5406 if (cp->inv_expr_id != -1)
5408 ivs->used_inv_expr[cp->inv_expr_id]++;
5409 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5410 ivs->num_used_inv_expr++;
5412 iv_ca_recount_cost (data, ivs);
5416 /* Extend set IVS by expressing USE by some of the candidates in it
5417 if possible. All important candidates will be considered
5418 if IMPORTANT_CANDIDATES is true. */
5420 static void
5421 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5422 struct iv_use *use, bool important_candidates)
5424 struct cost_pair *best_cp = NULL, *cp;
5425 bitmap_iterator bi;
5426 bitmap cands;
5427 unsigned i;
5429 gcc_assert (ivs->upto >= use->id);
5431 if (ivs->upto == use->id)
5433 ivs->upto++;
5434 ivs->bad_uses++;
5437 cands = (important_candidates ? data->important_candidates : ivs->cands);
5438 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5440 struct iv_cand *cand = iv_cand (data, i);
5442 cp = get_use_iv_cost (data, use, cand);
5444 if (cheaper_cost_pair (cp, best_cp))
5445 best_cp = cp;
5448 iv_ca_set_cp (data, ivs, use, best_cp);
5451 /* Get cost for assignment IVS. */
5453 static comp_cost
5454 iv_ca_cost (struct iv_ca *ivs)
5456 /* This was a conditional expression but it triggered a bug in
5457 Sun C 5.5. */
5458 if (ivs->bad_uses)
5459 return infinite_cost;
5460 else
5461 return ivs->cost;
5464 /* Returns true if all dependences of CP are among invariants in IVS. */
5466 static bool
5467 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5469 unsigned i;
5470 bitmap_iterator bi;
5472 if (!cp->depends_on)
5473 return true;
5475 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5477 if (ivs->n_invariant_uses[i] == 0)
5478 return false;
5481 return true;
5484 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5485 it before NEXT_CHANGE. */
5487 static struct iv_ca_delta *
5488 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5489 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5491 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5493 change->use = use;
5494 change->old_cp = old_cp;
5495 change->new_cp = new_cp;
5496 change->next_change = next_change;
5498 return change;
5501 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5502 are rewritten. */
5504 static struct iv_ca_delta *
5505 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5507 struct iv_ca_delta *last;
5509 if (!l2)
5510 return l1;
5512 if (!l1)
5513 return l2;
5515 for (last = l1; last->next_change; last = last->next_change)
5516 continue;
5517 last->next_change = l2;
5519 return l1;
5522 /* Reverse the list of changes DELTA, forming the inverse to it. */
5524 static struct iv_ca_delta *
5525 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5527 struct iv_ca_delta *act, *next, *prev = NULL;
5528 struct cost_pair *tmp;
5530 for (act = delta; act; act = next)
5532 next = act->next_change;
5533 act->next_change = prev;
5534 prev = act;
5536 tmp = act->old_cp;
5537 act->old_cp = act->new_cp;
5538 act->new_cp = tmp;
5541 return prev;
5544 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5545 reverted instead. */
5547 static void
5548 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5549 struct iv_ca_delta *delta, bool forward)
5551 struct cost_pair *from, *to;
5552 struct iv_ca_delta *act;
5554 if (!forward)
5555 delta = iv_ca_delta_reverse (delta);
5557 for (act = delta; act; act = act->next_change)
5559 from = act->old_cp;
5560 to = act->new_cp;
5561 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5562 iv_ca_set_cp (data, ivs, act->use, to);
5565 if (!forward)
5566 iv_ca_delta_reverse (delta);
5569 /* Returns true if CAND is used in IVS. */
5571 static bool
5572 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5574 return ivs->n_cand_uses[cand->id] > 0;
5577 /* Returns number of induction variable candidates in the set IVS. */
5579 static unsigned
5580 iv_ca_n_cands (struct iv_ca *ivs)
5582 return ivs->n_cands;
5585 /* Free the list of changes DELTA. */
5587 static void
5588 iv_ca_delta_free (struct iv_ca_delta **delta)
5590 struct iv_ca_delta *act, *next;
5592 for (act = *delta; act; act = next)
5594 next = act->next_change;
5595 free (act);
5598 *delta = NULL;
5601 /* Allocates new iv candidates assignment. */
5603 static struct iv_ca *
5604 iv_ca_new (struct ivopts_data *data)
5606 struct iv_ca *nw = XNEW (struct iv_ca);
5608 nw->upto = 0;
5609 nw->bad_uses = 0;
5610 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5611 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5612 nw->cands = BITMAP_ALLOC (NULL);
5613 nw->n_cands = 0;
5614 nw->n_regs = 0;
5615 nw->cand_use_cost = no_cost;
5616 nw->cand_cost = 0;
5617 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5618 nw->cost = no_cost;
5619 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5620 nw->num_used_inv_expr = 0;
5622 return nw;
5625 /* Free memory occupied by the set IVS. */
5627 static void
5628 iv_ca_free (struct iv_ca **ivs)
5630 free ((*ivs)->cand_for_use);
5631 free ((*ivs)->n_cand_uses);
5632 BITMAP_FREE ((*ivs)->cands);
5633 free ((*ivs)->n_invariant_uses);
5634 free ((*ivs)->used_inv_expr);
5635 free (*ivs);
5636 *ivs = NULL;
5639 /* Dumps IVS to FILE. */
5641 static void
5642 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5644 const char *pref = " invariants ";
5645 unsigned i;
5646 comp_cost cost = iv_ca_cost (ivs);
5648 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5649 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5650 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5651 bitmap_print (file, ivs->cands, " candidates: ","\n");
5653 for (i = 0; i < ivs->upto; i++)
5655 struct iv_use *use = iv_use (data, i);
5656 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5657 if (cp)
5658 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5659 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5660 else
5661 fprintf (file, " use:%d --> ??\n", use->id);
5664 for (i = 1; i <= data->max_inv_id; i++)
5665 if (ivs->n_invariant_uses[i])
5667 fprintf (file, "%s%d", pref, i);
5668 pref = ", ";
5670 fprintf (file, "\n\n");
5673 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5674 new set, and store differences in DELTA. Number of induction variables
5675 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5676 the function will try to find a solution with mimimal iv candidates. */
5678 static comp_cost
5679 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5680 struct iv_cand *cand, struct iv_ca_delta **delta,
5681 unsigned *n_ivs, bool min_ncand)
5683 unsigned i;
5684 comp_cost cost;
5685 struct iv_use *use;
5686 struct cost_pair *old_cp, *new_cp;
5688 *delta = NULL;
5689 for (i = 0; i < ivs->upto; i++)
5691 use = iv_use (data, i);
5692 old_cp = iv_ca_cand_for_use (ivs, use);
5694 if (old_cp
5695 && old_cp->cand == cand)
5696 continue;
5698 new_cp = get_use_iv_cost (data, use, cand);
5699 if (!new_cp)
5700 continue;
5702 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5703 continue;
5705 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5706 continue;
5708 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5711 iv_ca_delta_commit (data, ivs, *delta, true);
5712 cost = iv_ca_cost (ivs);
5713 if (n_ivs)
5714 *n_ivs = iv_ca_n_cands (ivs);
5715 iv_ca_delta_commit (data, ivs, *delta, false);
5717 return cost;
5720 /* Try narrowing set IVS by removing CAND. Return the cost of
5721 the new set and store the differences in DELTA. START is
5722 the candidate with which we start narrowing. */
5724 static comp_cost
5725 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5726 struct iv_cand *cand, struct iv_cand *start,
5727 struct iv_ca_delta **delta)
5729 unsigned i, ci;
5730 struct iv_use *use;
5731 struct cost_pair *old_cp, *new_cp, *cp;
5732 bitmap_iterator bi;
5733 struct iv_cand *cnd;
5734 comp_cost cost, best_cost, acost;
5736 *delta = NULL;
5737 for (i = 0; i < n_iv_uses (data); i++)
5739 use = iv_use (data, i);
5741 old_cp = iv_ca_cand_for_use (ivs, use);
5742 if (old_cp->cand != cand)
5743 continue;
5745 best_cost = iv_ca_cost (ivs);
5746 /* Start narrowing with START. */
5747 new_cp = get_use_iv_cost (data, use, start);
5749 if (data->consider_all_candidates)
5751 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5753 if (ci == cand->id || (start && ci == start->id))
5754 continue;
5756 cnd = iv_cand (data, ci);
5758 cp = get_use_iv_cost (data, use, cnd);
5759 if (!cp)
5760 continue;
5762 iv_ca_set_cp (data, ivs, use, cp);
5763 acost = iv_ca_cost (ivs);
5765 if (compare_costs (acost, best_cost) < 0)
5767 best_cost = acost;
5768 new_cp = cp;
5772 else
5774 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5776 if (ci == cand->id || (start && ci == start->id))
5777 continue;
5779 cnd = iv_cand (data, ci);
5781 cp = get_use_iv_cost (data, use, cnd);
5782 if (!cp)
5783 continue;
5785 iv_ca_set_cp (data, ivs, use, cp);
5786 acost = iv_ca_cost (ivs);
5788 if (compare_costs (acost, best_cost) < 0)
5790 best_cost = acost;
5791 new_cp = cp;
5795 /* Restore to old cp for use. */
5796 iv_ca_set_cp (data, ivs, use, old_cp);
5798 if (!new_cp)
5800 iv_ca_delta_free (delta);
5801 return infinite_cost;
5804 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5807 iv_ca_delta_commit (data, ivs, *delta, true);
5808 cost = iv_ca_cost (ivs);
5809 iv_ca_delta_commit (data, ivs, *delta, false);
5811 return cost;
5814 /* Try optimizing the set of candidates IVS by removing candidates different
5815 from to EXCEPT_CAND from it. Return cost of the new set, and store
5816 differences in DELTA. */
5818 static comp_cost
5819 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5820 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5822 bitmap_iterator bi;
5823 struct iv_ca_delta *act_delta, *best_delta;
5824 unsigned i;
5825 comp_cost best_cost, acost;
5826 struct iv_cand *cand;
5828 best_delta = NULL;
5829 best_cost = iv_ca_cost (ivs);
5831 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5833 cand = iv_cand (data, i);
5835 if (cand == except_cand)
5836 continue;
5838 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5840 if (compare_costs (acost, best_cost) < 0)
5842 best_cost = acost;
5843 iv_ca_delta_free (&best_delta);
5844 best_delta = act_delta;
5846 else
5847 iv_ca_delta_free (&act_delta);
5850 if (!best_delta)
5852 *delta = NULL;
5853 return best_cost;
5856 /* Recurse to possibly remove other unnecessary ivs. */
5857 iv_ca_delta_commit (data, ivs, best_delta, true);
5858 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5859 iv_ca_delta_commit (data, ivs, best_delta, false);
5860 *delta = iv_ca_delta_join (best_delta, *delta);
5861 return best_cost;
5864 /* Tries to extend the sets IVS in the best possible way in order
5865 to express the USE. If ORIGINALP is true, prefer candidates from
5866 the original set of IVs, otherwise favor important candidates not
5867 based on any memory object. */
5869 static bool
5870 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5871 struct iv_use *use, bool originalp)
5873 comp_cost best_cost, act_cost;
5874 unsigned i;
5875 bitmap_iterator bi;
5876 struct iv_cand *cand;
5877 struct iv_ca_delta *best_delta = NULL, *act_delta;
5878 struct cost_pair *cp;
5880 iv_ca_add_use (data, ivs, use, false);
5881 best_cost = iv_ca_cost (ivs);
5883 cp = iv_ca_cand_for_use (ivs, use);
5884 if (!cp)
5886 ivs->upto--;
5887 ivs->bad_uses--;
5888 iv_ca_add_use (data, ivs, use, true);
5889 best_cost = iv_ca_cost (ivs);
5890 cp = iv_ca_cand_for_use (ivs, use);
5892 if (cp)
5894 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5895 iv_ca_set_no_cp (data, ivs, use);
5898 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5899 first try important candidates not based on any memory object. Only if
5900 this fails, try the specific ones. Rationale -- in loops with many
5901 variables the best choice often is to use just one generic biv. If we
5902 added here many ivs specific to the uses, the optimization algorithm later
5903 would be likely to get stuck in a local minimum, thus causing us to create
5904 too many ivs. The approach from few ivs to more seems more likely to be
5905 successful -- starting from few ivs, replacing an expensive use by a
5906 specific iv should always be a win. */
5907 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5909 cand = iv_cand (data, i);
5911 if (originalp && cand->pos !=IP_ORIGINAL)
5912 continue;
5914 if (!originalp && cand->iv->base_object != NULL_TREE)
5915 continue;
5917 if (iv_ca_cand_used_p (ivs, cand))
5918 continue;
5920 cp = get_use_iv_cost (data, use, cand);
5921 if (!cp)
5922 continue;
5924 iv_ca_set_cp (data, ivs, use, cp);
5925 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5926 true);
5927 iv_ca_set_no_cp (data, ivs, use);
5928 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5930 if (compare_costs (act_cost, best_cost) < 0)
5932 best_cost = act_cost;
5934 iv_ca_delta_free (&best_delta);
5935 best_delta = act_delta;
5937 else
5938 iv_ca_delta_free (&act_delta);
5941 if (infinite_cost_p (best_cost))
5943 for (i = 0; i < use->n_map_members; i++)
5945 cp = use->cost_map + i;
5946 cand = cp->cand;
5947 if (!cand)
5948 continue;
5950 /* Already tried this. */
5951 if (cand->important)
5953 if (originalp && cand->pos == IP_ORIGINAL)
5954 continue;
5955 if (!originalp && cand->iv->base_object == NULL_TREE)
5956 continue;
5959 if (iv_ca_cand_used_p (ivs, cand))
5960 continue;
5962 act_delta = NULL;
5963 iv_ca_set_cp (data, ivs, use, cp);
5964 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5965 iv_ca_set_no_cp (data, ivs, use);
5966 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5967 cp, act_delta);
5969 if (compare_costs (act_cost, best_cost) < 0)
5971 best_cost = act_cost;
5973 if (best_delta)
5974 iv_ca_delta_free (&best_delta);
5975 best_delta = act_delta;
5977 else
5978 iv_ca_delta_free (&act_delta);
5982 iv_ca_delta_commit (data, ivs, best_delta, true);
5983 iv_ca_delta_free (&best_delta);
5985 return !infinite_cost_p (best_cost);
5988 /* Finds an initial assignment of candidates to uses. */
5990 static struct iv_ca *
5991 get_initial_solution (struct ivopts_data *data, bool originalp)
5993 struct iv_ca *ivs = iv_ca_new (data);
5994 unsigned i;
5996 for (i = 0; i < n_iv_uses (data); i++)
5997 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5999 iv_ca_free (&ivs);
6000 return NULL;
6003 return ivs;
6006 /* Tries to improve set of induction variables IVS. */
6008 static bool
6009 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6011 unsigned i, n_ivs;
6012 comp_cost acost, best_cost = iv_ca_cost (ivs);
6013 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6014 struct iv_cand *cand;
6016 /* Try extending the set of induction variables by one. */
6017 for (i = 0; i < n_iv_cands (data); i++)
6019 cand = iv_cand (data, i);
6021 if (iv_ca_cand_used_p (ivs, cand))
6022 continue;
6024 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6025 if (!act_delta)
6026 continue;
6028 /* If we successfully added the candidate and the set is small enough,
6029 try optimizing it by removing other candidates. */
6030 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6032 iv_ca_delta_commit (data, ivs, act_delta, true);
6033 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6034 iv_ca_delta_commit (data, ivs, act_delta, false);
6035 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6038 if (compare_costs (acost, best_cost) < 0)
6040 best_cost = acost;
6041 iv_ca_delta_free (&best_delta);
6042 best_delta = act_delta;
6044 else
6045 iv_ca_delta_free (&act_delta);
6048 if (!best_delta)
6050 /* Try removing the candidates from the set instead. */
6051 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6053 /* Nothing more we can do. */
6054 if (!best_delta)
6055 return false;
6058 iv_ca_delta_commit (data, ivs, best_delta, true);
6059 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6060 iv_ca_delta_free (&best_delta);
6061 return true;
6064 /* Attempts to find the optimal set of induction variables. We do simple
6065 greedy heuristic -- we try to replace at most one candidate in the selected
6066 solution and remove the unused ivs while this improves the cost. */
6068 static struct iv_ca *
6069 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6071 struct iv_ca *set;
6073 /* Get the initial solution. */
6074 set = get_initial_solution (data, originalp);
6075 if (!set)
6077 if (dump_file && (dump_flags & TDF_DETAILS))
6078 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6079 return NULL;
6082 if (dump_file && (dump_flags & TDF_DETAILS))
6084 fprintf (dump_file, "Initial set of candidates:\n");
6085 iv_ca_dump (data, dump_file, set);
6088 while (try_improve_iv_set (data, set))
6090 if (dump_file && (dump_flags & TDF_DETAILS))
6092 fprintf (dump_file, "Improved to:\n");
6093 iv_ca_dump (data, dump_file, set);
6097 return set;
6100 static struct iv_ca *
6101 find_optimal_iv_set (struct ivopts_data *data)
6103 unsigned i;
6104 struct iv_ca *set, *origset;
6105 struct iv_use *use;
6106 comp_cost cost, origcost;
6108 /* Determine the cost based on a strategy that starts with original IVs,
6109 and try again using a strategy that prefers candidates not based
6110 on any IVs. */
6111 origset = find_optimal_iv_set_1 (data, true);
6112 set = find_optimal_iv_set_1 (data, false);
6114 if (!origset && !set)
6115 return NULL;
6117 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6118 cost = set ? iv_ca_cost (set) : infinite_cost;
6120 if (dump_file && (dump_flags & TDF_DETAILS))
6122 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6123 origcost.cost, origcost.complexity);
6124 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6125 cost.cost, cost.complexity);
6128 /* Choose the one with the best cost. */
6129 if (compare_costs (origcost, cost) <= 0)
6131 if (set)
6132 iv_ca_free (&set);
6133 set = origset;
6135 else if (origset)
6136 iv_ca_free (&origset);
6138 for (i = 0; i < n_iv_uses (data); i++)
6140 use = iv_use (data, i);
6141 use->selected = iv_ca_cand_for_use (set, use)->cand;
6144 return set;
6147 /* Creates a new induction variable corresponding to CAND. */
6149 static void
6150 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6152 gimple_stmt_iterator incr_pos;
6153 tree base;
6154 bool after = false;
6156 if (!cand->iv)
6157 return;
6159 switch (cand->pos)
6161 case IP_NORMAL:
6162 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6163 break;
6165 case IP_END:
6166 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6167 after = true;
6168 break;
6170 case IP_AFTER_USE:
6171 after = true;
6172 /* fall through */
6173 case IP_BEFORE_USE:
6174 incr_pos = gsi_for_stmt (cand->incremented_at);
6175 break;
6177 case IP_ORIGINAL:
6178 /* Mark that the iv is preserved. */
6179 name_info (data, cand->var_before)->preserve_biv = true;
6180 name_info (data, cand->var_after)->preserve_biv = true;
6182 /* Rewrite the increment so that it uses var_before directly. */
6183 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6184 return;
6187 gimple_add_tmp_var (cand->var_before);
6189 base = unshare_expr (cand->iv->base);
6191 create_iv (base, unshare_expr (cand->iv->step),
6192 cand->var_before, data->current_loop,
6193 &incr_pos, after, &cand->var_before, &cand->var_after);
6196 /* Creates new induction variables described in SET. */
6198 static void
6199 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6201 unsigned i;
6202 struct iv_cand *cand;
6203 bitmap_iterator bi;
6205 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6207 cand = iv_cand (data, i);
6208 create_new_iv (data, cand);
6211 if (dump_file && (dump_flags & TDF_DETAILS))
6213 fprintf (dump_file, "\nSelected IV set: \n");
6214 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6216 cand = iv_cand (data, i);
6217 dump_cand (dump_file, cand);
6219 fprintf (dump_file, "\n");
6223 /* Rewrites USE (definition of iv used in a nonlinear expression)
6224 using candidate CAND. */
6226 static void
6227 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6228 struct iv_use *use, struct iv_cand *cand)
6230 tree comp;
6231 tree op, tgt;
6232 gimple ass;
6233 gimple_stmt_iterator bsi;
6235 /* An important special case -- if we are asked to express value of
6236 the original iv by itself, just exit; there is no need to
6237 introduce a new computation (that might also need casting the
6238 variable to unsigned and back). */
6239 if (cand->pos == IP_ORIGINAL
6240 && cand->incremented_at == use->stmt)
6242 enum tree_code stmt_code;
6244 gcc_assert (is_gimple_assign (use->stmt));
6245 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6247 /* Check whether we may leave the computation unchanged.
6248 This is the case only if it does not rely on other
6249 computations in the loop -- otherwise, the computation
6250 we rely upon may be removed in remove_unused_ivs,
6251 thus leading to ICE. */
6252 stmt_code = gimple_assign_rhs_code (use->stmt);
6253 if (stmt_code == PLUS_EXPR
6254 || stmt_code == MINUS_EXPR
6255 || stmt_code == POINTER_PLUS_EXPR)
6257 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6258 op = gimple_assign_rhs2 (use->stmt);
6259 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6260 op = gimple_assign_rhs1 (use->stmt);
6261 else
6262 op = NULL_TREE;
6264 else
6265 op = NULL_TREE;
6267 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6268 return;
6271 comp = get_computation (data->current_loop, use, cand);
6272 gcc_assert (comp != NULL_TREE);
6274 switch (gimple_code (use->stmt))
6276 case GIMPLE_PHI:
6277 tgt = PHI_RESULT (use->stmt);
6279 /* If we should keep the biv, do not replace it. */
6280 if (name_info (data, tgt)->preserve_biv)
6281 return;
6283 bsi = gsi_after_labels (gimple_bb (use->stmt));
6284 break;
6286 case GIMPLE_ASSIGN:
6287 tgt = gimple_assign_lhs (use->stmt);
6288 bsi = gsi_for_stmt (use->stmt);
6289 break;
6291 default:
6292 gcc_unreachable ();
6295 if (!valid_gimple_rhs_p (comp)
6296 || (gimple_code (use->stmt) != GIMPLE_PHI
6297 /* We can't allow re-allocating the stmt as it might be pointed
6298 to still. */
6299 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6300 >= gimple_num_ops (gsi_stmt (bsi)))))
6302 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6303 true, GSI_SAME_STMT);
6304 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6306 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6307 /* As this isn't a plain copy we have to reset alignment
6308 information. */
6309 if (SSA_NAME_PTR_INFO (comp))
6310 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6314 if (gimple_code (use->stmt) == GIMPLE_PHI)
6316 ass = gimple_build_assign (tgt, comp);
6317 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6319 bsi = gsi_for_stmt (use->stmt);
6320 remove_phi_node (&bsi, false);
6322 else
6324 gimple_assign_set_rhs_from_tree (&bsi, comp);
6325 use->stmt = gsi_stmt (bsi);
6329 /* Performs a peephole optimization to reorder the iv update statement with
6330 a mem ref to enable instruction combining in later phases. The mem ref uses
6331 the iv value before the update, so the reordering transformation requires
6332 adjustment of the offset. CAND is the selected IV_CAND.
6334 Example:
6336 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6337 iv2 = iv1 + 1;
6339 if (t < val) (1)
6340 goto L;
6341 goto Head;
6344 directly propagating t over to (1) will introduce overlapping live range
6345 thus increase register pressure. This peephole transform it into:
6348 iv2 = iv1 + 1;
6349 t = MEM_REF (base, iv2, 8, 8);
6350 if (t < val)
6351 goto L;
6352 goto Head;
6355 static void
6356 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6358 tree var_after;
6359 gimple iv_update, stmt;
6360 basic_block bb;
6361 gimple_stmt_iterator gsi, gsi_iv;
6363 if (cand->pos != IP_NORMAL)
6364 return;
6366 var_after = cand->var_after;
6367 iv_update = SSA_NAME_DEF_STMT (var_after);
6369 bb = gimple_bb (iv_update);
6370 gsi = gsi_last_nondebug_bb (bb);
6371 stmt = gsi_stmt (gsi);
6373 /* Only handle conditional statement for now. */
6374 if (gimple_code (stmt) != GIMPLE_COND)
6375 return;
6377 gsi_prev_nondebug (&gsi);
6378 stmt = gsi_stmt (gsi);
6379 if (stmt != iv_update)
6380 return;
6382 gsi_prev_nondebug (&gsi);
6383 if (gsi_end_p (gsi))
6384 return;
6386 stmt = gsi_stmt (gsi);
6387 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6388 return;
6390 if (stmt != use->stmt)
6391 return;
6393 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6394 return;
6396 if (dump_file && (dump_flags & TDF_DETAILS))
6398 fprintf (dump_file, "Reordering \n");
6399 print_gimple_stmt (dump_file, iv_update, 0, 0);
6400 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6401 fprintf (dump_file, "\n");
6404 gsi = gsi_for_stmt (use->stmt);
6405 gsi_iv = gsi_for_stmt (iv_update);
6406 gsi_move_before (&gsi_iv, &gsi);
6408 cand->pos = IP_BEFORE_USE;
6409 cand->incremented_at = use->stmt;
6412 /* Rewrites USE (address that is an iv) using candidate CAND. */
6414 static void
6415 rewrite_use_address (struct ivopts_data *data,
6416 struct iv_use *use, struct iv_cand *cand)
6418 aff_tree aff;
6419 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6420 tree base_hint = NULL_TREE;
6421 tree ref, iv;
6422 bool ok;
6424 adjust_iv_update_pos (cand, use);
6425 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6426 gcc_assert (ok);
6427 unshare_aff_combination (&aff);
6429 /* To avoid undefined overflow problems, all IV candidates use unsigned
6430 integer types. The drawback is that this makes it impossible for
6431 create_mem_ref to distinguish an IV that is based on a memory object
6432 from one that represents simply an offset.
6434 To work around this problem, we pass a hint to create_mem_ref that
6435 indicates which variable (if any) in aff is an IV based on a memory
6436 object. Note that we only consider the candidate. If this is not
6437 based on an object, the base of the reference is in some subexpression
6438 of the use -- but these will use pointer types, so they are recognized
6439 by the create_mem_ref heuristics anyway. */
6440 if (cand->iv->base_object)
6441 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6443 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6444 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6445 reference_alias_ptr_type (*use->op_p),
6446 iv, base_hint, data->speed);
6447 copy_ref_info (ref, *use->op_p);
6448 *use->op_p = ref;
6451 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6452 candidate CAND. */
6454 static void
6455 rewrite_use_compare (struct ivopts_data *data,
6456 struct iv_use *use, struct iv_cand *cand)
6458 tree comp, *var_p, op, bound;
6459 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6460 enum tree_code compare;
6461 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6462 bool ok;
6464 bound = cp->value;
6465 if (bound)
6467 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6468 tree var_type = TREE_TYPE (var);
6469 gimple_seq stmts;
6471 if (dump_file && (dump_flags & TDF_DETAILS))
6473 fprintf (dump_file, "Replacing exit test: ");
6474 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6476 compare = cp->comp;
6477 bound = unshare_expr (fold_convert (var_type, bound));
6478 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6479 if (stmts)
6480 gsi_insert_seq_on_edge_immediate (
6481 loop_preheader_edge (data->current_loop),
6482 stmts);
6484 gimple_cond_set_lhs (use->stmt, var);
6485 gimple_cond_set_code (use->stmt, compare);
6486 gimple_cond_set_rhs (use->stmt, op);
6487 return;
6490 /* The induction variable elimination failed; just express the original
6491 giv. */
6492 comp = get_computation (data->current_loop, use, cand);
6493 gcc_assert (comp != NULL_TREE);
6495 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6496 gcc_assert (ok);
6498 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6499 true, GSI_SAME_STMT);
6502 /* Rewrites USE using candidate CAND. */
6504 static void
6505 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6507 switch (use->type)
6509 case USE_NONLINEAR_EXPR:
6510 rewrite_use_nonlinear_expr (data, use, cand);
6511 break;
6513 case USE_ADDRESS:
6514 rewrite_use_address (data, use, cand);
6515 break;
6517 case USE_COMPARE:
6518 rewrite_use_compare (data, use, cand);
6519 break;
6521 default:
6522 gcc_unreachable ();
6525 update_stmt (use->stmt);
6528 /* Rewrite the uses using the selected induction variables. */
6530 static void
6531 rewrite_uses (struct ivopts_data *data)
6533 unsigned i;
6534 struct iv_cand *cand;
6535 struct iv_use *use;
6537 for (i = 0; i < n_iv_uses (data); i++)
6539 use = iv_use (data, i);
6540 cand = use->selected;
6541 gcc_assert (cand);
6543 rewrite_use (data, use, cand);
6547 /* Removes the ivs that are not used after rewriting. */
6549 static void
6550 remove_unused_ivs (struct ivopts_data *data)
6552 unsigned j;
6553 bitmap_iterator bi;
6554 bitmap toremove = BITMAP_ALLOC (NULL);
6556 /* Figure out an order in which to release SSA DEFs so that we don't
6557 release something that we'd have to propagate into a debug stmt
6558 afterwards. */
6559 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6561 struct version_info *info;
6563 info = ver_info (data, j);
6564 if (info->iv
6565 && !integer_zerop (info->iv->step)
6566 && !info->inv_id
6567 && !info->iv->have_use_for
6568 && !info->preserve_biv)
6570 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6572 tree def = info->iv->ssa_name;
6574 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6576 imm_use_iterator imm_iter;
6577 use_operand_p use_p;
6578 gimple stmt;
6579 int count = 0;
6581 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6583 if (!gimple_debug_bind_p (stmt))
6584 continue;
6586 /* We just want to determine whether to do nothing
6587 (count == 0), to substitute the computed
6588 expression into a single use of the SSA DEF by
6589 itself (count == 1), or to use a debug temp
6590 because the SSA DEF is used multiple times or as
6591 part of a larger expression (count > 1). */
6592 count++;
6593 if (gimple_debug_bind_get_value (stmt) != def)
6594 count++;
6596 if (count > 1)
6597 BREAK_FROM_IMM_USE_STMT (imm_iter);
6600 if (!count)
6601 continue;
6603 struct iv_use dummy_use;
6604 struct iv_cand *best_cand = NULL, *cand;
6605 unsigned i, best_pref = 0, cand_pref;
6607 memset (&dummy_use, 0, sizeof (dummy_use));
6608 dummy_use.iv = info->iv;
6609 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6611 cand = iv_use (data, i)->selected;
6612 if (cand == best_cand)
6613 continue;
6614 cand_pref = operand_equal_p (cand->iv->step,
6615 info->iv->step, 0)
6616 ? 4 : 0;
6617 cand_pref
6618 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6619 == TYPE_MODE (TREE_TYPE (info->iv->base))
6620 ? 2 : 0;
6621 cand_pref
6622 += TREE_CODE (cand->iv->base) == INTEGER_CST
6623 ? 1 : 0;
6624 if (best_cand == NULL || best_pref < cand_pref)
6626 best_cand = cand;
6627 best_pref = cand_pref;
6631 if (!best_cand)
6632 continue;
6634 tree comp = get_computation_at (data->current_loop,
6635 &dummy_use, best_cand,
6636 SSA_NAME_DEF_STMT (def));
6637 if (!comp)
6638 continue;
6640 if (count > 1)
6642 tree vexpr = make_node (DEBUG_EXPR_DECL);
6643 DECL_ARTIFICIAL (vexpr) = 1;
6644 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6645 if (SSA_NAME_VAR (def))
6646 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6647 else
6648 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6649 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6650 gimple_stmt_iterator gsi;
6652 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6653 gsi = gsi_after_labels (gimple_bb
6654 (SSA_NAME_DEF_STMT (def)));
6655 else
6656 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6658 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6659 comp = vexpr;
6662 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6664 if (!gimple_debug_bind_p (stmt))
6665 continue;
6667 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6668 SET_USE (use_p, comp);
6670 update_stmt (stmt);
6676 release_defs_bitset (toremove);
6678 BITMAP_FREE (toremove);
6681 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6682 for pointer_map_traverse. */
6684 static bool
6685 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6686 void *data ATTRIBUTE_UNUSED)
6688 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6690 free (niter);
6691 return true;
6694 /* Frees data allocated by the optimization of a single loop. */
6696 static void
6697 free_loop_data (struct ivopts_data *data)
6699 unsigned i, j;
6700 bitmap_iterator bi;
6701 tree obj;
6703 if (data->niters)
6705 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6706 pointer_map_destroy (data->niters);
6707 data->niters = NULL;
6710 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6712 struct version_info *info;
6714 info = ver_info (data, i);
6715 free (info->iv);
6716 info->iv = NULL;
6717 info->has_nonlin_use = false;
6718 info->preserve_biv = false;
6719 info->inv_id = 0;
6721 bitmap_clear (data->relevant);
6722 bitmap_clear (data->important_candidates);
6724 for (i = 0; i < n_iv_uses (data); i++)
6726 struct iv_use *use = iv_use (data, i);
6728 free (use->iv);
6729 BITMAP_FREE (use->related_cands);
6730 for (j = 0; j < use->n_map_members; j++)
6731 if (use->cost_map[j].depends_on)
6732 BITMAP_FREE (use->cost_map[j].depends_on);
6733 free (use->cost_map);
6734 free (use);
6736 data->iv_uses.truncate (0);
6738 for (i = 0; i < n_iv_cands (data); i++)
6740 struct iv_cand *cand = iv_cand (data, i);
6742 free (cand->iv);
6743 if (cand->depends_on)
6744 BITMAP_FREE (cand->depends_on);
6745 free (cand);
6747 data->iv_candidates.truncate (0);
6749 if (data->version_info_size < num_ssa_names)
6751 data->version_info_size = 2 * num_ssa_names;
6752 free (data->version_info);
6753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6756 data->max_inv_id = 0;
6758 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6759 SET_DECL_RTL (obj, NULL_RTX);
6761 decl_rtl_to_reset.truncate (0);
6763 data->inv_expr_tab.empty ();
6764 data->inv_expr_id = 0;
6767 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6768 loop tree. */
6770 static void
6771 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6773 free_loop_data (data);
6774 free (data->version_info);
6775 BITMAP_FREE (data->relevant);
6776 BITMAP_FREE (data->important_candidates);
6778 decl_rtl_to_reset.release ();
6779 data->iv_uses.release ();
6780 data->iv_candidates.release ();
6781 data->inv_expr_tab.dispose ();
6784 /* Returns true if the loop body BODY includes any function calls. */
6786 static bool
6787 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6789 gimple_stmt_iterator gsi;
6790 unsigned i;
6792 for (i = 0; i < num_nodes; i++)
6793 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6795 gimple stmt = gsi_stmt (gsi);
6796 if (is_gimple_call (stmt)
6797 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6798 return true;
6800 return false;
6803 /* Optimizes the LOOP. Returns true if anything changed. */
6805 static bool
6806 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6808 bool changed = false;
6809 struct iv_ca *iv_ca;
6810 edge exit = single_dom_exit (loop);
6811 basic_block *body;
6813 gcc_assert (!data->niters);
6814 data->current_loop = loop;
6815 data->speed = optimize_loop_for_speed_p (loop);
6817 if (dump_file && (dump_flags & TDF_DETAILS))
6819 fprintf (dump_file, "Processing loop %d\n", loop->num);
6821 if (exit)
6823 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6824 exit->src->index, exit->dest->index);
6825 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6826 fprintf (dump_file, "\n");
6829 fprintf (dump_file, "\n");
6832 body = get_loop_body (loop);
6833 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6834 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6835 free (body);
6837 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6839 /* For each ssa name determines whether it behaves as an induction variable
6840 in some loop. */
6841 if (!find_induction_variables (data))
6842 goto finish;
6844 /* Finds interesting uses (item 1). */
6845 find_interesting_uses (data);
6846 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6847 goto finish;
6849 /* Finds candidates for the induction variables (item 2). */
6850 find_iv_candidates (data);
6852 /* Calculates the costs (item 3, part 1). */
6853 determine_iv_costs (data);
6854 determine_use_iv_costs (data);
6855 determine_set_costs (data);
6857 /* Find the optimal set of induction variables (item 3, part 2). */
6858 iv_ca = find_optimal_iv_set (data);
6859 if (!iv_ca)
6860 goto finish;
6861 changed = true;
6863 /* Create the new induction variables (item 4, part 1). */
6864 create_new_ivs (data, iv_ca);
6865 iv_ca_free (&iv_ca);
6867 /* Rewrite the uses (item 4, part 2). */
6868 rewrite_uses (data);
6870 /* Remove the ivs that are unused after rewriting. */
6871 remove_unused_ivs (data);
6873 /* We have changed the structure of induction variables; it might happen
6874 that definitions in the scev database refer to some of them that were
6875 eliminated. */
6876 scev_reset ();
6878 finish:
6879 free_loop_data (data);
6881 return changed;
6884 /* Main entry point. Optimizes induction variables in loops. */
6886 void
6887 tree_ssa_iv_optimize (void)
6889 struct loop *loop;
6890 struct ivopts_data data;
6892 tree_ssa_iv_optimize_init (&data);
6894 /* Optimize the loops starting with the innermost ones. */
6895 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6897 if (dump_file && (dump_flags & TDF_DETAILS))
6898 flow_loop_dump (loop, dump_file, NULL, 1);
6900 tree_ssa_iv_optimize_loop (&data, loop);
6903 tree_ssa_iv_optimize_finalize (&data);