2013-11-13 Christophe Lyon <christophe.lyon@linaro.org>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob894ff2db521049e95ae00f2257437dc3f70d28e4
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "gimplify.h"
73 #include "gimple-ssa.h"
74 #include "cgraph.h"
75 #include "tree-cfg.h"
76 #include "tree-phinodes.h"
77 #include "ssa-iterators.h"
78 #include "tree-ssanames.h"
79 #include "tree-ssa-loop-ivopts.h"
80 #include "tree-ssa-loop-manip.h"
81 #include "tree-ssa-loop-niter.h"
82 #include "tree-ssa-loop.h"
83 #include "tree-dfa.h"
84 #include "tree-ssa.h"
85 #include "cfgloop.h"
86 #include "tree-pass.h"
87 #include "ggc.h"
88 #include "insn-config.h"
89 #include "pointer-set.h"
90 #include "hash-table.h"
91 #include "tree-chrec.h"
92 #include "tree-scalar-evolution.h"
93 #include "cfgloop.h"
94 #include "params.h"
95 #include "langhooks.h"
96 #include "tree-affine.h"
97 #include "target.h"
98 #include "tree-inline.h"
99 #include "tree-ssa-propagate.h"
100 #include "expmed.h"
101 #include "tree-ssa-address.h"
103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
104 cost of different addressing modes. This should be moved to a TBD
105 interface between the GIMPLE and RTL worlds. */
106 #include "expr.h"
107 #include "recog.h"
109 /* The infinite cost. */
110 #define INFTY 10000000
112 #define AVG_LOOP_NITER(LOOP) 5
114 /* Returns the expected number of loop iterations for LOOP.
115 The average trip count is computed from profile data if it
116 exists. */
118 static inline HOST_WIDE_INT
119 avg_loop_niter (struct loop *loop)
121 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
122 if (niter == -1)
123 return AVG_LOOP_NITER (loop);
125 return niter;
128 /* Representation of the induction variable. */
129 struct iv
131 tree base; /* Initial value of the iv. */
132 tree base_object; /* A memory object to that the induction variable points. */
133 tree step; /* Step of the iv (constant only). */
134 tree ssa_name; /* The ssa name with the value. */
135 bool biv_p; /* Is it a biv? */
136 bool have_use_for; /* Do we already have a use for it? */
137 unsigned use_id; /* The identifier in the use if it is the case. */
140 /* Per-ssa version information (induction variable descriptions, etc.). */
141 struct version_info
143 tree name; /* The ssa name. */
144 struct iv *iv; /* Induction variable description. */
145 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
146 an expression that is not an induction variable. */
147 bool preserve_biv; /* For the original biv, whether to preserve it. */
148 unsigned inv_id; /* Id of an invariant. */
151 /* Types of uses. */
152 enum use_type
154 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
155 USE_ADDRESS, /* Use in an address. */
156 USE_COMPARE /* Use is a compare. */
159 /* Cost of a computation. */
160 typedef struct
162 int cost; /* The runtime cost. */
163 unsigned complexity; /* The estimate of the complexity of the code for
164 the computation (in no concrete units --
165 complexity field should be larger for more
166 complex expressions and addressing modes). */
167 } comp_cost;
169 static const comp_cost no_cost = {0, 0};
170 static const comp_cost infinite_cost = {INFTY, INFTY};
172 /* The candidate - cost pair. */
173 struct cost_pair
175 struct iv_cand *cand; /* The candidate. */
176 comp_cost cost; /* The cost. */
177 bitmap depends_on; /* The list of invariants that have to be
178 preserved. */
179 tree value; /* For final value elimination, the expression for
180 the final value of the iv. For iv elimination,
181 the new bound to compare with. */
182 enum tree_code comp; /* For iv elimination, the comparison. */
183 int inv_expr_id; /* Loop invariant expression id. */
186 /* Use. */
187 struct iv_use
189 unsigned id; /* The id of the use. */
190 enum use_type type; /* Type of the use. */
191 struct iv *iv; /* The induction variable it is based on. */
192 gimple stmt; /* Statement in that it occurs. */
193 tree *op_p; /* The place where it occurs. */
194 bitmap related_cands; /* The set of "related" iv candidates, plus the common
195 important ones. */
197 unsigned n_map_members; /* Number of candidates in the cost_map list. */
198 struct cost_pair *cost_map;
199 /* The costs wrto the iv candidates. */
201 struct iv_cand *selected;
202 /* The selected candidate. */
205 /* The position where the iv is computed. */
206 enum iv_position
208 IP_NORMAL, /* At the end, just before the exit condition. */
209 IP_END, /* At the end of the latch block. */
210 IP_BEFORE_USE, /* Immediately before a specific use. */
211 IP_AFTER_USE, /* Immediately after a specific use. */
212 IP_ORIGINAL /* The original biv. */
215 /* The induction variable candidate. */
216 struct iv_cand
218 unsigned id; /* The number of the candidate. */
219 bool important; /* Whether this is an "important" candidate, i.e. such
220 that it should be considered by all uses. */
221 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
222 gimple incremented_at;/* For original biv, the statement where it is
223 incremented. */
224 tree var_before; /* The variable used for it before increment. */
225 tree var_after; /* The variable used for it after increment. */
226 struct iv *iv; /* The value of the candidate. NULL for
227 "pseudocandidate" used to indicate the possibility
228 to replace the final value of an iv by direct
229 computation of the value. */
230 unsigned cost; /* Cost of the candidate. */
231 unsigned cost_step; /* Cost of the candidate's increment operation. */
232 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
233 where it is incremented. */
234 bitmap depends_on; /* The list of invariants that are used in step of the
235 biv. */
238 /* Loop invariant expression hashtable entry. */
239 struct iv_inv_expr_ent
241 tree expr;
242 int id;
243 hashval_t hash;
246 /* The data used by the induction variable optimizations. */
248 typedef struct iv_use *iv_use_p;
250 typedef struct iv_cand *iv_cand_p;
252 /* Hashtable helpers. */
254 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
256 typedef iv_inv_expr_ent value_type;
257 typedef iv_inv_expr_ent compare_type;
258 static inline hashval_t hash (const value_type *);
259 static inline bool equal (const value_type *, const compare_type *);
262 /* Hash function for loop invariant expressions. */
264 inline hashval_t
265 iv_inv_expr_hasher::hash (const value_type *expr)
267 return expr->hash;
270 /* Hash table equality function for expressions. */
272 inline bool
273 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
275 return expr1->hash == expr2->hash
276 && operand_equal_p (expr1->expr, expr2->expr, 0);
279 struct ivopts_data
281 /* The currently optimized loop. */
282 struct loop *current_loop;
284 /* Numbers of iterations for all exits of the current loop. */
285 struct pointer_map_t *niters;
287 /* Number of registers used in it. */
288 unsigned regs_used;
290 /* The size of version_info array allocated. */
291 unsigned version_info_size;
293 /* The array of information for the ssa names. */
294 struct version_info *version_info;
296 /* The hashtable of loop invariant expressions created
297 by ivopt. */
298 hash_table <iv_inv_expr_hasher> inv_expr_tab;
300 /* Loop invariant expression id. */
301 int inv_expr_id;
303 /* The bitmap of indices in version_info whose value was changed. */
304 bitmap relevant;
306 /* The uses of induction variables. */
307 vec<iv_use_p> iv_uses;
309 /* The candidates. */
310 vec<iv_cand_p> iv_candidates;
312 /* A bitmap of important candidates. */
313 bitmap important_candidates;
315 /* The maximum invariant id. */
316 unsigned max_inv_id;
318 /* Whether to consider just related and important candidates when replacing a
319 use. */
320 bool consider_all_candidates;
322 /* Are we optimizing for speed? */
323 bool speed;
325 /* Whether the loop body includes any function calls. */
326 bool body_includes_call;
328 /* Whether the loop body can only be exited via single exit. */
329 bool loop_single_exit_p;
332 /* An assignment of iv candidates to uses. */
334 struct iv_ca
336 /* The number of uses covered by the assignment. */
337 unsigned upto;
339 /* Number of uses that cannot be expressed by the candidates in the set. */
340 unsigned bad_uses;
342 /* Candidate assigned to a use, together with the related costs. */
343 struct cost_pair **cand_for_use;
345 /* Number of times each candidate is used. */
346 unsigned *n_cand_uses;
348 /* The candidates used. */
349 bitmap cands;
351 /* The number of candidates in the set. */
352 unsigned n_cands;
354 /* Total number of registers needed. */
355 unsigned n_regs;
357 /* Total cost of expressing uses. */
358 comp_cost cand_use_cost;
360 /* Total cost of candidates. */
361 unsigned cand_cost;
363 /* Number of times each invariant is used. */
364 unsigned *n_invariant_uses;
366 /* The array holding the number of uses of each loop
367 invariant expressions created by ivopt. */
368 unsigned *used_inv_expr;
370 /* The number of created loop invariants. */
371 unsigned num_used_inv_expr;
373 /* Total cost of the assignment. */
374 comp_cost cost;
377 /* Difference of two iv candidate assignments. */
379 struct iv_ca_delta
381 /* Changed use. */
382 struct iv_use *use;
384 /* An old assignment (for rollback purposes). */
385 struct cost_pair *old_cp;
387 /* A new assignment. */
388 struct cost_pair *new_cp;
390 /* Next change in the list. */
391 struct iv_ca_delta *next_change;
394 /* Bound on number of candidates below that all candidates are considered. */
396 #define CONSIDER_ALL_CANDIDATES_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
400 optimizing such a loop would help, and it would take ages). */
402 #define MAX_CONSIDERED_USES \
403 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
405 /* If there are at most this number of ivs in the set, try removing unnecessary
406 ivs from the set always. */
408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
409 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
411 /* The list of trees for that the decl_rtl field must be reset is stored
412 here. */
414 static vec<tree> decl_rtl_to_reset;
416 static comp_cost force_expr_to_var_cost (tree, bool);
418 /* Number of uses recorded in DATA. */
420 static inline unsigned
421 n_iv_uses (struct ivopts_data *data)
423 return data->iv_uses.length ();
426 /* Ith use recorded in DATA. */
428 static inline struct iv_use *
429 iv_use (struct ivopts_data *data, unsigned i)
431 return data->iv_uses[i];
434 /* Number of candidates recorded in DATA. */
436 static inline unsigned
437 n_iv_cands (struct ivopts_data *data)
439 return data->iv_candidates.length ();
442 /* Ith candidate recorded in DATA. */
444 static inline struct iv_cand *
445 iv_cand (struct ivopts_data *data, unsigned i)
447 return data->iv_candidates[i];
450 /* The single loop exit if it dominates the latch, NULL otherwise. */
452 edge
453 single_dom_exit (struct loop *loop)
455 edge exit = single_exit (loop);
457 if (!exit)
458 return NULL;
460 if (!just_once_each_iteration_p (loop, exit->src))
461 return NULL;
463 return exit;
466 /* Dumps information about the induction variable IV to FILE. */
468 void
469 dump_iv (FILE *file, struct iv *iv)
471 if (iv->ssa_name)
473 fprintf (file, "ssa name ");
474 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
475 fprintf (file, "\n");
478 fprintf (file, " type ");
479 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
480 fprintf (file, "\n");
482 if (iv->step)
484 fprintf (file, " base ");
485 print_generic_expr (file, iv->base, TDF_SLIM);
486 fprintf (file, "\n");
488 fprintf (file, " step ");
489 print_generic_expr (file, iv->step, TDF_SLIM);
490 fprintf (file, "\n");
492 else
494 fprintf (file, " invariant ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
499 if (iv->base_object)
501 fprintf (file, " base object ");
502 print_generic_expr (file, iv->base_object, TDF_SLIM);
503 fprintf (file, "\n");
506 if (iv->biv_p)
507 fprintf (file, " is a biv\n");
510 /* Dumps information about the USE to FILE. */
512 void
513 dump_use (FILE *file, struct iv_use *use)
515 fprintf (file, "use %d\n", use->id);
517 switch (use->type)
519 case USE_NONLINEAR_EXPR:
520 fprintf (file, " generic\n");
521 break;
523 case USE_ADDRESS:
524 fprintf (file, " address\n");
525 break;
527 case USE_COMPARE:
528 fprintf (file, " compare\n");
529 break;
531 default:
532 gcc_unreachable ();
535 fprintf (file, " in statement ");
536 print_gimple_stmt (file, use->stmt, 0, 0);
537 fprintf (file, "\n");
539 fprintf (file, " at position ");
540 if (use->op_p)
541 print_generic_expr (file, *use->op_p, TDF_SLIM);
542 fprintf (file, "\n");
544 dump_iv (file, use->iv);
546 if (use->related_cands)
548 fprintf (file, " related candidates ");
549 dump_bitmap (file, use->related_cands);
553 /* Dumps information about the uses to FILE. */
555 void
556 dump_uses (FILE *file, struct ivopts_data *data)
558 unsigned i;
559 struct iv_use *use;
561 for (i = 0; i < n_iv_uses (data); i++)
563 use = iv_use (data, i);
565 dump_use (file, use);
566 fprintf (file, "\n");
570 /* Dumps information about induction variable candidate CAND to FILE. */
572 void
573 dump_cand (FILE *file, struct iv_cand *cand)
575 struct iv *iv = cand->iv;
577 fprintf (file, "candidate %d%s\n",
578 cand->id, cand->important ? " (important)" : "");
580 if (cand->depends_on)
582 fprintf (file, " depends on ");
583 dump_bitmap (file, cand->depends_on);
586 if (!iv)
588 fprintf (file, " final value replacement\n");
589 return;
592 if (cand->var_before)
594 fprintf (file, " var_before ");
595 print_generic_expr (file, cand->var_before, TDF_SLIM);
596 fprintf (file, "\n");
598 if (cand->var_after)
600 fprintf (file, " var_after ");
601 print_generic_expr (file, cand->var_after, TDF_SLIM);
602 fprintf (file, "\n");
605 switch (cand->pos)
607 case IP_NORMAL:
608 fprintf (file, " incremented before exit test\n");
609 break;
611 case IP_BEFORE_USE:
612 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
613 break;
615 case IP_AFTER_USE:
616 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
617 break;
619 case IP_END:
620 fprintf (file, " incremented at end\n");
621 break;
623 case IP_ORIGINAL:
624 fprintf (file, " original biv\n");
625 break;
628 dump_iv (file, iv);
631 /* Returns the info for ssa version VER. */
633 static inline struct version_info *
634 ver_info (struct ivopts_data *data, unsigned ver)
636 return data->version_info + ver;
639 /* Returns the info for ssa name NAME. */
641 static inline struct version_info *
642 name_info (struct ivopts_data *data, tree name)
644 return ver_info (data, SSA_NAME_VERSION (name));
647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
648 emitted in LOOP. */
650 static bool
651 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
653 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
655 gcc_assert (bb);
657 if (sbb == loop->latch)
658 return true;
660 if (sbb != bb)
661 return false;
663 return stmt == last_stmt (bb);
666 /* Returns true if STMT if after the place where the original induction
667 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
668 if the positions are identical. */
670 static bool
671 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
673 basic_block cand_bb = gimple_bb (cand->incremented_at);
674 basic_block stmt_bb = gimple_bb (stmt);
676 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
677 return false;
679 if (stmt_bb != cand_bb)
680 return true;
682 if (true_if_equal
683 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
684 return true;
685 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
688 /* Returns true if STMT if after the place where the induction variable
689 CAND is incremented in LOOP. */
691 static bool
692 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
694 switch (cand->pos)
696 case IP_END:
697 return false;
699 case IP_NORMAL:
700 return stmt_after_ip_normal_pos (loop, stmt);
702 case IP_ORIGINAL:
703 case IP_AFTER_USE:
704 return stmt_after_inc_pos (cand, stmt, false);
706 case IP_BEFORE_USE:
707 return stmt_after_inc_pos (cand, stmt, true);
709 default:
710 gcc_unreachable ();
714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
716 static bool
717 abnormal_ssa_name_p (tree exp)
719 if (!exp)
720 return false;
722 if (TREE_CODE (exp) != SSA_NAME)
723 return false;
725 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
729 abnormal phi node. Callback for for_each_index. */
731 static bool
732 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
733 void *data ATTRIBUTE_UNUSED)
735 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
737 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
738 return false;
739 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
740 return false;
743 return !abnormal_ssa_name_p (*index);
746 /* Returns true if EXPR contains a ssa name that occurs in an
747 abnormal phi node. */
749 bool
750 contains_abnormal_ssa_name_p (tree expr)
752 enum tree_code code;
753 enum tree_code_class codeclass;
755 if (!expr)
756 return false;
758 code = TREE_CODE (expr);
759 codeclass = TREE_CODE_CLASS (code);
761 if (code == SSA_NAME)
762 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
764 if (code == INTEGER_CST
765 || is_gimple_min_invariant (expr))
766 return false;
768 if (code == ADDR_EXPR)
769 return !for_each_index (&TREE_OPERAND (expr, 0),
770 idx_contains_abnormal_ssa_name_p,
771 NULL);
773 if (code == COND_EXPR)
774 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
775 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
776 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
778 switch (codeclass)
780 case tcc_binary:
781 case tcc_comparison:
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
783 return true;
785 /* Fallthru. */
786 case tcc_unary:
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
788 return true;
790 break;
792 default:
793 gcc_unreachable ();
796 return false;
799 /* Returns the structure describing number of iterations determined from
800 EXIT of DATA->current_loop, or NULL if something goes wrong. */
802 static struct tree_niter_desc *
803 niter_for_exit (struct ivopts_data *data, edge exit)
805 struct tree_niter_desc *desc;
806 void **slot;
808 if (!data->niters)
810 data->niters = pointer_map_create ();
811 slot = NULL;
813 else
814 slot = pointer_map_contains (data->niters, exit);
816 if (!slot)
818 /* Try to determine number of iterations. We cannot safely work with ssa
819 names that appear in phi nodes on abnormal edges, so that we do not
820 create overlapping life ranges for them (PR 27283). */
821 desc = XNEW (struct tree_niter_desc);
822 if (!number_of_iterations_exit (data->current_loop,
823 exit, desc, true)
824 || contains_abnormal_ssa_name_p (desc->niter))
826 XDELETE (desc);
827 desc = NULL;
829 slot = pointer_map_insert (data->niters, exit);
830 *slot = desc;
832 else
833 desc = (struct tree_niter_desc *) *slot;
835 return desc;
838 /* Returns the structure describing number of iterations determined from
839 single dominating exit of DATA->current_loop, or NULL if something
840 goes wrong. */
842 static struct tree_niter_desc *
843 niter_for_single_dom_exit (struct ivopts_data *data)
845 edge exit = single_dom_exit (data->current_loop);
847 if (!exit)
848 return NULL;
850 return niter_for_exit (data, exit);
853 /* Initializes data structures used by the iv optimization pass, stored
854 in DATA. */
856 static void
857 tree_ssa_iv_optimize_init (struct ivopts_data *data)
859 data->version_info_size = 2 * num_ssa_names;
860 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
861 data->relevant = BITMAP_ALLOC (NULL);
862 data->important_candidates = BITMAP_ALLOC (NULL);
863 data->max_inv_id = 0;
864 data->niters = NULL;
865 data->iv_uses.create (20);
866 data->iv_candidates.create (20);
867 data->inv_expr_tab.create (10);
868 data->inv_expr_id = 0;
869 decl_rtl_to_reset.create (20);
872 /* Returns a memory object to that EXPR points. In case we are able to
873 determine that it does not point to any such object, NULL is returned. */
875 static tree
876 determine_base_object (tree expr)
878 enum tree_code code = TREE_CODE (expr);
879 tree base, obj;
881 /* If this is a pointer casted to any type, we need to determine
882 the base object for the pointer; so handle conversions before
883 throwing away non-pointer expressions. */
884 if (CONVERT_EXPR_P (expr))
885 return determine_base_object (TREE_OPERAND (expr, 0));
887 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
888 return NULL_TREE;
890 switch (code)
892 case INTEGER_CST:
893 return NULL_TREE;
895 case ADDR_EXPR:
896 obj = TREE_OPERAND (expr, 0);
897 base = get_base_address (obj);
899 if (!base)
900 return expr;
902 if (TREE_CODE (base) == MEM_REF)
903 return determine_base_object (TREE_OPERAND (base, 0));
905 return fold_convert (ptr_type_node,
906 build_fold_addr_expr (base));
908 case POINTER_PLUS_EXPR:
909 return determine_base_object (TREE_OPERAND (expr, 0));
911 case PLUS_EXPR:
912 case MINUS_EXPR:
913 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
914 gcc_unreachable ();
916 default:
917 return fold_convert (ptr_type_node, expr);
921 /* Allocates an induction variable with given initial value BASE and step STEP
922 for loop LOOP. */
924 static struct iv *
925 alloc_iv (tree base, tree step)
927 tree base_object = base;
928 struct iv *iv = XCNEW (struct iv);
929 gcc_assert (step != NULL_TREE);
931 /* Lower all address expressions except ones with DECL_P as operand.
932 By doing this:
933 1) More accurate cost can be computed for address expressions;
934 2) Duplicate candidates won't be created for bases in different
935 forms, like &a[0] and &a. */
936 STRIP_NOPS (base_object);
937 if (TREE_CODE (base_object) == ADDR_EXPR
938 && !DECL_P (TREE_OPERAND (base_object, 0)))
940 aff_tree comb;
941 double_int size;
942 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
943 &comb, &size);
944 gcc_assert (base_object != NULL_TREE);
945 base_object = build_fold_addr_expr (base_object);
946 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
949 iv->base = base;
950 iv->base_object = determine_base_object (base_object);
951 iv->step = step;
952 iv->biv_p = false;
953 iv->have_use_for = false;
954 iv->use_id = 0;
955 iv->ssa_name = NULL_TREE;
957 return iv;
960 /* Sets STEP and BASE for induction variable IV. */
962 static void
963 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
965 struct version_info *info = name_info (data, iv);
967 gcc_assert (!info->iv);
969 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
970 info->iv = alloc_iv (base, step);
971 info->iv->ssa_name = iv;
974 /* Finds induction variable declaration for VAR. */
976 static struct iv *
977 get_iv (struct ivopts_data *data, tree var)
979 basic_block bb;
980 tree type = TREE_TYPE (var);
982 if (!POINTER_TYPE_P (type)
983 && !INTEGRAL_TYPE_P (type))
984 return NULL;
986 if (!name_info (data, var)->iv)
988 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
990 if (!bb
991 || !flow_bb_inside_loop_p (data->current_loop, bb))
992 set_iv (data, var, var, build_int_cst (type, 0));
995 return name_info (data, var)->iv;
998 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
999 not define a simple affine biv with nonzero step. */
1001 static tree
1002 determine_biv_step (gimple phi)
1004 struct loop *loop = gimple_bb (phi)->loop_father;
1005 tree name = PHI_RESULT (phi);
1006 affine_iv iv;
1008 if (virtual_operand_p (name))
1009 return NULL_TREE;
1011 if (!simple_iv (loop, loop, name, &iv, true))
1012 return NULL_TREE;
1014 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1017 /* Finds basic ivs. */
1019 static bool
1020 find_bivs (struct ivopts_data *data)
1022 gimple phi;
1023 tree step, type, base;
1024 bool found = false;
1025 struct loop *loop = data->current_loop;
1026 gimple_stmt_iterator psi;
1028 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1030 phi = gsi_stmt (psi);
1032 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1033 continue;
1035 step = determine_biv_step (phi);
1036 if (!step)
1037 continue;
1039 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1040 base = expand_simple_operations (base);
1041 if (contains_abnormal_ssa_name_p (base)
1042 || contains_abnormal_ssa_name_p (step))
1043 continue;
1045 type = TREE_TYPE (PHI_RESULT (phi));
1046 base = fold_convert (type, base);
1047 if (step)
1049 if (POINTER_TYPE_P (type))
1050 step = convert_to_ptrofftype (step);
1051 else
1052 step = fold_convert (type, step);
1055 set_iv (data, PHI_RESULT (phi), base, step);
1056 found = true;
1059 return found;
1062 /* Marks basic ivs. */
1064 static void
1065 mark_bivs (struct ivopts_data *data)
1067 gimple phi;
1068 tree var;
1069 struct iv *iv, *incr_iv;
1070 struct loop *loop = data->current_loop;
1071 basic_block incr_bb;
1072 gimple_stmt_iterator psi;
1074 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1076 phi = gsi_stmt (psi);
1078 iv = get_iv (data, PHI_RESULT (phi));
1079 if (!iv)
1080 continue;
1082 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1083 incr_iv = get_iv (data, var);
1084 if (!incr_iv)
1085 continue;
1087 /* If the increment is in the subloop, ignore it. */
1088 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1089 if (incr_bb->loop_father != data->current_loop
1090 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1091 continue;
1093 iv->biv_p = true;
1094 incr_iv->biv_p = true;
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099 parameters to IV. */
1101 static bool
1102 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1104 tree lhs;
1105 struct loop *loop = data->current_loop;
1107 iv->base = NULL_TREE;
1108 iv->step = NULL_TREE;
1110 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1111 return false;
1113 lhs = gimple_assign_lhs (stmt);
1114 if (TREE_CODE (lhs) != SSA_NAME)
1115 return false;
1117 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1118 return false;
1119 iv->base = expand_simple_operations (iv->base);
1121 if (contains_abnormal_ssa_name_p (iv->base)
1122 || contains_abnormal_ssa_name_p (iv->step))
1123 return false;
1125 /* If STMT could throw, then do not consider STMT as defining a GIV.
1126 While this will suppress optimizations, we can not safely delete this
1127 GIV and associated statements, even if it appears it is not used. */
1128 if (stmt_could_throw_p (stmt))
1129 return false;
1131 return true;
1134 /* Finds general ivs in statement STMT. */
1136 static void
1137 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1139 affine_iv iv;
1141 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1142 return;
1144 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1147 /* Finds general ivs in basic block BB. */
1149 static void
1150 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1152 gimple_stmt_iterator bsi;
1154 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1155 find_givs_in_stmt (data, gsi_stmt (bsi));
1158 /* Finds general ivs. */
1160 static void
1161 find_givs (struct ivopts_data *data)
1163 struct loop *loop = data->current_loop;
1164 basic_block *body = get_loop_body_in_dom_order (loop);
1165 unsigned i;
1167 for (i = 0; i < loop->num_nodes; i++)
1168 find_givs_in_bb (data, body[i]);
1169 free (body);
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173 variable and if so, its initial value and step. */
1175 static bool
1176 find_induction_variables (struct ivopts_data *data)
1178 unsigned i;
1179 bitmap_iterator bi;
1181 if (!find_bivs (data))
1182 return false;
1184 find_givs (data);
1185 mark_bivs (data);
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1189 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1191 if (niter)
1193 fprintf (dump_file, " number of iterations ");
1194 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1195 if (!integer_zerop (niter->may_be_zero))
1197 fprintf (dump_file, "; zero if ");
1198 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1200 fprintf (dump_file, "\n\n");
1203 fprintf (dump_file, "Induction variables:\n\n");
1205 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1207 if (ver_info (data, i)->iv)
1208 dump_iv (dump_file, ver_info (data, i)->iv);
1212 return true;
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1217 static struct iv_use *
1218 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1219 gimple stmt, enum use_type use_type)
1221 struct iv_use *use = XCNEW (struct iv_use);
1223 use->id = n_iv_uses (data);
1224 use->type = use_type;
1225 use->iv = iv;
1226 use->stmt = stmt;
1227 use->op_p = use_p;
1228 use->related_cands = BITMAP_ALLOC (NULL);
1230 /* To avoid showing ssa name in the dumps, if it was not reset by the
1231 caller. */
1232 iv->ssa_name = NULL_TREE;
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 dump_use (dump_file, use);
1237 data->iv_uses.safe_push (use);
1239 return use;
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243 NONLINEAR_USE is true if the invariant is used in a way we do not
1244 handle specially. */
1246 static void
1247 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1249 basic_block bb;
1250 struct version_info *info;
1252 if (TREE_CODE (op) != SSA_NAME
1253 || virtual_operand_p (op))
1254 return;
1256 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1257 if (bb
1258 && flow_bb_inside_loop_p (data->current_loop, bb))
1259 return;
1261 info = name_info (data, op);
1262 info->name = op;
1263 info->has_nonlin_use |= nonlinear_use;
1264 if (!info->inv_id)
1265 info->inv_id = ++data->max_inv_id;
1266 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1269 /* Checks whether the use OP is interesting and if so, records it. */
1271 static struct iv_use *
1272 find_interesting_uses_op (struct ivopts_data *data, tree op)
1274 struct iv *iv;
1275 struct iv *civ;
1276 gimple stmt;
1277 struct iv_use *use;
1279 if (TREE_CODE (op) != SSA_NAME)
1280 return NULL;
1282 iv = get_iv (data, op);
1283 if (!iv)
1284 return NULL;
1286 if (iv->have_use_for)
1288 use = iv_use (data, iv->use_id);
1290 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1291 return use;
1294 if (integer_zerop (iv->step))
1296 record_invariant (data, op, true);
1297 return NULL;
1299 iv->have_use_for = true;
1301 civ = XNEW (struct iv);
1302 *civ = *iv;
1304 stmt = SSA_NAME_DEF_STMT (op);
1305 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1306 || is_gimple_assign (stmt));
1308 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1309 iv->use_id = use->id;
1311 return use;
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315 of an induction variable and an invariant. If this is the case,
1316 CONTROL_VAR is set to location of the iv, BOUND to the location of
1317 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318 induction variable descriptions, and true is returned. If this is not
1319 the case, CONTROL_VAR and BOUND are set to the arguments of the
1320 condition and false is returned. */
1322 static bool
1323 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1324 tree **control_var, tree **bound,
1325 struct iv **iv_var, struct iv **iv_bound)
1327 /* The objects returned when COND has constant operands. */
1328 static struct iv const_iv;
1329 static tree zero;
1330 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1331 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1332 bool ret = false;
1334 if (gimple_code (stmt) == GIMPLE_COND)
1336 op0 = gimple_cond_lhs_ptr (stmt);
1337 op1 = gimple_cond_rhs_ptr (stmt);
1339 else
1341 op0 = gimple_assign_rhs1_ptr (stmt);
1342 op1 = gimple_assign_rhs2_ptr (stmt);
1345 zero = integer_zero_node;
1346 const_iv.step = integer_zero_node;
1348 if (TREE_CODE (*op0) == SSA_NAME)
1349 iv0 = get_iv (data, *op0);
1350 if (TREE_CODE (*op1) == SSA_NAME)
1351 iv1 = get_iv (data, *op1);
1353 /* Exactly one of the compared values must be an iv, and the other one must
1354 be an invariant. */
1355 if (!iv0 || !iv1)
1356 goto end;
1358 if (integer_zerop (iv0->step))
1360 /* Control variable may be on the other side. */
1361 tmp_op = op0; op0 = op1; op1 = tmp_op;
1362 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1364 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1366 end:
1367 if (control_var)
1368 *control_var = op0;;
1369 if (iv_var)
1370 *iv_var = iv0;;
1371 if (bound)
1372 *bound = op1;
1373 if (iv_bound)
1374 *iv_bound = iv1;
1376 return ret;
1379 /* Checks whether the condition in STMT is interesting and if so,
1380 records it. */
1382 static void
1383 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1385 tree *var_p, *bound_p;
1386 struct iv *var_iv, *civ;
1388 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1390 find_interesting_uses_op (data, *var_p);
1391 find_interesting_uses_op (data, *bound_p);
1392 return;
1395 civ = XNEW (struct iv);
1396 *civ = *var_iv;
1397 record_use (data, NULL, civ, stmt, USE_COMPARE);
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401 relative to the loop LOOP, i.e. if all its operands are defined
1402 outside of the returned loop. Returns NULL if EXPR is not
1403 even obviously invariant in LOOP. */
1405 struct loop *
1406 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1408 basic_block def_bb;
1409 unsigned i, len;
1411 if (is_gimple_min_invariant (expr))
1412 return current_loops->tree_root;
1414 if (TREE_CODE (expr) == SSA_NAME)
1416 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1417 if (def_bb)
1419 if (flow_bb_inside_loop_p (loop, def_bb))
1420 return NULL;
1421 return superloop_at_depth (loop,
1422 loop_depth (def_bb->loop_father) + 1);
1425 return current_loops->tree_root;
1428 if (!EXPR_P (expr))
1429 return NULL;
1431 unsigned maxdepth = 0;
1432 len = TREE_OPERAND_LENGTH (expr);
1433 for (i = 0; i < len; i++)
1435 struct loop *ivloop;
1436 if (!TREE_OPERAND (expr, i))
1437 continue;
1439 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1440 if (!ivloop)
1441 return NULL;
1442 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1445 return superloop_at_depth (loop, maxdepth);
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449 i.e. if all its operands are defined outside of the LOOP. LOOP
1450 should not be the function body. */
1452 bool
1453 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1455 basic_block def_bb;
1456 unsigned i, len;
1458 gcc_assert (loop_depth (loop) > 0);
1460 if (is_gimple_min_invariant (expr))
1461 return true;
1463 if (TREE_CODE (expr) == SSA_NAME)
1465 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1466 if (def_bb
1467 && flow_bb_inside_loop_p (loop, def_bb))
1468 return false;
1470 return true;
1473 if (!EXPR_P (expr))
1474 return false;
1476 len = TREE_OPERAND_LENGTH (expr);
1477 for (i = 0; i < len; i++)
1478 if (TREE_OPERAND (expr, i)
1479 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1480 return false;
1482 return true;
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486 initial ones. Returns false when the value of the index cannot be determined.
1487 Callback for for_each_index. */
1489 struct ifs_ivopts_data
1491 struct ivopts_data *ivopts_data;
1492 gimple stmt;
1493 tree step;
1496 static bool
1497 idx_find_step (tree base, tree *idx, void *data)
1499 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1500 struct iv *iv;
1501 tree step, iv_base, iv_step, lbound, off;
1502 struct loop *loop = dta->ivopts_data->current_loop;
1504 /* If base is a component ref, require that the offset of the reference
1505 be invariant. */
1506 if (TREE_CODE (base) == COMPONENT_REF)
1508 off = component_ref_field_offset (base);
1509 return expr_invariant_in_loop_p (loop, off);
1512 /* If base is array, first check whether we will be able to move the
1513 reference out of the loop (in order to take its address in strength
1514 reduction). In order for this to work we need both lower bound
1515 and step to be loop invariants. */
1516 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1518 /* Moreover, for a range, the size needs to be invariant as well. */
1519 if (TREE_CODE (base) == ARRAY_RANGE_REF
1520 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1521 return false;
1523 step = array_ref_element_size (base);
1524 lbound = array_ref_low_bound (base);
1526 if (!expr_invariant_in_loop_p (loop, step)
1527 || !expr_invariant_in_loop_p (loop, lbound))
1528 return false;
1531 if (TREE_CODE (*idx) != SSA_NAME)
1532 return true;
1534 iv = get_iv (dta->ivopts_data, *idx);
1535 if (!iv)
1536 return false;
1538 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1539 *&x[0], which is not folded and does not trigger the
1540 ARRAY_REF path below. */
1541 *idx = iv->base;
1543 if (integer_zerop (iv->step))
1544 return true;
1546 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1548 step = array_ref_element_size (base);
1550 /* We only handle addresses whose step is an integer constant. */
1551 if (TREE_CODE (step) != INTEGER_CST)
1552 return false;
1554 else
1555 /* The step for pointer arithmetics already is 1 byte. */
1556 step = size_one_node;
1558 iv_base = iv->base;
1559 iv_step = iv->step;
1560 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1561 sizetype, &iv_base, &iv_step, dta->stmt,
1562 false))
1564 /* The index might wrap. */
1565 return false;
1568 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1569 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1571 return true;
1574 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1575 object is passed to it in DATA. */
1577 static bool
1578 idx_record_use (tree base, tree *idx,
1579 void *vdata)
1581 struct ivopts_data *data = (struct ivopts_data *) vdata;
1582 find_interesting_uses_op (data, *idx);
1583 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1585 find_interesting_uses_op (data, array_ref_element_size (base));
1586 find_interesting_uses_op (data, array_ref_low_bound (base));
1588 return true;
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592 store cst to MUL and return true. Otherwise return false.
1593 The returned value is always sign-extended, regardless of the
1594 signedness of TOP and BOT. */
1596 static bool
1597 constant_multiple_of (tree top, tree bot, double_int *mul)
1599 tree mby;
1600 enum tree_code code;
1601 double_int res, p0, p1;
1602 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1604 STRIP_NOPS (top);
1605 STRIP_NOPS (bot);
1607 if (operand_equal_p (top, bot, 0))
1609 *mul = double_int_one;
1610 return true;
1613 code = TREE_CODE (top);
1614 switch (code)
1616 case MULT_EXPR:
1617 mby = TREE_OPERAND (top, 1);
1618 if (TREE_CODE (mby) != INTEGER_CST)
1619 return false;
1621 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1622 return false;
1624 *mul = (res * tree_to_double_int (mby)).sext (precision);
1625 return true;
1627 case PLUS_EXPR:
1628 case MINUS_EXPR:
1629 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1630 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1631 return false;
1633 if (code == MINUS_EXPR)
1634 p1 = -p1;
1635 *mul = (p0 + p1).sext (precision);
1636 return true;
1638 case INTEGER_CST:
1639 if (TREE_CODE (bot) != INTEGER_CST)
1640 return false;
1642 p0 = tree_to_double_int (top).sext (precision);
1643 p1 = tree_to_double_int (bot).sext (precision);
1644 if (p1.is_zero ())
1645 return false;
1646 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1647 return res.is_zero ();
1649 default:
1650 return false;
1654 /* Returns true if memory reference REF with step STEP may be unaligned. */
1656 static bool
1657 may_be_unaligned_p (tree ref, tree step)
1659 tree base;
1660 tree base_type;
1661 HOST_WIDE_INT bitsize;
1662 HOST_WIDE_INT bitpos;
1663 tree toffset;
1664 enum machine_mode mode;
1665 int unsignedp, volatilep;
1666 unsigned base_align;
1668 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669 thus they are not misaligned. */
1670 if (TREE_CODE (ref) == TARGET_MEM_REF)
1671 return false;
1673 /* The test below is basically copy of what expr.c:normal_inner_ref
1674 does to check whether the object must be loaded by parts when
1675 STRICT_ALIGNMENT is true. */
1676 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1677 &unsignedp, &volatilep, true);
1678 base_type = TREE_TYPE (base);
1679 base_align = get_object_alignment (base);
1680 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1682 if (mode != BLKmode)
1684 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1686 if (base_align < mode_align
1687 || (bitpos % mode_align) != 0
1688 || (bitpos % BITS_PER_UNIT) != 0)
1689 return true;
1691 if (toffset
1692 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1693 return true;
1695 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1696 return true;
1699 return false;
1702 /* Return true if EXPR may be non-addressable. */
1704 bool
1705 may_be_nonaddressable_p (tree expr)
1707 switch (TREE_CODE (expr))
1709 case TARGET_MEM_REF:
1710 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711 target, thus they are always addressable. */
1712 return false;
1714 case COMPONENT_REF:
1715 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1716 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1718 case VIEW_CONVERT_EXPR:
1719 /* This kind of view-conversions may wrap non-addressable objects
1720 and make them look addressable. After some processing the
1721 non-addressability may be uncovered again, causing ADDR_EXPRs
1722 of inappropriate objects to be built. */
1723 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1724 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1725 return true;
1727 /* ... fall through ... */
1729 case ARRAY_REF:
1730 case ARRAY_RANGE_REF:
1731 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1733 CASE_CONVERT:
1734 return true;
1736 default:
1737 break;
1740 return false;
1743 /* Finds addresses in *OP_P inside STMT. */
1745 static void
1746 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1748 tree base = *op_p, step = size_zero_node;
1749 struct iv *civ;
1750 struct ifs_ivopts_data ifs_ivopts_data;
1752 /* Do not play with volatile memory references. A bit too conservative,
1753 perhaps, but safe. */
1754 if (gimple_has_volatile_ops (stmt))
1755 goto fail;
1757 /* Ignore bitfields for now. Not really something terribly complicated
1758 to handle. TODO. */
1759 if (TREE_CODE (base) == BIT_FIELD_REF)
1760 goto fail;
1762 base = unshare_expr (base);
1764 if (TREE_CODE (base) == TARGET_MEM_REF)
1766 tree type = build_pointer_type (TREE_TYPE (base));
1767 tree astep;
1769 if (TMR_BASE (base)
1770 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1772 civ = get_iv (data, TMR_BASE (base));
1773 if (!civ)
1774 goto fail;
1776 TMR_BASE (base) = civ->base;
1777 step = civ->step;
1779 if (TMR_INDEX2 (base)
1780 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1782 civ = get_iv (data, TMR_INDEX2 (base));
1783 if (!civ)
1784 goto fail;
1786 TMR_INDEX2 (base) = civ->base;
1787 step = civ->step;
1789 if (TMR_INDEX (base)
1790 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1792 civ = get_iv (data, TMR_INDEX (base));
1793 if (!civ)
1794 goto fail;
1796 TMR_INDEX (base) = civ->base;
1797 astep = civ->step;
1799 if (astep)
1801 if (TMR_STEP (base))
1802 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1804 step = fold_build2 (PLUS_EXPR, type, step, astep);
1808 if (integer_zerop (step))
1809 goto fail;
1810 base = tree_mem_ref_addr (type, base);
1812 else
1814 ifs_ivopts_data.ivopts_data = data;
1815 ifs_ivopts_data.stmt = stmt;
1816 ifs_ivopts_data.step = size_zero_node;
1817 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1818 || integer_zerop (ifs_ivopts_data.step))
1819 goto fail;
1820 step = ifs_ivopts_data.step;
1822 /* Check that the base expression is addressable. This needs
1823 to be done after substituting bases of IVs into it. */
1824 if (may_be_nonaddressable_p (base))
1825 goto fail;
1827 /* Moreover, on strict alignment platforms, check that it is
1828 sufficiently aligned. */
1829 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1830 goto fail;
1832 base = build_fold_addr_expr (base);
1834 /* Substituting bases of IVs into the base expression might
1835 have caused folding opportunities. */
1836 if (TREE_CODE (base) == ADDR_EXPR)
1838 tree *ref = &TREE_OPERAND (base, 0);
1839 while (handled_component_p (*ref))
1840 ref = &TREE_OPERAND (*ref, 0);
1841 if (TREE_CODE (*ref) == MEM_REF)
1843 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1844 TREE_OPERAND (*ref, 0),
1845 TREE_OPERAND (*ref, 1));
1846 if (tem)
1847 *ref = tem;
1852 civ = alloc_iv (base, step);
1853 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1854 return;
1856 fail:
1857 for_each_index (op_p, idx_record_use, data);
1860 /* Finds and records invariants used in STMT. */
1862 static void
1863 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1865 ssa_op_iter iter;
1866 use_operand_p use_p;
1867 tree op;
1869 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1871 op = USE_FROM_PTR (use_p);
1872 record_invariant (data, op, false);
1876 /* Finds interesting uses of induction variables in the statement STMT. */
1878 static void
1879 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1881 struct iv *iv;
1882 tree op, *lhs, *rhs;
1883 ssa_op_iter iter;
1884 use_operand_p use_p;
1885 enum tree_code code;
1887 find_invariants_stmt (data, stmt);
1889 if (gimple_code (stmt) == GIMPLE_COND)
1891 find_interesting_uses_cond (data, stmt);
1892 return;
1895 if (is_gimple_assign (stmt))
1897 lhs = gimple_assign_lhs_ptr (stmt);
1898 rhs = gimple_assign_rhs1_ptr (stmt);
1900 if (TREE_CODE (*lhs) == SSA_NAME)
1902 /* If the statement defines an induction variable, the uses are not
1903 interesting by themselves. */
1905 iv = get_iv (data, *lhs);
1907 if (iv && !integer_zerop (iv->step))
1908 return;
1911 code = gimple_assign_rhs_code (stmt);
1912 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1913 && (REFERENCE_CLASS_P (*rhs)
1914 || is_gimple_val (*rhs)))
1916 if (REFERENCE_CLASS_P (*rhs))
1917 find_interesting_uses_address (data, stmt, rhs);
1918 else
1919 find_interesting_uses_op (data, *rhs);
1921 if (REFERENCE_CLASS_P (*lhs))
1922 find_interesting_uses_address (data, stmt, lhs);
1923 return;
1925 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1927 find_interesting_uses_cond (data, stmt);
1928 return;
1931 /* TODO -- we should also handle address uses of type
1933 memory = call (whatever);
1937 call (memory). */
1940 if (gimple_code (stmt) == GIMPLE_PHI
1941 && gimple_bb (stmt) == data->current_loop->header)
1943 iv = get_iv (data, PHI_RESULT (stmt));
1945 if (iv && !integer_zerop (iv->step))
1946 return;
1949 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1951 op = USE_FROM_PTR (use_p);
1953 if (TREE_CODE (op) != SSA_NAME)
1954 continue;
1956 iv = get_iv (data, op);
1957 if (!iv)
1958 continue;
1960 find_interesting_uses_op (data, op);
1964 /* Finds interesting uses of induction variables outside of loops
1965 on loop exit edge EXIT. */
1967 static void
1968 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1970 gimple phi;
1971 gimple_stmt_iterator psi;
1972 tree def;
1974 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1976 phi = gsi_stmt (psi);
1977 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1978 if (!virtual_operand_p (def))
1979 find_interesting_uses_op (data, def);
1983 /* Finds uses of the induction variables that are interesting. */
1985 static void
1986 find_interesting_uses (struct ivopts_data *data)
1988 basic_block bb;
1989 gimple_stmt_iterator bsi;
1990 basic_block *body = get_loop_body (data->current_loop);
1991 unsigned i;
1992 struct version_info *info;
1993 edge e;
1995 if (dump_file && (dump_flags & TDF_DETAILS))
1996 fprintf (dump_file, "Uses:\n\n");
1998 for (i = 0; i < data->current_loop->num_nodes; i++)
2000 edge_iterator ei;
2001 bb = body[i];
2003 FOR_EACH_EDGE (e, ei, bb->succs)
2004 if (e->dest != EXIT_BLOCK_PTR
2005 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2006 find_interesting_uses_outside (data, e);
2008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2009 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2010 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2011 if (!is_gimple_debug (gsi_stmt (bsi)))
2012 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2015 if (dump_file && (dump_flags & TDF_DETAILS))
2017 bitmap_iterator bi;
2019 fprintf (dump_file, "\n");
2021 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2023 info = ver_info (data, i);
2024 if (info->inv_id)
2026 fprintf (dump_file, " ");
2027 print_generic_expr (dump_file, info->name, TDF_SLIM);
2028 fprintf (dump_file, " is invariant (%d)%s\n",
2029 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2033 fprintf (dump_file, "\n");
2036 free (body);
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2040 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2041 we are at the top-level of the processed address. */
2043 static tree
2044 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2045 HOST_WIDE_INT *offset)
2047 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2048 enum tree_code code;
2049 tree type, orig_type = TREE_TYPE (expr);
2050 HOST_WIDE_INT off0, off1, st;
2051 tree orig_expr = expr;
2053 STRIP_NOPS (expr);
2055 type = TREE_TYPE (expr);
2056 code = TREE_CODE (expr);
2057 *offset = 0;
2059 switch (code)
2061 case INTEGER_CST:
2062 if (!cst_and_fits_in_hwi (expr)
2063 || integer_zerop (expr))
2064 return orig_expr;
2066 *offset = int_cst_value (expr);
2067 return build_int_cst (orig_type, 0);
2069 case POINTER_PLUS_EXPR:
2070 case PLUS_EXPR:
2071 case MINUS_EXPR:
2072 op0 = TREE_OPERAND (expr, 0);
2073 op1 = TREE_OPERAND (expr, 1);
2075 op0 = strip_offset_1 (op0, false, false, &off0);
2076 op1 = strip_offset_1 (op1, false, false, &off1);
2078 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2079 if (op0 == TREE_OPERAND (expr, 0)
2080 && op1 == TREE_OPERAND (expr, 1))
2081 return orig_expr;
2083 if (integer_zerop (op1))
2084 expr = op0;
2085 else if (integer_zerop (op0))
2087 if (code == MINUS_EXPR)
2088 expr = fold_build1 (NEGATE_EXPR, type, op1);
2089 else
2090 expr = op1;
2092 else
2093 expr = fold_build2 (code, type, op0, op1);
2095 return fold_convert (orig_type, expr);
2097 case MULT_EXPR:
2098 op1 = TREE_OPERAND (expr, 1);
2099 if (!cst_and_fits_in_hwi (op1))
2100 return orig_expr;
2102 op0 = TREE_OPERAND (expr, 0);
2103 op0 = strip_offset_1 (op0, false, false, &off0);
2104 if (op0 == TREE_OPERAND (expr, 0))
2105 return orig_expr;
2107 *offset = off0 * int_cst_value (op1);
2108 if (integer_zerop (op0))
2109 expr = op0;
2110 else
2111 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2113 return fold_convert (orig_type, expr);
2115 case ARRAY_REF:
2116 case ARRAY_RANGE_REF:
2117 if (!inside_addr)
2118 return orig_expr;
2120 step = array_ref_element_size (expr);
2121 if (!cst_and_fits_in_hwi (step))
2122 break;
2124 st = int_cst_value (step);
2125 op1 = TREE_OPERAND (expr, 1);
2126 op1 = strip_offset_1 (op1, false, false, &off1);
2127 *offset = off1 * st;
2129 if (top_compref
2130 && integer_zerop (op1))
2132 /* Strip the component reference completely. */
2133 op0 = TREE_OPERAND (expr, 0);
2134 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2135 *offset += off0;
2136 return op0;
2138 break;
2140 case COMPONENT_REF:
2142 tree field;
2144 if (!inside_addr)
2145 return orig_expr;
2147 tmp = component_ref_field_offset (expr);
2148 field = TREE_OPERAND (expr, 1);
2149 if (top_compref
2150 && cst_and_fits_in_hwi (tmp)
2151 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2153 HOST_WIDE_INT boffset, abs_off;
2155 /* Strip the component reference completely. */
2156 op0 = TREE_OPERAND (expr, 0);
2157 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2159 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2160 if (boffset < 0)
2161 abs_off = -abs_off;
2163 *offset = off0 + int_cst_value (tmp) + abs_off;
2164 return op0;
2167 break;
2169 case ADDR_EXPR:
2170 op0 = TREE_OPERAND (expr, 0);
2171 op0 = strip_offset_1 (op0, true, true, &off0);
2172 *offset += off0;
2174 if (op0 == TREE_OPERAND (expr, 0))
2175 return orig_expr;
2177 expr = build_fold_addr_expr (op0);
2178 return fold_convert (orig_type, expr);
2180 case MEM_REF:
2181 /* ??? Offset operand? */
2182 inside_addr = false;
2183 break;
2185 default:
2186 return orig_expr;
2189 /* Default handling of expressions for that we want to recurse into
2190 the first operand. */
2191 op0 = TREE_OPERAND (expr, 0);
2192 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2193 *offset += off0;
2195 if (op0 == TREE_OPERAND (expr, 0)
2196 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2197 return orig_expr;
2199 expr = copy_node (expr);
2200 TREE_OPERAND (expr, 0) = op0;
2201 if (op1)
2202 TREE_OPERAND (expr, 1) = op1;
2204 /* Inside address, we might strip the top level component references,
2205 thus changing type of the expression. Handling of ADDR_EXPR
2206 will fix that. */
2207 expr = fold_convert (orig_type, expr);
2209 return expr;
2212 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2214 static tree
2215 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2217 HOST_WIDE_INT off;
2218 tree core = strip_offset_1 (expr, false, false, &off);
2219 *offset = off;
2220 return core;
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224 We return unsigned type with the same precision, which avoids problems
2225 with overflows. */
2227 static tree
2228 generic_type_for (tree type)
2230 if (POINTER_TYPE_P (type))
2231 return unsigned_type_for (type);
2233 if (TYPE_UNSIGNED (type))
2234 return type;
2236 return unsigned_type_for (type);
2239 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2240 the bitmap to that we should store it. */
2242 static struct ivopts_data *fd_ivopts_data;
2243 static tree
2244 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2246 bitmap *depends_on = (bitmap *) data;
2247 struct version_info *info;
2249 if (TREE_CODE (*expr_p) != SSA_NAME)
2250 return NULL_TREE;
2251 info = name_info (fd_ivopts_data, *expr_p);
2253 if (!info->inv_id || info->has_nonlin_use)
2254 return NULL_TREE;
2256 if (!*depends_on)
2257 *depends_on = BITMAP_ALLOC (NULL);
2258 bitmap_set_bit (*depends_on, info->inv_id);
2260 return NULL_TREE;
2263 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2264 position to POS. If USE is not NULL, the candidate is set as related to
2265 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2266 replacement of the final value of the iv by a direct computation. */
2268 static struct iv_cand *
2269 add_candidate_1 (struct ivopts_data *data,
2270 tree base, tree step, bool important, enum iv_position pos,
2271 struct iv_use *use, gimple incremented_at)
2273 unsigned i;
2274 struct iv_cand *cand = NULL;
2275 tree type, orig_type;
2277 /* For non-original variables, make sure their values are computed in a type
2278 that does not invoke undefined behavior on overflows (since in general,
2279 we cannot prove that these induction variables are non-wrapping). */
2280 if (pos != IP_ORIGINAL)
2282 orig_type = TREE_TYPE (base);
2283 type = generic_type_for (orig_type);
2284 if (type != orig_type)
2286 base = fold_convert (type, base);
2287 step = fold_convert (type, step);
2291 for (i = 0; i < n_iv_cands (data); i++)
2293 cand = iv_cand (data, i);
2295 if (cand->pos != pos)
2296 continue;
2298 if (cand->incremented_at != incremented_at
2299 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2300 && cand->ainc_use != use))
2301 continue;
2303 if (!cand->iv)
2305 if (!base && !step)
2306 break;
2308 continue;
2311 if (!base && !step)
2312 continue;
2314 if (operand_equal_p (base, cand->iv->base, 0)
2315 && operand_equal_p (step, cand->iv->step, 0)
2316 && (TYPE_PRECISION (TREE_TYPE (base))
2317 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2318 break;
2321 if (i == n_iv_cands (data))
2323 cand = XCNEW (struct iv_cand);
2324 cand->id = i;
2326 if (!base && !step)
2327 cand->iv = NULL;
2328 else
2329 cand->iv = alloc_iv (base, step);
2331 cand->pos = pos;
2332 if (pos != IP_ORIGINAL && cand->iv)
2334 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2335 cand->var_after = cand->var_before;
2337 cand->important = important;
2338 cand->incremented_at = incremented_at;
2339 data->iv_candidates.safe_push (cand);
2341 if (step
2342 && TREE_CODE (step) != INTEGER_CST)
2344 fd_ivopts_data = data;
2345 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2348 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2349 cand->ainc_use = use;
2350 else
2351 cand->ainc_use = NULL;
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2354 dump_cand (dump_file, cand);
2357 if (important && !cand->important)
2359 cand->important = true;
2360 if (dump_file && (dump_flags & TDF_DETAILS))
2361 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2364 if (use)
2366 bitmap_set_bit (use->related_cands, i);
2367 if (dump_file && (dump_flags & TDF_DETAILS))
2368 fprintf (dump_file, "Candidate %d is related to use %d\n",
2369 cand->id, use->id);
2372 return cand;
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2376 is allowed.
2378 The purpose is to avoid splitting latch edge with a biv increment, thus
2379 creating a jump, possibly confusing other optimization passes and leaving
2380 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2381 is not available (so we do not have a better alternative), or if the latch
2382 edge is already nonempty. */
2384 static bool
2385 allow_ip_end_pos_p (struct loop *loop)
2387 if (!ip_normal_pos (loop))
2388 return true;
2390 if (!empty_block_p (ip_end_pos (loop)))
2391 return true;
2393 return false;
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397 Important field is set to IMPORTANT. */
2399 static void
2400 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2401 bool important, struct iv_use *use)
2403 basic_block use_bb = gimple_bb (use->stmt);
2404 enum machine_mode mem_mode;
2405 unsigned HOST_WIDE_INT cstepi;
2407 /* If we insert the increment in any position other than the standard
2408 ones, we must ensure that it is incremented once per iteration.
2409 It must not be in an inner nested loop, or one side of an if
2410 statement. */
2411 if (use_bb->loop_father != data->current_loop
2412 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2413 || stmt_could_throw_p (use->stmt)
2414 || !cst_and_fits_in_hwi (step))
2415 return;
2417 cstepi = int_cst_value (step);
2419 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2420 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2421 || USE_STORE_PRE_INCREMENT (mem_mode))
2422 && GET_MODE_SIZE (mem_mode) == cstepi)
2423 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2424 || USE_STORE_PRE_DECREMENT (mem_mode))
2425 && GET_MODE_SIZE (mem_mode) == -cstepi))
2427 enum tree_code code = MINUS_EXPR;
2428 tree new_base;
2429 tree new_step = step;
2431 if (POINTER_TYPE_P (TREE_TYPE (base)))
2433 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2434 code = POINTER_PLUS_EXPR;
2436 else
2437 new_step = fold_convert (TREE_TYPE (base), new_step);
2438 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2439 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2440 use->stmt);
2442 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2443 || USE_STORE_POST_INCREMENT (mem_mode))
2444 && GET_MODE_SIZE (mem_mode) == cstepi)
2445 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2446 || USE_STORE_POST_DECREMENT (mem_mode))
2447 && GET_MODE_SIZE (mem_mode) == -cstepi))
2449 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2450 use->stmt);
2454 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2455 position to POS. If USE is not NULL, the candidate is set as related to
2456 it. The candidate computation is scheduled on all available positions. */
2458 static void
2459 add_candidate (struct ivopts_data *data,
2460 tree base, tree step, bool important, struct iv_use *use)
2462 if (ip_normal_pos (data->current_loop))
2463 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2464 if (ip_end_pos (data->current_loop)
2465 && allow_ip_end_pos_p (data->current_loop))
2466 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2468 if (use != NULL && use->type == USE_ADDRESS)
2469 add_autoinc_candidates (data, base, step, important, use);
2472 /* Adds standard iv candidates. */
2474 static void
2475 add_standard_iv_candidates (struct ivopts_data *data)
2477 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2479 /* The same for a double-integer type if it is still fast enough. */
2480 if (TYPE_PRECISION
2481 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2482 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2483 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2484 build_int_cst (long_integer_type_node, 1), true, NULL);
2486 /* The same for a double-integer type if it is still fast enough. */
2487 if (TYPE_PRECISION
2488 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2489 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2490 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2491 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2495 /* Adds candidates bases on the old induction variable IV. */
2497 static void
2498 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2500 gimple phi;
2501 tree def;
2502 struct iv_cand *cand;
2504 add_candidate (data, iv->base, iv->step, true, NULL);
2506 /* The same, but with initial value zero. */
2507 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2508 add_candidate (data, size_int (0), iv->step, true, NULL);
2509 else
2510 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2511 iv->step, true, NULL);
2513 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2514 if (gimple_code (phi) == GIMPLE_PHI)
2516 /* Additionally record the possibility of leaving the original iv
2517 untouched. */
2518 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2519 cand = add_candidate_1 (data,
2520 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2521 SSA_NAME_DEF_STMT (def));
2522 cand->var_before = iv->ssa_name;
2523 cand->var_after = def;
2527 /* Adds candidates based on the old induction variables. */
2529 static void
2530 add_old_ivs_candidates (struct ivopts_data *data)
2532 unsigned i;
2533 struct iv *iv;
2534 bitmap_iterator bi;
2536 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2538 iv = ver_info (data, i)->iv;
2539 if (iv && iv->biv_p && !integer_zerop (iv->step))
2540 add_old_iv_candidates (data, iv);
2544 /* Adds candidates based on the value of the induction variable IV and USE. */
2546 static void
2547 add_iv_value_candidates (struct ivopts_data *data,
2548 struct iv *iv, struct iv_use *use)
2550 unsigned HOST_WIDE_INT offset;
2551 tree base;
2552 tree basetype;
2554 add_candidate (data, iv->base, iv->step, false, use);
2556 /* The same, but with initial value zero. Make such variable important,
2557 since it is generic enough so that possibly many uses may be based
2558 on it. */
2559 basetype = TREE_TYPE (iv->base);
2560 if (POINTER_TYPE_P (basetype))
2561 basetype = sizetype;
2562 add_candidate (data, build_int_cst (basetype, 0),
2563 iv->step, true, use);
2565 /* Third, try removing the constant offset. Make sure to even
2566 add a candidate for &a[0] vs. (T *)&a. */
2567 base = strip_offset (iv->base, &offset);
2568 if (offset
2569 || base != iv->base)
2570 add_candidate (data, base, iv->step, false, use);
2573 /* Adds candidates based on the uses. */
2575 static void
2576 add_derived_ivs_candidates (struct ivopts_data *data)
2578 unsigned i;
2580 for (i = 0; i < n_iv_uses (data); i++)
2582 struct iv_use *use = iv_use (data, i);
2584 if (!use)
2585 continue;
2587 switch (use->type)
2589 case USE_NONLINEAR_EXPR:
2590 case USE_COMPARE:
2591 case USE_ADDRESS:
2592 /* Just add the ivs based on the value of the iv used here. */
2593 add_iv_value_candidates (data, use->iv, use);
2594 break;
2596 default:
2597 gcc_unreachable ();
2602 /* Record important candidates and add them to related_cands bitmaps
2603 if needed. */
2605 static void
2606 record_important_candidates (struct ivopts_data *data)
2608 unsigned i;
2609 struct iv_use *use;
2611 for (i = 0; i < n_iv_cands (data); i++)
2613 struct iv_cand *cand = iv_cand (data, i);
2615 if (cand->important)
2616 bitmap_set_bit (data->important_candidates, i);
2619 data->consider_all_candidates = (n_iv_cands (data)
2620 <= CONSIDER_ALL_CANDIDATES_BOUND);
2622 if (data->consider_all_candidates)
2624 /* We will not need "related_cands" bitmaps in this case,
2625 so release them to decrease peak memory consumption. */
2626 for (i = 0; i < n_iv_uses (data); i++)
2628 use = iv_use (data, i);
2629 BITMAP_FREE (use->related_cands);
2632 else
2634 /* Add important candidates to the related_cands bitmaps. */
2635 for (i = 0; i < n_iv_uses (data); i++)
2636 bitmap_ior_into (iv_use (data, i)->related_cands,
2637 data->important_candidates);
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643 we allocate a simple list to every use. */
2645 static void
2646 alloc_use_cost_map (struct ivopts_data *data)
2648 unsigned i, size, s;
2650 for (i = 0; i < n_iv_uses (data); i++)
2652 struct iv_use *use = iv_use (data, i);
2654 if (data->consider_all_candidates)
2655 size = n_iv_cands (data);
2656 else
2658 s = bitmap_count_bits (use->related_cands);
2660 /* Round up to the power of two, so that moduling by it is fast. */
2661 size = s ? (1 << ceil_log2 (s)) : 1;
2664 use->n_map_members = size;
2665 use->cost_map = XCNEWVEC (struct cost_pair, size);
2669 /* Returns description of computation cost of expression whose runtime
2670 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2672 static comp_cost
2673 new_cost (unsigned runtime, unsigned complexity)
2675 comp_cost cost;
2677 cost.cost = runtime;
2678 cost.complexity = complexity;
2680 return cost;
2683 /* Adds costs COST1 and COST2. */
2685 static comp_cost
2686 add_costs (comp_cost cost1, comp_cost cost2)
2688 cost1.cost += cost2.cost;
2689 cost1.complexity += cost2.complexity;
2691 return cost1;
2693 /* Subtracts costs COST1 and COST2. */
2695 static comp_cost
2696 sub_costs (comp_cost cost1, comp_cost cost2)
2698 cost1.cost -= cost2.cost;
2699 cost1.complexity -= cost2.complexity;
2701 return cost1;
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705 COST1 > COST2, and 0 if COST1 = COST2. */
2707 static int
2708 compare_costs (comp_cost cost1, comp_cost cost2)
2710 if (cost1.cost == cost2.cost)
2711 return cost1.complexity - cost2.complexity;
2713 return cost1.cost - cost2.cost;
2716 /* Returns true if COST is infinite. */
2718 static bool
2719 infinite_cost_p (comp_cost cost)
2721 return cost.cost == INFTY;
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725 on invariants DEPENDS_ON and that the value used in expressing it
2726 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2728 static void
2729 set_use_iv_cost (struct ivopts_data *data,
2730 struct iv_use *use, struct iv_cand *cand,
2731 comp_cost cost, bitmap depends_on, tree value,
2732 enum tree_code comp, int inv_expr_id)
2734 unsigned i, s;
2736 if (infinite_cost_p (cost))
2738 BITMAP_FREE (depends_on);
2739 return;
2742 if (data->consider_all_candidates)
2744 use->cost_map[cand->id].cand = cand;
2745 use->cost_map[cand->id].cost = cost;
2746 use->cost_map[cand->id].depends_on = depends_on;
2747 use->cost_map[cand->id].value = value;
2748 use->cost_map[cand->id].comp = comp;
2749 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2750 return;
2753 /* n_map_members is a power of two, so this computes modulo. */
2754 s = cand->id & (use->n_map_members - 1);
2755 for (i = s; i < use->n_map_members; i++)
2756 if (!use->cost_map[i].cand)
2757 goto found;
2758 for (i = 0; i < s; i++)
2759 if (!use->cost_map[i].cand)
2760 goto found;
2762 gcc_unreachable ();
2764 found:
2765 use->cost_map[i].cand = cand;
2766 use->cost_map[i].cost = cost;
2767 use->cost_map[i].depends_on = depends_on;
2768 use->cost_map[i].value = value;
2769 use->cost_map[i].comp = comp;
2770 use->cost_map[i].inv_expr_id = inv_expr_id;
2773 /* Gets cost of (USE, CANDIDATE) pair. */
2775 static struct cost_pair *
2776 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2777 struct iv_cand *cand)
2779 unsigned i, s;
2780 struct cost_pair *ret;
2782 if (!cand)
2783 return NULL;
2785 if (data->consider_all_candidates)
2787 ret = use->cost_map + cand->id;
2788 if (!ret->cand)
2789 return NULL;
2791 return ret;
2794 /* n_map_members is a power of two, so this computes modulo. */
2795 s = cand->id & (use->n_map_members - 1);
2796 for (i = s; i < use->n_map_members; i++)
2797 if (use->cost_map[i].cand == cand)
2798 return use->cost_map + i;
2799 else if (use->cost_map[i].cand == NULL)
2800 return NULL;
2801 for (i = 0; i < s; i++)
2802 if (use->cost_map[i].cand == cand)
2803 return use->cost_map + i;
2804 else if (use->cost_map[i].cand == NULL)
2805 return NULL;
2807 return NULL;
2810 /* Returns estimate on cost of computing SEQ. */
2812 static unsigned
2813 seq_cost (rtx seq, bool speed)
2815 unsigned cost = 0;
2816 rtx set;
2818 for (; seq; seq = NEXT_INSN (seq))
2820 set = single_set (seq);
2821 if (set)
2822 cost += set_src_cost (SET_SRC (set), speed);
2823 else
2824 cost++;
2827 return cost;
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2831 static rtx
2832 produce_memory_decl_rtl (tree obj, int *regno)
2834 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2835 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2836 rtx x;
2838 gcc_assert (obj);
2839 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2841 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2842 x = gen_rtx_SYMBOL_REF (address_mode, name);
2843 SET_SYMBOL_REF_DECL (x, obj);
2844 x = gen_rtx_MEM (DECL_MODE (obj), x);
2845 set_mem_addr_space (x, as);
2846 targetm.encode_section_info (obj, x, true);
2848 else
2850 x = gen_raw_REG (address_mode, (*regno)++);
2851 x = gen_rtx_MEM (DECL_MODE (obj), x);
2852 set_mem_addr_space (x, as);
2855 return x;
2858 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2859 walk_tree. DATA contains the actual fake register number. */
2861 static tree
2862 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2864 tree obj = NULL_TREE;
2865 rtx x = NULL_RTX;
2866 int *regno = (int *) data;
2868 switch (TREE_CODE (*expr_p))
2870 case ADDR_EXPR:
2871 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2872 handled_component_p (*expr_p);
2873 expr_p = &TREE_OPERAND (*expr_p, 0))
2874 continue;
2875 obj = *expr_p;
2876 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2877 x = produce_memory_decl_rtl (obj, regno);
2878 break;
2880 case SSA_NAME:
2881 *ws = 0;
2882 obj = SSA_NAME_VAR (*expr_p);
2883 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2884 if (!obj)
2885 return NULL_TREE;
2886 if (!DECL_RTL_SET_P (obj))
2887 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2888 break;
2890 case VAR_DECL:
2891 case PARM_DECL:
2892 case RESULT_DECL:
2893 *ws = 0;
2894 obj = *expr_p;
2896 if (DECL_RTL_SET_P (obj))
2897 break;
2899 if (DECL_MODE (obj) == BLKmode)
2900 x = produce_memory_decl_rtl (obj, regno);
2901 else
2902 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2904 break;
2906 default:
2907 break;
2910 if (x)
2912 decl_rtl_to_reset.safe_push (obj);
2913 SET_DECL_RTL (obj, x);
2916 return NULL_TREE;
2919 /* Determines cost of the computation of EXPR. */
2921 static unsigned
2922 computation_cost (tree expr, bool speed)
2924 rtx seq, rslt;
2925 tree type = TREE_TYPE (expr);
2926 unsigned cost;
2927 /* Avoid using hard regs in ways which may be unsupported. */
2928 int regno = LAST_VIRTUAL_REGISTER + 1;
2929 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2930 enum node_frequency real_frequency = node->frequency;
2932 node->frequency = NODE_FREQUENCY_NORMAL;
2933 crtl->maybe_hot_insn_p = speed;
2934 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2935 start_sequence ();
2936 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2937 seq = get_insns ();
2938 end_sequence ();
2939 default_rtl_profile ();
2940 node->frequency = real_frequency;
2942 cost = seq_cost (seq, speed);
2943 if (MEM_P (rslt))
2944 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2945 TYPE_ADDR_SPACE (type), speed);
2946 else if (!REG_P (rslt))
2947 cost += set_src_cost (rslt, speed);
2949 return cost;
2952 /* Returns variable containing the value of candidate CAND at statement AT. */
2954 static tree
2955 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2957 if (stmt_after_increment (loop, cand, stmt))
2958 return cand->var_after;
2959 else
2960 return cand->var_before;
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964 same precision that is at least as wide as the precision of TYPE, stores
2965 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2966 type of A and B. */
2968 static tree
2969 determine_common_wider_type (tree *a, tree *b)
2971 tree wider_type = NULL;
2972 tree suba, subb;
2973 tree atype = TREE_TYPE (*a);
2975 if (CONVERT_EXPR_P (*a))
2977 suba = TREE_OPERAND (*a, 0);
2978 wider_type = TREE_TYPE (suba);
2979 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2980 return atype;
2982 else
2983 return atype;
2985 if (CONVERT_EXPR_P (*b))
2987 subb = TREE_OPERAND (*b, 0);
2988 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2989 return atype;
2991 else
2992 return atype;
2994 *a = suba;
2995 *b = subb;
2996 return wider_type;
2999 /* Determines the expression by that USE is expressed from induction variable
3000 CAND at statement AT in LOOP. The expression is stored in a decomposed
3001 form into AFF. Returns false if USE cannot be expressed using CAND. */
3003 static bool
3004 get_computation_aff (struct loop *loop,
3005 struct iv_use *use, struct iv_cand *cand, gimple at,
3006 struct affine_tree_combination *aff)
3008 tree ubase = use->iv->base;
3009 tree ustep = use->iv->step;
3010 tree cbase = cand->iv->base;
3011 tree cstep = cand->iv->step, cstep_common;
3012 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3013 tree common_type, var;
3014 tree uutype;
3015 aff_tree cbase_aff, var_aff;
3016 double_int rat;
3018 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3020 /* We do not have a precision to express the values of use. */
3021 return false;
3024 var = var_at_stmt (loop, cand, at);
3025 uutype = unsigned_type_for (utype);
3027 /* If the conversion is not noop, perform it. */
3028 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3030 cstep = fold_convert (uutype, cstep);
3031 cbase = fold_convert (uutype, cbase);
3032 var = fold_convert (uutype, var);
3035 if (!constant_multiple_of (ustep, cstep, &rat))
3036 return false;
3038 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039 type, we achieve better folding by computing their difference in this
3040 wider type, and cast the result to UUTYPE. We do not need to worry about
3041 overflows, as all the arithmetics will in the end be performed in UUTYPE
3042 anyway. */
3043 common_type = determine_common_wider_type (&ubase, &cbase);
3045 /* use = ubase - ratio * cbase + ratio * var. */
3046 tree_to_aff_combination (ubase, common_type, aff);
3047 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3048 tree_to_aff_combination (var, uutype, &var_aff);
3050 /* We need to shift the value if we are after the increment. */
3051 if (stmt_after_increment (loop, cand, at))
3053 aff_tree cstep_aff;
3055 if (common_type != uutype)
3056 cstep_common = fold_convert (common_type, cstep);
3057 else
3058 cstep_common = cstep;
3060 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3061 aff_combination_add (&cbase_aff, &cstep_aff);
3064 aff_combination_scale (&cbase_aff, -rat);
3065 aff_combination_add (aff, &cbase_aff);
3066 if (common_type != uutype)
3067 aff_combination_convert (aff, uutype);
3069 aff_combination_scale (&var_aff, rat);
3070 aff_combination_add (aff, &var_aff);
3072 return true;
3075 /* Return the type of USE. */
3077 static tree
3078 get_use_type (struct iv_use *use)
3080 tree base_type = TREE_TYPE (use->iv->base);
3081 tree type;
3083 if (use->type == USE_ADDRESS)
3085 /* The base_type may be a void pointer. Create a pointer type based on
3086 the mem_ref instead. */
3087 type = build_pointer_type (TREE_TYPE (*use->op_p));
3088 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3089 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3091 else
3092 type = base_type;
3094 return type;
3097 /* Determines the expression by that USE is expressed from induction variable
3098 CAND at statement AT in LOOP. The computation is unshared. */
3100 static tree
3101 get_computation_at (struct loop *loop,
3102 struct iv_use *use, struct iv_cand *cand, gimple at)
3104 aff_tree aff;
3105 tree type = get_use_type (use);
3107 if (!get_computation_aff (loop, use, cand, at, &aff))
3108 return NULL_TREE;
3109 unshare_aff_combination (&aff);
3110 return fold_convert (type, aff_combination_to_tree (&aff));
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND in LOOP. The computation is unshared. */
3116 static tree
3117 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3119 return get_computation_at (loop, use, cand, use->stmt);
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123 If we're optimizing for space, the loop setup overhead is constant;
3124 if we're optimizing for speed, amortize it over the per-iteration cost. */
3125 static unsigned
3126 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3128 if (cost == INFTY)
3129 return cost;
3130 else if (optimize_loop_for_speed_p (data->current_loop))
3131 return cost / avg_loop_niter (data->current_loop);
3132 else
3133 return cost;
3136 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3137 validity for a memory reference accessing memory of mode MODE in
3138 address space AS. */
3141 bool
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3143 addr_space_t as)
3145 #define MAX_RATIO 128
3146 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3147 static vec<sbitmap> valid_mult_list;
3148 sbitmap valid_mult;
3150 if (data_index >= valid_mult_list.length ())
3151 valid_mult_list.safe_grow_cleared (data_index + 1);
3153 valid_mult = valid_mult_list[data_index];
3154 if (!valid_mult)
3156 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3157 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3158 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3159 rtx addr, scaled;
3160 HOST_WIDE_INT i;
3162 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3163 bitmap_clear (valid_mult);
3164 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3165 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3166 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3168 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3169 if (memory_address_addr_space_p (mode, addr, as)
3170 || memory_address_addr_space_p (mode, scaled, as))
3171 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, " allowed multipliers:");
3177 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3178 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3179 fprintf (dump_file, " %d", (int) i);
3180 fprintf (dump_file, "\n");
3181 fprintf (dump_file, "\n");
3184 valid_mult_list[data_index] = valid_mult;
3187 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3188 return false;
3190 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3195 variable is omitted. Compute the cost for a memory reference that accesses
3196 a memory location of mode MEM_MODE in address space AS.
3198 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199 size of MEM_MODE / RATIO) is available. To make this determination, we
3200 look at the size of the increment to be made, which is given in CSTEP.
3201 CSTEP may be zero if the step is unknown.
3202 STMT_AFTER_INC is true iff the statement we're looking at is after the
3203 increment of the original biv.
3205 TODO -- there must be some better way. This all is quite crude. */
3207 typedef struct address_cost_data_s
3209 HOST_WIDE_INT min_offset, max_offset;
3210 unsigned costs[2][2][2][2];
3211 } *address_cost_data;
3214 static comp_cost
3215 get_address_cost (bool symbol_present, bool var_present,
3216 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3217 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3218 addr_space_t as, bool speed,
3219 bool stmt_after_inc, bool *may_autoinc)
3221 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3222 static vec<address_cost_data> address_cost_data_list;
3223 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3224 address_cost_data data;
3225 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3226 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3227 unsigned cost, acost, complexity;
3228 bool offset_p, ratio_p, autoinc;
3229 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3230 unsigned HOST_WIDE_INT mask;
3231 unsigned bits;
3233 if (data_index >= address_cost_data_list.length ())
3234 address_cost_data_list.safe_grow_cleared (data_index + 1);
3236 data = address_cost_data_list[data_index];
3237 if (!data)
3239 HOST_WIDE_INT i;
3240 HOST_WIDE_INT rat, off = 0;
3241 int old_cse_not_expected, width;
3242 unsigned sym_p, var_p, off_p, rat_p, add_c;
3243 rtx seq, addr, base;
3244 rtx reg0, reg1;
3246 data = (address_cost_data) xcalloc (1, sizeof (*data));
3248 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3250 width = GET_MODE_BITSIZE (address_mode) - 1;
3251 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3252 width = HOST_BITS_PER_WIDE_INT - 1;
3253 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3255 for (i = width; i >= 0; i--)
3257 off = -((unsigned HOST_WIDE_INT) 1 << i);
3258 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3259 if (memory_address_addr_space_p (mem_mode, addr, as))
3260 break;
3262 data->min_offset = (i == -1? 0 : off);
3264 for (i = width; i >= 0; i--)
3266 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3267 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3268 if (memory_address_addr_space_p (mem_mode, addr, as))
3269 break;
3271 if (i == -1)
3272 off = 0;
3273 data->max_offset = off;
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3277 fprintf (dump_file, "get_address_cost:\n");
3278 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3279 GET_MODE_NAME (mem_mode),
3280 data->min_offset);
3281 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282 GET_MODE_NAME (mem_mode),
3283 data->max_offset);
3286 rat = 1;
3287 for (i = 2; i <= MAX_RATIO; i++)
3288 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3290 rat = i;
3291 break;
3294 /* Compute the cost of various addressing modes. */
3295 acost = 0;
3296 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3299 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3300 || USE_STORE_PRE_DECREMENT (mem_mode))
3302 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3303 has_predec[mem_mode]
3304 = memory_address_addr_space_p (mem_mode, addr, as);
3306 if (USE_LOAD_POST_DECREMENT (mem_mode)
3307 || USE_STORE_POST_DECREMENT (mem_mode))
3309 addr = gen_rtx_POST_DEC (address_mode, reg0);
3310 has_postdec[mem_mode]
3311 = memory_address_addr_space_p (mem_mode, addr, as);
3313 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3314 || USE_STORE_PRE_DECREMENT (mem_mode))
3316 addr = gen_rtx_PRE_INC (address_mode, reg0);
3317 has_preinc[mem_mode]
3318 = memory_address_addr_space_p (mem_mode, addr, as);
3320 if (USE_LOAD_POST_INCREMENT (mem_mode)
3321 || USE_STORE_POST_INCREMENT (mem_mode))
3323 addr = gen_rtx_POST_INC (address_mode, reg0);
3324 has_postinc[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3327 for (i = 0; i < 16; i++)
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3334 addr = reg0;
3335 if (rat_p)
3336 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337 gen_int_mode (rat, address_mode));
3339 if (var_p)
3340 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3342 if (sym_p)
3344 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3351 if (off_p)
3352 base = gen_rtx_fmt_e (CONST, address_mode,
3353 gen_rtx_fmt_ee
3354 (PLUS, address_mode, base,
3355 gen_int_mode (off, address_mode)));
3357 else if (off_p)
3358 base = gen_int_mode (off, address_mode);
3359 else
3360 base = NULL_RTX;
3362 if (base)
3363 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3365 start_sequence ();
3366 /* To avoid splitting addressing modes, pretend that no cse will
3367 follow. */
3368 old_cse_not_expected = cse_not_expected;
3369 cse_not_expected = true;
3370 addr = memory_address_addr_space (mem_mode, addr, as);
3371 cse_not_expected = old_cse_not_expected;
3372 seq = get_insns ();
3373 end_sequence ();
3375 acost = seq_cost (seq, speed);
3376 acost += address_cost (addr, mem_mode, as, speed);
3378 if (!acost)
3379 acost = 1;
3380 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c = add_cost (speed, address_mode);
3396 for (i = 0; i < 8; i++)
3398 var_p = i & 1;
3399 off_p = (i >> 1) & 1;
3400 rat_p = (i >> 2) & 1;
3402 acost = data->costs[0][1][off_p][rat_p] + 1;
3403 if (var_p)
3404 acost += add_c;
3406 if (acost < data->costs[1][var_p][off_p][rat_p])
3407 data->costs[1][var_p][off_p][rat_p] = acost;
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3412 fprintf (dump_file, "Address costs:\n");
3414 for (i = 0; i < 16; i++)
3416 sym_p = i & 1;
3417 var_p = (i >> 1) & 1;
3418 off_p = (i >> 2) & 1;
3419 rat_p = (i >> 3) & 1;
3421 fprintf (dump_file, " ");
3422 if (sym_p)
3423 fprintf (dump_file, "sym + ");
3424 if (var_p)
3425 fprintf (dump_file, "var + ");
3426 if (off_p)
3427 fprintf (dump_file, "cst + ");
3428 if (rat_p)
3429 fprintf (dump_file, "rat * ");
3431 acost = data->costs[sym_p][var_p][off_p][rat_p];
3432 fprintf (dump_file, "index costs %d\n", acost);
3434 if (has_predec[mem_mode] || has_postdec[mem_mode]
3435 || has_preinc[mem_mode] || has_postinc[mem_mode])
3436 fprintf (dump_file, " May include autoinc/dec\n");
3437 fprintf (dump_file, "\n");
3440 address_cost_data_list[data_index] = data;
3443 bits = GET_MODE_BITSIZE (address_mode);
3444 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3445 offset &= mask;
3446 if ((offset >> (bits - 1) & 1))
3447 offset |= ~mask;
3448 s_offset = offset;
3450 autoinc = false;
3451 msize = GET_MODE_SIZE (mem_mode);
3452 autoinc_offset = offset;
3453 if (stmt_after_inc)
3454 autoinc_offset += ratio * cstep;
3455 if (symbol_present || var_present || ratio != 1)
3456 autoinc = false;
3457 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3458 && msize == cstep)
3459 || (has_postdec[mem_mode] && autoinc_offset == 0
3460 && msize == -cstep)
3461 || (has_preinc[mem_mode] && autoinc_offset == msize
3462 && msize == cstep)
3463 || (has_predec[mem_mode] && autoinc_offset == -msize
3464 && msize == -cstep))
3465 autoinc = true;
3467 cost = 0;
3468 offset_p = (s_offset != 0
3469 && data->min_offset <= s_offset
3470 && s_offset <= data->max_offset);
3471 ratio_p = (ratio != 1
3472 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3474 if (ratio != 1 && !ratio_p)
3475 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3477 if (s_offset && !offset_p && !symbol_present)
3478 cost += add_cost (speed, address_mode);
3480 if (may_autoinc)
3481 *may_autoinc = autoinc;
3482 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3483 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3484 return new_cost (cost + acost, complexity);
3487 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3488 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3489 calculating the operands of EXPR. Returns true if successful, and returns
3490 the cost in COST. */
3492 static bool
3493 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3494 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3496 comp_cost res;
3497 tree op1 = TREE_OPERAND (expr, 1);
3498 tree cst = TREE_OPERAND (mult, 1);
3499 tree multop = TREE_OPERAND (mult, 0);
3500 int m = exact_log2 (int_cst_value (cst));
3501 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3502 int sa_cost;
3503 bool equal_p = false;
3505 if (!(m >= 0 && m < maxm))
3506 return false;
3508 if (operand_equal_p (op1, mult, 0))
3509 equal_p = true;
3511 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3512 ? shiftadd_cost (speed, mode, m)
3513 : (equal_p
3514 ? shiftsub1_cost (speed, mode, m)
3515 : shiftsub0_cost (speed, mode, m)));
3516 res = new_cost (sa_cost, 0);
3517 res = add_costs (res, equal_p ? cost0 : cost1);
3519 STRIP_NOPS (multop);
3520 if (!is_gimple_val (multop))
3521 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3523 *cost = res;
3524 return true;
3527 /* Estimates cost of forcing expression EXPR into a variable. */
3529 static comp_cost
3530 force_expr_to_var_cost (tree expr, bool speed)
3532 static bool costs_initialized = false;
3533 static unsigned integer_cost [2];
3534 static unsigned symbol_cost [2];
3535 static unsigned address_cost [2];
3536 tree op0, op1;
3537 comp_cost cost0, cost1, cost;
3538 enum machine_mode mode;
3540 if (!costs_initialized)
3542 tree type = build_pointer_type (integer_type_node);
3543 tree var, addr;
3544 rtx x;
3545 int i;
3547 var = create_tmp_var_raw (integer_type_node, "test_var");
3548 TREE_STATIC (var) = 1;
3549 x = produce_memory_decl_rtl (var, NULL);
3550 SET_DECL_RTL (var, x);
3552 addr = build1 (ADDR_EXPR, type, var);
3555 for (i = 0; i < 2; i++)
3557 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3558 2000), i);
3560 symbol_cost[i] = computation_cost (addr, i) + 1;
3562 address_cost[i]
3563 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3566 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3567 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3568 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3569 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3570 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3571 fprintf (dump_file, "\n");
3575 costs_initialized = true;
3578 STRIP_NOPS (expr);
3580 if (SSA_VAR_P (expr))
3581 return no_cost;
3583 if (is_gimple_min_invariant (expr))
3585 if (TREE_CODE (expr) == INTEGER_CST)
3586 return new_cost (integer_cost [speed], 0);
3588 if (TREE_CODE (expr) == ADDR_EXPR)
3590 tree obj = TREE_OPERAND (expr, 0);
3592 if (TREE_CODE (obj) == VAR_DECL
3593 || TREE_CODE (obj) == PARM_DECL
3594 || TREE_CODE (obj) == RESULT_DECL)
3595 return new_cost (symbol_cost [speed], 0);
3598 return new_cost (address_cost [speed], 0);
3601 switch (TREE_CODE (expr))
3603 case POINTER_PLUS_EXPR:
3604 case PLUS_EXPR:
3605 case MINUS_EXPR:
3606 case MULT_EXPR:
3607 op0 = TREE_OPERAND (expr, 0);
3608 op1 = TREE_OPERAND (expr, 1);
3609 STRIP_NOPS (op0);
3610 STRIP_NOPS (op1);
3611 break;
3613 CASE_CONVERT:
3614 case NEGATE_EXPR:
3615 op0 = TREE_OPERAND (expr, 0);
3616 STRIP_NOPS (op0);
3617 op1 = NULL_TREE;
3618 break;
3620 default:
3621 /* Just an arbitrary value, FIXME. */
3622 return new_cost (target_spill_cost[speed], 0);
3625 if (op0 == NULL_TREE
3626 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3627 cost0 = no_cost;
3628 else
3629 cost0 = force_expr_to_var_cost (op0, speed);
3631 if (op1 == NULL_TREE
3632 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3633 cost1 = no_cost;
3634 else
3635 cost1 = force_expr_to_var_cost (op1, speed);
3637 mode = TYPE_MODE (TREE_TYPE (expr));
3638 switch (TREE_CODE (expr))
3640 case POINTER_PLUS_EXPR:
3641 case PLUS_EXPR:
3642 case MINUS_EXPR:
3643 case NEGATE_EXPR:
3644 cost = new_cost (add_cost (speed, mode), 0);
3645 if (TREE_CODE (expr) != NEGATE_EXPR)
3647 tree mult = NULL_TREE;
3648 comp_cost sa_cost;
3649 if (TREE_CODE (op1) == MULT_EXPR)
3650 mult = op1;
3651 else if (TREE_CODE (op0) == MULT_EXPR)
3652 mult = op0;
3654 if (mult != NULL_TREE
3655 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3656 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3657 speed, &sa_cost))
3658 return sa_cost;
3660 break;
3662 CASE_CONVERT:
3664 tree inner_mode, outer_mode;
3665 outer_mode = TREE_TYPE (expr);
3666 inner_mode = TREE_TYPE (op0);
3667 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3668 TYPE_MODE (inner_mode), speed), 0);
3670 break;
3672 case MULT_EXPR:
3673 if (cst_and_fits_in_hwi (op0))
3674 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3675 mode, speed), 0);
3676 else if (cst_and_fits_in_hwi (op1))
3677 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3678 mode, speed), 0);
3679 else
3680 return new_cost (target_spill_cost [speed], 0);
3681 break;
3683 default:
3684 gcc_unreachable ();
3687 cost = add_costs (cost, cost0);
3688 cost = add_costs (cost, cost1);
3690 /* Bound the cost by target_spill_cost. The parts of complicated
3691 computations often are either loop invariant or at least can
3692 be shared between several iv uses, so letting this grow without
3693 limits would not give reasonable results. */
3694 if (cost.cost > (int) target_spill_cost [speed])
3695 cost.cost = target_spill_cost [speed];
3697 return cost;
3700 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3701 invariants the computation depends on. */
3703 static comp_cost
3704 force_var_cost (struct ivopts_data *data,
3705 tree expr, bitmap *depends_on)
3707 if (depends_on)
3709 fd_ivopts_data = data;
3710 walk_tree (&expr, find_depends, depends_on, NULL);
3713 return force_expr_to_var_cost (expr, data->speed);
3716 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3717 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3718 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3719 invariants the computation depends on. */
3721 static comp_cost
3722 split_address_cost (struct ivopts_data *data,
3723 tree addr, bool *symbol_present, bool *var_present,
3724 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3726 tree core;
3727 HOST_WIDE_INT bitsize;
3728 HOST_WIDE_INT bitpos;
3729 tree toffset;
3730 enum machine_mode mode;
3731 int unsignedp, volatilep;
3733 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3734 &unsignedp, &volatilep, false);
3736 if (toffset != 0
3737 || bitpos % BITS_PER_UNIT != 0
3738 || TREE_CODE (core) != VAR_DECL)
3740 *symbol_present = false;
3741 *var_present = true;
3742 fd_ivopts_data = data;
3743 walk_tree (&addr, find_depends, depends_on, NULL);
3744 return new_cost (target_spill_cost[data->speed], 0);
3747 *offset += bitpos / BITS_PER_UNIT;
3748 if (TREE_STATIC (core)
3749 || DECL_EXTERNAL (core))
3751 *symbol_present = true;
3752 *var_present = false;
3753 return no_cost;
3756 *symbol_present = false;
3757 *var_present = true;
3758 return no_cost;
3761 /* Estimates cost of expressing difference of addresses E1 - E2 as
3762 var + symbol + offset. The value of offset is added to OFFSET,
3763 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3764 part is missing. DEPENDS_ON is a set of the invariants the computation
3765 depends on. */
3767 static comp_cost
3768 ptr_difference_cost (struct ivopts_data *data,
3769 tree e1, tree e2, bool *symbol_present, bool *var_present,
3770 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3772 HOST_WIDE_INT diff = 0;
3773 aff_tree aff_e1, aff_e2;
3774 tree type;
3776 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3778 if (ptr_difference_const (e1, e2, &diff))
3780 *offset += diff;
3781 *symbol_present = false;
3782 *var_present = false;
3783 return no_cost;
3786 if (integer_zerop (e2))
3787 return split_address_cost (data, TREE_OPERAND (e1, 0),
3788 symbol_present, var_present, offset, depends_on);
3790 *symbol_present = false;
3791 *var_present = true;
3793 type = signed_type_for (TREE_TYPE (e1));
3794 tree_to_aff_combination (e1, type, &aff_e1);
3795 tree_to_aff_combination (e2, type, &aff_e2);
3796 aff_combination_scale (&aff_e2, double_int_minus_one);
3797 aff_combination_add (&aff_e1, &aff_e2);
3799 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3802 /* Estimates cost of expressing difference E1 - E2 as
3803 var + symbol + offset. The value of offset is added to OFFSET,
3804 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3805 part is missing. DEPENDS_ON is a set of the invariants the computation
3806 depends on. */
3808 static comp_cost
3809 difference_cost (struct ivopts_data *data,
3810 tree e1, tree e2, bool *symbol_present, bool *var_present,
3811 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3813 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3814 unsigned HOST_WIDE_INT off1, off2;
3815 aff_tree aff_e1, aff_e2;
3816 tree type;
3818 e1 = strip_offset (e1, &off1);
3819 e2 = strip_offset (e2, &off2);
3820 *offset += off1 - off2;
3822 STRIP_NOPS (e1);
3823 STRIP_NOPS (e2);
3825 if (TREE_CODE (e1) == ADDR_EXPR)
3826 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3827 offset, depends_on);
3828 *symbol_present = false;
3830 if (operand_equal_p (e1, e2, 0))
3832 *var_present = false;
3833 return no_cost;
3836 *var_present = true;
3838 if (integer_zerop (e2))
3839 return force_var_cost (data, e1, depends_on);
3841 if (integer_zerop (e1))
3843 comp_cost cost = force_var_cost (data, e2, depends_on);
3844 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3845 return cost;
3848 type = signed_type_for (TREE_TYPE (e1));
3849 tree_to_aff_combination (e1, type, &aff_e1);
3850 tree_to_aff_combination (e2, type, &aff_e2);
3851 aff_combination_scale (&aff_e2, double_int_minus_one);
3852 aff_combination_add (&aff_e1, &aff_e2);
3854 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3857 /* Returns true if AFF1 and AFF2 are identical. */
3859 static bool
3860 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3862 unsigned i;
3864 if (aff1->n != aff2->n)
3865 return false;
3867 for (i = 0; i < aff1->n; i++)
3869 if (aff1->elts[i].coef != aff2->elts[i].coef)
3870 return false;
3872 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3873 return false;
3875 return true;
3878 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3880 static int
3881 get_expr_id (struct ivopts_data *data, tree expr)
3883 struct iv_inv_expr_ent ent;
3884 struct iv_inv_expr_ent **slot;
3886 ent.expr = expr;
3887 ent.hash = iterative_hash_expr (expr, 0);
3888 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3889 if (*slot)
3890 return (*slot)->id;
3892 *slot = XNEW (struct iv_inv_expr_ent);
3893 (*slot)->expr = expr;
3894 (*slot)->hash = ent.hash;
3895 (*slot)->id = data->inv_expr_id++;
3896 return (*slot)->id;
3899 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3900 requires a new compiler generated temporary. Returns -1 otherwise.
3901 ADDRESS_P is a flag indicating if the expression is for address
3902 computation. */
3904 static int
3905 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3906 tree cbase, HOST_WIDE_INT ratio,
3907 bool address_p)
3909 aff_tree ubase_aff, cbase_aff;
3910 tree expr, ub, cb;
3912 STRIP_NOPS (ubase);
3913 STRIP_NOPS (cbase);
3914 ub = ubase;
3915 cb = cbase;
3917 if ((TREE_CODE (ubase) == INTEGER_CST)
3918 && (TREE_CODE (cbase) == INTEGER_CST))
3919 return -1;
3921 /* Strips the constant part. */
3922 if (TREE_CODE (ubase) == PLUS_EXPR
3923 || TREE_CODE (ubase) == MINUS_EXPR
3924 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3926 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3927 ubase = TREE_OPERAND (ubase, 0);
3930 /* Strips the constant part. */
3931 if (TREE_CODE (cbase) == PLUS_EXPR
3932 || TREE_CODE (cbase) == MINUS_EXPR
3933 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3935 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3936 cbase = TREE_OPERAND (cbase, 0);
3939 if (address_p)
3941 if (((TREE_CODE (ubase) == SSA_NAME)
3942 || (TREE_CODE (ubase) == ADDR_EXPR
3943 && is_gimple_min_invariant (ubase)))
3944 && (TREE_CODE (cbase) == INTEGER_CST))
3945 return -1;
3947 if (((TREE_CODE (cbase) == SSA_NAME)
3948 || (TREE_CODE (cbase) == ADDR_EXPR
3949 && is_gimple_min_invariant (cbase)))
3950 && (TREE_CODE (ubase) == INTEGER_CST))
3951 return -1;
3954 if (ratio == 1)
3956 if (operand_equal_p (ubase, cbase, 0))
3957 return -1;
3959 if (TREE_CODE (ubase) == ADDR_EXPR
3960 && TREE_CODE (cbase) == ADDR_EXPR)
3962 tree usym, csym;
3964 usym = TREE_OPERAND (ubase, 0);
3965 csym = TREE_OPERAND (cbase, 0);
3966 if (TREE_CODE (usym) == ARRAY_REF)
3968 tree ind = TREE_OPERAND (usym, 1);
3969 if (TREE_CODE (ind) == INTEGER_CST
3970 && host_integerp (ind, 0)
3971 && TREE_INT_CST_LOW (ind) == 0)
3972 usym = TREE_OPERAND (usym, 0);
3974 if (TREE_CODE (csym) == ARRAY_REF)
3976 tree ind = TREE_OPERAND (csym, 1);
3977 if (TREE_CODE (ind) == INTEGER_CST
3978 && host_integerp (ind, 0)
3979 && TREE_INT_CST_LOW (ind) == 0)
3980 csym = TREE_OPERAND (csym, 0);
3982 if (operand_equal_p (usym, csym, 0))
3983 return -1;
3985 /* Now do more complex comparison */
3986 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3987 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3988 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3989 return -1;
3992 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3993 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3995 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3996 aff_combination_add (&ubase_aff, &cbase_aff);
3997 expr = aff_combination_to_tree (&ubase_aff);
3998 return get_expr_id (data, expr);
4003 /* Determines the cost of the computation by that USE is expressed
4004 from induction variable CAND. If ADDRESS_P is true, we just need
4005 to create an address from it, otherwise we want to get it into
4006 register. A set of invariants we depend on is stored in
4007 DEPENDS_ON. AT is the statement at that the value is computed.
4008 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4009 addressing is likely. */
4011 static comp_cost
4012 get_computation_cost_at (struct ivopts_data *data,
4013 struct iv_use *use, struct iv_cand *cand,
4014 bool address_p, bitmap *depends_on, gimple at,
4015 bool *can_autoinc,
4016 int *inv_expr_id)
4018 tree ubase = use->iv->base, ustep = use->iv->step;
4019 tree cbase, cstep;
4020 tree utype = TREE_TYPE (ubase), ctype;
4021 unsigned HOST_WIDE_INT cstepi, offset = 0;
4022 HOST_WIDE_INT ratio, aratio;
4023 bool var_present, symbol_present, stmt_is_after_inc;
4024 comp_cost cost;
4025 double_int rat;
4026 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4027 enum machine_mode mem_mode = (address_p
4028 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4029 : VOIDmode);
4031 *depends_on = NULL;
4033 /* Only consider real candidates. */
4034 if (!cand->iv)
4035 return infinite_cost;
4037 cbase = cand->iv->base;
4038 cstep = cand->iv->step;
4039 ctype = TREE_TYPE (cbase);
4041 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4043 /* We do not have a precision to express the values of use. */
4044 return infinite_cost;
4047 if (address_p
4048 || (use->iv->base_object
4049 && cand->iv->base_object
4050 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4051 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4053 /* Do not try to express address of an object with computation based
4054 on address of a different object. This may cause problems in rtl
4055 level alias analysis (that does not expect this to be happening,
4056 as this is illegal in C), and would be unlikely to be useful
4057 anyway. */
4058 if (use->iv->base_object
4059 && cand->iv->base_object
4060 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4061 return infinite_cost;
4064 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4066 /* TODO -- add direct handling of this case. */
4067 goto fallback;
4070 /* CSTEPI is removed from the offset in case statement is after the
4071 increment. If the step is not constant, we use zero instead.
4072 This is a bit imprecise (there is the extra addition), but
4073 redundancy elimination is likely to transform the code so that
4074 it uses value of the variable before increment anyway,
4075 so it is not that much unrealistic. */
4076 if (cst_and_fits_in_hwi (cstep))
4077 cstepi = int_cst_value (cstep);
4078 else
4079 cstepi = 0;
4081 if (!constant_multiple_of (ustep, cstep, &rat))
4082 return infinite_cost;
4084 if (rat.fits_shwi ())
4085 ratio = rat.to_shwi ();
4086 else
4087 return infinite_cost;
4089 STRIP_NOPS (cbase);
4090 ctype = TREE_TYPE (cbase);
4092 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4094 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4095 or ratio == 1, it is better to handle this like
4097 ubase - ratio * cbase + ratio * var
4099 (also holds in the case ratio == -1, TODO. */
4101 if (cst_and_fits_in_hwi (cbase))
4103 offset = - ratio * int_cst_value (cbase);
4104 cost = difference_cost (data,
4105 ubase, build_int_cst (utype, 0),
4106 &symbol_present, &var_present, &offset,
4107 depends_on);
4108 cost.cost /= avg_loop_niter (data->current_loop);
4110 else if (ratio == 1)
4112 tree real_cbase = cbase;
4114 /* Check to see if any adjustment is needed. */
4115 if (cstepi == 0 && stmt_is_after_inc)
4117 aff_tree real_cbase_aff;
4118 aff_tree cstep_aff;
4120 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4121 &real_cbase_aff);
4122 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4124 aff_combination_add (&real_cbase_aff, &cstep_aff);
4125 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4128 cost = difference_cost (data,
4129 ubase, real_cbase,
4130 &symbol_present, &var_present, &offset,
4131 depends_on);
4132 cost.cost /= avg_loop_niter (data->current_loop);
4134 else if (address_p
4135 && !POINTER_TYPE_P (ctype)
4136 && multiplier_allowed_in_address_p
4137 (ratio, mem_mode,
4138 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4140 cbase
4141 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4142 cost = difference_cost (data,
4143 ubase, cbase,
4144 &symbol_present, &var_present, &offset,
4145 depends_on);
4146 cost.cost /= avg_loop_niter (data->current_loop);
4148 else
4150 cost = force_var_cost (data, cbase, depends_on);
4151 cost = add_costs (cost,
4152 difference_cost (data,
4153 ubase, build_int_cst (utype, 0),
4154 &symbol_present, &var_present,
4155 &offset, depends_on));
4156 cost.cost /= avg_loop_niter (data->current_loop);
4157 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4160 if (inv_expr_id)
4162 *inv_expr_id =
4163 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4164 /* Clear depends on. */
4165 if (*inv_expr_id != -1 && depends_on && *depends_on)
4166 bitmap_clear (*depends_on);
4169 /* If we are after the increment, the value of the candidate is higher by
4170 one iteration. */
4171 if (stmt_is_after_inc)
4172 offset -= ratio * cstepi;
4174 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4175 (symbol/var1/const parts may be omitted). If we are looking for an
4176 address, find the cost of addressing this. */
4177 if (address_p)
4178 return add_costs (cost,
4179 get_address_cost (symbol_present, var_present,
4180 offset, ratio, cstepi,
4181 mem_mode,
4182 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4183 speed, stmt_is_after_inc,
4184 can_autoinc));
4186 /* Otherwise estimate the costs for computing the expression. */
4187 if (!symbol_present && !var_present && !offset)
4189 if (ratio != 1)
4190 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4191 return cost;
4194 /* Symbol + offset should be compile-time computable so consider that they
4195 are added once to the variable, if present. */
4196 if (var_present && (symbol_present || offset))
4197 cost.cost += adjust_setup_cost (data,
4198 add_cost (speed, TYPE_MODE (ctype)));
4200 /* Having offset does not affect runtime cost in case it is added to
4201 symbol, but it increases complexity. */
4202 if (offset)
4203 cost.complexity++;
4205 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4207 aratio = ratio > 0 ? ratio : -ratio;
4208 if (aratio != 1)
4209 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4210 return cost;
4212 fallback:
4213 if (can_autoinc)
4214 *can_autoinc = false;
4217 /* Just get the expression, expand it and measure the cost. */
4218 tree comp = get_computation_at (data->current_loop, use, cand, at);
4220 if (!comp)
4221 return infinite_cost;
4223 if (address_p)
4224 comp = build_simple_mem_ref (comp);
4226 return new_cost (computation_cost (comp, speed), 0);
4230 /* Determines the cost of the computation by that USE is expressed
4231 from induction variable CAND. If ADDRESS_P is true, we just need
4232 to create an address from it, otherwise we want to get it into
4233 register. A set of invariants we depend on is stored in
4234 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4235 autoinc addressing is likely. */
4237 static comp_cost
4238 get_computation_cost (struct ivopts_data *data,
4239 struct iv_use *use, struct iv_cand *cand,
4240 bool address_p, bitmap *depends_on,
4241 bool *can_autoinc, int *inv_expr_id)
4243 return get_computation_cost_at (data,
4244 use, cand, address_p, depends_on, use->stmt,
4245 can_autoinc, inv_expr_id);
4248 /* Determines cost of basing replacement of USE on CAND in a generic
4249 expression. */
4251 static bool
4252 determine_use_iv_cost_generic (struct ivopts_data *data,
4253 struct iv_use *use, struct iv_cand *cand)
4255 bitmap depends_on;
4256 comp_cost cost;
4257 int inv_expr_id = -1;
4259 /* The simple case first -- if we need to express value of the preserved
4260 original biv, the cost is 0. This also prevents us from counting the
4261 cost of increment twice -- once at this use and once in the cost of
4262 the candidate. */
4263 if (cand->pos == IP_ORIGINAL
4264 && cand->incremented_at == use->stmt)
4266 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4267 ERROR_MARK, -1);
4268 return true;
4271 cost = get_computation_cost (data, use, cand, false, &depends_on,
4272 NULL, &inv_expr_id);
4274 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4275 inv_expr_id);
4277 return !infinite_cost_p (cost);
4280 /* Determines cost of basing replacement of USE on CAND in an address. */
4282 static bool
4283 determine_use_iv_cost_address (struct ivopts_data *data,
4284 struct iv_use *use, struct iv_cand *cand)
4286 bitmap depends_on;
4287 bool can_autoinc;
4288 int inv_expr_id = -1;
4289 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4290 &can_autoinc, &inv_expr_id);
4292 if (cand->ainc_use == use)
4294 if (can_autoinc)
4295 cost.cost -= cand->cost_step;
4296 /* If we generated the candidate solely for exploiting autoincrement
4297 opportunities, and it turns out it can't be used, set the cost to
4298 infinity to make sure we ignore it. */
4299 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4300 cost = infinite_cost;
4302 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4303 inv_expr_id);
4305 return !infinite_cost_p (cost);
4308 /* Computes value of candidate CAND at position AT in iteration NITER, and
4309 stores it to VAL. */
4311 static void
4312 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4313 aff_tree *val)
4315 aff_tree step, delta, nit;
4316 struct iv *iv = cand->iv;
4317 tree type = TREE_TYPE (iv->base);
4318 tree steptype = type;
4319 if (POINTER_TYPE_P (type))
4320 steptype = sizetype;
4322 tree_to_aff_combination (iv->step, steptype, &step);
4323 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4324 aff_combination_convert (&nit, steptype);
4325 aff_combination_mult (&nit, &step, &delta);
4326 if (stmt_after_increment (loop, cand, at))
4327 aff_combination_add (&delta, &step);
4329 tree_to_aff_combination (iv->base, type, val);
4330 aff_combination_add (val, &delta);
4333 /* Returns period of induction variable iv. */
4335 static tree
4336 iv_period (struct iv *iv)
4338 tree step = iv->step, period, type;
4339 tree pow2div;
4341 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4343 type = unsigned_type_for (TREE_TYPE (step));
4344 /* Period of the iv is lcm (step, type_range)/step -1,
4345 i.e., N*type_range/step - 1. Since type range is power
4346 of two, N == (step >> num_of_ending_zeros_binary (step),
4347 so the final result is
4349 (type_range >> num_of_ending_zeros_binary (step)) - 1
4352 pow2div = num_ending_zeros (step);
4354 period = build_low_bits_mask (type,
4355 (TYPE_PRECISION (type)
4356 - tree_low_cst (pow2div, 1)));
4358 return period;
4361 /* Returns the comparison operator used when eliminating the iv USE. */
4363 static enum tree_code
4364 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4366 struct loop *loop = data->current_loop;
4367 basic_block ex_bb;
4368 edge exit;
4370 ex_bb = gimple_bb (use->stmt);
4371 exit = EDGE_SUCC (ex_bb, 0);
4372 if (flow_bb_inside_loop_p (loop, exit->dest))
4373 exit = EDGE_SUCC (ex_bb, 1);
4375 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4378 static tree
4379 strip_wrap_conserving_type_conversions (tree exp)
4381 while (tree_ssa_useless_type_conversion (exp)
4382 && (nowrap_type_p (TREE_TYPE (exp))
4383 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4384 exp = TREE_OPERAND (exp, 0);
4385 return exp;
4388 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4389 check for an exact match. */
4391 static bool
4392 expr_equal_p (tree e, tree what)
4394 gimple stmt;
4395 enum tree_code code;
4397 e = strip_wrap_conserving_type_conversions (e);
4398 what = strip_wrap_conserving_type_conversions (what);
4400 code = TREE_CODE (what);
4401 if (TREE_TYPE (e) != TREE_TYPE (what))
4402 return false;
4404 if (operand_equal_p (e, what, 0))
4405 return true;
4407 if (TREE_CODE (e) != SSA_NAME)
4408 return false;
4410 stmt = SSA_NAME_DEF_STMT (e);
4411 if (gimple_code (stmt) != GIMPLE_ASSIGN
4412 || gimple_assign_rhs_code (stmt) != code)
4413 return false;
4415 switch (get_gimple_rhs_class (code))
4417 case GIMPLE_BINARY_RHS:
4418 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4419 return false;
4420 /* Fallthru. */
4422 case GIMPLE_UNARY_RHS:
4423 case GIMPLE_SINGLE_RHS:
4424 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4425 default:
4426 return false;
4430 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4431 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4432 calculation is performed in non-wrapping type.
4434 TODO: More generally, we could test for the situation that
4435 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4436 This would require knowing the sign of OFFSET.
4438 Also, we only look for the first addition in the computation of BASE.
4439 More complex analysis would be better, but introducing it just for
4440 this optimization seems like an overkill. */
4442 static bool
4443 difference_cannot_overflow_p (tree base, tree offset)
4445 enum tree_code code;
4446 tree e1, e2;
4448 if (!nowrap_type_p (TREE_TYPE (base)))
4449 return false;
4451 base = expand_simple_operations (base);
4453 if (TREE_CODE (base) == SSA_NAME)
4455 gimple stmt = SSA_NAME_DEF_STMT (base);
4457 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4458 return false;
4460 code = gimple_assign_rhs_code (stmt);
4461 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4462 return false;
4464 e1 = gimple_assign_rhs1 (stmt);
4465 e2 = gimple_assign_rhs2 (stmt);
4467 else
4469 code = TREE_CODE (base);
4470 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4471 return false;
4472 e1 = TREE_OPERAND (base, 0);
4473 e2 = TREE_OPERAND (base, 1);
4476 /* TODO: deeper inspection may be necessary to prove the equality. */
4477 switch (code)
4479 case PLUS_EXPR:
4480 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4481 case POINTER_PLUS_EXPR:
4482 return expr_equal_p (e2, offset);
4484 default:
4485 return false;
4489 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4490 comparison with CAND. NITER describes the number of iterations of
4491 the loops. If successful, the comparison in COMP_P is altered accordingly.
4493 We aim to handle the following situation:
4495 sometype *base, *p;
4496 int a, b, i;
4498 i = a;
4499 p = p_0 = base + a;
4503 bla (*p);
4504 p++;
4505 i++;
4507 while (i < b);
4509 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4510 We aim to optimize this to
4512 p = p_0 = base + a;
4515 bla (*p);
4516 p++;
4518 while (p < p_0 - a + b);
4520 This preserves the correctness, since the pointer arithmetics does not
4521 overflow. More precisely:
4523 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4524 overflow in computing it or the values of p.
4525 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4526 overflow. To prove this, we use the fact that p_0 = base + a. */
4528 static bool
4529 iv_elimination_compare_lt (struct ivopts_data *data,
4530 struct iv_cand *cand, enum tree_code *comp_p,
4531 struct tree_niter_desc *niter)
4533 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4534 struct affine_tree_combination nit, tmpa, tmpb;
4535 enum tree_code comp;
4536 HOST_WIDE_INT step;
4538 /* We need to know that the candidate induction variable does not overflow.
4539 While more complex analysis may be used to prove this, for now just
4540 check that the variable appears in the original program and that it
4541 is computed in a type that guarantees no overflows. */
4542 cand_type = TREE_TYPE (cand->iv->base);
4543 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4544 return false;
4546 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4547 the calculation of the BOUND could overflow, making the comparison
4548 invalid. */
4549 if (!data->loop_single_exit_p)
4550 return false;
4552 /* We need to be able to decide whether candidate is increasing or decreasing
4553 in order to choose the right comparison operator. */
4554 if (!cst_and_fits_in_hwi (cand->iv->step))
4555 return false;
4556 step = int_cst_value (cand->iv->step);
4558 /* Check that the number of iterations matches the expected pattern:
4559 a + 1 > b ? 0 : b - a - 1. */
4560 mbz = niter->may_be_zero;
4561 if (TREE_CODE (mbz) == GT_EXPR)
4563 /* Handle a + 1 > b. */
4564 tree op0 = TREE_OPERAND (mbz, 0);
4565 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4567 a = TREE_OPERAND (op0, 0);
4568 b = TREE_OPERAND (mbz, 1);
4570 else
4571 return false;
4573 else if (TREE_CODE (mbz) == LT_EXPR)
4575 tree op1 = TREE_OPERAND (mbz, 1);
4577 /* Handle b < a + 1. */
4578 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4580 a = TREE_OPERAND (op1, 0);
4581 b = TREE_OPERAND (mbz, 0);
4583 else
4584 return false;
4586 else
4587 return false;
4589 /* Expected number of iterations is B - A - 1. Check that it matches
4590 the actual number, i.e., that B - A - NITER = 1. */
4591 tree_to_aff_combination (niter->niter, nit_type, &nit);
4592 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4593 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4594 aff_combination_scale (&nit, double_int_minus_one);
4595 aff_combination_scale (&tmpa, double_int_minus_one);
4596 aff_combination_add (&tmpb, &tmpa);
4597 aff_combination_add (&tmpb, &nit);
4598 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4599 return false;
4601 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4602 overflow. */
4603 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4604 cand->iv->step,
4605 fold_convert (TREE_TYPE (cand->iv->step), a));
4606 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4607 return false;
4609 /* Determine the new comparison operator. */
4610 comp = step < 0 ? GT_EXPR : LT_EXPR;
4611 if (*comp_p == NE_EXPR)
4612 *comp_p = comp;
4613 else if (*comp_p == EQ_EXPR)
4614 *comp_p = invert_tree_comparison (comp, false);
4615 else
4616 gcc_unreachable ();
4618 return true;
4621 /* Check whether it is possible to express the condition in USE by comparison
4622 of candidate CAND. If so, store the value compared with to BOUND, and the
4623 comparison operator to COMP. */
4625 static bool
4626 may_eliminate_iv (struct ivopts_data *data,
4627 struct iv_use *use, struct iv_cand *cand, tree *bound,
4628 enum tree_code *comp)
4630 basic_block ex_bb;
4631 edge exit;
4632 tree period;
4633 struct loop *loop = data->current_loop;
4634 aff_tree bnd;
4635 struct tree_niter_desc *desc = NULL;
4637 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4638 return false;
4640 /* For now works only for exits that dominate the loop latch.
4641 TODO: extend to other conditions inside loop body. */
4642 ex_bb = gimple_bb (use->stmt);
4643 if (use->stmt != last_stmt (ex_bb)
4644 || gimple_code (use->stmt) != GIMPLE_COND
4645 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4646 return false;
4648 exit = EDGE_SUCC (ex_bb, 0);
4649 if (flow_bb_inside_loop_p (loop, exit->dest))
4650 exit = EDGE_SUCC (ex_bb, 1);
4651 if (flow_bb_inside_loop_p (loop, exit->dest))
4652 return false;
4654 desc = niter_for_exit (data, exit);
4655 if (!desc)
4656 return false;
4658 /* Determine whether we can use the variable to test the exit condition.
4659 This is the case iff the period of the induction variable is greater
4660 than the number of iterations for which the exit condition is true. */
4661 period = iv_period (cand->iv);
4663 /* If the number of iterations is constant, compare against it directly. */
4664 if (TREE_CODE (desc->niter) == INTEGER_CST)
4666 /* See cand_value_at. */
4667 if (stmt_after_increment (loop, cand, use->stmt))
4669 if (!tree_int_cst_lt (desc->niter, period))
4670 return false;
4672 else
4674 if (tree_int_cst_lt (period, desc->niter))
4675 return false;
4679 /* If not, and if this is the only possible exit of the loop, see whether
4680 we can get a conservative estimate on the number of iterations of the
4681 entire loop and compare against that instead. */
4682 else
4684 double_int period_value, max_niter;
4686 max_niter = desc->max;
4687 if (stmt_after_increment (loop, cand, use->stmt))
4688 max_niter += double_int_one;
4689 period_value = tree_to_double_int (period);
4690 if (max_niter.ugt (period_value))
4692 /* See if we can take advantage of inferred loop bound information. */
4693 if (data->loop_single_exit_p)
4695 if (!max_loop_iterations (loop, &max_niter))
4696 return false;
4697 /* The loop bound is already adjusted by adding 1. */
4698 if (max_niter.ugt (period_value))
4699 return false;
4701 else
4702 return false;
4706 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4708 *bound = aff_combination_to_tree (&bnd);
4709 *comp = iv_elimination_compare (data, use);
4711 /* It is unlikely that computing the number of iterations using division
4712 would be more profitable than keeping the original induction variable. */
4713 if (expression_expensive_p (*bound))
4714 return false;
4716 /* Sometimes, it is possible to handle the situation that the number of
4717 iterations may be zero unless additional assumtions by using <
4718 instead of != in the exit condition.
4720 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4721 base the exit condition on it. However, that is often too
4722 expensive. */
4723 if (!integer_zerop (desc->may_be_zero))
4724 return iv_elimination_compare_lt (data, cand, comp, desc);
4726 return true;
4729 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4730 be copied, if is is used in the loop body and DATA->body_includes_call. */
4732 static int
4733 parm_decl_cost (struct ivopts_data *data, tree bound)
4735 tree sbound = bound;
4736 STRIP_NOPS (sbound);
4738 if (TREE_CODE (sbound) == SSA_NAME
4739 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4740 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4741 && data->body_includes_call)
4742 return COSTS_N_INSNS (1);
4744 return 0;
4747 /* Determines cost of basing replacement of USE on CAND in a condition. */
4749 static bool
4750 determine_use_iv_cost_condition (struct ivopts_data *data,
4751 struct iv_use *use, struct iv_cand *cand)
4753 tree bound = NULL_TREE;
4754 struct iv *cmp_iv;
4755 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4756 comp_cost elim_cost, express_cost, cost, bound_cost;
4757 bool ok;
4758 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4759 tree *control_var, *bound_cst;
4760 enum tree_code comp = ERROR_MARK;
4762 /* Only consider real candidates. */
4763 if (!cand->iv)
4765 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4766 ERROR_MARK, -1);
4767 return false;
4770 /* Try iv elimination. */
4771 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4773 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4774 if (elim_cost.cost == 0)
4775 elim_cost.cost = parm_decl_cost (data, bound);
4776 else if (TREE_CODE (bound) == INTEGER_CST)
4777 elim_cost.cost = 0;
4778 /* If we replace a loop condition 'i < n' with 'p < base + n',
4779 depends_on_elim will have 'base' and 'n' set, which implies
4780 that both 'base' and 'n' will be live during the loop. More likely,
4781 'base + n' will be loop invariant, resulting in only one live value
4782 during the loop. So in that case we clear depends_on_elim and set
4783 elim_inv_expr_id instead. */
4784 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4786 elim_inv_expr_id = get_expr_id (data, bound);
4787 bitmap_clear (depends_on_elim);
4789 /* The bound is a loop invariant, so it will be only computed
4790 once. */
4791 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4793 else
4794 elim_cost = infinite_cost;
4796 /* Try expressing the original giv. If it is compared with an invariant,
4797 note that we cannot get rid of it. */
4798 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4799 NULL, &cmp_iv);
4800 gcc_assert (ok);
4802 /* When the condition is a comparison of the candidate IV against
4803 zero, prefer this IV.
4805 TODO: The constant that we're subtracting from the cost should
4806 be target-dependent. This information should be added to the
4807 target costs for each backend. */
4808 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4809 && integer_zerop (*bound_cst)
4810 && (operand_equal_p (*control_var, cand->var_after, 0)
4811 || operand_equal_p (*control_var, cand->var_before, 0)))
4812 elim_cost.cost -= 1;
4814 express_cost = get_computation_cost (data, use, cand, false,
4815 &depends_on_express, NULL,
4816 &express_inv_expr_id);
4817 fd_ivopts_data = data;
4818 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4820 /* Count the cost of the original bound as well. */
4821 bound_cost = force_var_cost (data, *bound_cst, NULL);
4822 if (bound_cost.cost == 0)
4823 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4824 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4825 bound_cost.cost = 0;
4826 express_cost.cost += bound_cost.cost;
4828 /* Choose the better approach, preferring the eliminated IV. */
4829 if (compare_costs (elim_cost, express_cost) <= 0)
4831 cost = elim_cost;
4832 depends_on = depends_on_elim;
4833 depends_on_elim = NULL;
4834 inv_expr_id = elim_inv_expr_id;
4836 else
4838 cost = express_cost;
4839 depends_on = depends_on_express;
4840 depends_on_express = NULL;
4841 bound = NULL_TREE;
4842 comp = ERROR_MARK;
4843 inv_expr_id = express_inv_expr_id;
4846 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4848 if (depends_on_elim)
4849 BITMAP_FREE (depends_on_elim);
4850 if (depends_on_express)
4851 BITMAP_FREE (depends_on_express);
4853 return !infinite_cost_p (cost);
4856 /* Determines cost of basing replacement of USE on CAND. Returns false
4857 if USE cannot be based on CAND. */
4859 static bool
4860 determine_use_iv_cost (struct ivopts_data *data,
4861 struct iv_use *use, struct iv_cand *cand)
4863 switch (use->type)
4865 case USE_NONLINEAR_EXPR:
4866 return determine_use_iv_cost_generic (data, use, cand);
4868 case USE_ADDRESS:
4869 return determine_use_iv_cost_address (data, use, cand);
4871 case USE_COMPARE:
4872 return determine_use_iv_cost_condition (data, use, cand);
4874 default:
4875 gcc_unreachable ();
4879 /* Return true if get_computation_cost indicates that autoincrement is
4880 a possibility for the pair of USE and CAND, false otherwise. */
4882 static bool
4883 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4884 struct iv_cand *cand)
4886 bitmap depends_on;
4887 bool can_autoinc;
4888 comp_cost cost;
4890 if (use->type != USE_ADDRESS)
4891 return false;
4893 cost = get_computation_cost (data, use, cand, true, &depends_on,
4894 &can_autoinc, NULL);
4896 BITMAP_FREE (depends_on);
4898 return !infinite_cost_p (cost) && can_autoinc;
4901 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4902 use that allows autoincrement, and set their AINC_USE if possible. */
4904 static void
4905 set_autoinc_for_original_candidates (struct ivopts_data *data)
4907 unsigned i, j;
4909 for (i = 0; i < n_iv_cands (data); i++)
4911 struct iv_cand *cand = iv_cand (data, i);
4912 struct iv_use *closest_before = NULL;
4913 struct iv_use *closest_after = NULL;
4914 if (cand->pos != IP_ORIGINAL)
4915 continue;
4917 for (j = 0; j < n_iv_uses (data); j++)
4919 struct iv_use *use = iv_use (data, j);
4920 unsigned uid = gimple_uid (use->stmt);
4922 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4923 continue;
4925 if (uid < gimple_uid (cand->incremented_at)
4926 && (closest_before == NULL
4927 || uid > gimple_uid (closest_before->stmt)))
4928 closest_before = use;
4930 if (uid > gimple_uid (cand->incremented_at)
4931 && (closest_after == NULL
4932 || uid < gimple_uid (closest_after->stmt)))
4933 closest_after = use;
4936 if (closest_before != NULL
4937 && autoinc_possible_for_pair (data, closest_before, cand))
4938 cand->ainc_use = closest_before;
4939 else if (closest_after != NULL
4940 && autoinc_possible_for_pair (data, closest_after, cand))
4941 cand->ainc_use = closest_after;
4945 /* Finds the candidates for the induction variables. */
4947 static void
4948 find_iv_candidates (struct ivopts_data *data)
4950 /* Add commonly used ivs. */
4951 add_standard_iv_candidates (data);
4953 /* Add old induction variables. */
4954 add_old_ivs_candidates (data);
4956 /* Add induction variables derived from uses. */
4957 add_derived_ivs_candidates (data);
4959 set_autoinc_for_original_candidates (data);
4961 /* Record the important candidates. */
4962 record_important_candidates (data);
4965 /* Determines costs of basing the use of the iv on an iv candidate. */
4967 static void
4968 determine_use_iv_costs (struct ivopts_data *data)
4970 unsigned i, j;
4971 struct iv_use *use;
4972 struct iv_cand *cand;
4973 bitmap to_clear = BITMAP_ALLOC (NULL);
4975 alloc_use_cost_map (data);
4977 for (i = 0; i < n_iv_uses (data); i++)
4979 use = iv_use (data, i);
4981 if (data->consider_all_candidates)
4983 for (j = 0; j < n_iv_cands (data); j++)
4985 cand = iv_cand (data, j);
4986 determine_use_iv_cost (data, use, cand);
4989 else
4991 bitmap_iterator bi;
4993 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4995 cand = iv_cand (data, j);
4996 if (!determine_use_iv_cost (data, use, cand))
4997 bitmap_set_bit (to_clear, j);
5000 /* Remove the candidates for that the cost is infinite from
5001 the list of related candidates. */
5002 bitmap_and_compl_into (use->related_cands, to_clear);
5003 bitmap_clear (to_clear);
5007 BITMAP_FREE (to_clear);
5009 if (dump_file && (dump_flags & TDF_DETAILS))
5011 fprintf (dump_file, "Use-candidate costs:\n");
5013 for (i = 0; i < n_iv_uses (data); i++)
5015 use = iv_use (data, i);
5017 fprintf (dump_file, "Use %d:\n", i);
5018 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5019 for (j = 0; j < use->n_map_members; j++)
5021 if (!use->cost_map[j].cand
5022 || infinite_cost_p (use->cost_map[j].cost))
5023 continue;
5025 fprintf (dump_file, " %d\t%d\t%d\t",
5026 use->cost_map[j].cand->id,
5027 use->cost_map[j].cost.cost,
5028 use->cost_map[j].cost.complexity);
5029 if (use->cost_map[j].depends_on)
5030 bitmap_print (dump_file,
5031 use->cost_map[j].depends_on, "","");
5032 if (use->cost_map[j].inv_expr_id != -1)
5033 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5034 fprintf (dump_file, "\n");
5037 fprintf (dump_file, "\n");
5039 fprintf (dump_file, "\n");
5043 /* Determines cost of the candidate CAND. */
5045 static void
5046 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5048 comp_cost cost_base;
5049 unsigned cost, cost_step;
5050 tree base;
5052 if (!cand->iv)
5054 cand->cost = 0;
5055 return;
5058 /* There are two costs associated with the candidate -- its increment
5059 and its initialization. The second is almost negligible for any loop
5060 that rolls enough, so we take it just very little into account. */
5062 base = cand->iv->base;
5063 cost_base = force_var_cost (data, base, NULL);
5064 /* It will be exceptional that the iv register happens to be initialized with
5065 the proper value at no cost. In general, there will at least be a regcopy
5066 or a const set. */
5067 if (cost_base.cost == 0)
5068 cost_base.cost = COSTS_N_INSNS (1);
5069 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5071 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5073 /* Prefer the original ivs unless we may gain something by replacing it.
5074 The reason is to make debugging simpler; so this is not relevant for
5075 artificial ivs created by other optimization passes. */
5076 if (cand->pos != IP_ORIGINAL
5077 || !SSA_NAME_VAR (cand->var_before)
5078 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5079 cost++;
5081 /* Prefer not to insert statements into latch unless there are some
5082 already (so that we do not create unnecessary jumps). */
5083 if (cand->pos == IP_END
5084 && empty_block_p (ip_end_pos (data->current_loop)))
5085 cost++;
5087 cand->cost = cost;
5088 cand->cost_step = cost_step;
5091 /* Determines costs of computation of the candidates. */
5093 static void
5094 determine_iv_costs (struct ivopts_data *data)
5096 unsigned i;
5098 if (dump_file && (dump_flags & TDF_DETAILS))
5100 fprintf (dump_file, "Candidate costs:\n");
5101 fprintf (dump_file, " cand\tcost\n");
5104 for (i = 0; i < n_iv_cands (data); i++)
5106 struct iv_cand *cand = iv_cand (data, i);
5108 determine_iv_cost (data, cand);
5110 if (dump_file && (dump_flags & TDF_DETAILS))
5111 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5114 if (dump_file && (dump_flags & TDF_DETAILS))
5115 fprintf (dump_file, "\n");
5118 /* Calculates cost for having SIZE induction variables. */
5120 static unsigned
5121 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5123 /* We add size to the cost, so that we prefer eliminating ivs
5124 if possible. */
5125 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5126 data->body_includes_call);
5129 /* For each size of the induction variable set determine the penalty. */
5131 static void
5132 determine_set_costs (struct ivopts_data *data)
5134 unsigned j, n;
5135 gimple phi;
5136 gimple_stmt_iterator psi;
5137 tree op;
5138 struct loop *loop = data->current_loop;
5139 bitmap_iterator bi;
5141 if (dump_file && (dump_flags & TDF_DETAILS))
5143 fprintf (dump_file, "Global costs:\n");
5144 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5145 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5146 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5147 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5150 n = 0;
5151 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5153 phi = gsi_stmt (psi);
5154 op = PHI_RESULT (phi);
5156 if (virtual_operand_p (op))
5157 continue;
5159 if (get_iv (data, op))
5160 continue;
5162 n++;
5165 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5167 struct version_info *info = ver_info (data, j);
5169 if (info->inv_id && info->has_nonlin_use)
5170 n++;
5173 data->regs_used = n;
5174 if (dump_file && (dump_flags & TDF_DETAILS))
5175 fprintf (dump_file, " regs_used %d\n", n);
5177 if (dump_file && (dump_flags & TDF_DETAILS))
5179 fprintf (dump_file, " cost for size:\n");
5180 fprintf (dump_file, " ivs\tcost\n");
5181 for (j = 0; j <= 2 * target_avail_regs; j++)
5182 fprintf (dump_file, " %d\t%d\n", j,
5183 ivopts_global_cost_for_size (data, j));
5184 fprintf (dump_file, "\n");
5188 /* Returns true if A is a cheaper cost pair than B. */
5190 static bool
5191 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5193 int cmp;
5195 if (!a)
5196 return false;
5198 if (!b)
5199 return true;
5201 cmp = compare_costs (a->cost, b->cost);
5202 if (cmp < 0)
5203 return true;
5205 if (cmp > 0)
5206 return false;
5208 /* In case the costs are the same, prefer the cheaper candidate. */
5209 if (a->cand->cost < b->cand->cost)
5210 return true;
5212 return false;
5216 /* Returns candidate by that USE is expressed in IVS. */
5218 static struct cost_pair *
5219 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5221 return ivs->cand_for_use[use->id];
5224 /* Computes the cost field of IVS structure. */
5226 static void
5227 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5229 comp_cost cost = ivs->cand_use_cost;
5231 cost.cost += ivs->cand_cost;
5233 cost.cost += ivopts_global_cost_for_size (data,
5234 ivs->n_regs + ivs->num_used_inv_expr);
5236 ivs->cost = cost;
5239 /* Remove invariants in set INVS to set IVS. */
5241 static void
5242 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5244 bitmap_iterator bi;
5245 unsigned iid;
5247 if (!invs)
5248 return;
5250 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5252 ivs->n_invariant_uses[iid]--;
5253 if (ivs->n_invariant_uses[iid] == 0)
5254 ivs->n_regs--;
5258 /* Set USE not to be expressed by any candidate in IVS. */
5260 static void
5261 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5262 struct iv_use *use)
5264 unsigned uid = use->id, cid;
5265 struct cost_pair *cp;
5267 cp = ivs->cand_for_use[uid];
5268 if (!cp)
5269 return;
5270 cid = cp->cand->id;
5272 ivs->bad_uses++;
5273 ivs->cand_for_use[uid] = NULL;
5274 ivs->n_cand_uses[cid]--;
5276 if (ivs->n_cand_uses[cid] == 0)
5278 bitmap_clear_bit (ivs->cands, cid);
5279 /* Do not count the pseudocandidates. */
5280 if (cp->cand->iv)
5281 ivs->n_regs--;
5282 ivs->n_cands--;
5283 ivs->cand_cost -= cp->cand->cost;
5285 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5288 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5290 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5292 if (cp->inv_expr_id != -1)
5294 ivs->used_inv_expr[cp->inv_expr_id]--;
5295 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5296 ivs->num_used_inv_expr--;
5298 iv_ca_recount_cost (data, ivs);
5301 /* Add invariants in set INVS to set IVS. */
5303 static void
5304 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5306 bitmap_iterator bi;
5307 unsigned iid;
5309 if (!invs)
5310 return;
5312 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5314 ivs->n_invariant_uses[iid]++;
5315 if (ivs->n_invariant_uses[iid] == 1)
5316 ivs->n_regs++;
5320 /* Set cost pair for USE in set IVS to CP. */
5322 static void
5323 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5324 struct iv_use *use, struct cost_pair *cp)
5326 unsigned uid = use->id, cid;
5328 if (ivs->cand_for_use[uid] == cp)
5329 return;
5331 if (ivs->cand_for_use[uid])
5332 iv_ca_set_no_cp (data, ivs, use);
5334 if (cp)
5336 cid = cp->cand->id;
5338 ivs->bad_uses--;
5339 ivs->cand_for_use[uid] = cp;
5340 ivs->n_cand_uses[cid]++;
5341 if (ivs->n_cand_uses[cid] == 1)
5343 bitmap_set_bit (ivs->cands, cid);
5344 /* Do not count the pseudocandidates. */
5345 if (cp->cand->iv)
5346 ivs->n_regs++;
5347 ivs->n_cands++;
5348 ivs->cand_cost += cp->cand->cost;
5350 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5353 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5354 iv_ca_set_add_invariants (ivs, cp->depends_on);
5356 if (cp->inv_expr_id != -1)
5358 ivs->used_inv_expr[cp->inv_expr_id]++;
5359 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5360 ivs->num_used_inv_expr++;
5362 iv_ca_recount_cost (data, ivs);
5366 /* Extend set IVS by expressing USE by some of the candidates in it
5367 if possible. All important candidates will be considered
5368 if IMPORTANT_CANDIDATES is true. */
5370 static void
5371 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5372 struct iv_use *use, bool important_candidates)
5374 struct cost_pair *best_cp = NULL, *cp;
5375 bitmap_iterator bi;
5376 bitmap cands;
5377 unsigned i;
5379 gcc_assert (ivs->upto >= use->id);
5381 if (ivs->upto == use->id)
5383 ivs->upto++;
5384 ivs->bad_uses++;
5387 cands = (important_candidates ? data->important_candidates : ivs->cands);
5388 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5390 struct iv_cand *cand = iv_cand (data, i);
5392 cp = get_use_iv_cost (data, use, cand);
5394 if (cheaper_cost_pair (cp, best_cp))
5395 best_cp = cp;
5398 iv_ca_set_cp (data, ivs, use, best_cp);
5401 /* Get cost for assignment IVS. */
5403 static comp_cost
5404 iv_ca_cost (struct iv_ca *ivs)
5406 /* This was a conditional expression but it triggered a bug in
5407 Sun C 5.5. */
5408 if (ivs->bad_uses)
5409 return infinite_cost;
5410 else
5411 return ivs->cost;
5414 /* Returns true if all dependences of CP are among invariants in IVS. */
5416 static bool
5417 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5419 unsigned i;
5420 bitmap_iterator bi;
5422 if (!cp->depends_on)
5423 return true;
5425 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5427 if (ivs->n_invariant_uses[i] == 0)
5428 return false;
5431 return true;
5434 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5435 it before NEXT_CHANGE. */
5437 static struct iv_ca_delta *
5438 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5439 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5441 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5443 change->use = use;
5444 change->old_cp = old_cp;
5445 change->new_cp = new_cp;
5446 change->next_change = next_change;
5448 return change;
5451 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5452 are rewritten. */
5454 static struct iv_ca_delta *
5455 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5457 struct iv_ca_delta *last;
5459 if (!l2)
5460 return l1;
5462 if (!l1)
5463 return l2;
5465 for (last = l1; last->next_change; last = last->next_change)
5466 continue;
5467 last->next_change = l2;
5469 return l1;
5472 /* Reverse the list of changes DELTA, forming the inverse to it. */
5474 static struct iv_ca_delta *
5475 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5477 struct iv_ca_delta *act, *next, *prev = NULL;
5478 struct cost_pair *tmp;
5480 for (act = delta; act; act = next)
5482 next = act->next_change;
5483 act->next_change = prev;
5484 prev = act;
5486 tmp = act->old_cp;
5487 act->old_cp = act->new_cp;
5488 act->new_cp = tmp;
5491 return prev;
5494 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5495 reverted instead. */
5497 static void
5498 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5499 struct iv_ca_delta *delta, bool forward)
5501 struct cost_pair *from, *to;
5502 struct iv_ca_delta *act;
5504 if (!forward)
5505 delta = iv_ca_delta_reverse (delta);
5507 for (act = delta; act; act = act->next_change)
5509 from = act->old_cp;
5510 to = act->new_cp;
5511 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5512 iv_ca_set_cp (data, ivs, act->use, to);
5515 if (!forward)
5516 iv_ca_delta_reverse (delta);
5519 /* Returns true if CAND is used in IVS. */
5521 static bool
5522 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5524 return ivs->n_cand_uses[cand->id] > 0;
5527 /* Returns number of induction variable candidates in the set IVS. */
5529 static unsigned
5530 iv_ca_n_cands (struct iv_ca *ivs)
5532 return ivs->n_cands;
5535 /* Free the list of changes DELTA. */
5537 static void
5538 iv_ca_delta_free (struct iv_ca_delta **delta)
5540 struct iv_ca_delta *act, *next;
5542 for (act = *delta; act; act = next)
5544 next = act->next_change;
5545 free (act);
5548 *delta = NULL;
5551 /* Allocates new iv candidates assignment. */
5553 static struct iv_ca *
5554 iv_ca_new (struct ivopts_data *data)
5556 struct iv_ca *nw = XNEW (struct iv_ca);
5558 nw->upto = 0;
5559 nw->bad_uses = 0;
5560 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5561 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5562 nw->cands = BITMAP_ALLOC (NULL);
5563 nw->n_cands = 0;
5564 nw->n_regs = 0;
5565 nw->cand_use_cost = no_cost;
5566 nw->cand_cost = 0;
5567 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5568 nw->cost = no_cost;
5569 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5570 nw->num_used_inv_expr = 0;
5572 return nw;
5575 /* Free memory occupied by the set IVS. */
5577 static void
5578 iv_ca_free (struct iv_ca **ivs)
5580 free ((*ivs)->cand_for_use);
5581 free ((*ivs)->n_cand_uses);
5582 BITMAP_FREE ((*ivs)->cands);
5583 free ((*ivs)->n_invariant_uses);
5584 free ((*ivs)->used_inv_expr);
5585 free (*ivs);
5586 *ivs = NULL;
5589 /* Dumps IVS to FILE. */
5591 static void
5592 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5594 const char *pref = " invariants ";
5595 unsigned i;
5596 comp_cost cost = iv_ca_cost (ivs);
5598 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5599 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5600 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5601 bitmap_print (file, ivs->cands, " candidates: ","\n");
5603 for (i = 0; i < ivs->upto; i++)
5605 struct iv_use *use = iv_use (data, i);
5606 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5607 if (cp)
5608 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5609 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5610 else
5611 fprintf (file, " use:%d --> ??\n", use->id);
5614 for (i = 1; i <= data->max_inv_id; i++)
5615 if (ivs->n_invariant_uses[i])
5617 fprintf (file, "%s%d", pref, i);
5618 pref = ", ";
5620 fprintf (file, "\n\n");
5623 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5624 new set, and store differences in DELTA. Number of induction variables
5625 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5626 the function will try to find a solution with mimimal iv candidates. */
5628 static comp_cost
5629 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5630 struct iv_cand *cand, struct iv_ca_delta **delta,
5631 unsigned *n_ivs, bool min_ncand)
5633 unsigned i;
5634 comp_cost cost;
5635 struct iv_use *use;
5636 struct cost_pair *old_cp, *new_cp;
5638 *delta = NULL;
5639 for (i = 0; i < ivs->upto; i++)
5641 use = iv_use (data, i);
5642 old_cp = iv_ca_cand_for_use (ivs, use);
5644 if (old_cp
5645 && old_cp->cand == cand)
5646 continue;
5648 new_cp = get_use_iv_cost (data, use, cand);
5649 if (!new_cp)
5650 continue;
5652 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5653 continue;
5655 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5656 continue;
5658 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5661 iv_ca_delta_commit (data, ivs, *delta, true);
5662 cost = iv_ca_cost (ivs);
5663 if (n_ivs)
5664 *n_ivs = iv_ca_n_cands (ivs);
5665 iv_ca_delta_commit (data, ivs, *delta, false);
5667 return cost;
5670 /* Try narrowing set IVS by removing CAND. Return the cost of
5671 the new set and store the differences in DELTA. */
5673 static comp_cost
5674 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5675 struct iv_cand *cand, struct iv_ca_delta **delta)
5677 unsigned i, ci;
5678 struct iv_use *use;
5679 struct cost_pair *old_cp, *new_cp, *cp;
5680 bitmap_iterator bi;
5681 struct iv_cand *cnd;
5682 comp_cost cost;
5684 *delta = NULL;
5685 for (i = 0; i < n_iv_uses (data); i++)
5687 use = iv_use (data, i);
5689 old_cp = iv_ca_cand_for_use (ivs, use);
5690 if (old_cp->cand != cand)
5691 continue;
5693 new_cp = NULL;
5695 if (data->consider_all_candidates)
5697 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5699 if (ci == cand->id)
5700 continue;
5702 cnd = iv_cand (data, ci);
5704 cp = get_use_iv_cost (data, use, cnd);
5705 if (!cp)
5706 continue;
5708 if (!iv_ca_has_deps (ivs, cp))
5709 continue;
5711 if (!cheaper_cost_pair (cp, new_cp))
5712 continue;
5714 new_cp = cp;
5717 else
5719 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5721 if (ci == cand->id)
5722 continue;
5724 cnd = iv_cand (data, ci);
5726 cp = get_use_iv_cost (data, use, cnd);
5727 if (!cp)
5728 continue;
5729 if (!iv_ca_has_deps (ivs, cp))
5730 continue;
5732 if (!cheaper_cost_pair (cp, new_cp))
5733 continue;
5735 new_cp = cp;
5739 if (!new_cp)
5741 iv_ca_delta_free (delta);
5742 return infinite_cost;
5745 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5748 iv_ca_delta_commit (data, ivs, *delta, true);
5749 cost = iv_ca_cost (ivs);
5750 iv_ca_delta_commit (data, ivs, *delta, false);
5752 return cost;
5755 /* Try optimizing the set of candidates IVS by removing candidates different
5756 from to EXCEPT_CAND from it. Return cost of the new set, and store
5757 differences in DELTA. */
5759 static comp_cost
5760 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5761 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5763 bitmap_iterator bi;
5764 struct iv_ca_delta *act_delta, *best_delta;
5765 unsigned i;
5766 comp_cost best_cost, acost;
5767 struct iv_cand *cand;
5769 best_delta = NULL;
5770 best_cost = iv_ca_cost (ivs);
5772 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5774 cand = iv_cand (data, i);
5776 if (cand == except_cand)
5777 continue;
5779 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5781 if (compare_costs (acost, best_cost) < 0)
5783 best_cost = acost;
5784 iv_ca_delta_free (&best_delta);
5785 best_delta = act_delta;
5787 else
5788 iv_ca_delta_free (&act_delta);
5791 if (!best_delta)
5793 *delta = NULL;
5794 return best_cost;
5797 /* Recurse to possibly remove other unnecessary ivs. */
5798 iv_ca_delta_commit (data, ivs, best_delta, true);
5799 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5800 iv_ca_delta_commit (data, ivs, best_delta, false);
5801 *delta = iv_ca_delta_join (best_delta, *delta);
5802 return best_cost;
5805 /* Tries to extend the sets IVS in the best possible way in order
5806 to express the USE. If ORIGINALP is true, prefer candidates from
5807 the original set of IVs, otherwise favor important candidates not
5808 based on any memory object. */
5810 static bool
5811 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5812 struct iv_use *use, bool originalp)
5814 comp_cost best_cost, act_cost;
5815 unsigned i;
5816 bitmap_iterator bi;
5817 struct iv_cand *cand;
5818 struct iv_ca_delta *best_delta = NULL, *act_delta;
5819 struct cost_pair *cp;
5821 iv_ca_add_use (data, ivs, use, false);
5822 best_cost = iv_ca_cost (ivs);
5824 cp = iv_ca_cand_for_use (ivs, use);
5825 if (!cp)
5827 ivs->upto--;
5828 ivs->bad_uses--;
5829 iv_ca_add_use (data, ivs, use, true);
5830 best_cost = iv_ca_cost (ivs);
5831 cp = iv_ca_cand_for_use (ivs, use);
5833 if (cp)
5835 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5836 iv_ca_set_no_cp (data, ivs, use);
5839 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5840 first try important candidates not based on any memory object. Only if
5841 this fails, try the specific ones. Rationale -- in loops with many
5842 variables the best choice often is to use just one generic biv. If we
5843 added here many ivs specific to the uses, the optimization algorithm later
5844 would be likely to get stuck in a local minimum, thus causing us to create
5845 too many ivs. The approach from few ivs to more seems more likely to be
5846 successful -- starting from few ivs, replacing an expensive use by a
5847 specific iv should always be a win. */
5848 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5850 cand = iv_cand (data, i);
5852 if (originalp && cand->pos !=IP_ORIGINAL)
5853 continue;
5855 if (!originalp && cand->iv->base_object != NULL_TREE)
5856 continue;
5858 if (iv_ca_cand_used_p (ivs, cand))
5859 continue;
5861 cp = get_use_iv_cost (data, use, cand);
5862 if (!cp)
5863 continue;
5865 iv_ca_set_cp (data, ivs, use, cp);
5866 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5867 true);
5868 iv_ca_set_no_cp (data, ivs, use);
5869 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5871 if (compare_costs (act_cost, best_cost) < 0)
5873 best_cost = act_cost;
5875 iv_ca_delta_free (&best_delta);
5876 best_delta = act_delta;
5878 else
5879 iv_ca_delta_free (&act_delta);
5882 if (infinite_cost_p (best_cost))
5884 for (i = 0; i < use->n_map_members; i++)
5886 cp = use->cost_map + i;
5887 cand = cp->cand;
5888 if (!cand)
5889 continue;
5891 /* Already tried this. */
5892 if (cand->important)
5894 if (originalp && cand->pos == IP_ORIGINAL)
5895 continue;
5896 if (!originalp && cand->iv->base_object == NULL_TREE)
5897 continue;
5900 if (iv_ca_cand_used_p (ivs, cand))
5901 continue;
5903 act_delta = NULL;
5904 iv_ca_set_cp (data, ivs, use, cp);
5905 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5906 iv_ca_set_no_cp (data, ivs, use);
5907 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5908 cp, act_delta);
5910 if (compare_costs (act_cost, best_cost) < 0)
5912 best_cost = act_cost;
5914 if (best_delta)
5915 iv_ca_delta_free (&best_delta);
5916 best_delta = act_delta;
5918 else
5919 iv_ca_delta_free (&act_delta);
5923 iv_ca_delta_commit (data, ivs, best_delta, true);
5924 iv_ca_delta_free (&best_delta);
5926 return !infinite_cost_p (best_cost);
5929 /* Finds an initial assignment of candidates to uses. */
5931 static struct iv_ca *
5932 get_initial_solution (struct ivopts_data *data, bool originalp)
5934 struct iv_ca *ivs = iv_ca_new (data);
5935 unsigned i;
5937 for (i = 0; i < n_iv_uses (data); i++)
5938 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5940 iv_ca_free (&ivs);
5941 return NULL;
5944 return ivs;
5947 /* Tries to improve set of induction variables IVS. */
5949 static bool
5950 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5952 unsigned i, n_ivs;
5953 comp_cost acost, best_cost = iv_ca_cost (ivs);
5954 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5955 struct iv_cand *cand;
5957 /* Try extending the set of induction variables by one. */
5958 for (i = 0; i < n_iv_cands (data); i++)
5960 cand = iv_cand (data, i);
5962 if (iv_ca_cand_used_p (ivs, cand))
5963 continue;
5965 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5966 if (!act_delta)
5967 continue;
5969 /* If we successfully added the candidate and the set is small enough,
5970 try optimizing it by removing other candidates. */
5971 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5973 iv_ca_delta_commit (data, ivs, act_delta, true);
5974 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5975 iv_ca_delta_commit (data, ivs, act_delta, false);
5976 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5979 if (compare_costs (acost, best_cost) < 0)
5981 best_cost = acost;
5982 iv_ca_delta_free (&best_delta);
5983 best_delta = act_delta;
5985 else
5986 iv_ca_delta_free (&act_delta);
5989 if (!best_delta)
5991 /* Try removing the candidates from the set instead. */
5992 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5994 /* Nothing more we can do. */
5995 if (!best_delta)
5996 return false;
5999 iv_ca_delta_commit (data, ivs, best_delta, true);
6000 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6001 iv_ca_delta_free (&best_delta);
6002 return true;
6005 /* Attempts to find the optimal set of induction variables. We do simple
6006 greedy heuristic -- we try to replace at most one candidate in the selected
6007 solution and remove the unused ivs while this improves the cost. */
6009 static struct iv_ca *
6010 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6012 struct iv_ca *set;
6014 /* Get the initial solution. */
6015 set = get_initial_solution (data, originalp);
6016 if (!set)
6018 if (dump_file && (dump_flags & TDF_DETAILS))
6019 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6020 return NULL;
6023 if (dump_file && (dump_flags & TDF_DETAILS))
6025 fprintf (dump_file, "Initial set of candidates:\n");
6026 iv_ca_dump (data, dump_file, set);
6029 while (try_improve_iv_set (data, set))
6031 if (dump_file && (dump_flags & TDF_DETAILS))
6033 fprintf (dump_file, "Improved to:\n");
6034 iv_ca_dump (data, dump_file, set);
6038 return set;
6041 static struct iv_ca *
6042 find_optimal_iv_set (struct ivopts_data *data)
6044 unsigned i;
6045 struct iv_ca *set, *origset;
6046 struct iv_use *use;
6047 comp_cost cost, origcost;
6049 /* Determine the cost based on a strategy that starts with original IVs,
6050 and try again using a strategy that prefers candidates not based
6051 on any IVs. */
6052 origset = find_optimal_iv_set_1 (data, true);
6053 set = find_optimal_iv_set_1 (data, false);
6055 if (!origset && !set)
6056 return NULL;
6058 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6059 cost = set ? iv_ca_cost (set) : infinite_cost;
6061 if (dump_file && (dump_flags & TDF_DETAILS))
6063 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6064 origcost.cost, origcost.complexity);
6065 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6066 cost.cost, cost.complexity);
6069 /* Choose the one with the best cost. */
6070 if (compare_costs (origcost, cost) <= 0)
6072 if (set)
6073 iv_ca_free (&set);
6074 set = origset;
6076 else if (origset)
6077 iv_ca_free (&origset);
6079 for (i = 0; i < n_iv_uses (data); i++)
6081 use = iv_use (data, i);
6082 use->selected = iv_ca_cand_for_use (set, use)->cand;
6085 return set;
6088 /* Creates a new induction variable corresponding to CAND. */
6090 static void
6091 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6093 gimple_stmt_iterator incr_pos;
6094 tree base;
6095 bool after = false;
6097 if (!cand->iv)
6098 return;
6100 switch (cand->pos)
6102 case IP_NORMAL:
6103 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6104 break;
6106 case IP_END:
6107 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6108 after = true;
6109 break;
6111 case IP_AFTER_USE:
6112 after = true;
6113 /* fall through */
6114 case IP_BEFORE_USE:
6115 incr_pos = gsi_for_stmt (cand->incremented_at);
6116 break;
6118 case IP_ORIGINAL:
6119 /* Mark that the iv is preserved. */
6120 name_info (data, cand->var_before)->preserve_biv = true;
6121 name_info (data, cand->var_after)->preserve_biv = true;
6123 /* Rewrite the increment so that it uses var_before directly. */
6124 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6125 return;
6128 gimple_add_tmp_var (cand->var_before);
6130 base = unshare_expr (cand->iv->base);
6132 create_iv (base, unshare_expr (cand->iv->step),
6133 cand->var_before, data->current_loop,
6134 &incr_pos, after, &cand->var_before, &cand->var_after);
6137 /* Creates new induction variables described in SET. */
6139 static void
6140 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6142 unsigned i;
6143 struct iv_cand *cand;
6144 bitmap_iterator bi;
6146 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6148 cand = iv_cand (data, i);
6149 create_new_iv (data, cand);
6152 if (dump_file && (dump_flags & TDF_DETAILS))
6154 fprintf (dump_file, "\nSelected IV set: \n");
6155 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6157 cand = iv_cand (data, i);
6158 dump_cand (dump_file, cand);
6160 fprintf (dump_file, "\n");
6164 /* Rewrites USE (definition of iv used in a nonlinear expression)
6165 using candidate CAND. */
6167 static void
6168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6169 struct iv_use *use, struct iv_cand *cand)
6171 tree comp;
6172 tree op, tgt;
6173 gimple ass;
6174 gimple_stmt_iterator bsi;
6176 /* An important special case -- if we are asked to express value of
6177 the original iv by itself, just exit; there is no need to
6178 introduce a new computation (that might also need casting the
6179 variable to unsigned and back). */
6180 if (cand->pos == IP_ORIGINAL
6181 && cand->incremented_at == use->stmt)
6183 enum tree_code stmt_code;
6185 gcc_assert (is_gimple_assign (use->stmt));
6186 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6188 /* Check whether we may leave the computation unchanged.
6189 This is the case only if it does not rely on other
6190 computations in the loop -- otherwise, the computation
6191 we rely upon may be removed in remove_unused_ivs,
6192 thus leading to ICE. */
6193 stmt_code = gimple_assign_rhs_code (use->stmt);
6194 if (stmt_code == PLUS_EXPR
6195 || stmt_code == MINUS_EXPR
6196 || stmt_code == POINTER_PLUS_EXPR)
6198 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6199 op = gimple_assign_rhs2 (use->stmt);
6200 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6201 op = gimple_assign_rhs1 (use->stmt);
6202 else
6203 op = NULL_TREE;
6205 else
6206 op = NULL_TREE;
6208 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6209 return;
6212 comp = get_computation (data->current_loop, use, cand);
6213 gcc_assert (comp != NULL_TREE);
6215 switch (gimple_code (use->stmt))
6217 case GIMPLE_PHI:
6218 tgt = PHI_RESULT (use->stmt);
6220 /* If we should keep the biv, do not replace it. */
6221 if (name_info (data, tgt)->preserve_biv)
6222 return;
6224 bsi = gsi_after_labels (gimple_bb (use->stmt));
6225 break;
6227 case GIMPLE_ASSIGN:
6228 tgt = gimple_assign_lhs (use->stmt);
6229 bsi = gsi_for_stmt (use->stmt);
6230 break;
6232 default:
6233 gcc_unreachable ();
6236 if (!valid_gimple_rhs_p (comp)
6237 || (gimple_code (use->stmt) != GIMPLE_PHI
6238 /* We can't allow re-allocating the stmt as it might be pointed
6239 to still. */
6240 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6241 >= gimple_num_ops (gsi_stmt (bsi)))))
6243 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6244 true, GSI_SAME_STMT);
6245 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6247 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6248 /* As this isn't a plain copy we have to reset alignment
6249 information. */
6250 if (SSA_NAME_PTR_INFO (comp))
6251 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6255 if (gimple_code (use->stmt) == GIMPLE_PHI)
6257 ass = gimple_build_assign (tgt, comp);
6258 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6260 bsi = gsi_for_stmt (use->stmt);
6261 remove_phi_node (&bsi, false);
6263 else
6265 gimple_assign_set_rhs_from_tree (&bsi, comp);
6266 use->stmt = gsi_stmt (bsi);
6270 /* Performs a peephole optimization to reorder the iv update statement with
6271 a mem ref to enable instruction combining in later phases. The mem ref uses
6272 the iv value before the update, so the reordering transformation requires
6273 adjustment of the offset. CAND is the selected IV_CAND.
6275 Example:
6277 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6278 iv2 = iv1 + 1;
6280 if (t < val) (1)
6281 goto L;
6282 goto Head;
6285 directly propagating t over to (1) will introduce overlapping live range
6286 thus increase register pressure. This peephole transform it into:
6289 iv2 = iv1 + 1;
6290 t = MEM_REF (base, iv2, 8, 8);
6291 if (t < val)
6292 goto L;
6293 goto Head;
6296 static void
6297 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6299 tree var_after;
6300 gimple iv_update, stmt;
6301 basic_block bb;
6302 gimple_stmt_iterator gsi, gsi_iv;
6304 if (cand->pos != IP_NORMAL)
6305 return;
6307 var_after = cand->var_after;
6308 iv_update = SSA_NAME_DEF_STMT (var_after);
6310 bb = gimple_bb (iv_update);
6311 gsi = gsi_last_nondebug_bb (bb);
6312 stmt = gsi_stmt (gsi);
6314 /* Only handle conditional statement for now. */
6315 if (gimple_code (stmt) != GIMPLE_COND)
6316 return;
6318 gsi_prev_nondebug (&gsi);
6319 stmt = gsi_stmt (gsi);
6320 if (stmt != iv_update)
6321 return;
6323 gsi_prev_nondebug (&gsi);
6324 if (gsi_end_p (gsi))
6325 return;
6327 stmt = gsi_stmt (gsi);
6328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6329 return;
6331 if (stmt != use->stmt)
6332 return;
6334 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6335 return;
6337 if (dump_file && (dump_flags & TDF_DETAILS))
6339 fprintf (dump_file, "Reordering \n");
6340 print_gimple_stmt (dump_file, iv_update, 0, 0);
6341 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6342 fprintf (dump_file, "\n");
6345 gsi = gsi_for_stmt (use->stmt);
6346 gsi_iv = gsi_for_stmt (iv_update);
6347 gsi_move_before (&gsi_iv, &gsi);
6349 cand->pos = IP_BEFORE_USE;
6350 cand->incremented_at = use->stmt;
6353 /* Rewrites USE (address that is an iv) using candidate CAND. */
6355 static void
6356 rewrite_use_address (struct ivopts_data *data,
6357 struct iv_use *use, struct iv_cand *cand)
6359 aff_tree aff;
6360 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6361 tree base_hint = NULL_TREE;
6362 tree ref, iv;
6363 bool ok;
6365 adjust_iv_update_pos (cand, use);
6366 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6367 gcc_assert (ok);
6368 unshare_aff_combination (&aff);
6370 /* To avoid undefined overflow problems, all IV candidates use unsigned
6371 integer types. The drawback is that this makes it impossible for
6372 create_mem_ref to distinguish an IV that is based on a memory object
6373 from one that represents simply an offset.
6375 To work around this problem, we pass a hint to create_mem_ref that
6376 indicates which variable (if any) in aff is an IV based on a memory
6377 object. Note that we only consider the candidate. If this is not
6378 based on an object, the base of the reference is in some subexpression
6379 of the use -- but these will use pointer types, so they are recognized
6380 by the create_mem_ref heuristics anyway. */
6381 if (cand->iv->base_object)
6382 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6384 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6385 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6386 reference_alias_ptr_type (*use->op_p),
6387 iv, base_hint, data->speed);
6388 copy_ref_info (ref, *use->op_p);
6389 *use->op_p = ref;
6392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6393 candidate CAND. */
6395 static void
6396 rewrite_use_compare (struct ivopts_data *data,
6397 struct iv_use *use, struct iv_cand *cand)
6399 tree comp, *var_p, op, bound;
6400 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6401 enum tree_code compare;
6402 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6403 bool ok;
6405 bound = cp->value;
6406 if (bound)
6408 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6409 tree var_type = TREE_TYPE (var);
6410 gimple_seq stmts;
6412 if (dump_file && (dump_flags & TDF_DETAILS))
6414 fprintf (dump_file, "Replacing exit test: ");
6415 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6417 compare = cp->comp;
6418 bound = unshare_expr (fold_convert (var_type, bound));
6419 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6420 if (stmts)
6421 gsi_insert_seq_on_edge_immediate (
6422 loop_preheader_edge (data->current_loop),
6423 stmts);
6425 gimple_cond_set_lhs (use->stmt, var);
6426 gimple_cond_set_code (use->stmt, compare);
6427 gimple_cond_set_rhs (use->stmt, op);
6428 return;
6431 /* The induction variable elimination failed; just express the original
6432 giv. */
6433 comp = get_computation (data->current_loop, use, cand);
6434 gcc_assert (comp != NULL_TREE);
6436 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6437 gcc_assert (ok);
6439 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6440 true, GSI_SAME_STMT);
6443 /* Rewrites USE using candidate CAND. */
6445 static void
6446 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6448 switch (use->type)
6450 case USE_NONLINEAR_EXPR:
6451 rewrite_use_nonlinear_expr (data, use, cand);
6452 break;
6454 case USE_ADDRESS:
6455 rewrite_use_address (data, use, cand);
6456 break;
6458 case USE_COMPARE:
6459 rewrite_use_compare (data, use, cand);
6460 break;
6462 default:
6463 gcc_unreachable ();
6466 update_stmt (use->stmt);
6469 /* Rewrite the uses using the selected induction variables. */
6471 static void
6472 rewrite_uses (struct ivopts_data *data)
6474 unsigned i;
6475 struct iv_cand *cand;
6476 struct iv_use *use;
6478 for (i = 0; i < n_iv_uses (data); i++)
6480 use = iv_use (data, i);
6481 cand = use->selected;
6482 gcc_assert (cand);
6484 rewrite_use (data, use, cand);
6488 /* Removes the ivs that are not used after rewriting. */
6490 static void
6491 remove_unused_ivs (struct ivopts_data *data)
6493 unsigned j;
6494 bitmap_iterator bi;
6495 bitmap toremove = BITMAP_ALLOC (NULL);
6497 /* Figure out an order in which to release SSA DEFs so that we don't
6498 release something that we'd have to propagate into a debug stmt
6499 afterwards. */
6500 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6502 struct version_info *info;
6504 info = ver_info (data, j);
6505 if (info->iv
6506 && !integer_zerop (info->iv->step)
6507 && !info->inv_id
6508 && !info->iv->have_use_for
6509 && !info->preserve_biv)
6511 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6513 tree def = info->iv->ssa_name;
6515 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6517 imm_use_iterator imm_iter;
6518 use_operand_p use_p;
6519 gimple stmt;
6520 int count = 0;
6522 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6524 if (!gimple_debug_bind_p (stmt))
6525 continue;
6527 /* We just want to determine whether to do nothing
6528 (count == 0), to substitute the computed
6529 expression into a single use of the SSA DEF by
6530 itself (count == 1), or to use a debug temp
6531 because the SSA DEF is used multiple times or as
6532 part of a larger expression (count > 1). */
6533 count++;
6534 if (gimple_debug_bind_get_value (stmt) != def)
6535 count++;
6537 if (count > 1)
6538 BREAK_FROM_IMM_USE_STMT (imm_iter);
6541 if (!count)
6542 continue;
6544 struct iv_use dummy_use;
6545 struct iv_cand *best_cand = NULL, *cand;
6546 unsigned i, best_pref = 0, cand_pref;
6548 memset (&dummy_use, 0, sizeof (dummy_use));
6549 dummy_use.iv = info->iv;
6550 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6552 cand = iv_use (data, i)->selected;
6553 if (cand == best_cand)
6554 continue;
6555 cand_pref = operand_equal_p (cand->iv->step,
6556 info->iv->step, 0)
6557 ? 4 : 0;
6558 cand_pref
6559 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6560 == TYPE_MODE (TREE_TYPE (info->iv->base))
6561 ? 2 : 0;
6562 cand_pref
6563 += TREE_CODE (cand->iv->base) == INTEGER_CST
6564 ? 1 : 0;
6565 if (best_cand == NULL || best_pref < cand_pref)
6567 best_cand = cand;
6568 best_pref = cand_pref;
6572 if (!best_cand)
6573 continue;
6575 tree comp = get_computation_at (data->current_loop,
6576 &dummy_use, best_cand,
6577 SSA_NAME_DEF_STMT (def));
6578 if (!comp)
6579 continue;
6581 if (count > 1)
6583 tree vexpr = make_node (DEBUG_EXPR_DECL);
6584 DECL_ARTIFICIAL (vexpr) = 1;
6585 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6586 if (SSA_NAME_VAR (def))
6587 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6588 else
6589 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6590 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6591 gimple_stmt_iterator gsi;
6593 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6594 gsi = gsi_after_labels (gimple_bb
6595 (SSA_NAME_DEF_STMT (def)));
6596 else
6597 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6599 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6600 comp = vexpr;
6603 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6605 if (!gimple_debug_bind_p (stmt))
6606 continue;
6608 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6609 SET_USE (use_p, comp);
6611 update_stmt (stmt);
6617 release_defs_bitset (toremove);
6619 BITMAP_FREE (toremove);
6622 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6623 for pointer_map_traverse. */
6625 static bool
6626 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6627 void *data ATTRIBUTE_UNUSED)
6629 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6631 free (niter);
6632 return true;
6635 /* Frees data allocated by the optimization of a single loop. */
6637 static void
6638 free_loop_data (struct ivopts_data *data)
6640 unsigned i, j;
6641 bitmap_iterator bi;
6642 tree obj;
6644 if (data->niters)
6646 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6647 pointer_map_destroy (data->niters);
6648 data->niters = NULL;
6651 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6653 struct version_info *info;
6655 info = ver_info (data, i);
6656 free (info->iv);
6657 info->iv = NULL;
6658 info->has_nonlin_use = false;
6659 info->preserve_biv = false;
6660 info->inv_id = 0;
6662 bitmap_clear (data->relevant);
6663 bitmap_clear (data->important_candidates);
6665 for (i = 0; i < n_iv_uses (data); i++)
6667 struct iv_use *use = iv_use (data, i);
6669 free (use->iv);
6670 BITMAP_FREE (use->related_cands);
6671 for (j = 0; j < use->n_map_members; j++)
6672 if (use->cost_map[j].depends_on)
6673 BITMAP_FREE (use->cost_map[j].depends_on);
6674 free (use->cost_map);
6675 free (use);
6677 data->iv_uses.truncate (0);
6679 for (i = 0; i < n_iv_cands (data); i++)
6681 struct iv_cand *cand = iv_cand (data, i);
6683 free (cand->iv);
6684 if (cand->depends_on)
6685 BITMAP_FREE (cand->depends_on);
6686 free (cand);
6688 data->iv_candidates.truncate (0);
6690 if (data->version_info_size < num_ssa_names)
6692 data->version_info_size = 2 * num_ssa_names;
6693 free (data->version_info);
6694 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6697 data->max_inv_id = 0;
6699 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6700 SET_DECL_RTL (obj, NULL_RTX);
6702 decl_rtl_to_reset.truncate (0);
6704 data->inv_expr_tab.empty ();
6705 data->inv_expr_id = 0;
6708 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6709 loop tree. */
6711 static void
6712 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6714 free_loop_data (data);
6715 free (data->version_info);
6716 BITMAP_FREE (data->relevant);
6717 BITMAP_FREE (data->important_candidates);
6719 decl_rtl_to_reset.release ();
6720 data->iv_uses.release ();
6721 data->iv_candidates.release ();
6722 data->inv_expr_tab.dispose ();
6725 /* Returns true if the loop body BODY includes any function calls. */
6727 static bool
6728 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6730 gimple_stmt_iterator gsi;
6731 unsigned i;
6733 for (i = 0; i < num_nodes; i++)
6734 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6736 gimple stmt = gsi_stmt (gsi);
6737 if (is_gimple_call (stmt)
6738 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6739 return true;
6741 return false;
6744 /* Optimizes the LOOP. Returns true if anything changed. */
6746 static bool
6747 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6749 bool changed = false;
6750 struct iv_ca *iv_ca;
6751 edge exit = single_dom_exit (loop);
6752 basic_block *body;
6754 gcc_assert (!data->niters);
6755 data->current_loop = loop;
6756 data->speed = optimize_loop_for_speed_p (loop);
6758 if (dump_file && (dump_flags & TDF_DETAILS))
6760 fprintf (dump_file, "Processing loop %d\n", loop->num);
6762 if (exit)
6764 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6765 exit->src->index, exit->dest->index);
6766 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6767 fprintf (dump_file, "\n");
6770 fprintf (dump_file, "\n");
6773 body = get_loop_body (loop);
6774 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6775 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6776 free (body);
6778 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6780 /* For each ssa name determines whether it behaves as an induction variable
6781 in some loop. */
6782 if (!find_induction_variables (data))
6783 goto finish;
6785 /* Finds interesting uses (item 1). */
6786 find_interesting_uses (data);
6787 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6788 goto finish;
6790 /* Finds candidates for the induction variables (item 2). */
6791 find_iv_candidates (data);
6793 /* Calculates the costs (item 3, part 1). */
6794 determine_iv_costs (data);
6795 determine_use_iv_costs (data);
6796 determine_set_costs (data);
6798 /* Find the optimal set of induction variables (item 3, part 2). */
6799 iv_ca = find_optimal_iv_set (data);
6800 if (!iv_ca)
6801 goto finish;
6802 changed = true;
6804 /* Create the new induction variables (item 4, part 1). */
6805 create_new_ivs (data, iv_ca);
6806 iv_ca_free (&iv_ca);
6808 /* Rewrite the uses (item 4, part 2). */
6809 rewrite_uses (data);
6811 /* Remove the ivs that are unused after rewriting. */
6812 remove_unused_ivs (data);
6814 /* We have changed the structure of induction variables; it might happen
6815 that definitions in the scev database refer to some of them that were
6816 eliminated. */
6817 scev_reset ();
6819 finish:
6820 free_loop_data (data);
6822 return changed;
6825 /* Main entry point. Optimizes induction variables in loops. */
6827 void
6828 tree_ssa_iv_optimize (void)
6830 struct loop *loop;
6831 struct ivopts_data data;
6832 loop_iterator li;
6834 tree_ssa_iv_optimize_init (&data);
6836 /* Optimize the loops starting with the innermost ones. */
6837 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6839 if (dump_file && (dump_flags & TDF_DETAILS))
6840 flow_loop_dump (loop, dump_file, NULL, 1);
6842 tree_ssa_iv_optimize_loop (&data, loop);
6845 tree_ssa_iv_optimize_finalize (&data);