* gcc.dg/atomic-compare-exchange-1.c,
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobf7da1267416141820f2dc0ee95bf9ec1ba82c7ab
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "gimple.h"
73 #include "gimple-ssa.h"
74 #include "cgraph.h"
75 #include "tree-cfg.h"
76 #include "tree-phinodes.h"
77 #include "ssa-iterators.h"
78 #include "tree-ssanames.h"
79 #include "tree-ssa-loop-ivopts.h"
80 #include "tree-ssa-loop-manip.h"
81 #include "tree-ssa-loop-niter.h"
82 #include "tree-ssa-loop.h"
83 #include "tree-dfa.h"
84 #include "tree-ssa.h"
85 #include "cfgloop.h"
86 #include "tree-pass.h"
87 #include "ggc.h"
88 #include "insn-config.h"
89 #include "pointer-set.h"
90 #include "hash-table.h"
91 #include "tree-chrec.h"
92 #include "tree-scalar-evolution.h"
93 #include "cfgloop.h"
94 #include "params.h"
95 #include "langhooks.h"
96 #include "tree-affine.h"
97 #include "target.h"
98 #include "tree-inline.h"
99 #include "tree-ssa-propagate.h"
100 #include "expmed.h"
101 #include "tree-ssa-address.h"
103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
104 cost of different addressing modes. This should be moved to a TBD
105 interface between the GIMPLE and RTL worlds. */
106 #include "expr.h"
107 #include "recog.h"
109 /* The infinite cost. */
110 #define INFTY 10000000
112 #define AVG_LOOP_NITER(LOOP) 5
114 /* Returns the expected number of loop iterations for LOOP.
115 The average trip count is computed from profile data if it
116 exists. */
118 static inline HOST_WIDE_INT
119 avg_loop_niter (struct loop *loop)
121 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
122 if (niter == -1)
123 return AVG_LOOP_NITER (loop);
125 return niter;
128 /* Representation of the induction variable. */
129 struct iv
131 tree base; /* Initial value of the iv. */
132 tree base_object; /* A memory object to that the induction variable points. */
133 tree step; /* Step of the iv (constant only). */
134 tree ssa_name; /* The ssa name with the value. */
135 bool biv_p; /* Is it a biv? */
136 bool have_use_for; /* Do we already have a use for it? */
137 unsigned use_id; /* The identifier in the use if it is the case. */
140 /* Per-ssa version information (induction variable descriptions, etc.). */
141 struct version_info
143 tree name; /* The ssa name. */
144 struct iv *iv; /* Induction variable description. */
145 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
146 an expression that is not an induction variable. */
147 bool preserve_biv; /* For the original biv, whether to preserve it. */
148 unsigned inv_id; /* Id of an invariant. */
151 /* Types of uses. */
152 enum use_type
154 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
155 USE_ADDRESS, /* Use in an address. */
156 USE_COMPARE /* Use is a compare. */
159 /* Cost of a computation. */
160 typedef struct
162 int cost; /* The runtime cost. */
163 unsigned complexity; /* The estimate of the complexity of the code for
164 the computation (in no concrete units --
165 complexity field should be larger for more
166 complex expressions and addressing modes). */
167 } comp_cost;
169 static const comp_cost no_cost = {0, 0};
170 static const comp_cost infinite_cost = {INFTY, INFTY};
172 /* The candidate - cost pair. */
173 struct cost_pair
175 struct iv_cand *cand; /* The candidate. */
176 comp_cost cost; /* The cost. */
177 bitmap depends_on; /* The list of invariants that have to be
178 preserved. */
179 tree value; /* For final value elimination, the expression for
180 the final value of the iv. For iv elimination,
181 the new bound to compare with. */
182 enum tree_code comp; /* For iv elimination, the comparison. */
183 int inv_expr_id; /* Loop invariant expression id. */
186 /* Use. */
187 struct iv_use
189 unsigned id; /* The id of the use. */
190 enum use_type type; /* Type of the use. */
191 struct iv *iv; /* The induction variable it is based on. */
192 gimple stmt; /* Statement in that it occurs. */
193 tree *op_p; /* The place where it occurs. */
194 bitmap related_cands; /* The set of "related" iv candidates, plus the common
195 important ones. */
197 unsigned n_map_members; /* Number of candidates in the cost_map list. */
198 struct cost_pair *cost_map;
199 /* The costs wrto the iv candidates. */
201 struct iv_cand *selected;
202 /* The selected candidate. */
205 /* The position where the iv is computed. */
206 enum iv_position
208 IP_NORMAL, /* At the end, just before the exit condition. */
209 IP_END, /* At the end of the latch block. */
210 IP_BEFORE_USE, /* Immediately before a specific use. */
211 IP_AFTER_USE, /* Immediately after a specific use. */
212 IP_ORIGINAL /* The original biv. */
215 /* The induction variable candidate. */
216 struct iv_cand
218 unsigned id; /* The number of the candidate. */
219 bool important; /* Whether this is an "important" candidate, i.e. such
220 that it should be considered by all uses. */
221 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
222 gimple incremented_at;/* For original biv, the statement where it is
223 incremented. */
224 tree var_before; /* The variable used for it before increment. */
225 tree var_after; /* The variable used for it after increment. */
226 struct iv *iv; /* The value of the candidate. NULL for
227 "pseudocandidate" used to indicate the possibility
228 to replace the final value of an iv by direct
229 computation of the value. */
230 unsigned cost; /* Cost of the candidate. */
231 unsigned cost_step; /* Cost of the candidate's increment operation. */
232 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
233 where it is incremented. */
234 bitmap depends_on; /* The list of invariants that are used in step of the
235 biv. */
238 /* Loop invariant expression hashtable entry. */
239 struct iv_inv_expr_ent
241 tree expr;
242 int id;
243 hashval_t hash;
246 /* The data used by the induction variable optimizations. */
248 typedef struct iv_use *iv_use_p;
250 typedef struct iv_cand *iv_cand_p;
252 /* Hashtable helpers. */
254 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
256 typedef iv_inv_expr_ent value_type;
257 typedef iv_inv_expr_ent compare_type;
258 static inline hashval_t hash (const value_type *);
259 static inline bool equal (const value_type *, const compare_type *);
262 /* Hash function for loop invariant expressions. */
264 inline hashval_t
265 iv_inv_expr_hasher::hash (const value_type *expr)
267 return expr->hash;
270 /* Hash table equality function for expressions. */
272 inline bool
273 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
275 return expr1->hash == expr2->hash
276 && operand_equal_p (expr1->expr, expr2->expr, 0);
279 struct ivopts_data
281 /* The currently optimized loop. */
282 struct loop *current_loop;
284 /* Numbers of iterations for all exits of the current loop. */
285 struct pointer_map_t *niters;
287 /* Number of registers used in it. */
288 unsigned regs_used;
290 /* The size of version_info array allocated. */
291 unsigned version_info_size;
293 /* The array of information for the ssa names. */
294 struct version_info *version_info;
296 /* The hashtable of loop invariant expressions created
297 by ivopt. */
298 hash_table <iv_inv_expr_hasher> inv_expr_tab;
300 /* Loop invariant expression id. */
301 int inv_expr_id;
303 /* The bitmap of indices in version_info whose value was changed. */
304 bitmap relevant;
306 /* The uses of induction variables. */
307 vec<iv_use_p> iv_uses;
309 /* The candidates. */
310 vec<iv_cand_p> iv_candidates;
312 /* A bitmap of important candidates. */
313 bitmap important_candidates;
315 /* The maximum invariant id. */
316 unsigned max_inv_id;
318 /* Whether to consider just related and important candidates when replacing a
319 use. */
320 bool consider_all_candidates;
322 /* Are we optimizing for speed? */
323 bool speed;
325 /* Whether the loop body includes any function calls. */
326 bool body_includes_call;
328 /* Whether the loop body can only be exited via single exit. */
329 bool loop_single_exit_p;
332 /* An assignment of iv candidates to uses. */
334 struct iv_ca
336 /* The number of uses covered by the assignment. */
337 unsigned upto;
339 /* Number of uses that cannot be expressed by the candidates in the set. */
340 unsigned bad_uses;
342 /* Candidate assigned to a use, together with the related costs. */
343 struct cost_pair **cand_for_use;
345 /* Number of times each candidate is used. */
346 unsigned *n_cand_uses;
348 /* The candidates used. */
349 bitmap cands;
351 /* The number of candidates in the set. */
352 unsigned n_cands;
354 /* Total number of registers needed. */
355 unsigned n_regs;
357 /* Total cost of expressing uses. */
358 comp_cost cand_use_cost;
360 /* Total cost of candidates. */
361 unsigned cand_cost;
363 /* Number of times each invariant is used. */
364 unsigned *n_invariant_uses;
366 /* The array holding the number of uses of each loop
367 invariant expressions created by ivopt. */
368 unsigned *used_inv_expr;
370 /* The number of created loop invariants. */
371 unsigned num_used_inv_expr;
373 /* Total cost of the assignment. */
374 comp_cost cost;
377 /* Difference of two iv candidate assignments. */
379 struct iv_ca_delta
381 /* Changed use. */
382 struct iv_use *use;
384 /* An old assignment (for rollback purposes). */
385 struct cost_pair *old_cp;
387 /* A new assignment. */
388 struct cost_pair *new_cp;
390 /* Next change in the list. */
391 struct iv_ca_delta *next_change;
394 /* Bound on number of candidates below that all candidates are considered. */
396 #define CONSIDER_ALL_CANDIDATES_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
400 optimizing such a loop would help, and it would take ages). */
402 #define MAX_CONSIDERED_USES \
403 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
405 /* If there are at most this number of ivs in the set, try removing unnecessary
406 ivs from the set always. */
408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
409 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
411 /* The list of trees for that the decl_rtl field must be reset is stored
412 here. */
414 static vec<tree> decl_rtl_to_reset;
416 static comp_cost force_expr_to_var_cost (tree, bool);
418 /* Number of uses recorded in DATA. */
420 static inline unsigned
421 n_iv_uses (struct ivopts_data *data)
423 return data->iv_uses.length ();
426 /* Ith use recorded in DATA. */
428 static inline struct iv_use *
429 iv_use (struct ivopts_data *data, unsigned i)
431 return data->iv_uses[i];
434 /* Number of candidates recorded in DATA. */
436 static inline unsigned
437 n_iv_cands (struct ivopts_data *data)
439 return data->iv_candidates.length ();
442 /* Ith candidate recorded in DATA. */
444 static inline struct iv_cand *
445 iv_cand (struct ivopts_data *data, unsigned i)
447 return data->iv_candidates[i];
450 /* The single loop exit if it dominates the latch, NULL otherwise. */
452 edge
453 single_dom_exit (struct loop *loop)
455 edge exit = single_exit (loop);
457 if (!exit)
458 return NULL;
460 if (!just_once_each_iteration_p (loop, exit->src))
461 return NULL;
463 return exit;
466 /* Dumps information about the induction variable IV to FILE. */
468 void
469 dump_iv (FILE *file, struct iv *iv)
471 if (iv->ssa_name)
473 fprintf (file, "ssa name ");
474 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
475 fprintf (file, "\n");
478 fprintf (file, " type ");
479 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
480 fprintf (file, "\n");
482 if (iv->step)
484 fprintf (file, " base ");
485 print_generic_expr (file, iv->base, TDF_SLIM);
486 fprintf (file, "\n");
488 fprintf (file, " step ");
489 print_generic_expr (file, iv->step, TDF_SLIM);
490 fprintf (file, "\n");
492 else
494 fprintf (file, " invariant ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
499 if (iv->base_object)
501 fprintf (file, " base object ");
502 print_generic_expr (file, iv->base_object, TDF_SLIM);
503 fprintf (file, "\n");
506 if (iv->biv_p)
507 fprintf (file, " is a biv\n");
510 /* Dumps information about the USE to FILE. */
512 void
513 dump_use (FILE *file, struct iv_use *use)
515 fprintf (file, "use %d\n", use->id);
517 switch (use->type)
519 case USE_NONLINEAR_EXPR:
520 fprintf (file, " generic\n");
521 break;
523 case USE_ADDRESS:
524 fprintf (file, " address\n");
525 break;
527 case USE_COMPARE:
528 fprintf (file, " compare\n");
529 break;
531 default:
532 gcc_unreachable ();
535 fprintf (file, " in statement ");
536 print_gimple_stmt (file, use->stmt, 0, 0);
537 fprintf (file, "\n");
539 fprintf (file, " at position ");
540 if (use->op_p)
541 print_generic_expr (file, *use->op_p, TDF_SLIM);
542 fprintf (file, "\n");
544 dump_iv (file, use->iv);
546 if (use->related_cands)
548 fprintf (file, " related candidates ");
549 dump_bitmap (file, use->related_cands);
553 /* Dumps information about the uses to FILE. */
555 void
556 dump_uses (FILE *file, struct ivopts_data *data)
558 unsigned i;
559 struct iv_use *use;
561 for (i = 0; i < n_iv_uses (data); i++)
563 use = iv_use (data, i);
565 dump_use (file, use);
566 fprintf (file, "\n");
570 /* Dumps information about induction variable candidate CAND to FILE. */
572 void
573 dump_cand (FILE *file, struct iv_cand *cand)
575 struct iv *iv = cand->iv;
577 fprintf (file, "candidate %d%s\n",
578 cand->id, cand->important ? " (important)" : "");
580 if (cand->depends_on)
582 fprintf (file, " depends on ");
583 dump_bitmap (file, cand->depends_on);
586 if (!iv)
588 fprintf (file, " final value replacement\n");
589 return;
592 if (cand->var_before)
594 fprintf (file, " var_before ");
595 print_generic_expr (file, cand->var_before, TDF_SLIM);
596 fprintf (file, "\n");
598 if (cand->var_after)
600 fprintf (file, " var_after ");
601 print_generic_expr (file, cand->var_after, TDF_SLIM);
602 fprintf (file, "\n");
605 switch (cand->pos)
607 case IP_NORMAL:
608 fprintf (file, " incremented before exit test\n");
609 break;
611 case IP_BEFORE_USE:
612 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
613 break;
615 case IP_AFTER_USE:
616 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
617 break;
619 case IP_END:
620 fprintf (file, " incremented at end\n");
621 break;
623 case IP_ORIGINAL:
624 fprintf (file, " original biv\n");
625 break;
628 dump_iv (file, iv);
631 /* Returns the info for ssa version VER. */
633 static inline struct version_info *
634 ver_info (struct ivopts_data *data, unsigned ver)
636 return data->version_info + ver;
639 /* Returns the info for ssa name NAME. */
641 static inline struct version_info *
642 name_info (struct ivopts_data *data, tree name)
644 return ver_info (data, SSA_NAME_VERSION (name));
647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
648 emitted in LOOP. */
650 static bool
651 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
653 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
655 gcc_assert (bb);
657 if (sbb == loop->latch)
658 return true;
660 if (sbb != bb)
661 return false;
663 return stmt == last_stmt (bb);
666 /* Returns true if STMT if after the place where the original induction
667 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
668 if the positions are identical. */
670 static bool
671 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
673 basic_block cand_bb = gimple_bb (cand->incremented_at);
674 basic_block stmt_bb = gimple_bb (stmt);
676 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
677 return false;
679 if (stmt_bb != cand_bb)
680 return true;
682 if (true_if_equal
683 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
684 return true;
685 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
688 /* Returns true if STMT if after the place where the induction variable
689 CAND is incremented in LOOP. */
691 static bool
692 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
694 switch (cand->pos)
696 case IP_END:
697 return false;
699 case IP_NORMAL:
700 return stmt_after_ip_normal_pos (loop, stmt);
702 case IP_ORIGINAL:
703 case IP_AFTER_USE:
704 return stmt_after_inc_pos (cand, stmt, false);
706 case IP_BEFORE_USE:
707 return stmt_after_inc_pos (cand, stmt, true);
709 default:
710 gcc_unreachable ();
714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
716 static bool
717 abnormal_ssa_name_p (tree exp)
719 if (!exp)
720 return false;
722 if (TREE_CODE (exp) != SSA_NAME)
723 return false;
725 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
729 abnormal phi node. Callback for for_each_index. */
731 static bool
732 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
733 void *data ATTRIBUTE_UNUSED)
735 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
737 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
738 return false;
739 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
740 return false;
743 return !abnormal_ssa_name_p (*index);
746 /* Returns true if EXPR contains a ssa name that occurs in an
747 abnormal phi node. */
749 bool
750 contains_abnormal_ssa_name_p (tree expr)
752 enum tree_code code;
753 enum tree_code_class codeclass;
755 if (!expr)
756 return false;
758 code = TREE_CODE (expr);
759 codeclass = TREE_CODE_CLASS (code);
761 if (code == SSA_NAME)
762 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
764 if (code == INTEGER_CST
765 || is_gimple_min_invariant (expr))
766 return false;
768 if (code == ADDR_EXPR)
769 return !for_each_index (&TREE_OPERAND (expr, 0),
770 idx_contains_abnormal_ssa_name_p,
771 NULL);
773 if (code == COND_EXPR)
774 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
775 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
776 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
778 switch (codeclass)
780 case tcc_binary:
781 case tcc_comparison:
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
783 return true;
785 /* Fallthru. */
786 case tcc_unary:
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
788 return true;
790 break;
792 default:
793 gcc_unreachable ();
796 return false;
799 /* Returns the structure describing number of iterations determined from
800 EXIT of DATA->current_loop, or NULL if something goes wrong. */
802 static struct tree_niter_desc *
803 niter_for_exit (struct ivopts_data *data, edge exit)
805 struct tree_niter_desc *desc;
806 void **slot;
808 if (!data->niters)
810 data->niters = pointer_map_create ();
811 slot = NULL;
813 else
814 slot = pointer_map_contains (data->niters, exit);
816 if (!slot)
818 /* Try to determine number of iterations. We cannot safely work with ssa
819 names that appear in phi nodes on abnormal edges, so that we do not
820 create overlapping life ranges for them (PR 27283). */
821 desc = XNEW (struct tree_niter_desc);
822 if (!number_of_iterations_exit (data->current_loop,
823 exit, desc, true)
824 || contains_abnormal_ssa_name_p (desc->niter))
826 XDELETE (desc);
827 desc = NULL;
829 slot = pointer_map_insert (data->niters, exit);
830 *slot = desc;
832 else
833 desc = (struct tree_niter_desc *) *slot;
835 return desc;
838 /* Returns the structure describing number of iterations determined from
839 single dominating exit of DATA->current_loop, or NULL if something
840 goes wrong. */
842 static struct tree_niter_desc *
843 niter_for_single_dom_exit (struct ivopts_data *data)
845 edge exit = single_dom_exit (data->current_loop);
847 if (!exit)
848 return NULL;
850 return niter_for_exit (data, exit);
853 /* Initializes data structures used by the iv optimization pass, stored
854 in DATA. */
856 static void
857 tree_ssa_iv_optimize_init (struct ivopts_data *data)
859 data->version_info_size = 2 * num_ssa_names;
860 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
861 data->relevant = BITMAP_ALLOC (NULL);
862 data->important_candidates = BITMAP_ALLOC (NULL);
863 data->max_inv_id = 0;
864 data->niters = NULL;
865 data->iv_uses.create (20);
866 data->iv_candidates.create (20);
867 data->inv_expr_tab.create (10);
868 data->inv_expr_id = 0;
869 decl_rtl_to_reset.create (20);
872 /* Returns a memory object to that EXPR points. In case we are able to
873 determine that it does not point to any such object, NULL is returned. */
875 static tree
876 determine_base_object (tree expr)
878 enum tree_code code = TREE_CODE (expr);
879 tree base, obj;
881 /* If this is a pointer casted to any type, we need to determine
882 the base object for the pointer; so handle conversions before
883 throwing away non-pointer expressions. */
884 if (CONVERT_EXPR_P (expr))
885 return determine_base_object (TREE_OPERAND (expr, 0));
887 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
888 return NULL_TREE;
890 switch (code)
892 case INTEGER_CST:
893 return NULL_TREE;
895 case ADDR_EXPR:
896 obj = TREE_OPERAND (expr, 0);
897 base = get_base_address (obj);
899 if (!base)
900 return expr;
902 if (TREE_CODE (base) == MEM_REF)
903 return determine_base_object (TREE_OPERAND (base, 0));
905 return fold_convert (ptr_type_node,
906 build_fold_addr_expr (base));
908 case POINTER_PLUS_EXPR:
909 return determine_base_object (TREE_OPERAND (expr, 0));
911 case PLUS_EXPR:
912 case MINUS_EXPR:
913 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
914 gcc_unreachable ();
916 default:
917 return fold_convert (ptr_type_node, expr);
921 /* Allocates an induction variable with given initial value BASE and step STEP
922 for loop LOOP. */
924 static struct iv *
925 alloc_iv (tree base, tree step)
927 tree base_object = base;
928 struct iv *iv = XCNEW (struct iv);
929 gcc_assert (step != NULL_TREE);
931 /* Lower all address expressions except ones with DECL_P as operand.
932 By doing this:
933 1) More accurate cost can be computed for address expressions;
934 2) Duplicate candidates won't be created for bases in different
935 forms, like &a[0] and &a. */
936 STRIP_NOPS (base_object);
937 if (TREE_CODE (base_object) == ADDR_EXPR
938 && !DECL_P (TREE_OPERAND (base_object, 0)))
940 aff_tree comb;
941 double_int size;
942 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
943 &comb, &size);
944 gcc_assert (base_object != NULL_TREE);
945 base_object = build_fold_addr_expr (base_object);
946 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
949 iv->base = base;
950 iv->base_object = determine_base_object (base_object);
951 iv->step = step;
952 iv->biv_p = false;
953 iv->have_use_for = false;
954 iv->use_id = 0;
955 iv->ssa_name = NULL_TREE;
957 return iv;
960 /* Sets STEP and BASE for induction variable IV. */
962 static void
963 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
965 struct version_info *info = name_info (data, iv);
967 gcc_assert (!info->iv);
969 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
970 info->iv = alloc_iv (base, step);
971 info->iv->ssa_name = iv;
974 /* Finds induction variable declaration for VAR. */
976 static struct iv *
977 get_iv (struct ivopts_data *data, tree var)
979 basic_block bb;
980 tree type = TREE_TYPE (var);
982 if (!POINTER_TYPE_P (type)
983 && !INTEGRAL_TYPE_P (type))
984 return NULL;
986 if (!name_info (data, var)->iv)
988 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
990 if (!bb
991 || !flow_bb_inside_loop_p (data->current_loop, bb))
992 set_iv (data, var, var, build_int_cst (type, 0));
995 return name_info (data, var)->iv;
998 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
999 not define a simple affine biv with nonzero step. */
1001 static tree
1002 determine_biv_step (gimple phi)
1004 struct loop *loop = gimple_bb (phi)->loop_father;
1005 tree name = PHI_RESULT (phi);
1006 affine_iv iv;
1008 if (virtual_operand_p (name))
1009 return NULL_TREE;
1011 if (!simple_iv (loop, loop, name, &iv, true))
1012 return NULL_TREE;
1014 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1017 /* Finds basic ivs. */
1019 static bool
1020 find_bivs (struct ivopts_data *data)
1022 gimple phi;
1023 tree step, type, base;
1024 bool found = false;
1025 struct loop *loop = data->current_loop;
1026 gimple_stmt_iterator psi;
1028 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1030 phi = gsi_stmt (psi);
1032 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1033 continue;
1035 step = determine_biv_step (phi);
1036 if (!step)
1037 continue;
1039 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1040 base = expand_simple_operations (base);
1041 if (contains_abnormal_ssa_name_p (base)
1042 || contains_abnormal_ssa_name_p (step))
1043 continue;
1045 type = TREE_TYPE (PHI_RESULT (phi));
1046 base = fold_convert (type, base);
1047 if (step)
1049 if (POINTER_TYPE_P (type))
1050 step = convert_to_ptrofftype (step);
1051 else
1052 step = fold_convert (type, step);
1055 set_iv (data, PHI_RESULT (phi), base, step);
1056 found = true;
1059 return found;
1062 /* Marks basic ivs. */
1064 static void
1065 mark_bivs (struct ivopts_data *data)
1067 gimple phi;
1068 tree var;
1069 struct iv *iv, *incr_iv;
1070 struct loop *loop = data->current_loop;
1071 basic_block incr_bb;
1072 gimple_stmt_iterator psi;
1074 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1076 phi = gsi_stmt (psi);
1078 iv = get_iv (data, PHI_RESULT (phi));
1079 if (!iv)
1080 continue;
1082 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1083 incr_iv = get_iv (data, var);
1084 if (!incr_iv)
1085 continue;
1087 /* If the increment is in the subloop, ignore it. */
1088 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1089 if (incr_bb->loop_father != data->current_loop
1090 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1091 continue;
1093 iv->biv_p = true;
1094 incr_iv->biv_p = true;
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099 parameters to IV. */
1101 static bool
1102 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1104 tree lhs;
1105 struct loop *loop = data->current_loop;
1107 iv->base = NULL_TREE;
1108 iv->step = NULL_TREE;
1110 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1111 return false;
1113 lhs = gimple_assign_lhs (stmt);
1114 if (TREE_CODE (lhs) != SSA_NAME)
1115 return false;
1117 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1118 return false;
1119 iv->base = expand_simple_operations (iv->base);
1121 if (contains_abnormal_ssa_name_p (iv->base)
1122 || contains_abnormal_ssa_name_p (iv->step))
1123 return false;
1125 /* If STMT could throw, then do not consider STMT as defining a GIV.
1126 While this will suppress optimizations, we can not safely delete this
1127 GIV and associated statements, even if it appears it is not used. */
1128 if (stmt_could_throw_p (stmt))
1129 return false;
1131 return true;
1134 /* Finds general ivs in statement STMT. */
1136 static void
1137 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1139 affine_iv iv;
1141 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1142 return;
1144 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1147 /* Finds general ivs in basic block BB. */
1149 static void
1150 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1152 gimple_stmt_iterator bsi;
1154 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1155 find_givs_in_stmt (data, gsi_stmt (bsi));
1158 /* Finds general ivs. */
1160 static void
1161 find_givs (struct ivopts_data *data)
1163 struct loop *loop = data->current_loop;
1164 basic_block *body = get_loop_body_in_dom_order (loop);
1165 unsigned i;
1167 for (i = 0; i < loop->num_nodes; i++)
1168 find_givs_in_bb (data, body[i]);
1169 free (body);
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173 variable and if so, its initial value and step. */
1175 static bool
1176 find_induction_variables (struct ivopts_data *data)
1178 unsigned i;
1179 bitmap_iterator bi;
1181 if (!find_bivs (data))
1182 return false;
1184 find_givs (data);
1185 mark_bivs (data);
1187 if (dump_file && (dump_flags & TDF_DETAILS))
1189 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1191 if (niter)
1193 fprintf (dump_file, " number of iterations ");
1194 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1195 if (!integer_zerop (niter->may_be_zero))
1197 fprintf (dump_file, "; zero if ");
1198 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1200 fprintf (dump_file, "\n\n");
1203 fprintf (dump_file, "Induction variables:\n\n");
1205 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1207 if (ver_info (data, i)->iv)
1208 dump_iv (dump_file, ver_info (data, i)->iv);
1212 return true;
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1217 static struct iv_use *
1218 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1219 gimple stmt, enum use_type use_type)
1221 struct iv_use *use = XCNEW (struct iv_use);
1223 use->id = n_iv_uses (data);
1224 use->type = use_type;
1225 use->iv = iv;
1226 use->stmt = stmt;
1227 use->op_p = use_p;
1228 use->related_cands = BITMAP_ALLOC (NULL);
1230 /* To avoid showing ssa name in the dumps, if it was not reset by the
1231 caller. */
1232 iv->ssa_name = NULL_TREE;
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 dump_use (dump_file, use);
1237 data->iv_uses.safe_push (use);
1239 return use;
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243 NONLINEAR_USE is true if the invariant is used in a way we do not
1244 handle specially. */
1246 static void
1247 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1249 basic_block bb;
1250 struct version_info *info;
1252 if (TREE_CODE (op) != SSA_NAME
1253 || virtual_operand_p (op))
1254 return;
1256 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1257 if (bb
1258 && flow_bb_inside_loop_p (data->current_loop, bb))
1259 return;
1261 info = name_info (data, op);
1262 info->name = op;
1263 info->has_nonlin_use |= nonlinear_use;
1264 if (!info->inv_id)
1265 info->inv_id = ++data->max_inv_id;
1266 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1269 /* Checks whether the use OP is interesting and if so, records it. */
1271 static struct iv_use *
1272 find_interesting_uses_op (struct ivopts_data *data, tree op)
1274 struct iv *iv;
1275 struct iv *civ;
1276 gimple stmt;
1277 struct iv_use *use;
1279 if (TREE_CODE (op) != SSA_NAME)
1280 return NULL;
1282 iv = get_iv (data, op);
1283 if (!iv)
1284 return NULL;
1286 if (iv->have_use_for)
1288 use = iv_use (data, iv->use_id);
1290 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1291 return use;
1294 if (integer_zerop (iv->step))
1296 record_invariant (data, op, true);
1297 return NULL;
1299 iv->have_use_for = true;
1301 civ = XNEW (struct iv);
1302 *civ = *iv;
1304 stmt = SSA_NAME_DEF_STMT (op);
1305 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1306 || is_gimple_assign (stmt));
1308 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1309 iv->use_id = use->id;
1311 return use;
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315 of an induction variable and an invariant. If this is the case,
1316 CONTROL_VAR is set to location of the iv, BOUND to the location of
1317 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318 induction variable descriptions, and true is returned. If this is not
1319 the case, CONTROL_VAR and BOUND are set to the arguments of the
1320 condition and false is returned. */
1322 static bool
1323 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1324 tree **control_var, tree **bound,
1325 struct iv **iv_var, struct iv **iv_bound)
1327 /* The objects returned when COND has constant operands. */
1328 static struct iv const_iv;
1329 static tree zero;
1330 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1331 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1332 bool ret = false;
1334 if (gimple_code (stmt) == GIMPLE_COND)
1336 op0 = gimple_cond_lhs_ptr (stmt);
1337 op1 = gimple_cond_rhs_ptr (stmt);
1339 else
1341 op0 = gimple_assign_rhs1_ptr (stmt);
1342 op1 = gimple_assign_rhs2_ptr (stmt);
1345 zero = integer_zero_node;
1346 const_iv.step = integer_zero_node;
1348 if (TREE_CODE (*op0) == SSA_NAME)
1349 iv0 = get_iv (data, *op0);
1350 if (TREE_CODE (*op1) == SSA_NAME)
1351 iv1 = get_iv (data, *op1);
1353 /* Exactly one of the compared values must be an iv, and the other one must
1354 be an invariant. */
1355 if (!iv0 || !iv1)
1356 goto end;
1358 if (integer_zerop (iv0->step))
1360 /* Control variable may be on the other side. */
1361 tmp_op = op0; op0 = op1; op1 = tmp_op;
1362 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1364 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1366 end:
1367 if (control_var)
1368 *control_var = op0;;
1369 if (iv_var)
1370 *iv_var = iv0;;
1371 if (bound)
1372 *bound = op1;
1373 if (iv_bound)
1374 *iv_bound = iv1;
1376 return ret;
1379 /* Checks whether the condition in STMT is interesting and if so,
1380 records it. */
1382 static void
1383 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1385 tree *var_p, *bound_p;
1386 struct iv *var_iv, *civ;
1388 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1390 find_interesting_uses_op (data, *var_p);
1391 find_interesting_uses_op (data, *bound_p);
1392 return;
1395 civ = XNEW (struct iv);
1396 *civ = *var_iv;
1397 record_use (data, NULL, civ, stmt, USE_COMPARE);
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401 relative to the loop LOOP, i.e. if all its operands are defined
1402 outside of the returned loop. Returns NULL if EXPR is not
1403 even obviously invariant in LOOP. */
1405 struct loop *
1406 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1408 basic_block def_bb;
1409 unsigned i, len;
1411 if (is_gimple_min_invariant (expr))
1412 return current_loops->tree_root;
1414 if (TREE_CODE (expr) == SSA_NAME)
1416 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1417 if (def_bb)
1419 if (flow_bb_inside_loop_p (loop, def_bb))
1420 return NULL;
1421 return superloop_at_depth (loop,
1422 loop_depth (def_bb->loop_father) + 1);
1425 return current_loops->tree_root;
1428 if (!EXPR_P (expr))
1429 return NULL;
1431 unsigned maxdepth = 0;
1432 len = TREE_OPERAND_LENGTH (expr);
1433 for (i = 0; i < len; i++)
1435 struct loop *ivloop;
1436 if (!TREE_OPERAND (expr, i))
1437 continue;
1439 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1440 if (!ivloop)
1441 return NULL;
1442 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1445 return superloop_at_depth (loop, maxdepth);
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449 i.e. if all its operands are defined outside of the LOOP. LOOP
1450 should not be the function body. */
1452 bool
1453 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1455 basic_block def_bb;
1456 unsigned i, len;
1458 gcc_assert (loop_depth (loop) > 0);
1460 if (is_gimple_min_invariant (expr))
1461 return true;
1463 if (TREE_CODE (expr) == SSA_NAME)
1465 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1466 if (def_bb
1467 && flow_bb_inside_loop_p (loop, def_bb))
1468 return false;
1470 return true;
1473 if (!EXPR_P (expr))
1474 return false;
1476 len = TREE_OPERAND_LENGTH (expr);
1477 for (i = 0; i < len; i++)
1478 if (TREE_OPERAND (expr, i)
1479 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1480 return false;
1482 return true;
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486 initial ones. Returns false when the value of the index cannot be determined.
1487 Callback for for_each_index. */
1489 struct ifs_ivopts_data
1491 struct ivopts_data *ivopts_data;
1492 gimple stmt;
1493 tree step;
1496 static bool
1497 idx_find_step (tree base, tree *idx, void *data)
1499 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1500 struct iv *iv;
1501 tree step, iv_base, iv_step, lbound, off;
1502 struct loop *loop = dta->ivopts_data->current_loop;
1504 /* If base is a component ref, require that the offset of the reference
1505 be invariant. */
1506 if (TREE_CODE (base) == COMPONENT_REF)
1508 off = component_ref_field_offset (base);
1509 return expr_invariant_in_loop_p (loop, off);
1512 /* If base is array, first check whether we will be able to move the
1513 reference out of the loop (in order to take its address in strength
1514 reduction). In order for this to work we need both lower bound
1515 and step to be loop invariants. */
1516 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1518 /* Moreover, for a range, the size needs to be invariant as well. */
1519 if (TREE_CODE (base) == ARRAY_RANGE_REF
1520 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1521 return false;
1523 step = array_ref_element_size (base);
1524 lbound = array_ref_low_bound (base);
1526 if (!expr_invariant_in_loop_p (loop, step)
1527 || !expr_invariant_in_loop_p (loop, lbound))
1528 return false;
1531 if (TREE_CODE (*idx) != SSA_NAME)
1532 return true;
1534 iv = get_iv (dta->ivopts_data, *idx);
1535 if (!iv)
1536 return false;
1538 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1539 *&x[0], which is not folded and does not trigger the
1540 ARRAY_REF path below. */
1541 *idx = iv->base;
1543 if (integer_zerop (iv->step))
1544 return true;
1546 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1548 step = array_ref_element_size (base);
1550 /* We only handle addresses whose step is an integer constant. */
1551 if (TREE_CODE (step) != INTEGER_CST)
1552 return false;
1554 else
1555 /* The step for pointer arithmetics already is 1 byte. */
1556 step = size_one_node;
1558 iv_base = iv->base;
1559 iv_step = iv->step;
1560 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1561 sizetype, &iv_base, &iv_step, dta->stmt,
1562 false))
1564 /* The index might wrap. */
1565 return false;
1568 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1569 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1571 return true;
1574 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1575 object is passed to it in DATA. */
1577 static bool
1578 idx_record_use (tree base, tree *idx,
1579 void *vdata)
1581 struct ivopts_data *data = (struct ivopts_data *) vdata;
1582 find_interesting_uses_op (data, *idx);
1583 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1585 find_interesting_uses_op (data, array_ref_element_size (base));
1586 find_interesting_uses_op (data, array_ref_low_bound (base));
1588 return true;
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592 store cst to MUL and return true. Otherwise return false.
1593 The returned value is always sign-extended, regardless of the
1594 signedness of TOP and BOT. */
1596 static bool
1597 constant_multiple_of (tree top, tree bot, double_int *mul)
1599 tree mby;
1600 enum tree_code code;
1601 double_int res, p0, p1;
1602 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1604 STRIP_NOPS (top);
1605 STRIP_NOPS (bot);
1607 if (operand_equal_p (top, bot, 0))
1609 *mul = double_int_one;
1610 return true;
1613 code = TREE_CODE (top);
1614 switch (code)
1616 case MULT_EXPR:
1617 mby = TREE_OPERAND (top, 1);
1618 if (TREE_CODE (mby) != INTEGER_CST)
1619 return false;
1621 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1622 return false;
1624 *mul = (res * tree_to_double_int (mby)).sext (precision);
1625 return true;
1627 case PLUS_EXPR:
1628 case MINUS_EXPR:
1629 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1630 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1631 return false;
1633 if (code == MINUS_EXPR)
1634 p1 = -p1;
1635 *mul = (p0 + p1).sext (precision);
1636 return true;
1638 case INTEGER_CST:
1639 if (TREE_CODE (bot) != INTEGER_CST)
1640 return false;
1642 p0 = tree_to_double_int (top).sext (precision);
1643 p1 = tree_to_double_int (bot).sext (precision);
1644 if (p1.is_zero ())
1645 return false;
1646 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1647 return res.is_zero ();
1649 default:
1650 return false;
1654 /* Returns true if memory reference REF with step STEP may be unaligned. */
1656 static bool
1657 may_be_unaligned_p (tree ref, tree step)
1659 tree base;
1660 tree base_type;
1661 HOST_WIDE_INT bitsize;
1662 HOST_WIDE_INT bitpos;
1663 tree toffset;
1664 enum machine_mode mode;
1665 int unsignedp, volatilep;
1666 unsigned base_align;
1668 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669 thus they are not misaligned. */
1670 if (TREE_CODE (ref) == TARGET_MEM_REF)
1671 return false;
1673 /* The test below is basically copy of what expr.c:normal_inner_ref
1674 does to check whether the object must be loaded by parts when
1675 STRICT_ALIGNMENT is true. */
1676 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1677 &unsignedp, &volatilep, true);
1678 base_type = TREE_TYPE (base);
1679 base_align = get_object_alignment (base);
1680 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1682 if (mode != BLKmode)
1684 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1686 if (base_align < mode_align
1687 || (bitpos % mode_align) != 0
1688 || (bitpos % BITS_PER_UNIT) != 0)
1689 return true;
1691 if (toffset
1692 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1693 return true;
1695 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1696 return true;
1699 return false;
1702 /* Return true if EXPR may be non-addressable. */
1704 bool
1705 may_be_nonaddressable_p (tree expr)
1707 switch (TREE_CODE (expr))
1709 case TARGET_MEM_REF:
1710 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711 target, thus they are always addressable. */
1712 return false;
1714 case COMPONENT_REF:
1715 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1716 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1718 case VIEW_CONVERT_EXPR:
1719 /* This kind of view-conversions may wrap non-addressable objects
1720 and make them look addressable. After some processing the
1721 non-addressability may be uncovered again, causing ADDR_EXPRs
1722 of inappropriate objects to be built. */
1723 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1724 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1725 return true;
1727 /* ... fall through ... */
1729 case ARRAY_REF:
1730 case ARRAY_RANGE_REF:
1731 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1733 CASE_CONVERT:
1734 return true;
1736 default:
1737 break;
1740 return false;
1743 /* Finds addresses in *OP_P inside STMT. */
1745 static void
1746 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1748 tree base = *op_p, step = size_zero_node;
1749 struct iv *civ;
1750 struct ifs_ivopts_data ifs_ivopts_data;
1752 /* Do not play with volatile memory references. A bit too conservative,
1753 perhaps, but safe. */
1754 if (gimple_has_volatile_ops (stmt))
1755 goto fail;
1757 /* Ignore bitfields for now. Not really something terribly complicated
1758 to handle. TODO. */
1759 if (TREE_CODE (base) == BIT_FIELD_REF)
1760 goto fail;
1762 base = unshare_expr (base);
1764 if (TREE_CODE (base) == TARGET_MEM_REF)
1766 tree type = build_pointer_type (TREE_TYPE (base));
1767 tree astep;
1769 if (TMR_BASE (base)
1770 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1772 civ = get_iv (data, TMR_BASE (base));
1773 if (!civ)
1774 goto fail;
1776 TMR_BASE (base) = civ->base;
1777 step = civ->step;
1779 if (TMR_INDEX2 (base)
1780 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1782 civ = get_iv (data, TMR_INDEX2 (base));
1783 if (!civ)
1784 goto fail;
1786 TMR_INDEX2 (base) = civ->base;
1787 step = civ->step;
1789 if (TMR_INDEX (base)
1790 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1792 civ = get_iv (data, TMR_INDEX (base));
1793 if (!civ)
1794 goto fail;
1796 TMR_INDEX (base) = civ->base;
1797 astep = civ->step;
1799 if (astep)
1801 if (TMR_STEP (base))
1802 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1804 step = fold_build2 (PLUS_EXPR, type, step, astep);
1808 if (integer_zerop (step))
1809 goto fail;
1810 base = tree_mem_ref_addr (type, base);
1812 else
1814 ifs_ivopts_data.ivopts_data = data;
1815 ifs_ivopts_data.stmt = stmt;
1816 ifs_ivopts_data.step = size_zero_node;
1817 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1818 || integer_zerop (ifs_ivopts_data.step))
1819 goto fail;
1820 step = ifs_ivopts_data.step;
1822 /* Check that the base expression is addressable. This needs
1823 to be done after substituting bases of IVs into it. */
1824 if (may_be_nonaddressable_p (base))
1825 goto fail;
1827 /* Moreover, on strict alignment platforms, check that it is
1828 sufficiently aligned. */
1829 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1830 goto fail;
1832 base = build_fold_addr_expr (base);
1834 /* Substituting bases of IVs into the base expression might
1835 have caused folding opportunities. */
1836 if (TREE_CODE (base) == ADDR_EXPR)
1838 tree *ref = &TREE_OPERAND (base, 0);
1839 while (handled_component_p (*ref))
1840 ref = &TREE_OPERAND (*ref, 0);
1841 if (TREE_CODE (*ref) == MEM_REF)
1843 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1844 TREE_OPERAND (*ref, 0),
1845 TREE_OPERAND (*ref, 1));
1846 if (tem)
1847 *ref = tem;
1852 civ = alloc_iv (base, step);
1853 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1854 return;
1856 fail:
1857 for_each_index (op_p, idx_record_use, data);
1860 /* Finds and records invariants used in STMT. */
1862 static void
1863 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1865 ssa_op_iter iter;
1866 use_operand_p use_p;
1867 tree op;
1869 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1871 op = USE_FROM_PTR (use_p);
1872 record_invariant (data, op, false);
1876 /* Finds interesting uses of induction variables in the statement STMT. */
1878 static void
1879 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1881 struct iv *iv;
1882 tree op, *lhs, *rhs;
1883 ssa_op_iter iter;
1884 use_operand_p use_p;
1885 enum tree_code code;
1887 find_invariants_stmt (data, stmt);
1889 if (gimple_code (stmt) == GIMPLE_COND)
1891 find_interesting_uses_cond (data, stmt);
1892 return;
1895 if (is_gimple_assign (stmt))
1897 lhs = gimple_assign_lhs_ptr (stmt);
1898 rhs = gimple_assign_rhs1_ptr (stmt);
1900 if (TREE_CODE (*lhs) == SSA_NAME)
1902 /* If the statement defines an induction variable, the uses are not
1903 interesting by themselves. */
1905 iv = get_iv (data, *lhs);
1907 if (iv && !integer_zerop (iv->step))
1908 return;
1911 code = gimple_assign_rhs_code (stmt);
1912 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1913 && (REFERENCE_CLASS_P (*rhs)
1914 || is_gimple_val (*rhs)))
1916 if (REFERENCE_CLASS_P (*rhs))
1917 find_interesting_uses_address (data, stmt, rhs);
1918 else
1919 find_interesting_uses_op (data, *rhs);
1921 if (REFERENCE_CLASS_P (*lhs))
1922 find_interesting_uses_address (data, stmt, lhs);
1923 return;
1925 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1927 find_interesting_uses_cond (data, stmt);
1928 return;
1931 /* TODO -- we should also handle address uses of type
1933 memory = call (whatever);
1937 call (memory). */
1940 if (gimple_code (stmt) == GIMPLE_PHI
1941 && gimple_bb (stmt) == data->current_loop->header)
1943 iv = get_iv (data, PHI_RESULT (stmt));
1945 if (iv && !integer_zerop (iv->step))
1946 return;
1949 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1951 op = USE_FROM_PTR (use_p);
1953 if (TREE_CODE (op) != SSA_NAME)
1954 continue;
1956 iv = get_iv (data, op);
1957 if (!iv)
1958 continue;
1960 find_interesting_uses_op (data, op);
1964 /* Finds interesting uses of induction variables outside of loops
1965 on loop exit edge EXIT. */
1967 static void
1968 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1970 gimple phi;
1971 gimple_stmt_iterator psi;
1972 tree def;
1974 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1976 phi = gsi_stmt (psi);
1977 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1978 if (!virtual_operand_p (def))
1979 find_interesting_uses_op (data, def);
1983 /* Finds uses of the induction variables that are interesting. */
1985 static void
1986 find_interesting_uses (struct ivopts_data *data)
1988 basic_block bb;
1989 gimple_stmt_iterator bsi;
1990 basic_block *body = get_loop_body (data->current_loop);
1991 unsigned i;
1992 struct version_info *info;
1993 edge e;
1995 if (dump_file && (dump_flags & TDF_DETAILS))
1996 fprintf (dump_file, "Uses:\n\n");
1998 for (i = 0; i < data->current_loop->num_nodes; i++)
2000 edge_iterator ei;
2001 bb = body[i];
2003 FOR_EACH_EDGE (e, ei, bb->succs)
2004 if (e->dest != EXIT_BLOCK_PTR
2005 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2006 find_interesting_uses_outside (data, e);
2008 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2009 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2010 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2011 if (!is_gimple_debug (gsi_stmt (bsi)))
2012 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2015 if (dump_file && (dump_flags & TDF_DETAILS))
2017 bitmap_iterator bi;
2019 fprintf (dump_file, "\n");
2021 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2023 info = ver_info (data, i);
2024 if (info->inv_id)
2026 fprintf (dump_file, " ");
2027 print_generic_expr (dump_file, info->name, TDF_SLIM);
2028 fprintf (dump_file, " is invariant (%d)%s\n",
2029 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2033 fprintf (dump_file, "\n");
2036 free (body);
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2040 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2041 we are at the top-level of the processed address. */
2043 static tree
2044 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2045 HOST_WIDE_INT *offset)
2047 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2048 enum tree_code code;
2049 tree type, orig_type = TREE_TYPE (expr);
2050 HOST_WIDE_INT off0, off1, st;
2051 tree orig_expr = expr;
2053 STRIP_NOPS (expr);
2055 type = TREE_TYPE (expr);
2056 code = TREE_CODE (expr);
2057 *offset = 0;
2059 switch (code)
2061 case INTEGER_CST:
2062 if (!cst_and_fits_in_hwi (expr)
2063 || integer_zerop (expr))
2064 return orig_expr;
2066 *offset = int_cst_value (expr);
2067 return build_int_cst (orig_type, 0);
2069 case POINTER_PLUS_EXPR:
2070 case PLUS_EXPR:
2071 case MINUS_EXPR:
2072 op0 = TREE_OPERAND (expr, 0);
2073 op1 = TREE_OPERAND (expr, 1);
2075 op0 = strip_offset_1 (op0, false, false, &off0);
2076 op1 = strip_offset_1 (op1, false, false, &off1);
2078 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2079 if (op0 == TREE_OPERAND (expr, 0)
2080 && op1 == TREE_OPERAND (expr, 1))
2081 return orig_expr;
2083 if (integer_zerop (op1))
2084 expr = op0;
2085 else if (integer_zerop (op0))
2087 if (code == MINUS_EXPR)
2088 expr = fold_build1 (NEGATE_EXPR, type, op1);
2089 else
2090 expr = op1;
2092 else
2093 expr = fold_build2 (code, type, op0, op1);
2095 return fold_convert (orig_type, expr);
2097 case MULT_EXPR:
2098 op1 = TREE_OPERAND (expr, 1);
2099 if (!cst_and_fits_in_hwi (op1))
2100 return orig_expr;
2102 op0 = TREE_OPERAND (expr, 0);
2103 op0 = strip_offset_1 (op0, false, false, &off0);
2104 if (op0 == TREE_OPERAND (expr, 0))
2105 return orig_expr;
2107 *offset = off0 * int_cst_value (op1);
2108 if (integer_zerop (op0))
2109 expr = op0;
2110 else
2111 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2113 return fold_convert (orig_type, expr);
2115 case ARRAY_REF:
2116 case ARRAY_RANGE_REF:
2117 if (!inside_addr)
2118 return orig_expr;
2120 step = array_ref_element_size (expr);
2121 if (!cst_and_fits_in_hwi (step))
2122 break;
2124 st = int_cst_value (step);
2125 op1 = TREE_OPERAND (expr, 1);
2126 op1 = strip_offset_1 (op1, false, false, &off1);
2127 *offset = off1 * st;
2129 if (top_compref
2130 && integer_zerop (op1))
2132 /* Strip the component reference completely. */
2133 op0 = TREE_OPERAND (expr, 0);
2134 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2135 *offset += off0;
2136 return op0;
2138 break;
2140 case COMPONENT_REF:
2142 tree field;
2144 if (!inside_addr)
2145 return orig_expr;
2147 tmp = component_ref_field_offset (expr);
2148 field = TREE_OPERAND (expr, 1);
2149 if (top_compref
2150 && cst_and_fits_in_hwi (tmp)
2151 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2153 HOST_WIDE_INT boffset, abs_off;
2155 /* Strip the component reference completely. */
2156 op0 = TREE_OPERAND (expr, 0);
2157 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2159 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2160 if (boffset < 0)
2161 abs_off = -abs_off;
2163 *offset = off0 + int_cst_value (tmp) + abs_off;
2164 return op0;
2167 break;
2169 case ADDR_EXPR:
2170 op0 = TREE_OPERAND (expr, 0);
2171 op0 = strip_offset_1 (op0, true, true, &off0);
2172 *offset += off0;
2174 if (op0 == TREE_OPERAND (expr, 0))
2175 return orig_expr;
2177 expr = build_fold_addr_expr (op0);
2178 return fold_convert (orig_type, expr);
2180 case MEM_REF:
2181 /* ??? Offset operand? */
2182 inside_addr = false;
2183 break;
2185 default:
2186 return orig_expr;
2189 /* Default handling of expressions for that we want to recurse into
2190 the first operand. */
2191 op0 = TREE_OPERAND (expr, 0);
2192 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2193 *offset += off0;
2195 if (op0 == TREE_OPERAND (expr, 0)
2196 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2197 return orig_expr;
2199 expr = copy_node (expr);
2200 TREE_OPERAND (expr, 0) = op0;
2201 if (op1)
2202 TREE_OPERAND (expr, 1) = op1;
2204 /* Inside address, we might strip the top level component references,
2205 thus changing type of the expression. Handling of ADDR_EXPR
2206 will fix that. */
2207 expr = fold_convert (orig_type, expr);
2209 return expr;
2212 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2214 static tree
2215 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2217 HOST_WIDE_INT off;
2218 tree core = strip_offset_1 (expr, false, false, &off);
2219 *offset = off;
2220 return core;
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224 We return unsigned type with the same precision, which avoids problems
2225 with overflows. */
2227 static tree
2228 generic_type_for (tree type)
2230 if (POINTER_TYPE_P (type))
2231 return unsigned_type_for (type);
2233 if (TYPE_UNSIGNED (type))
2234 return type;
2236 return unsigned_type_for (type);
2239 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2240 the bitmap to that we should store it. */
2242 static struct ivopts_data *fd_ivopts_data;
2243 static tree
2244 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2246 bitmap *depends_on = (bitmap *) data;
2247 struct version_info *info;
2249 if (TREE_CODE (*expr_p) != SSA_NAME)
2250 return NULL_TREE;
2251 info = name_info (fd_ivopts_data, *expr_p);
2253 if (!info->inv_id || info->has_nonlin_use)
2254 return NULL_TREE;
2256 if (!*depends_on)
2257 *depends_on = BITMAP_ALLOC (NULL);
2258 bitmap_set_bit (*depends_on, info->inv_id);
2260 return NULL_TREE;
2263 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2264 position to POS. If USE is not NULL, the candidate is set as related to
2265 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2266 replacement of the final value of the iv by a direct computation. */
2268 static struct iv_cand *
2269 add_candidate_1 (struct ivopts_data *data,
2270 tree base, tree step, bool important, enum iv_position pos,
2271 struct iv_use *use, gimple incremented_at)
2273 unsigned i;
2274 struct iv_cand *cand = NULL;
2275 tree type, orig_type;
2277 /* For non-original variables, make sure their values are computed in a type
2278 that does not invoke undefined behavior on overflows (since in general,
2279 we cannot prove that these induction variables are non-wrapping). */
2280 if (pos != IP_ORIGINAL)
2282 orig_type = TREE_TYPE (base);
2283 type = generic_type_for (orig_type);
2284 if (type != orig_type)
2286 base = fold_convert (type, base);
2287 step = fold_convert (type, step);
2291 for (i = 0; i < n_iv_cands (data); i++)
2293 cand = iv_cand (data, i);
2295 if (cand->pos != pos)
2296 continue;
2298 if (cand->incremented_at != incremented_at
2299 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2300 && cand->ainc_use != use))
2301 continue;
2303 if (!cand->iv)
2305 if (!base && !step)
2306 break;
2308 continue;
2311 if (!base && !step)
2312 continue;
2314 if (operand_equal_p (base, cand->iv->base, 0)
2315 && operand_equal_p (step, cand->iv->step, 0)
2316 && (TYPE_PRECISION (TREE_TYPE (base))
2317 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2318 break;
2321 if (i == n_iv_cands (data))
2323 cand = XCNEW (struct iv_cand);
2324 cand->id = i;
2326 if (!base && !step)
2327 cand->iv = NULL;
2328 else
2329 cand->iv = alloc_iv (base, step);
2331 cand->pos = pos;
2332 if (pos != IP_ORIGINAL && cand->iv)
2334 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2335 cand->var_after = cand->var_before;
2337 cand->important = important;
2338 cand->incremented_at = incremented_at;
2339 data->iv_candidates.safe_push (cand);
2341 if (step
2342 && TREE_CODE (step) != INTEGER_CST)
2344 fd_ivopts_data = data;
2345 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2348 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2349 cand->ainc_use = use;
2350 else
2351 cand->ainc_use = NULL;
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2354 dump_cand (dump_file, cand);
2357 if (important && !cand->important)
2359 cand->important = true;
2360 if (dump_file && (dump_flags & TDF_DETAILS))
2361 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2364 if (use)
2366 bitmap_set_bit (use->related_cands, i);
2367 if (dump_file && (dump_flags & TDF_DETAILS))
2368 fprintf (dump_file, "Candidate %d is related to use %d\n",
2369 cand->id, use->id);
2372 return cand;
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2376 is allowed.
2378 The purpose is to avoid splitting latch edge with a biv increment, thus
2379 creating a jump, possibly confusing other optimization passes and leaving
2380 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2381 is not available (so we do not have a better alternative), or if the latch
2382 edge is already nonempty. */
2384 static bool
2385 allow_ip_end_pos_p (struct loop *loop)
2387 if (!ip_normal_pos (loop))
2388 return true;
2390 if (!empty_block_p (ip_end_pos (loop)))
2391 return true;
2393 return false;
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397 Important field is set to IMPORTANT. */
2399 static void
2400 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2401 bool important, struct iv_use *use)
2403 basic_block use_bb = gimple_bb (use->stmt);
2404 enum machine_mode mem_mode;
2405 unsigned HOST_WIDE_INT cstepi;
2407 /* If we insert the increment in any position other than the standard
2408 ones, we must ensure that it is incremented once per iteration.
2409 It must not be in an inner nested loop, or one side of an if
2410 statement. */
2411 if (use_bb->loop_father != data->current_loop
2412 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2413 || stmt_could_throw_p (use->stmt)
2414 || !cst_and_fits_in_hwi (step))
2415 return;
2417 cstepi = int_cst_value (step);
2419 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2420 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2421 || USE_STORE_PRE_INCREMENT (mem_mode))
2422 && GET_MODE_SIZE (mem_mode) == cstepi)
2423 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2424 || USE_STORE_PRE_DECREMENT (mem_mode))
2425 && GET_MODE_SIZE (mem_mode) == -cstepi))
2427 enum tree_code code = MINUS_EXPR;
2428 tree new_base;
2429 tree new_step = step;
2431 if (POINTER_TYPE_P (TREE_TYPE (base)))
2433 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2434 code = POINTER_PLUS_EXPR;
2436 else
2437 new_step = fold_convert (TREE_TYPE (base), new_step);
2438 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2439 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2440 use->stmt);
2442 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2443 || USE_STORE_POST_INCREMENT (mem_mode))
2444 && GET_MODE_SIZE (mem_mode) == cstepi)
2445 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2446 || USE_STORE_POST_DECREMENT (mem_mode))
2447 && GET_MODE_SIZE (mem_mode) == -cstepi))
2449 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2450 use->stmt);
2454 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2455 position to POS. If USE is not NULL, the candidate is set as related to
2456 it. The candidate computation is scheduled on all available positions. */
2458 static void
2459 add_candidate (struct ivopts_data *data,
2460 tree base, tree step, bool important, struct iv_use *use)
2462 if (ip_normal_pos (data->current_loop))
2463 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2464 if (ip_end_pos (data->current_loop)
2465 && allow_ip_end_pos_p (data->current_loop))
2466 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2468 if (use != NULL && use->type == USE_ADDRESS)
2469 add_autoinc_candidates (data, base, step, important, use);
2472 /* Adds standard iv candidates. */
2474 static void
2475 add_standard_iv_candidates (struct ivopts_data *data)
2477 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2479 /* The same for a double-integer type if it is still fast enough. */
2480 if (TYPE_PRECISION
2481 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2482 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2483 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2484 build_int_cst (long_integer_type_node, 1), true, NULL);
2486 /* The same for a double-integer type if it is still fast enough. */
2487 if (TYPE_PRECISION
2488 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2489 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2490 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2491 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2495 /* Adds candidates bases on the old induction variable IV. */
2497 static void
2498 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2500 gimple phi;
2501 tree def;
2502 struct iv_cand *cand;
2504 add_candidate (data, iv->base, iv->step, true, NULL);
2506 /* The same, but with initial value zero. */
2507 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2508 add_candidate (data, size_int (0), iv->step, true, NULL);
2509 else
2510 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2511 iv->step, true, NULL);
2513 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2514 if (gimple_code (phi) == GIMPLE_PHI)
2516 /* Additionally record the possibility of leaving the original iv
2517 untouched. */
2518 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2519 cand = add_candidate_1 (data,
2520 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2521 SSA_NAME_DEF_STMT (def));
2522 cand->var_before = iv->ssa_name;
2523 cand->var_after = def;
2527 /* Adds candidates based on the old induction variables. */
2529 static void
2530 add_old_ivs_candidates (struct ivopts_data *data)
2532 unsigned i;
2533 struct iv *iv;
2534 bitmap_iterator bi;
2536 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2538 iv = ver_info (data, i)->iv;
2539 if (iv && iv->biv_p && !integer_zerop (iv->step))
2540 add_old_iv_candidates (data, iv);
2544 /* Adds candidates based on the value of the induction variable IV and USE. */
2546 static void
2547 add_iv_value_candidates (struct ivopts_data *data,
2548 struct iv *iv, struct iv_use *use)
2550 unsigned HOST_WIDE_INT offset;
2551 tree base;
2552 tree basetype;
2554 add_candidate (data, iv->base, iv->step, false, use);
2556 /* The same, but with initial value zero. Make such variable important,
2557 since it is generic enough so that possibly many uses may be based
2558 on it. */
2559 basetype = TREE_TYPE (iv->base);
2560 if (POINTER_TYPE_P (basetype))
2561 basetype = sizetype;
2562 add_candidate (data, build_int_cst (basetype, 0),
2563 iv->step, true, use);
2565 /* Third, try removing the constant offset. Make sure to even
2566 add a candidate for &a[0] vs. (T *)&a. */
2567 base = strip_offset (iv->base, &offset);
2568 if (offset
2569 || base != iv->base)
2570 add_candidate (data, base, iv->step, false, use);
2573 /* Adds candidates based on the uses. */
2575 static void
2576 add_derived_ivs_candidates (struct ivopts_data *data)
2578 unsigned i;
2580 for (i = 0; i < n_iv_uses (data); i++)
2582 struct iv_use *use = iv_use (data, i);
2584 if (!use)
2585 continue;
2587 switch (use->type)
2589 case USE_NONLINEAR_EXPR:
2590 case USE_COMPARE:
2591 case USE_ADDRESS:
2592 /* Just add the ivs based on the value of the iv used here. */
2593 add_iv_value_candidates (data, use->iv, use);
2594 break;
2596 default:
2597 gcc_unreachable ();
2602 /* Record important candidates and add them to related_cands bitmaps
2603 if needed. */
2605 static void
2606 record_important_candidates (struct ivopts_data *data)
2608 unsigned i;
2609 struct iv_use *use;
2611 for (i = 0; i < n_iv_cands (data); i++)
2613 struct iv_cand *cand = iv_cand (data, i);
2615 if (cand->important)
2616 bitmap_set_bit (data->important_candidates, i);
2619 data->consider_all_candidates = (n_iv_cands (data)
2620 <= CONSIDER_ALL_CANDIDATES_BOUND);
2622 if (data->consider_all_candidates)
2624 /* We will not need "related_cands" bitmaps in this case,
2625 so release them to decrease peak memory consumption. */
2626 for (i = 0; i < n_iv_uses (data); i++)
2628 use = iv_use (data, i);
2629 BITMAP_FREE (use->related_cands);
2632 else
2634 /* Add important candidates to the related_cands bitmaps. */
2635 for (i = 0; i < n_iv_uses (data); i++)
2636 bitmap_ior_into (iv_use (data, i)->related_cands,
2637 data->important_candidates);
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643 we allocate a simple list to every use. */
2645 static void
2646 alloc_use_cost_map (struct ivopts_data *data)
2648 unsigned i, size, s;
2650 for (i = 0; i < n_iv_uses (data); i++)
2652 struct iv_use *use = iv_use (data, i);
2654 if (data->consider_all_candidates)
2655 size = n_iv_cands (data);
2656 else
2658 s = bitmap_count_bits (use->related_cands);
2660 /* Round up to the power of two, so that moduling by it is fast. */
2661 size = s ? (1 << ceil_log2 (s)) : 1;
2664 use->n_map_members = size;
2665 use->cost_map = XCNEWVEC (struct cost_pair, size);
2669 /* Returns description of computation cost of expression whose runtime
2670 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2672 static comp_cost
2673 new_cost (unsigned runtime, unsigned complexity)
2675 comp_cost cost;
2677 cost.cost = runtime;
2678 cost.complexity = complexity;
2680 return cost;
2683 /* Adds costs COST1 and COST2. */
2685 static comp_cost
2686 add_costs (comp_cost cost1, comp_cost cost2)
2688 cost1.cost += cost2.cost;
2689 cost1.complexity += cost2.complexity;
2691 return cost1;
2693 /* Subtracts costs COST1 and COST2. */
2695 static comp_cost
2696 sub_costs (comp_cost cost1, comp_cost cost2)
2698 cost1.cost -= cost2.cost;
2699 cost1.complexity -= cost2.complexity;
2701 return cost1;
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705 COST1 > COST2, and 0 if COST1 = COST2. */
2707 static int
2708 compare_costs (comp_cost cost1, comp_cost cost2)
2710 if (cost1.cost == cost2.cost)
2711 return cost1.complexity - cost2.complexity;
2713 return cost1.cost - cost2.cost;
2716 /* Returns true if COST is infinite. */
2718 static bool
2719 infinite_cost_p (comp_cost cost)
2721 return cost.cost == INFTY;
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725 on invariants DEPENDS_ON and that the value used in expressing it
2726 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2728 static void
2729 set_use_iv_cost (struct ivopts_data *data,
2730 struct iv_use *use, struct iv_cand *cand,
2731 comp_cost cost, bitmap depends_on, tree value,
2732 enum tree_code comp, int inv_expr_id)
2734 unsigned i, s;
2736 if (infinite_cost_p (cost))
2738 BITMAP_FREE (depends_on);
2739 return;
2742 if (data->consider_all_candidates)
2744 use->cost_map[cand->id].cand = cand;
2745 use->cost_map[cand->id].cost = cost;
2746 use->cost_map[cand->id].depends_on = depends_on;
2747 use->cost_map[cand->id].value = value;
2748 use->cost_map[cand->id].comp = comp;
2749 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2750 return;
2753 /* n_map_members is a power of two, so this computes modulo. */
2754 s = cand->id & (use->n_map_members - 1);
2755 for (i = s; i < use->n_map_members; i++)
2756 if (!use->cost_map[i].cand)
2757 goto found;
2758 for (i = 0; i < s; i++)
2759 if (!use->cost_map[i].cand)
2760 goto found;
2762 gcc_unreachable ();
2764 found:
2765 use->cost_map[i].cand = cand;
2766 use->cost_map[i].cost = cost;
2767 use->cost_map[i].depends_on = depends_on;
2768 use->cost_map[i].value = value;
2769 use->cost_map[i].comp = comp;
2770 use->cost_map[i].inv_expr_id = inv_expr_id;
2773 /* Gets cost of (USE, CANDIDATE) pair. */
2775 static struct cost_pair *
2776 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2777 struct iv_cand *cand)
2779 unsigned i, s;
2780 struct cost_pair *ret;
2782 if (!cand)
2783 return NULL;
2785 if (data->consider_all_candidates)
2787 ret = use->cost_map + cand->id;
2788 if (!ret->cand)
2789 return NULL;
2791 return ret;
2794 /* n_map_members is a power of two, so this computes modulo. */
2795 s = cand->id & (use->n_map_members - 1);
2796 for (i = s; i < use->n_map_members; i++)
2797 if (use->cost_map[i].cand == cand)
2798 return use->cost_map + i;
2799 else if (use->cost_map[i].cand == NULL)
2800 return NULL;
2801 for (i = 0; i < s; i++)
2802 if (use->cost_map[i].cand == cand)
2803 return use->cost_map + i;
2804 else if (use->cost_map[i].cand == NULL)
2805 return NULL;
2807 return NULL;
2810 /* Returns estimate on cost of computing SEQ. */
2812 static unsigned
2813 seq_cost (rtx seq, bool speed)
2815 unsigned cost = 0;
2816 rtx set;
2818 for (; seq; seq = NEXT_INSN (seq))
2820 set = single_set (seq);
2821 if (set)
2822 cost += set_src_cost (SET_SRC (set), speed);
2823 else
2824 cost++;
2827 return cost;
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2831 static rtx
2832 produce_memory_decl_rtl (tree obj, int *regno)
2834 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2835 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2836 rtx x;
2838 gcc_assert (obj);
2839 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2841 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2842 x = gen_rtx_SYMBOL_REF (address_mode, name);
2843 SET_SYMBOL_REF_DECL (x, obj);
2844 x = gen_rtx_MEM (DECL_MODE (obj), x);
2845 set_mem_addr_space (x, as);
2846 targetm.encode_section_info (obj, x, true);
2848 else
2850 x = gen_raw_REG (address_mode, (*regno)++);
2851 x = gen_rtx_MEM (DECL_MODE (obj), x);
2852 set_mem_addr_space (x, as);
2855 return x;
2858 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2859 walk_tree. DATA contains the actual fake register number. */
2861 static tree
2862 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2864 tree obj = NULL_TREE;
2865 rtx x = NULL_RTX;
2866 int *regno = (int *) data;
2868 switch (TREE_CODE (*expr_p))
2870 case ADDR_EXPR:
2871 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2872 handled_component_p (*expr_p);
2873 expr_p = &TREE_OPERAND (*expr_p, 0))
2874 continue;
2875 obj = *expr_p;
2876 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2877 x = produce_memory_decl_rtl (obj, regno);
2878 break;
2880 case SSA_NAME:
2881 *ws = 0;
2882 obj = SSA_NAME_VAR (*expr_p);
2883 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2884 if (!obj)
2885 return NULL_TREE;
2886 if (!DECL_RTL_SET_P (obj))
2887 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2888 break;
2890 case VAR_DECL:
2891 case PARM_DECL:
2892 case RESULT_DECL:
2893 *ws = 0;
2894 obj = *expr_p;
2896 if (DECL_RTL_SET_P (obj))
2897 break;
2899 if (DECL_MODE (obj) == BLKmode)
2900 x = produce_memory_decl_rtl (obj, regno);
2901 else
2902 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2904 break;
2906 default:
2907 break;
2910 if (x)
2912 decl_rtl_to_reset.safe_push (obj);
2913 SET_DECL_RTL (obj, x);
2916 return NULL_TREE;
2919 /* Determines cost of the computation of EXPR. */
2921 static unsigned
2922 computation_cost (tree expr, bool speed)
2924 rtx seq, rslt;
2925 tree type = TREE_TYPE (expr);
2926 unsigned cost;
2927 /* Avoid using hard regs in ways which may be unsupported. */
2928 int regno = LAST_VIRTUAL_REGISTER + 1;
2929 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2930 enum node_frequency real_frequency = node->frequency;
2932 node->frequency = NODE_FREQUENCY_NORMAL;
2933 crtl->maybe_hot_insn_p = speed;
2934 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2935 start_sequence ();
2936 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2937 seq = get_insns ();
2938 end_sequence ();
2939 default_rtl_profile ();
2940 node->frequency = real_frequency;
2942 cost = seq_cost (seq, speed);
2943 if (MEM_P (rslt))
2944 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2945 TYPE_ADDR_SPACE (type), speed);
2946 else if (!REG_P (rslt))
2947 cost += set_src_cost (rslt, speed);
2949 return cost;
2952 /* Returns variable containing the value of candidate CAND at statement AT. */
2954 static tree
2955 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2957 if (stmt_after_increment (loop, cand, stmt))
2958 return cand->var_after;
2959 else
2960 return cand->var_before;
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964 same precision that is at least as wide as the precision of TYPE, stores
2965 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2966 type of A and B. */
2968 static tree
2969 determine_common_wider_type (tree *a, tree *b)
2971 tree wider_type = NULL;
2972 tree suba, subb;
2973 tree atype = TREE_TYPE (*a);
2975 if (CONVERT_EXPR_P (*a))
2977 suba = TREE_OPERAND (*a, 0);
2978 wider_type = TREE_TYPE (suba);
2979 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2980 return atype;
2982 else
2983 return atype;
2985 if (CONVERT_EXPR_P (*b))
2987 subb = TREE_OPERAND (*b, 0);
2988 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2989 return atype;
2991 else
2992 return atype;
2994 *a = suba;
2995 *b = subb;
2996 return wider_type;
2999 /* Determines the expression by that USE is expressed from induction variable
3000 CAND at statement AT in LOOP. The expression is stored in a decomposed
3001 form into AFF. Returns false if USE cannot be expressed using CAND. */
3003 static bool
3004 get_computation_aff (struct loop *loop,
3005 struct iv_use *use, struct iv_cand *cand, gimple at,
3006 struct affine_tree_combination *aff)
3008 tree ubase = use->iv->base;
3009 tree ustep = use->iv->step;
3010 tree cbase = cand->iv->base;
3011 tree cstep = cand->iv->step, cstep_common;
3012 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3013 tree common_type, var;
3014 tree uutype;
3015 aff_tree cbase_aff, var_aff;
3016 double_int rat;
3018 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3020 /* We do not have a precision to express the values of use. */
3021 return false;
3024 var = var_at_stmt (loop, cand, at);
3025 uutype = unsigned_type_for (utype);
3027 /* If the conversion is not noop, perform it. */
3028 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3030 cstep = fold_convert (uutype, cstep);
3031 cbase = fold_convert (uutype, cbase);
3032 var = fold_convert (uutype, var);
3035 if (!constant_multiple_of (ustep, cstep, &rat))
3036 return false;
3038 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039 type, we achieve better folding by computing their difference in this
3040 wider type, and cast the result to UUTYPE. We do not need to worry about
3041 overflows, as all the arithmetics will in the end be performed in UUTYPE
3042 anyway. */
3043 common_type = determine_common_wider_type (&ubase, &cbase);
3045 /* use = ubase - ratio * cbase + ratio * var. */
3046 tree_to_aff_combination (ubase, common_type, aff);
3047 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3048 tree_to_aff_combination (var, uutype, &var_aff);
3050 /* We need to shift the value if we are after the increment. */
3051 if (stmt_after_increment (loop, cand, at))
3053 aff_tree cstep_aff;
3055 if (common_type != uutype)
3056 cstep_common = fold_convert (common_type, cstep);
3057 else
3058 cstep_common = cstep;
3060 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3061 aff_combination_add (&cbase_aff, &cstep_aff);
3064 aff_combination_scale (&cbase_aff, -rat);
3065 aff_combination_add (aff, &cbase_aff);
3066 if (common_type != uutype)
3067 aff_combination_convert (aff, uutype);
3069 aff_combination_scale (&var_aff, rat);
3070 aff_combination_add (aff, &var_aff);
3072 return true;
3075 /* Return the type of USE. */
3077 static tree
3078 get_use_type (struct iv_use *use)
3080 tree base_type = TREE_TYPE (use->iv->base);
3081 tree type;
3083 if (use->type == USE_ADDRESS)
3085 /* The base_type may be a void pointer. Create a pointer type based on
3086 the mem_ref instead. */
3087 type = build_pointer_type (TREE_TYPE (*use->op_p));
3088 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3089 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3091 else
3092 type = base_type;
3094 return type;
3097 /* Determines the expression by that USE is expressed from induction variable
3098 CAND at statement AT in LOOP. The computation is unshared. */
3100 static tree
3101 get_computation_at (struct loop *loop,
3102 struct iv_use *use, struct iv_cand *cand, gimple at)
3104 aff_tree aff;
3105 tree type = get_use_type (use);
3107 if (!get_computation_aff (loop, use, cand, at, &aff))
3108 return NULL_TREE;
3109 unshare_aff_combination (&aff);
3110 return fold_convert (type, aff_combination_to_tree (&aff));
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND in LOOP. The computation is unshared. */
3116 static tree
3117 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3119 return get_computation_at (loop, use, cand, use->stmt);
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123 If we're optimizing for space, the loop setup overhead is constant;
3124 if we're optimizing for speed, amortize it over the per-iteration cost. */
3125 static unsigned
3126 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3128 if (cost == INFTY)
3129 return cost;
3130 else if (optimize_loop_for_speed_p (data->current_loop))
3131 return cost / avg_loop_niter (data->current_loop);
3132 else
3133 return cost;
3136 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3137 validity for a memory reference accessing memory of mode MODE in
3138 address space AS. */
3141 bool
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3143 addr_space_t as)
3145 #define MAX_RATIO 128
3146 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3147 static vec<sbitmap> valid_mult_list;
3148 sbitmap valid_mult;
3150 if (data_index >= valid_mult_list.length ())
3151 valid_mult_list.safe_grow_cleared (data_index + 1);
3153 valid_mult = valid_mult_list[data_index];
3154 if (!valid_mult)
3156 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3157 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3158 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3159 rtx addr, scaled;
3160 HOST_WIDE_INT i;
3162 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3163 bitmap_clear (valid_mult);
3164 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3165 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3166 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3168 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3169 if (memory_address_addr_space_p (mode, addr, as)
3170 || memory_address_addr_space_p (mode, scaled, as))
3171 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, " allowed multipliers:");
3177 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3178 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3179 fprintf (dump_file, " %d", (int) i);
3180 fprintf (dump_file, "\n");
3181 fprintf (dump_file, "\n");
3184 valid_mult_list[data_index] = valid_mult;
3187 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3188 return false;
3190 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3195 variable is omitted. Compute the cost for a memory reference that accesses
3196 a memory location of mode MEM_MODE in address space AS.
3198 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199 size of MEM_MODE / RATIO) is available. To make this determination, we
3200 look at the size of the increment to be made, which is given in CSTEP.
3201 CSTEP may be zero if the step is unknown.
3202 STMT_AFTER_INC is true iff the statement we're looking at is after the
3203 increment of the original biv.
3205 TODO -- there must be some better way. This all is quite crude. */
3207 typedef struct address_cost_data_s
3209 HOST_WIDE_INT min_offset, max_offset;
3210 unsigned costs[2][2][2][2];
3211 } *address_cost_data;
3214 static comp_cost
3215 get_address_cost (bool symbol_present, bool var_present,
3216 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3217 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3218 addr_space_t as, bool speed,
3219 bool stmt_after_inc, bool *may_autoinc)
3221 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3222 static vec<address_cost_data> address_cost_data_list;
3223 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3224 address_cost_data data;
3225 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3226 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3227 unsigned cost, acost, complexity;
3228 bool offset_p, ratio_p, autoinc;
3229 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3230 unsigned HOST_WIDE_INT mask;
3231 unsigned bits;
3233 if (data_index >= address_cost_data_list.length ())
3234 address_cost_data_list.safe_grow_cleared (data_index + 1);
3236 data = address_cost_data_list[data_index];
3237 if (!data)
3239 HOST_WIDE_INT i;
3240 HOST_WIDE_INT rat, off = 0;
3241 int old_cse_not_expected, width;
3242 unsigned sym_p, var_p, off_p, rat_p, add_c;
3243 rtx seq, addr, base;
3244 rtx reg0, reg1;
3246 data = (address_cost_data) xcalloc (1, sizeof (*data));
3248 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3250 width = GET_MODE_BITSIZE (address_mode) - 1;
3251 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3252 width = HOST_BITS_PER_WIDE_INT - 1;
3253 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3255 for (i = width; i >= 0; i--)
3257 off = -((unsigned HOST_WIDE_INT) 1 << i);
3258 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3259 if (memory_address_addr_space_p (mem_mode, addr, as))
3260 break;
3262 data->min_offset = (i == -1? 0 : off);
3264 for (i = width; i >= 0; i--)
3266 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3267 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3268 if (memory_address_addr_space_p (mem_mode, addr, as))
3269 break;
3271 if (i == -1)
3272 off = 0;
3273 data->max_offset = off;
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3277 fprintf (dump_file, "get_address_cost:\n");
3278 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3279 GET_MODE_NAME (mem_mode),
3280 data->min_offset);
3281 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282 GET_MODE_NAME (mem_mode),
3283 data->max_offset);
3286 rat = 1;
3287 for (i = 2; i <= MAX_RATIO; i++)
3288 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3290 rat = i;
3291 break;
3294 /* Compute the cost of various addressing modes. */
3295 acost = 0;
3296 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3299 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3300 || USE_STORE_PRE_DECREMENT (mem_mode))
3302 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3303 has_predec[mem_mode]
3304 = memory_address_addr_space_p (mem_mode, addr, as);
3306 if (USE_LOAD_POST_DECREMENT (mem_mode)
3307 || USE_STORE_POST_DECREMENT (mem_mode))
3309 addr = gen_rtx_POST_DEC (address_mode, reg0);
3310 has_postdec[mem_mode]
3311 = memory_address_addr_space_p (mem_mode, addr, as);
3313 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3314 || USE_STORE_PRE_DECREMENT (mem_mode))
3316 addr = gen_rtx_PRE_INC (address_mode, reg0);
3317 has_preinc[mem_mode]
3318 = memory_address_addr_space_p (mem_mode, addr, as);
3320 if (USE_LOAD_POST_INCREMENT (mem_mode)
3321 || USE_STORE_POST_INCREMENT (mem_mode))
3323 addr = gen_rtx_POST_INC (address_mode, reg0);
3324 has_postinc[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3327 for (i = 0; i < 16; i++)
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3334 addr = reg0;
3335 if (rat_p)
3336 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337 gen_int_mode (rat, address_mode));
3339 if (var_p)
3340 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3342 if (sym_p)
3344 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3351 if (off_p)
3352 base = gen_rtx_fmt_e (CONST, address_mode,
3353 gen_rtx_fmt_ee
3354 (PLUS, address_mode, base,
3355 gen_int_mode (off, address_mode)));
3357 else if (off_p)
3358 base = gen_int_mode (off, address_mode);
3359 else
3360 base = NULL_RTX;
3362 if (base)
3363 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3365 start_sequence ();
3366 /* To avoid splitting addressing modes, pretend that no cse will
3367 follow. */
3368 old_cse_not_expected = cse_not_expected;
3369 cse_not_expected = true;
3370 addr = memory_address_addr_space (mem_mode, addr, as);
3371 cse_not_expected = old_cse_not_expected;
3372 seq = get_insns ();
3373 end_sequence ();
3375 acost = seq_cost (seq, speed);
3376 acost += address_cost (addr, mem_mode, as, speed);
3378 if (!acost)
3379 acost = 1;
3380 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c = add_cost (speed, address_mode);
3396 for (i = 0; i < 8; i++)
3398 var_p = i & 1;
3399 off_p = (i >> 1) & 1;
3400 rat_p = (i >> 2) & 1;
3402 acost = data->costs[0][1][off_p][rat_p] + 1;
3403 if (var_p)
3404 acost += add_c;
3406 if (acost < data->costs[1][var_p][off_p][rat_p])
3407 data->costs[1][var_p][off_p][rat_p] = acost;
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3412 fprintf (dump_file, "Address costs:\n");
3414 for (i = 0; i < 16; i++)
3416 sym_p = i & 1;
3417 var_p = (i >> 1) & 1;
3418 off_p = (i >> 2) & 1;
3419 rat_p = (i >> 3) & 1;
3421 fprintf (dump_file, " ");
3422 if (sym_p)
3423 fprintf (dump_file, "sym + ");
3424 if (var_p)
3425 fprintf (dump_file, "var + ");
3426 if (off_p)
3427 fprintf (dump_file, "cst + ");
3428 if (rat_p)
3429 fprintf (dump_file, "rat * ");
3431 acost = data->costs[sym_p][var_p][off_p][rat_p];
3432 fprintf (dump_file, "index costs %d\n", acost);
3434 if (has_predec[mem_mode] || has_postdec[mem_mode]
3435 || has_preinc[mem_mode] || has_postinc[mem_mode])
3436 fprintf (dump_file, " May include autoinc/dec\n");
3437 fprintf (dump_file, "\n");
3440 address_cost_data_list[data_index] = data;
3443 bits = GET_MODE_BITSIZE (address_mode);
3444 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3445 offset &= mask;
3446 if ((offset >> (bits - 1) & 1))
3447 offset |= ~mask;
3448 s_offset = offset;
3450 autoinc = false;
3451 msize = GET_MODE_SIZE (mem_mode);
3452 autoinc_offset = offset;
3453 if (stmt_after_inc)
3454 autoinc_offset += ratio * cstep;
3455 if (symbol_present || var_present || ratio != 1)
3456 autoinc = false;
3457 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3458 && msize == cstep)
3459 || (has_postdec[mem_mode] && autoinc_offset == 0
3460 && msize == -cstep)
3461 || (has_preinc[mem_mode] && autoinc_offset == msize
3462 && msize == cstep)
3463 || (has_predec[mem_mode] && autoinc_offset == -msize
3464 && msize == -cstep))
3465 autoinc = true;
3467 cost = 0;
3468 offset_p = (s_offset != 0
3469 && data->min_offset <= s_offset
3470 && s_offset <= data->max_offset);
3471 ratio_p = (ratio != 1
3472 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3474 if (ratio != 1 && !ratio_p)
3475 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3477 if (s_offset && !offset_p && !symbol_present)
3478 cost += add_cost (speed, address_mode);
3480 if (may_autoinc)
3481 *may_autoinc = autoinc;
3482 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3483 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3484 return new_cost (cost + acost, complexity);
3487 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3488 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3489 calculating the operands of EXPR. Returns true if successful, and returns
3490 the cost in COST. */
3492 static bool
3493 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3494 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3496 comp_cost res;
3497 tree op1 = TREE_OPERAND (expr, 1);
3498 tree cst = TREE_OPERAND (mult, 1);
3499 tree multop = TREE_OPERAND (mult, 0);
3500 int m = exact_log2 (int_cst_value (cst));
3501 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3502 int sa_cost;
3503 bool equal_p = false;
3505 if (!(m >= 0 && m < maxm))
3506 return false;
3508 if (operand_equal_p (op1, mult, 0))
3509 equal_p = true;
3511 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3512 ? shiftadd_cost (speed, mode, m)
3513 : (equal_p
3514 ? shiftsub1_cost (speed, mode, m)
3515 : shiftsub0_cost (speed, mode, m)));
3516 res = new_cost (sa_cost, 0);
3517 res = add_costs (res, equal_p ? cost0 : cost1);
3519 STRIP_NOPS (multop);
3520 if (!is_gimple_val (multop))
3521 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3523 *cost = res;
3524 return true;
3527 /* Estimates cost of forcing expression EXPR into a variable. */
3529 static comp_cost
3530 force_expr_to_var_cost (tree expr, bool speed)
3532 static bool costs_initialized = false;
3533 static unsigned integer_cost [2];
3534 static unsigned symbol_cost [2];
3535 static unsigned address_cost [2];
3536 tree op0, op1;
3537 comp_cost cost0, cost1, cost;
3538 enum machine_mode mode;
3540 if (!costs_initialized)
3542 tree type = build_pointer_type (integer_type_node);
3543 tree var, addr;
3544 rtx x;
3545 int i;
3547 var = create_tmp_var_raw (integer_type_node, "test_var");
3548 TREE_STATIC (var) = 1;
3549 x = produce_memory_decl_rtl (var, NULL);
3550 SET_DECL_RTL (var, x);
3552 addr = build1 (ADDR_EXPR, type, var);
3555 for (i = 0; i < 2; i++)
3557 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3558 2000), i);
3560 symbol_cost[i] = computation_cost (addr, i) + 1;
3562 address_cost[i]
3563 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3566 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3567 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3568 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3569 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3570 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3571 fprintf (dump_file, "\n");
3575 costs_initialized = true;
3578 STRIP_NOPS (expr);
3580 if (SSA_VAR_P (expr))
3581 return no_cost;
3583 if (is_gimple_min_invariant (expr))
3585 if (TREE_CODE (expr) == INTEGER_CST)
3586 return new_cost (integer_cost [speed], 0);
3588 if (TREE_CODE (expr) == ADDR_EXPR)
3590 tree obj = TREE_OPERAND (expr, 0);
3592 if (TREE_CODE (obj) == VAR_DECL
3593 || TREE_CODE (obj) == PARM_DECL
3594 || TREE_CODE (obj) == RESULT_DECL)
3595 return new_cost (symbol_cost [speed], 0);
3598 return new_cost (address_cost [speed], 0);
3601 switch (TREE_CODE (expr))
3603 case POINTER_PLUS_EXPR:
3604 case PLUS_EXPR:
3605 case MINUS_EXPR:
3606 case MULT_EXPR:
3607 op0 = TREE_OPERAND (expr, 0);
3608 op1 = TREE_OPERAND (expr, 1);
3609 STRIP_NOPS (op0);
3610 STRIP_NOPS (op1);
3612 if (is_gimple_val (op0))
3613 cost0 = no_cost;
3614 else
3615 cost0 = force_expr_to_var_cost (op0, speed);
3617 if (is_gimple_val (op1))
3618 cost1 = no_cost;
3619 else
3620 cost1 = force_expr_to_var_cost (op1, speed);
3622 break;
3624 case NEGATE_EXPR:
3625 op0 = TREE_OPERAND (expr, 0);
3626 STRIP_NOPS (op0);
3627 op1 = NULL_TREE;
3629 if (is_gimple_val (op0))
3630 cost0 = no_cost;
3631 else
3632 cost0 = force_expr_to_var_cost (op0, speed);
3634 cost1 = no_cost;
3635 break;
3637 default:
3638 /* Just an arbitrary value, FIXME. */
3639 return new_cost (target_spill_cost[speed], 0);
3642 mode = TYPE_MODE (TREE_TYPE (expr));
3643 switch (TREE_CODE (expr))
3645 case POINTER_PLUS_EXPR:
3646 case PLUS_EXPR:
3647 case MINUS_EXPR:
3648 case NEGATE_EXPR:
3649 cost = new_cost (add_cost (speed, mode), 0);
3650 if (TREE_CODE (expr) != NEGATE_EXPR)
3652 tree mult = NULL_TREE;
3653 comp_cost sa_cost;
3654 if (TREE_CODE (op1) == MULT_EXPR)
3655 mult = op1;
3656 else if (TREE_CODE (op0) == MULT_EXPR)
3657 mult = op0;
3659 if (mult != NULL_TREE
3660 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3661 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3662 speed, &sa_cost))
3663 return sa_cost;
3665 break;
3667 case MULT_EXPR:
3668 if (cst_and_fits_in_hwi (op0))
3669 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3670 mode, speed), 0);
3671 else if (cst_and_fits_in_hwi (op1))
3672 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3673 mode, speed), 0);
3674 else
3675 return new_cost (target_spill_cost [speed], 0);
3676 break;
3678 default:
3679 gcc_unreachable ();
3682 cost = add_costs (cost, cost0);
3683 cost = add_costs (cost, cost1);
3685 /* Bound the cost by target_spill_cost. The parts of complicated
3686 computations often are either loop invariant or at least can
3687 be shared between several iv uses, so letting this grow without
3688 limits would not give reasonable results. */
3689 if (cost.cost > (int) target_spill_cost [speed])
3690 cost.cost = target_spill_cost [speed];
3692 return cost;
3695 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3696 invariants the computation depends on. */
3698 static comp_cost
3699 force_var_cost (struct ivopts_data *data,
3700 tree expr, bitmap *depends_on)
3702 if (depends_on)
3704 fd_ivopts_data = data;
3705 walk_tree (&expr, find_depends, depends_on, NULL);
3708 return force_expr_to_var_cost (expr, data->speed);
3711 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3712 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3713 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3714 invariants the computation depends on. */
3716 static comp_cost
3717 split_address_cost (struct ivopts_data *data,
3718 tree addr, bool *symbol_present, bool *var_present,
3719 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3721 tree core;
3722 HOST_WIDE_INT bitsize;
3723 HOST_WIDE_INT bitpos;
3724 tree toffset;
3725 enum machine_mode mode;
3726 int unsignedp, volatilep;
3728 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3729 &unsignedp, &volatilep, false);
3731 if (toffset != 0
3732 || bitpos % BITS_PER_UNIT != 0
3733 || TREE_CODE (core) != VAR_DECL)
3735 *symbol_present = false;
3736 *var_present = true;
3737 fd_ivopts_data = data;
3738 walk_tree (&addr, find_depends, depends_on, NULL);
3739 return new_cost (target_spill_cost[data->speed], 0);
3742 *offset += bitpos / BITS_PER_UNIT;
3743 if (TREE_STATIC (core)
3744 || DECL_EXTERNAL (core))
3746 *symbol_present = true;
3747 *var_present = false;
3748 return no_cost;
3751 *symbol_present = false;
3752 *var_present = true;
3753 return no_cost;
3756 /* Estimates cost of expressing difference of addresses E1 - E2 as
3757 var + symbol + offset. The value of offset is added to OFFSET,
3758 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759 part is missing. DEPENDS_ON is a set of the invariants the computation
3760 depends on. */
3762 static comp_cost
3763 ptr_difference_cost (struct ivopts_data *data,
3764 tree e1, tree e2, bool *symbol_present, bool *var_present,
3765 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3767 HOST_WIDE_INT diff = 0;
3768 aff_tree aff_e1, aff_e2;
3769 tree type;
3771 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3773 if (ptr_difference_const (e1, e2, &diff))
3775 *offset += diff;
3776 *symbol_present = false;
3777 *var_present = false;
3778 return no_cost;
3781 if (integer_zerop (e2))
3782 return split_address_cost (data, TREE_OPERAND (e1, 0),
3783 symbol_present, var_present, offset, depends_on);
3785 *symbol_present = false;
3786 *var_present = true;
3788 type = signed_type_for (TREE_TYPE (e1));
3789 tree_to_aff_combination (e1, type, &aff_e1);
3790 tree_to_aff_combination (e2, type, &aff_e2);
3791 aff_combination_scale (&aff_e2, double_int_minus_one);
3792 aff_combination_add (&aff_e1, &aff_e2);
3794 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3797 /* Estimates cost of expressing difference E1 - E2 as
3798 var + symbol + offset. The value of offset is added to OFFSET,
3799 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3800 part is missing. DEPENDS_ON is a set of the invariants the computation
3801 depends on. */
3803 static comp_cost
3804 difference_cost (struct ivopts_data *data,
3805 tree e1, tree e2, bool *symbol_present, bool *var_present,
3806 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3808 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3809 unsigned HOST_WIDE_INT off1, off2;
3810 aff_tree aff_e1, aff_e2;
3811 tree type;
3813 e1 = strip_offset (e1, &off1);
3814 e2 = strip_offset (e2, &off2);
3815 *offset += off1 - off2;
3817 STRIP_NOPS (e1);
3818 STRIP_NOPS (e2);
3820 if (TREE_CODE (e1) == ADDR_EXPR)
3821 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3822 offset, depends_on);
3823 *symbol_present = false;
3825 if (operand_equal_p (e1, e2, 0))
3827 *var_present = false;
3828 return no_cost;
3831 *var_present = true;
3833 if (integer_zerop (e2))
3834 return force_var_cost (data, e1, depends_on);
3836 if (integer_zerop (e1))
3838 comp_cost cost = force_var_cost (data, e2, depends_on);
3839 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3840 return cost;
3843 type = signed_type_for (TREE_TYPE (e1));
3844 tree_to_aff_combination (e1, type, &aff_e1);
3845 tree_to_aff_combination (e2, type, &aff_e2);
3846 aff_combination_scale (&aff_e2, double_int_minus_one);
3847 aff_combination_add (&aff_e1, &aff_e2);
3849 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3852 /* Returns true if AFF1 and AFF2 are identical. */
3854 static bool
3855 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3857 unsigned i;
3859 if (aff1->n != aff2->n)
3860 return false;
3862 for (i = 0; i < aff1->n; i++)
3864 if (aff1->elts[i].coef != aff2->elts[i].coef)
3865 return false;
3867 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3868 return false;
3870 return true;
3873 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3875 static int
3876 get_expr_id (struct ivopts_data *data, tree expr)
3878 struct iv_inv_expr_ent ent;
3879 struct iv_inv_expr_ent **slot;
3881 ent.expr = expr;
3882 ent.hash = iterative_hash_expr (expr, 0);
3883 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3884 if (*slot)
3885 return (*slot)->id;
3887 *slot = XNEW (struct iv_inv_expr_ent);
3888 (*slot)->expr = expr;
3889 (*slot)->hash = ent.hash;
3890 (*slot)->id = data->inv_expr_id++;
3891 return (*slot)->id;
3894 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3895 requires a new compiler generated temporary. Returns -1 otherwise.
3896 ADDRESS_P is a flag indicating if the expression is for address
3897 computation. */
3899 static int
3900 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3901 tree cbase, HOST_WIDE_INT ratio,
3902 bool address_p)
3904 aff_tree ubase_aff, cbase_aff;
3905 tree expr, ub, cb;
3907 STRIP_NOPS (ubase);
3908 STRIP_NOPS (cbase);
3909 ub = ubase;
3910 cb = cbase;
3912 if ((TREE_CODE (ubase) == INTEGER_CST)
3913 && (TREE_CODE (cbase) == INTEGER_CST))
3914 return -1;
3916 /* Strips the constant part. */
3917 if (TREE_CODE (ubase) == PLUS_EXPR
3918 || TREE_CODE (ubase) == MINUS_EXPR
3919 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3921 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3922 ubase = TREE_OPERAND (ubase, 0);
3925 /* Strips the constant part. */
3926 if (TREE_CODE (cbase) == PLUS_EXPR
3927 || TREE_CODE (cbase) == MINUS_EXPR
3928 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3930 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3931 cbase = TREE_OPERAND (cbase, 0);
3934 if (address_p)
3936 if (((TREE_CODE (ubase) == SSA_NAME)
3937 || (TREE_CODE (ubase) == ADDR_EXPR
3938 && is_gimple_min_invariant (ubase)))
3939 && (TREE_CODE (cbase) == INTEGER_CST))
3940 return -1;
3942 if (((TREE_CODE (cbase) == SSA_NAME)
3943 || (TREE_CODE (cbase) == ADDR_EXPR
3944 && is_gimple_min_invariant (cbase)))
3945 && (TREE_CODE (ubase) == INTEGER_CST))
3946 return -1;
3949 if (ratio == 1)
3951 if (operand_equal_p (ubase, cbase, 0))
3952 return -1;
3954 if (TREE_CODE (ubase) == ADDR_EXPR
3955 && TREE_CODE (cbase) == ADDR_EXPR)
3957 tree usym, csym;
3959 usym = TREE_OPERAND (ubase, 0);
3960 csym = TREE_OPERAND (cbase, 0);
3961 if (TREE_CODE (usym) == ARRAY_REF)
3963 tree ind = TREE_OPERAND (usym, 1);
3964 if (TREE_CODE (ind) == INTEGER_CST
3965 && host_integerp (ind, 0)
3966 && TREE_INT_CST_LOW (ind) == 0)
3967 usym = TREE_OPERAND (usym, 0);
3969 if (TREE_CODE (csym) == ARRAY_REF)
3971 tree ind = TREE_OPERAND (csym, 1);
3972 if (TREE_CODE (ind) == INTEGER_CST
3973 && host_integerp (ind, 0)
3974 && TREE_INT_CST_LOW (ind) == 0)
3975 csym = TREE_OPERAND (csym, 0);
3977 if (operand_equal_p (usym, csym, 0))
3978 return -1;
3980 /* Now do more complex comparison */
3981 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3982 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3983 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3984 return -1;
3987 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3988 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3990 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3991 aff_combination_add (&ubase_aff, &cbase_aff);
3992 expr = aff_combination_to_tree (&ubase_aff);
3993 return get_expr_id (data, expr);
3998 /* Determines the cost of the computation by that USE is expressed
3999 from induction variable CAND. If ADDRESS_P is true, we just need
4000 to create an address from it, otherwise we want to get it into
4001 register. A set of invariants we depend on is stored in
4002 DEPENDS_ON. AT is the statement at that the value is computed.
4003 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4004 addressing is likely. */
4006 static comp_cost
4007 get_computation_cost_at (struct ivopts_data *data,
4008 struct iv_use *use, struct iv_cand *cand,
4009 bool address_p, bitmap *depends_on, gimple at,
4010 bool *can_autoinc,
4011 int *inv_expr_id)
4013 tree ubase = use->iv->base, ustep = use->iv->step;
4014 tree cbase, cstep;
4015 tree utype = TREE_TYPE (ubase), ctype;
4016 unsigned HOST_WIDE_INT cstepi, offset = 0;
4017 HOST_WIDE_INT ratio, aratio;
4018 bool var_present, symbol_present, stmt_is_after_inc;
4019 comp_cost cost;
4020 double_int rat;
4021 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4022 enum machine_mode mem_mode = (address_p
4023 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4024 : VOIDmode);
4026 *depends_on = NULL;
4028 /* Only consider real candidates. */
4029 if (!cand->iv)
4030 return infinite_cost;
4032 cbase = cand->iv->base;
4033 cstep = cand->iv->step;
4034 ctype = TREE_TYPE (cbase);
4036 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4038 /* We do not have a precision to express the values of use. */
4039 return infinite_cost;
4042 if (address_p
4043 || (use->iv->base_object
4044 && cand->iv->base_object
4045 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4046 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4048 /* Do not try to express address of an object with computation based
4049 on address of a different object. This may cause problems in rtl
4050 level alias analysis (that does not expect this to be happening,
4051 as this is illegal in C), and would be unlikely to be useful
4052 anyway. */
4053 if (use->iv->base_object
4054 && cand->iv->base_object
4055 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4056 return infinite_cost;
4059 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4061 /* TODO -- add direct handling of this case. */
4062 goto fallback;
4065 /* CSTEPI is removed from the offset in case statement is after the
4066 increment. If the step is not constant, we use zero instead.
4067 This is a bit imprecise (there is the extra addition), but
4068 redundancy elimination is likely to transform the code so that
4069 it uses value of the variable before increment anyway,
4070 so it is not that much unrealistic. */
4071 if (cst_and_fits_in_hwi (cstep))
4072 cstepi = int_cst_value (cstep);
4073 else
4074 cstepi = 0;
4076 if (!constant_multiple_of (ustep, cstep, &rat))
4077 return infinite_cost;
4079 if (rat.fits_shwi ())
4080 ratio = rat.to_shwi ();
4081 else
4082 return infinite_cost;
4084 STRIP_NOPS (cbase);
4085 ctype = TREE_TYPE (cbase);
4087 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4089 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4090 or ratio == 1, it is better to handle this like
4092 ubase - ratio * cbase + ratio * var
4094 (also holds in the case ratio == -1, TODO. */
4096 if (cst_and_fits_in_hwi (cbase))
4098 offset = - ratio * int_cst_value (cbase);
4099 cost = difference_cost (data,
4100 ubase, build_int_cst (utype, 0),
4101 &symbol_present, &var_present, &offset,
4102 depends_on);
4103 cost.cost /= avg_loop_niter (data->current_loop);
4105 else if (ratio == 1)
4107 tree real_cbase = cbase;
4109 /* Check to see if any adjustment is needed. */
4110 if (cstepi == 0 && stmt_is_after_inc)
4112 aff_tree real_cbase_aff;
4113 aff_tree cstep_aff;
4115 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4116 &real_cbase_aff);
4117 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4119 aff_combination_add (&real_cbase_aff, &cstep_aff);
4120 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4123 cost = difference_cost (data,
4124 ubase, real_cbase,
4125 &symbol_present, &var_present, &offset,
4126 depends_on);
4127 cost.cost /= avg_loop_niter (data->current_loop);
4129 else if (address_p
4130 && !POINTER_TYPE_P (ctype)
4131 && multiplier_allowed_in_address_p
4132 (ratio, mem_mode,
4133 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4135 cbase
4136 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4137 cost = difference_cost (data,
4138 ubase, cbase,
4139 &symbol_present, &var_present, &offset,
4140 depends_on);
4141 cost.cost /= avg_loop_niter (data->current_loop);
4143 else
4145 cost = force_var_cost (data, cbase, depends_on);
4146 cost = add_costs (cost,
4147 difference_cost (data,
4148 ubase, build_int_cst (utype, 0),
4149 &symbol_present, &var_present,
4150 &offset, depends_on));
4151 cost.cost /= avg_loop_niter (data->current_loop);
4152 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4155 if (inv_expr_id)
4157 *inv_expr_id =
4158 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4159 /* Clear depends on. */
4160 if (*inv_expr_id != -1 && depends_on && *depends_on)
4161 bitmap_clear (*depends_on);
4164 /* If we are after the increment, the value of the candidate is higher by
4165 one iteration. */
4166 if (stmt_is_after_inc)
4167 offset -= ratio * cstepi;
4169 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4170 (symbol/var1/const parts may be omitted). If we are looking for an
4171 address, find the cost of addressing this. */
4172 if (address_p)
4173 return add_costs (cost,
4174 get_address_cost (symbol_present, var_present,
4175 offset, ratio, cstepi,
4176 mem_mode,
4177 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4178 speed, stmt_is_after_inc,
4179 can_autoinc));
4181 /* Otherwise estimate the costs for computing the expression. */
4182 if (!symbol_present && !var_present && !offset)
4184 if (ratio != 1)
4185 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4186 return cost;
4189 /* Symbol + offset should be compile-time computable so consider that they
4190 are added once to the variable, if present. */
4191 if (var_present && (symbol_present || offset))
4192 cost.cost += adjust_setup_cost (data,
4193 add_cost (speed, TYPE_MODE (ctype)));
4195 /* Having offset does not affect runtime cost in case it is added to
4196 symbol, but it increases complexity. */
4197 if (offset)
4198 cost.complexity++;
4200 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4202 aratio = ratio > 0 ? ratio : -ratio;
4203 if (aratio != 1)
4204 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4205 return cost;
4207 fallback:
4208 if (can_autoinc)
4209 *can_autoinc = false;
4212 /* Just get the expression, expand it and measure the cost. */
4213 tree comp = get_computation_at (data->current_loop, use, cand, at);
4215 if (!comp)
4216 return infinite_cost;
4218 if (address_p)
4219 comp = build_simple_mem_ref (comp);
4221 return new_cost (computation_cost (comp, speed), 0);
4225 /* Determines the cost of the computation by that USE is expressed
4226 from induction variable CAND. If ADDRESS_P is true, we just need
4227 to create an address from it, otherwise we want to get it into
4228 register. A set of invariants we depend on is stored in
4229 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4230 autoinc addressing is likely. */
4232 static comp_cost
4233 get_computation_cost (struct ivopts_data *data,
4234 struct iv_use *use, struct iv_cand *cand,
4235 bool address_p, bitmap *depends_on,
4236 bool *can_autoinc, int *inv_expr_id)
4238 return get_computation_cost_at (data,
4239 use, cand, address_p, depends_on, use->stmt,
4240 can_autoinc, inv_expr_id);
4243 /* Determines cost of basing replacement of USE on CAND in a generic
4244 expression. */
4246 static bool
4247 determine_use_iv_cost_generic (struct ivopts_data *data,
4248 struct iv_use *use, struct iv_cand *cand)
4250 bitmap depends_on;
4251 comp_cost cost;
4252 int inv_expr_id = -1;
4254 /* The simple case first -- if we need to express value of the preserved
4255 original biv, the cost is 0. This also prevents us from counting the
4256 cost of increment twice -- once at this use and once in the cost of
4257 the candidate. */
4258 if (cand->pos == IP_ORIGINAL
4259 && cand->incremented_at == use->stmt)
4261 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4262 ERROR_MARK, -1);
4263 return true;
4266 cost = get_computation_cost (data, use, cand, false, &depends_on,
4267 NULL, &inv_expr_id);
4269 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4270 inv_expr_id);
4272 return !infinite_cost_p (cost);
4275 /* Determines cost of basing replacement of USE on CAND in an address. */
4277 static bool
4278 determine_use_iv_cost_address (struct ivopts_data *data,
4279 struct iv_use *use, struct iv_cand *cand)
4281 bitmap depends_on;
4282 bool can_autoinc;
4283 int inv_expr_id = -1;
4284 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4285 &can_autoinc, &inv_expr_id);
4287 if (cand->ainc_use == use)
4289 if (can_autoinc)
4290 cost.cost -= cand->cost_step;
4291 /* If we generated the candidate solely for exploiting autoincrement
4292 opportunities, and it turns out it can't be used, set the cost to
4293 infinity to make sure we ignore it. */
4294 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4295 cost = infinite_cost;
4297 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4298 inv_expr_id);
4300 return !infinite_cost_p (cost);
4303 /* Computes value of candidate CAND at position AT in iteration NITER, and
4304 stores it to VAL. */
4306 static void
4307 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4308 aff_tree *val)
4310 aff_tree step, delta, nit;
4311 struct iv *iv = cand->iv;
4312 tree type = TREE_TYPE (iv->base);
4313 tree steptype = type;
4314 if (POINTER_TYPE_P (type))
4315 steptype = sizetype;
4317 tree_to_aff_combination (iv->step, steptype, &step);
4318 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4319 aff_combination_convert (&nit, steptype);
4320 aff_combination_mult (&nit, &step, &delta);
4321 if (stmt_after_increment (loop, cand, at))
4322 aff_combination_add (&delta, &step);
4324 tree_to_aff_combination (iv->base, type, val);
4325 aff_combination_add (val, &delta);
4328 /* Returns period of induction variable iv. */
4330 static tree
4331 iv_period (struct iv *iv)
4333 tree step = iv->step, period, type;
4334 tree pow2div;
4336 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4338 type = unsigned_type_for (TREE_TYPE (step));
4339 /* Period of the iv is lcm (step, type_range)/step -1,
4340 i.e., N*type_range/step - 1. Since type range is power
4341 of two, N == (step >> num_of_ending_zeros_binary (step),
4342 so the final result is
4344 (type_range >> num_of_ending_zeros_binary (step)) - 1
4347 pow2div = num_ending_zeros (step);
4349 period = build_low_bits_mask (type,
4350 (TYPE_PRECISION (type)
4351 - tree_low_cst (pow2div, 1)));
4353 return period;
4356 /* Returns the comparison operator used when eliminating the iv USE. */
4358 static enum tree_code
4359 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4361 struct loop *loop = data->current_loop;
4362 basic_block ex_bb;
4363 edge exit;
4365 ex_bb = gimple_bb (use->stmt);
4366 exit = EDGE_SUCC (ex_bb, 0);
4367 if (flow_bb_inside_loop_p (loop, exit->dest))
4368 exit = EDGE_SUCC (ex_bb, 1);
4370 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4373 static tree
4374 strip_wrap_conserving_type_conversions (tree exp)
4376 while (tree_ssa_useless_type_conversion (exp)
4377 && (nowrap_type_p (TREE_TYPE (exp))
4378 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4379 exp = TREE_OPERAND (exp, 0);
4380 return exp;
4383 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4384 check for an exact match. */
4386 static bool
4387 expr_equal_p (tree e, tree what)
4389 gimple stmt;
4390 enum tree_code code;
4392 e = strip_wrap_conserving_type_conversions (e);
4393 what = strip_wrap_conserving_type_conversions (what);
4395 code = TREE_CODE (what);
4396 if (TREE_TYPE (e) != TREE_TYPE (what))
4397 return false;
4399 if (operand_equal_p (e, what, 0))
4400 return true;
4402 if (TREE_CODE (e) != SSA_NAME)
4403 return false;
4405 stmt = SSA_NAME_DEF_STMT (e);
4406 if (gimple_code (stmt) != GIMPLE_ASSIGN
4407 || gimple_assign_rhs_code (stmt) != code)
4408 return false;
4410 switch (get_gimple_rhs_class (code))
4412 case GIMPLE_BINARY_RHS:
4413 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4414 return false;
4415 /* Fallthru. */
4417 case GIMPLE_UNARY_RHS:
4418 case GIMPLE_SINGLE_RHS:
4419 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4420 default:
4421 return false;
4425 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4426 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4427 calculation is performed in non-wrapping type.
4429 TODO: More generally, we could test for the situation that
4430 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4431 This would require knowing the sign of OFFSET.
4433 Also, we only look for the first addition in the computation of BASE.
4434 More complex analysis would be better, but introducing it just for
4435 this optimization seems like an overkill. */
4437 static bool
4438 difference_cannot_overflow_p (tree base, tree offset)
4440 enum tree_code code;
4441 tree e1, e2;
4443 if (!nowrap_type_p (TREE_TYPE (base)))
4444 return false;
4446 base = expand_simple_operations (base);
4448 if (TREE_CODE (base) == SSA_NAME)
4450 gimple stmt = SSA_NAME_DEF_STMT (base);
4452 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4453 return false;
4455 code = gimple_assign_rhs_code (stmt);
4456 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4457 return false;
4459 e1 = gimple_assign_rhs1 (stmt);
4460 e2 = gimple_assign_rhs2 (stmt);
4462 else
4464 code = TREE_CODE (base);
4465 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4466 return false;
4467 e1 = TREE_OPERAND (base, 0);
4468 e2 = TREE_OPERAND (base, 1);
4471 /* TODO: deeper inspection may be necessary to prove the equality. */
4472 switch (code)
4474 case PLUS_EXPR:
4475 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4476 case POINTER_PLUS_EXPR:
4477 return expr_equal_p (e2, offset);
4479 default:
4480 return false;
4484 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4485 comparison with CAND. NITER describes the number of iterations of
4486 the loops. If successful, the comparison in COMP_P is altered accordingly.
4488 We aim to handle the following situation:
4490 sometype *base, *p;
4491 int a, b, i;
4493 i = a;
4494 p = p_0 = base + a;
4498 bla (*p);
4499 p++;
4500 i++;
4502 while (i < b);
4504 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4505 We aim to optimize this to
4507 p = p_0 = base + a;
4510 bla (*p);
4511 p++;
4513 while (p < p_0 - a + b);
4515 This preserves the correctness, since the pointer arithmetics does not
4516 overflow. More precisely:
4518 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4519 overflow in computing it or the values of p.
4520 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4521 overflow. To prove this, we use the fact that p_0 = base + a. */
4523 static bool
4524 iv_elimination_compare_lt (struct ivopts_data *data,
4525 struct iv_cand *cand, enum tree_code *comp_p,
4526 struct tree_niter_desc *niter)
4528 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4529 struct affine_tree_combination nit, tmpa, tmpb;
4530 enum tree_code comp;
4531 HOST_WIDE_INT step;
4533 /* We need to know that the candidate induction variable does not overflow.
4534 While more complex analysis may be used to prove this, for now just
4535 check that the variable appears in the original program and that it
4536 is computed in a type that guarantees no overflows. */
4537 cand_type = TREE_TYPE (cand->iv->base);
4538 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4539 return false;
4541 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4542 the calculation of the BOUND could overflow, making the comparison
4543 invalid. */
4544 if (!data->loop_single_exit_p)
4545 return false;
4547 /* We need to be able to decide whether candidate is increasing or decreasing
4548 in order to choose the right comparison operator. */
4549 if (!cst_and_fits_in_hwi (cand->iv->step))
4550 return false;
4551 step = int_cst_value (cand->iv->step);
4553 /* Check that the number of iterations matches the expected pattern:
4554 a + 1 > b ? 0 : b - a - 1. */
4555 mbz = niter->may_be_zero;
4556 if (TREE_CODE (mbz) == GT_EXPR)
4558 /* Handle a + 1 > b. */
4559 tree op0 = TREE_OPERAND (mbz, 0);
4560 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4562 a = TREE_OPERAND (op0, 0);
4563 b = TREE_OPERAND (mbz, 1);
4565 else
4566 return false;
4568 else if (TREE_CODE (mbz) == LT_EXPR)
4570 tree op1 = TREE_OPERAND (mbz, 1);
4572 /* Handle b < a + 1. */
4573 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4575 a = TREE_OPERAND (op1, 0);
4576 b = TREE_OPERAND (mbz, 0);
4578 else
4579 return false;
4581 else
4582 return false;
4584 /* Expected number of iterations is B - A - 1. Check that it matches
4585 the actual number, i.e., that B - A - NITER = 1. */
4586 tree_to_aff_combination (niter->niter, nit_type, &nit);
4587 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4588 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4589 aff_combination_scale (&nit, double_int_minus_one);
4590 aff_combination_scale (&tmpa, double_int_minus_one);
4591 aff_combination_add (&tmpb, &tmpa);
4592 aff_combination_add (&tmpb, &nit);
4593 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4594 return false;
4596 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4597 overflow. */
4598 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4599 cand->iv->step,
4600 fold_convert (TREE_TYPE (cand->iv->step), a));
4601 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4602 return false;
4604 /* Determine the new comparison operator. */
4605 comp = step < 0 ? GT_EXPR : LT_EXPR;
4606 if (*comp_p == NE_EXPR)
4607 *comp_p = comp;
4608 else if (*comp_p == EQ_EXPR)
4609 *comp_p = invert_tree_comparison (comp, false);
4610 else
4611 gcc_unreachable ();
4613 return true;
4616 /* Check whether it is possible to express the condition in USE by comparison
4617 of candidate CAND. If so, store the value compared with to BOUND, and the
4618 comparison operator to COMP. */
4620 static bool
4621 may_eliminate_iv (struct ivopts_data *data,
4622 struct iv_use *use, struct iv_cand *cand, tree *bound,
4623 enum tree_code *comp)
4625 basic_block ex_bb;
4626 edge exit;
4627 tree period;
4628 struct loop *loop = data->current_loop;
4629 aff_tree bnd;
4630 struct tree_niter_desc *desc = NULL;
4632 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4633 return false;
4635 /* For now works only for exits that dominate the loop latch.
4636 TODO: extend to other conditions inside loop body. */
4637 ex_bb = gimple_bb (use->stmt);
4638 if (use->stmt != last_stmt (ex_bb)
4639 || gimple_code (use->stmt) != GIMPLE_COND
4640 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4641 return false;
4643 exit = EDGE_SUCC (ex_bb, 0);
4644 if (flow_bb_inside_loop_p (loop, exit->dest))
4645 exit = EDGE_SUCC (ex_bb, 1);
4646 if (flow_bb_inside_loop_p (loop, exit->dest))
4647 return false;
4649 desc = niter_for_exit (data, exit);
4650 if (!desc)
4651 return false;
4653 /* Determine whether we can use the variable to test the exit condition.
4654 This is the case iff the period of the induction variable is greater
4655 than the number of iterations for which the exit condition is true. */
4656 period = iv_period (cand->iv);
4658 /* If the number of iterations is constant, compare against it directly. */
4659 if (TREE_CODE (desc->niter) == INTEGER_CST)
4661 /* See cand_value_at. */
4662 if (stmt_after_increment (loop, cand, use->stmt))
4664 if (!tree_int_cst_lt (desc->niter, period))
4665 return false;
4667 else
4669 if (tree_int_cst_lt (period, desc->niter))
4670 return false;
4674 /* If not, and if this is the only possible exit of the loop, see whether
4675 we can get a conservative estimate on the number of iterations of the
4676 entire loop and compare against that instead. */
4677 else
4679 double_int period_value, max_niter;
4681 max_niter = desc->max;
4682 if (stmt_after_increment (loop, cand, use->stmt))
4683 max_niter += double_int_one;
4684 period_value = tree_to_double_int (period);
4685 if (max_niter.ugt (period_value))
4687 /* See if we can take advantage of inferred loop bound information. */
4688 if (data->loop_single_exit_p)
4690 if (!max_loop_iterations (loop, &max_niter))
4691 return false;
4692 /* The loop bound is already adjusted by adding 1. */
4693 if (max_niter.ugt (period_value))
4694 return false;
4696 else
4697 return false;
4701 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4703 *bound = aff_combination_to_tree (&bnd);
4704 *comp = iv_elimination_compare (data, use);
4706 /* It is unlikely that computing the number of iterations using division
4707 would be more profitable than keeping the original induction variable. */
4708 if (expression_expensive_p (*bound))
4709 return false;
4711 /* Sometimes, it is possible to handle the situation that the number of
4712 iterations may be zero unless additional assumtions by using <
4713 instead of != in the exit condition.
4715 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4716 base the exit condition on it. However, that is often too
4717 expensive. */
4718 if (!integer_zerop (desc->may_be_zero))
4719 return iv_elimination_compare_lt (data, cand, comp, desc);
4721 return true;
4724 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4725 be copied, if is is used in the loop body and DATA->body_includes_call. */
4727 static int
4728 parm_decl_cost (struct ivopts_data *data, tree bound)
4730 tree sbound = bound;
4731 STRIP_NOPS (sbound);
4733 if (TREE_CODE (sbound) == SSA_NAME
4734 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4735 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4736 && data->body_includes_call)
4737 return COSTS_N_INSNS (1);
4739 return 0;
4742 /* Determines cost of basing replacement of USE on CAND in a condition. */
4744 static bool
4745 determine_use_iv_cost_condition (struct ivopts_data *data,
4746 struct iv_use *use, struct iv_cand *cand)
4748 tree bound = NULL_TREE;
4749 struct iv *cmp_iv;
4750 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4751 comp_cost elim_cost, express_cost, cost, bound_cost;
4752 bool ok;
4753 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4754 tree *control_var, *bound_cst;
4755 enum tree_code comp = ERROR_MARK;
4757 /* Only consider real candidates. */
4758 if (!cand->iv)
4760 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4761 ERROR_MARK, -1);
4762 return false;
4765 /* Try iv elimination. */
4766 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4768 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4769 if (elim_cost.cost == 0)
4770 elim_cost.cost = parm_decl_cost (data, bound);
4771 else if (TREE_CODE (bound) == INTEGER_CST)
4772 elim_cost.cost = 0;
4773 /* If we replace a loop condition 'i < n' with 'p < base + n',
4774 depends_on_elim will have 'base' and 'n' set, which implies
4775 that both 'base' and 'n' will be live during the loop. More likely,
4776 'base + n' will be loop invariant, resulting in only one live value
4777 during the loop. So in that case we clear depends_on_elim and set
4778 elim_inv_expr_id instead. */
4779 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4781 elim_inv_expr_id = get_expr_id (data, bound);
4782 bitmap_clear (depends_on_elim);
4784 /* The bound is a loop invariant, so it will be only computed
4785 once. */
4786 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4788 else
4789 elim_cost = infinite_cost;
4791 /* Try expressing the original giv. If it is compared with an invariant,
4792 note that we cannot get rid of it. */
4793 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4794 NULL, &cmp_iv);
4795 gcc_assert (ok);
4797 /* When the condition is a comparison of the candidate IV against
4798 zero, prefer this IV.
4800 TODO: The constant that we're subtracting from the cost should
4801 be target-dependent. This information should be added to the
4802 target costs for each backend. */
4803 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4804 && integer_zerop (*bound_cst)
4805 && (operand_equal_p (*control_var, cand->var_after, 0)
4806 || operand_equal_p (*control_var, cand->var_before, 0)))
4807 elim_cost.cost -= 1;
4809 express_cost = get_computation_cost (data, use, cand, false,
4810 &depends_on_express, NULL,
4811 &express_inv_expr_id);
4812 fd_ivopts_data = data;
4813 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4815 /* Count the cost of the original bound as well. */
4816 bound_cost = force_var_cost (data, *bound_cst, NULL);
4817 if (bound_cost.cost == 0)
4818 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4819 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4820 bound_cost.cost = 0;
4821 express_cost.cost += bound_cost.cost;
4823 /* Choose the better approach, preferring the eliminated IV. */
4824 if (compare_costs (elim_cost, express_cost) <= 0)
4826 cost = elim_cost;
4827 depends_on = depends_on_elim;
4828 depends_on_elim = NULL;
4829 inv_expr_id = elim_inv_expr_id;
4831 else
4833 cost = express_cost;
4834 depends_on = depends_on_express;
4835 depends_on_express = NULL;
4836 bound = NULL_TREE;
4837 comp = ERROR_MARK;
4838 inv_expr_id = express_inv_expr_id;
4841 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4843 if (depends_on_elim)
4844 BITMAP_FREE (depends_on_elim);
4845 if (depends_on_express)
4846 BITMAP_FREE (depends_on_express);
4848 return !infinite_cost_p (cost);
4851 /* Determines cost of basing replacement of USE on CAND. Returns false
4852 if USE cannot be based on CAND. */
4854 static bool
4855 determine_use_iv_cost (struct ivopts_data *data,
4856 struct iv_use *use, struct iv_cand *cand)
4858 switch (use->type)
4860 case USE_NONLINEAR_EXPR:
4861 return determine_use_iv_cost_generic (data, use, cand);
4863 case USE_ADDRESS:
4864 return determine_use_iv_cost_address (data, use, cand);
4866 case USE_COMPARE:
4867 return determine_use_iv_cost_condition (data, use, cand);
4869 default:
4870 gcc_unreachable ();
4874 /* Return true if get_computation_cost indicates that autoincrement is
4875 a possibility for the pair of USE and CAND, false otherwise. */
4877 static bool
4878 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4879 struct iv_cand *cand)
4881 bitmap depends_on;
4882 bool can_autoinc;
4883 comp_cost cost;
4885 if (use->type != USE_ADDRESS)
4886 return false;
4888 cost = get_computation_cost (data, use, cand, true, &depends_on,
4889 &can_autoinc, NULL);
4891 BITMAP_FREE (depends_on);
4893 return !infinite_cost_p (cost) && can_autoinc;
4896 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4897 use that allows autoincrement, and set their AINC_USE if possible. */
4899 static void
4900 set_autoinc_for_original_candidates (struct ivopts_data *data)
4902 unsigned i, j;
4904 for (i = 0; i < n_iv_cands (data); i++)
4906 struct iv_cand *cand = iv_cand (data, i);
4907 struct iv_use *closest_before = NULL;
4908 struct iv_use *closest_after = NULL;
4909 if (cand->pos != IP_ORIGINAL)
4910 continue;
4912 for (j = 0; j < n_iv_uses (data); j++)
4914 struct iv_use *use = iv_use (data, j);
4915 unsigned uid = gimple_uid (use->stmt);
4917 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4918 continue;
4920 if (uid < gimple_uid (cand->incremented_at)
4921 && (closest_before == NULL
4922 || uid > gimple_uid (closest_before->stmt)))
4923 closest_before = use;
4925 if (uid > gimple_uid (cand->incremented_at)
4926 && (closest_after == NULL
4927 || uid < gimple_uid (closest_after->stmt)))
4928 closest_after = use;
4931 if (closest_before != NULL
4932 && autoinc_possible_for_pair (data, closest_before, cand))
4933 cand->ainc_use = closest_before;
4934 else if (closest_after != NULL
4935 && autoinc_possible_for_pair (data, closest_after, cand))
4936 cand->ainc_use = closest_after;
4940 /* Finds the candidates for the induction variables. */
4942 static void
4943 find_iv_candidates (struct ivopts_data *data)
4945 /* Add commonly used ivs. */
4946 add_standard_iv_candidates (data);
4948 /* Add old induction variables. */
4949 add_old_ivs_candidates (data);
4951 /* Add induction variables derived from uses. */
4952 add_derived_ivs_candidates (data);
4954 set_autoinc_for_original_candidates (data);
4956 /* Record the important candidates. */
4957 record_important_candidates (data);
4960 /* Determines costs of basing the use of the iv on an iv candidate. */
4962 static void
4963 determine_use_iv_costs (struct ivopts_data *data)
4965 unsigned i, j;
4966 struct iv_use *use;
4967 struct iv_cand *cand;
4968 bitmap to_clear = BITMAP_ALLOC (NULL);
4970 alloc_use_cost_map (data);
4972 for (i = 0; i < n_iv_uses (data); i++)
4974 use = iv_use (data, i);
4976 if (data->consider_all_candidates)
4978 for (j = 0; j < n_iv_cands (data); j++)
4980 cand = iv_cand (data, j);
4981 determine_use_iv_cost (data, use, cand);
4984 else
4986 bitmap_iterator bi;
4988 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4990 cand = iv_cand (data, j);
4991 if (!determine_use_iv_cost (data, use, cand))
4992 bitmap_set_bit (to_clear, j);
4995 /* Remove the candidates for that the cost is infinite from
4996 the list of related candidates. */
4997 bitmap_and_compl_into (use->related_cands, to_clear);
4998 bitmap_clear (to_clear);
5002 BITMAP_FREE (to_clear);
5004 if (dump_file && (dump_flags & TDF_DETAILS))
5006 fprintf (dump_file, "Use-candidate costs:\n");
5008 for (i = 0; i < n_iv_uses (data); i++)
5010 use = iv_use (data, i);
5012 fprintf (dump_file, "Use %d:\n", i);
5013 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5014 for (j = 0; j < use->n_map_members; j++)
5016 if (!use->cost_map[j].cand
5017 || infinite_cost_p (use->cost_map[j].cost))
5018 continue;
5020 fprintf (dump_file, " %d\t%d\t%d\t",
5021 use->cost_map[j].cand->id,
5022 use->cost_map[j].cost.cost,
5023 use->cost_map[j].cost.complexity);
5024 if (use->cost_map[j].depends_on)
5025 bitmap_print (dump_file,
5026 use->cost_map[j].depends_on, "","");
5027 if (use->cost_map[j].inv_expr_id != -1)
5028 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5029 fprintf (dump_file, "\n");
5032 fprintf (dump_file, "\n");
5034 fprintf (dump_file, "\n");
5038 /* Determines cost of the candidate CAND. */
5040 static void
5041 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5043 comp_cost cost_base;
5044 unsigned cost, cost_step;
5045 tree base;
5047 if (!cand->iv)
5049 cand->cost = 0;
5050 return;
5053 /* There are two costs associated with the candidate -- its increment
5054 and its initialization. The second is almost negligible for any loop
5055 that rolls enough, so we take it just very little into account. */
5057 base = cand->iv->base;
5058 cost_base = force_var_cost (data, base, NULL);
5059 /* It will be exceptional that the iv register happens to be initialized with
5060 the proper value at no cost. In general, there will at least be a regcopy
5061 or a const set. */
5062 if (cost_base.cost == 0)
5063 cost_base.cost = COSTS_N_INSNS (1);
5064 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5066 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5068 /* Prefer the original ivs unless we may gain something by replacing it.
5069 The reason is to make debugging simpler; so this is not relevant for
5070 artificial ivs created by other optimization passes. */
5071 if (cand->pos != IP_ORIGINAL
5072 || !SSA_NAME_VAR (cand->var_before)
5073 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5074 cost++;
5076 /* Prefer not to insert statements into latch unless there are some
5077 already (so that we do not create unnecessary jumps). */
5078 if (cand->pos == IP_END
5079 && empty_block_p (ip_end_pos (data->current_loop)))
5080 cost++;
5082 cand->cost = cost;
5083 cand->cost_step = cost_step;
5086 /* Determines costs of computation of the candidates. */
5088 static void
5089 determine_iv_costs (struct ivopts_data *data)
5091 unsigned i;
5093 if (dump_file && (dump_flags & TDF_DETAILS))
5095 fprintf (dump_file, "Candidate costs:\n");
5096 fprintf (dump_file, " cand\tcost\n");
5099 for (i = 0; i < n_iv_cands (data); i++)
5101 struct iv_cand *cand = iv_cand (data, i);
5103 determine_iv_cost (data, cand);
5105 if (dump_file && (dump_flags & TDF_DETAILS))
5106 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5109 if (dump_file && (dump_flags & TDF_DETAILS))
5110 fprintf (dump_file, "\n");
5113 /* Calculates cost for having SIZE induction variables. */
5115 static unsigned
5116 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5118 /* We add size to the cost, so that we prefer eliminating ivs
5119 if possible. */
5120 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5121 data->body_includes_call);
5124 /* For each size of the induction variable set determine the penalty. */
5126 static void
5127 determine_set_costs (struct ivopts_data *data)
5129 unsigned j, n;
5130 gimple phi;
5131 gimple_stmt_iterator psi;
5132 tree op;
5133 struct loop *loop = data->current_loop;
5134 bitmap_iterator bi;
5136 if (dump_file && (dump_flags & TDF_DETAILS))
5138 fprintf (dump_file, "Global costs:\n");
5139 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5140 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5141 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5142 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5145 n = 0;
5146 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5148 phi = gsi_stmt (psi);
5149 op = PHI_RESULT (phi);
5151 if (virtual_operand_p (op))
5152 continue;
5154 if (get_iv (data, op))
5155 continue;
5157 n++;
5160 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5162 struct version_info *info = ver_info (data, j);
5164 if (info->inv_id && info->has_nonlin_use)
5165 n++;
5168 data->regs_used = n;
5169 if (dump_file && (dump_flags & TDF_DETAILS))
5170 fprintf (dump_file, " regs_used %d\n", n);
5172 if (dump_file && (dump_flags & TDF_DETAILS))
5174 fprintf (dump_file, " cost for size:\n");
5175 fprintf (dump_file, " ivs\tcost\n");
5176 for (j = 0; j <= 2 * target_avail_regs; j++)
5177 fprintf (dump_file, " %d\t%d\n", j,
5178 ivopts_global_cost_for_size (data, j));
5179 fprintf (dump_file, "\n");
5183 /* Returns true if A is a cheaper cost pair than B. */
5185 static bool
5186 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5188 int cmp;
5190 if (!a)
5191 return false;
5193 if (!b)
5194 return true;
5196 cmp = compare_costs (a->cost, b->cost);
5197 if (cmp < 0)
5198 return true;
5200 if (cmp > 0)
5201 return false;
5203 /* In case the costs are the same, prefer the cheaper candidate. */
5204 if (a->cand->cost < b->cand->cost)
5205 return true;
5207 return false;
5211 /* Returns candidate by that USE is expressed in IVS. */
5213 static struct cost_pair *
5214 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5216 return ivs->cand_for_use[use->id];
5219 /* Computes the cost field of IVS structure. */
5221 static void
5222 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5224 comp_cost cost = ivs->cand_use_cost;
5226 cost.cost += ivs->cand_cost;
5228 cost.cost += ivopts_global_cost_for_size (data,
5229 ivs->n_regs + ivs->num_used_inv_expr);
5231 ivs->cost = cost;
5234 /* Remove invariants in set INVS to set IVS. */
5236 static void
5237 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5239 bitmap_iterator bi;
5240 unsigned iid;
5242 if (!invs)
5243 return;
5245 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5247 ivs->n_invariant_uses[iid]--;
5248 if (ivs->n_invariant_uses[iid] == 0)
5249 ivs->n_regs--;
5253 /* Set USE not to be expressed by any candidate in IVS. */
5255 static void
5256 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5257 struct iv_use *use)
5259 unsigned uid = use->id, cid;
5260 struct cost_pair *cp;
5262 cp = ivs->cand_for_use[uid];
5263 if (!cp)
5264 return;
5265 cid = cp->cand->id;
5267 ivs->bad_uses++;
5268 ivs->cand_for_use[uid] = NULL;
5269 ivs->n_cand_uses[cid]--;
5271 if (ivs->n_cand_uses[cid] == 0)
5273 bitmap_clear_bit (ivs->cands, cid);
5274 /* Do not count the pseudocandidates. */
5275 if (cp->cand->iv)
5276 ivs->n_regs--;
5277 ivs->n_cands--;
5278 ivs->cand_cost -= cp->cand->cost;
5280 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5283 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5285 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5287 if (cp->inv_expr_id != -1)
5289 ivs->used_inv_expr[cp->inv_expr_id]--;
5290 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5291 ivs->num_used_inv_expr--;
5293 iv_ca_recount_cost (data, ivs);
5296 /* Add invariants in set INVS to set IVS. */
5298 static void
5299 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5301 bitmap_iterator bi;
5302 unsigned iid;
5304 if (!invs)
5305 return;
5307 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5309 ivs->n_invariant_uses[iid]++;
5310 if (ivs->n_invariant_uses[iid] == 1)
5311 ivs->n_regs++;
5315 /* Set cost pair for USE in set IVS to CP. */
5317 static void
5318 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5319 struct iv_use *use, struct cost_pair *cp)
5321 unsigned uid = use->id, cid;
5323 if (ivs->cand_for_use[uid] == cp)
5324 return;
5326 if (ivs->cand_for_use[uid])
5327 iv_ca_set_no_cp (data, ivs, use);
5329 if (cp)
5331 cid = cp->cand->id;
5333 ivs->bad_uses--;
5334 ivs->cand_for_use[uid] = cp;
5335 ivs->n_cand_uses[cid]++;
5336 if (ivs->n_cand_uses[cid] == 1)
5338 bitmap_set_bit (ivs->cands, cid);
5339 /* Do not count the pseudocandidates. */
5340 if (cp->cand->iv)
5341 ivs->n_regs++;
5342 ivs->n_cands++;
5343 ivs->cand_cost += cp->cand->cost;
5345 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5348 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5349 iv_ca_set_add_invariants (ivs, cp->depends_on);
5351 if (cp->inv_expr_id != -1)
5353 ivs->used_inv_expr[cp->inv_expr_id]++;
5354 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5355 ivs->num_used_inv_expr++;
5357 iv_ca_recount_cost (data, ivs);
5361 /* Extend set IVS by expressing USE by some of the candidates in it
5362 if possible. All important candidates will be considered
5363 if IMPORTANT_CANDIDATES is true. */
5365 static void
5366 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5367 struct iv_use *use, bool important_candidates)
5369 struct cost_pair *best_cp = NULL, *cp;
5370 bitmap_iterator bi;
5371 bitmap cands;
5372 unsigned i;
5374 gcc_assert (ivs->upto >= use->id);
5376 if (ivs->upto == use->id)
5378 ivs->upto++;
5379 ivs->bad_uses++;
5382 cands = (important_candidates ? data->important_candidates : ivs->cands);
5383 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5385 struct iv_cand *cand = iv_cand (data, i);
5387 cp = get_use_iv_cost (data, use, cand);
5389 if (cheaper_cost_pair (cp, best_cp))
5390 best_cp = cp;
5393 iv_ca_set_cp (data, ivs, use, best_cp);
5396 /* Get cost for assignment IVS. */
5398 static comp_cost
5399 iv_ca_cost (struct iv_ca *ivs)
5401 /* This was a conditional expression but it triggered a bug in
5402 Sun C 5.5. */
5403 if (ivs->bad_uses)
5404 return infinite_cost;
5405 else
5406 return ivs->cost;
5409 /* Returns true if all dependences of CP are among invariants in IVS. */
5411 static bool
5412 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5414 unsigned i;
5415 bitmap_iterator bi;
5417 if (!cp->depends_on)
5418 return true;
5420 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5422 if (ivs->n_invariant_uses[i] == 0)
5423 return false;
5426 return true;
5429 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5430 it before NEXT_CHANGE. */
5432 static struct iv_ca_delta *
5433 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5434 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5436 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5438 change->use = use;
5439 change->old_cp = old_cp;
5440 change->new_cp = new_cp;
5441 change->next_change = next_change;
5443 return change;
5446 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5447 are rewritten. */
5449 static struct iv_ca_delta *
5450 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5452 struct iv_ca_delta *last;
5454 if (!l2)
5455 return l1;
5457 if (!l1)
5458 return l2;
5460 for (last = l1; last->next_change; last = last->next_change)
5461 continue;
5462 last->next_change = l2;
5464 return l1;
5467 /* Reverse the list of changes DELTA, forming the inverse to it. */
5469 static struct iv_ca_delta *
5470 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5472 struct iv_ca_delta *act, *next, *prev = NULL;
5473 struct cost_pair *tmp;
5475 for (act = delta; act; act = next)
5477 next = act->next_change;
5478 act->next_change = prev;
5479 prev = act;
5481 tmp = act->old_cp;
5482 act->old_cp = act->new_cp;
5483 act->new_cp = tmp;
5486 return prev;
5489 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5490 reverted instead. */
5492 static void
5493 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5494 struct iv_ca_delta *delta, bool forward)
5496 struct cost_pair *from, *to;
5497 struct iv_ca_delta *act;
5499 if (!forward)
5500 delta = iv_ca_delta_reverse (delta);
5502 for (act = delta; act; act = act->next_change)
5504 from = act->old_cp;
5505 to = act->new_cp;
5506 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5507 iv_ca_set_cp (data, ivs, act->use, to);
5510 if (!forward)
5511 iv_ca_delta_reverse (delta);
5514 /* Returns true if CAND is used in IVS. */
5516 static bool
5517 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5519 return ivs->n_cand_uses[cand->id] > 0;
5522 /* Returns number of induction variable candidates in the set IVS. */
5524 static unsigned
5525 iv_ca_n_cands (struct iv_ca *ivs)
5527 return ivs->n_cands;
5530 /* Free the list of changes DELTA. */
5532 static void
5533 iv_ca_delta_free (struct iv_ca_delta **delta)
5535 struct iv_ca_delta *act, *next;
5537 for (act = *delta; act; act = next)
5539 next = act->next_change;
5540 free (act);
5543 *delta = NULL;
5546 /* Allocates new iv candidates assignment. */
5548 static struct iv_ca *
5549 iv_ca_new (struct ivopts_data *data)
5551 struct iv_ca *nw = XNEW (struct iv_ca);
5553 nw->upto = 0;
5554 nw->bad_uses = 0;
5555 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5556 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5557 nw->cands = BITMAP_ALLOC (NULL);
5558 nw->n_cands = 0;
5559 nw->n_regs = 0;
5560 nw->cand_use_cost = no_cost;
5561 nw->cand_cost = 0;
5562 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5563 nw->cost = no_cost;
5564 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5565 nw->num_used_inv_expr = 0;
5567 return nw;
5570 /* Free memory occupied by the set IVS. */
5572 static void
5573 iv_ca_free (struct iv_ca **ivs)
5575 free ((*ivs)->cand_for_use);
5576 free ((*ivs)->n_cand_uses);
5577 BITMAP_FREE ((*ivs)->cands);
5578 free ((*ivs)->n_invariant_uses);
5579 free ((*ivs)->used_inv_expr);
5580 free (*ivs);
5581 *ivs = NULL;
5584 /* Dumps IVS to FILE. */
5586 static void
5587 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5589 const char *pref = " invariants ";
5590 unsigned i;
5591 comp_cost cost = iv_ca_cost (ivs);
5593 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5594 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5595 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5596 bitmap_print (file, ivs->cands, " candidates: ","\n");
5598 for (i = 0; i < ivs->upto; i++)
5600 struct iv_use *use = iv_use (data, i);
5601 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5602 if (cp)
5603 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5604 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5605 else
5606 fprintf (file, " use:%d --> ??\n", use->id);
5609 for (i = 1; i <= data->max_inv_id; i++)
5610 if (ivs->n_invariant_uses[i])
5612 fprintf (file, "%s%d", pref, i);
5613 pref = ", ";
5615 fprintf (file, "\n\n");
5618 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5619 new set, and store differences in DELTA. Number of induction variables
5620 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5621 the function will try to find a solution with mimimal iv candidates. */
5623 static comp_cost
5624 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5625 struct iv_cand *cand, struct iv_ca_delta **delta,
5626 unsigned *n_ivs, bool min_ncand)
5628 unsigned i;
5629 comp_cost cost;
5630 struct iv_use *use;
5631 struct cost_pair *old_cp, *new_cp;
5633 *delta = NULL;
5634 for (i = 0; i < ivs->upto; i++)
5636 use = iv_use (data, i);
5637 old_cp = iv_ca_cand_for_use (ivs, use);
5639 if (old_cp
5640 && old_cp->cand == cand)
5641 continue;
5643 new_cp = get_use_iv_cost (data, use, cand);
5644 if (!new_cp)
5645 continue;
5647 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5648 continue;
5650 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5651 continue;
5653 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5656 iv_ca_delta_commit (data, ivs, *delta, true);
5657 cost = iv_ca_cost (ivs);
5658 if (n_ivs)
5659 *n_ivs = iv_ca_n_cands (ivs);
5660 iv_ca_delta_commit (data, ivs, *delta, false);
5662 return cost;
5665 /* Try narrowing set IVS by removing CAND. Return the cost of
5666 the new set and store the differences in DELTA. */
5668 static comp_cost
5669 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5670 struct iv_cand *cand, struct iv_ca_delta **delta)
5672 unsigned i, ci;
5673 struct iv_use *use;
5674 struct cost_pair *old_cp, *new_cp, *cp;
5675 bitmap_iterator bi;
5676 struct iv_cand *cnd;
5677 comp_cost cost;
5679 *delta = NULL;
5680 for (i = 0; i < n_iv_uses (data); i++)
5682 use = iv_use (data, i);
5684 old_cp = iv_ca_cand_for_use (ivs, use);
5685 if (old_cp->cand != cand)
5686 continue;
5688 new_cp = NULL;
5690 if (data->consider_all_candidates)
5692 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5694 if (ci == cand->id)
5695 continue;
5697 cnd = iv_cand (data, ci);
5699 cp = get_use_iv_cost (data, use, cnd);
5700 if (!cp)
5701 continue;
5703 if (!iv_ca_has_deps (ivs, cp))
5704 continue;
5706 if (!cheaper_cost_pair (cp, new_cp))
5707 continue;
5709 new_cp = cp;
5712 else
5714 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5716 if (ci == cand->id)
5717 continue;
5719 cnd = iv_cand (data, ci);
5721 cp = get_use_iv_cost (data, use, cnd);
5722 if (!cp)
5723 continue;
5724 if (!iv_ca_has_deps (ivs, cp))
5725 continue;
5727 if (!cheaper_cost_pair (cp, new_cp))
5728 continue;
5730 new_cp = cp;
5734 if (!new_cp)
5736 iv_ca_delta_free (delta);
5737 return infinite_cost;
5740 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5743 iv_ca_delta_commit (data, ivs, *delta, true);
5744 cost = iv_ca_cost (ivs);
5745 iv_ca_delta_commit (data, ivs, *delta, false);
5747 return cost;
5750 /* Try optimizing the set of candidates IVS by removing candidates different
5751 from to EXCEPT_CAND from it. Return cost of the new set, and store
5752 differences in DELTA. */
5754 static comp_cost
5755 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5756 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5758 bitmap_iterator bi;
5759 struct iv_ca_delta *act_delta, *best_delta;
5760 unsigned i;
5761 comp_cost best_cost, acost;
5762 struct iv_cand *cand;
5764 best_delta = NULL;
5765 best_cost = iv_ca_cost (ivs);
5767 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5769 cand = iv_cand (data, i);
5771 if (cand == except_cand)
5772 continue;
5774 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5776 if (compare_costs (acost, best_cost) < 0)
5778 best_cost = acost;
5779 iv_ca_delta_free (&best_delta);
5780 best_delta = act_delta;
5782 else
5783 iv_ca_delta_free (&act_delta);
5786 if (!best_delta)
5788 *delta = NULL;
5789 return best_cost;
5792 /* Recurse to possibly remove other unnecessary ivs. */
5793 iv_ca_delta_commit (data, ivs, best_delta, true);
5794 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5795 iv_ca_delta_commit (data, ivs, best_delta, false);
5796 *delta = iv_ca_delta_join (best_delta, *delta);
5797 return best_cost;
5800 /* Tries to extend the sets IVS in the best possible way in order
5801 to express the USE. If ORIGINALP is true, prefer candidates from
5802 the original set of IVs, otherwise favor important candidates not
5803 based on any memory object. */
5805 static bool
5806 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5807 struct iv_use *use, bool originalp)
5809 comp_cost best_cost, act_cost;
5810 unsigned i;
5811 bitmap_iterator bi;
5812 struct iv_cand *cand;
5813 struct iv_ca_delta *best_delta = NULL, *act_delta;
5814 struct cost_pair *cp;
5816 iv_ca_add_use (data, ivs, use, false);
5817 best_cost = iv_ca_cost (ivs);
5819 cp = iv_ca_cand_for_use (ivs, use);
5820 if (!cp)
5822 ivs->upto--;
5823 ivs->bad_uses--;
5824 iv_ca_add_use (data, ivs, use, true);
5825 best_cost = iv_ca_cost (ivs);
5826 cp = iv_ca_cand_for_use (ivs, use);
5828 if (cp)
5830 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5831 iv_ca_set_no_cp (data, ivs, use);
5834 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5835 first try important candidates not based on any memory object. Only if
5836 this fails, try the specific ones. Rationale -- in loops with many
5837 variables the best choice often is to use just one generic biv. If we
5838 added here many ivs specific to the uses, the optimization algorithm later
5839 would be likely to get stuck in a local minimum, thus causing us to create
5840 too many ivs. The approach from few ivs to more seems more likely to be
5841 successful -- starting from few ivs, replacing an expensive use by a
5842 specific iv should always be a win. */
5843 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5845 cand = iv_cand (data, i);
5847 if (originalp && cand->pos !=IP_ORIGINAL)
5848 continue;
5850 if (!originalp && cand->iv->base_object != NULL_TREE)
5851 continue;
5853 if (iv_ca_cand_used_p (ivs, cand))
5854 continue;
5856 cp = get_use_iv_cost (data, use, cand);
5857 if (!cp)
5858 continue;
5860 iv_ca_set_cp (data, ivs, use, cp);
5861 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5862 true);
5863 iv_ca_set_no_cp (data, ivs, use);
5864 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5866 if (compare_costs (act_cost, best_cost) < 0)
5868 best_cost = act_cost;
5870 iv_ca_delta_free (&best_delta);
5871 best_delta = act_delta;
5873 else
5874 iv_ca_delta_free (&act_delta);
5877 if (infinite_cost_p (best_cost))
5879 for (i = 0; i < use->n_map_members; i++)
5881 cp = use->cost_map + i;
5882 cand = cp->cand;
5883 if (!cand)
5884 continue;
5886 /* Already tried this. */
5887 if (cand->important)
5889 if (originalp && cand->pos == IP_ORIGINAL)
5890 continue;
5891 if (!originalp && cand->iv->base_object == NULL_TREE)
5892 continue;
5895 if (iv_ca_cand_used_p (ivs, cand))
5896 continue;
5898 act_delta = NULL;
5899 iv_ca_set_cp (data, ivs, use, cp);
5900 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5901 iv_ca_set_no_cp (data, ivs, use);
5902 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5903 cp, act_delta);
5905 if (compare_costs (act_cost, best_cost) < 0)
5907 best_cost = act_cost;
5909 if (best_delta)
5910 iv_ca_delta_free (&best_delta);
5911 best_delta = act_delta;
5913 else
5914 iv_ca_delta_free (&act_delta);
5918 iv_ca_delta_commit (data, ivs, best_delta, true);
5919 iv_ca_delta_free (&best_delta);
5921 return !infinite_cost_p (best_cost);
5924 /* Finds an initial assignment of candidates to uses. */
5926 static struct iv_ca *
5927 get_initial_solution (struct ivopts_data *data, bool originalp)
5929 struct iv_ca *ivs = iv_ca_new (data);
5930 unsigned i;
5932 for (i = 0; i < n_iv_uses (data); i++)
5933 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5935 iv_ca_free (&ivs);
5936 return NULL;
5939 return ivs;
5942 /* Tries to improve set of induction variables IVS. */
5944 static bool
5945 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5947 unsigned i, n_ivs;
5948 comp_cost acost, best_cost = iv_ca_cost (ivs);
5949 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5950 struct iv_cand *cand;
5952 /* Try extending the set of induction variables by one. */
5953 for (i = 0; i < n_iv_cands (data); i++)
5955 cand = iv_cand (data, i);
5957 if (iv_ca_cand_used_p (ivs, cand))
5958 continue;
5960 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5961 if (!act_delta)
5962 continue;
5964 /* If we successfully added the candidate and the set is small enough,
5965 try optimizing it by removing other candidates. */
5966 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5968 iv_ca_delta_commit (data, ivs, act_delta, true);
5969 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5970 iv_ca_delta_commit (data, ivs, act_delta, false);
5971 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5974 if (compare_costs (acost, best_cost) < 0)
5976 best_cost = acost;
5977 iv_ca_delta_free (&best_delta);
5978 best_delta = act_delta;
5980 else
5981 iv_ca_delta_free (&act_delta);
5984 if (!best_delta)
5986 /* Try removing the candidates from the set instead. */
5987 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5989 /* Nothing more we can do. */
5990 if (!best_delta)
5991 return false;
5994 iv_ca_delta_commit (data, ivs, best_delta, true);
5995 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5996 iv_ca_delta_free (&best_delta);
5997 return true;
6000 /* Attempts to find the optimal set of induction variables. We do simple
6001 greedy heuristic -- we try to replace at most one candidate in the selected
6002 solution and remove the unused ivs while this improves the cost. */
6004 static struct iv_ca *
6005 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6007 struct iv_ca *set;
6009 /* Get the initial solution. */
6010 set = get_initial_solution (data, originalp);
6011 if (!set)
6013 if (dump_file && (dump_flags & TDF_DETAILS))
6014 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6015 return NULL;
6018 if (dump_file && (dump_flags & TDF_DETAILS))
6020 fprintf (dump_file, "Initial set of candidates:\n");
6021 iv_ca_dump (data, dump_file, set);
6024 while (try_improve_iv_set (data, set))
6026 if (dump_file && (dump_flags & TDF_DETAILS))
6028 fprintf (dump_file, "Improved to:\n");
6029 iv_ca_dump (data, dump_file, set);
6033 return set;
6036 static struct iv_ca *
6037 find_optimal_iv_set (struct ivopts_data *data)
6039 unsigned i;
6040 struct iv_ca *set, *origset;
6041 struct iv_use *use;
6042 comp_cost cost, origcost;
6044 /* Determine the cost based on a strategy that starts with original IVs,
6045 and try again using a strategy that prefers candidates not based
6046 on any IVs. */
6047 origset = find_optimal_iv_set_1 (data, true);
6048 set = find_optimal_iv_set_1 (data, false);
6050 if (!origset && !set)
6051 return NULL;
6053 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6054 cost = set ? iv_ca_cost (set) : infinite_cost;
6056 if (dump_file && (dump_flags & TDF_DETAILS))
6058 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6059 origcost.cost, origcost.complexity);
6060 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6061 cost.cost, cost.complexity);
6064 /* Choose the one with the best cost. */
6065 if (compare_costs (origcost, cost) <= 0)
6067 if (set)
6068 iv_ca_free (&set);
6069 set = origset;
6071 else if (origset)
6072 iv_ca_free (&origset);
6074 for (i = 0; i < n_iv_uses (data); i++)
6076 use = iv_use (data, i);
6077 use->selected = iv_ca_cand_for_use (set, use)->cand;
6080 return set;
6083 /* Creates a new induction variable corresponding to CAND. */
6085 static void
6086 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6088 gimple_stmt_iterator incr_pos;
6089 tree base;
6090 bool after = false;
6092 if (!cand->iv)
6093 return;
6095 switch (cand->pos)
6097 case IP_NORMAL:
6098 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6099 break;
6101 case IP_END:
6102 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6103 after = true;
6104 break;
6106 case IP_AFTER_USE:
6107 after = true;
6108 /* fall through */
6109 case IP_BEFORE_USE:
6110 incr_pos = gsi_for_stmt (cand->incremented_at);
6111 break;
6113 case IP_ORIGINAL:
6114 /* Mark that the iv is preserved. */
6115 name_info (data, cand->var_before)->preserve_biv = true;
6116 name_info (data, cand->var_after)->preserve_biv = true;
6118 /* Rewrite the increment so that it uses var_before directly. */
6119 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6120 return;
6123 gimple_add_tmp_var (cand->var_before);
6125 base = unshare_expr (cand->iv->base);
6127 create_iv (base, unshare_expr (cand->iv->step),
6128 cand->var_before, data->current_loop,
6129 &incr_pos, after, &cand->var_before, &cand->var_after);
6132 /* Creates new induction variables described in SET. */
6134 static void
6135 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6137 unsigned i;
6138 struct iv_cand *cand;
6139 bitmap_iterator bi;
6141 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6143 cand = iv_cand (data, i);
6144 create_new_iv (data, cand);
6147 if (dump_file && (dump_flags & TDF_DETAILS))
6149 fprintf (dump_file, "\nSelected IV set: \n");
6150 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6152 cand = iv_cand (data, i);
6153 dump_cand (dump_file, cand);
6155 fprintf (dump_file, "\n");
6159 /* Rewrites USE (definition of iv used in a nonlinear expression)
6160 using candidate CAND. */
6162 static void
6163 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6164 struct iv_use *use, struct iv_cand *cand)
6166 tree comp;
6167 tree op, tgt;
6168 gimple ass;
6169 gimple_stmt_iterator bsi;
6171 /* An important special case -- if we are asked to express value of
6172 the original iv by itself, just exit; there is no need to
6173 introduce a new computation (that might also need casting the
6174 variable to unsigned and back). */
6175 if (cand->pos == IP_ORIGINAL
6176 && cand->incremented_at == use->stmt)
6178 enum tree_code stmt_code;
6180 gcc_assert (is_gimple_assign (use->stmt));
6181 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6183 /* Check whether we may leave the computation unchanged.
6184 This is the case only if it does not rely on other
6185 computations in the loop -- otherwise, the computation
6186 we rely upon may be removed in remove_unused_ivs,
6187 thus leading to ICE. */
6188 stmt_code = gimple_assign_rhs_code (use->stmt);
6189 if (stmt_code == PLUS_EXPR
6190 || stmt_code == MINUS_EXPR
6191 || stmt_code == POINTER_PLUS_EXPR)
6193 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6194 op = gimple_assign_rhs2 (use->stmt);
6195 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6196 op = gimple_assign_rhs1 (use->stmt);
6197 else
6198 op = NULL_TREE;
6200 else
6201 op = NULL_TREE;
6203 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6204 return;
6207 comp = get_computation (data->current_loop, use, cand);
6208 gcc_assert (comp != NULL_TREE);
6210 switch (gimple_code (use->stmt))
6212 case GIMPLE_PHI:
6213 tgt = PHI_RESULT (use->stmt);
6215 /* If we should keep the biv, do not replace it. */
6216 if (name_info (data, tgt)->preserve_biv)
6217 return;
6219 bsi = gsi_after_labels (gimple_bb (use->stmt));
6220 break;
6222 case GIMPLE_ASSIGN:
6223 tgt = gimple_assign_lhs (use->stmt);
6224 bsi = gsi_for_stmt (use->stmt);
6225 break;
6227 default:
6228 gcc_unreachable ();
6231 if (!valid_gimple_rhs_p (comp)
6232 || (gimple_code (use->stmt) != GIMPLE_PHI
6233 /* We can't allow re-allocating the stmt as it might be pointed
6234 to still. */
6235 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6236 >= gimple_num_ops (gsi_stmt (bsi)))))
6238 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6239 true, GSI_SAME_STMT);
6240 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6242 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6243 /* As this isn't a plain copy we have to reset alignment
6244 information. */
6245 if (SSA_NAME_PTR_INFO (comp))
6246 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6250 if (gimple_code (use->stmt) == GIMPLE_PHI)
6252 ass = gimple_build_assign (tgt, comp);
6253 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6255 bsi = gsi_for_stmt (use->stmt);
6256 remove_phi_node (&bsi, false);
6258 else
6260 gimple_assign_set_rhs_from_tree (&bsi, comp);
6261 use->stmt = gsi_stmt (bsi);
6265 /* Performs a peephole optimization to reorder the iv update statement with
6266 a mem ref to enable instruction combining in later phases. The mem ref uses
6267 the iv value before the update, so the reordering transformation requires
6268 adjustment of the offset. CAND is the selected IV_CAND.
6270 Example:
6272 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6273 iv2 = iv1 + 1;
6275 if (t < val) (1)
6276 goto L;
6277 goto Head;
6280 directly propagating t over to (1) will introduce overlapping live range
6281 thus increase register pressure. This peephole transform it into:
6284 iv2 = iv1 + 1;
6285 t = MEM_REF (base, iv2, 8, 8);
6286 if (t < val)
6287 goto L;
6288 goto Head;
6291 static void
6292 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6294 tree var_after;
6295 gimple iv_update, stmt;
6296 basic_block bb;
6297 gimple_stmt_iterator gsi, gsi_iv;
6299 if (cand->pos != IP_NORMAL)
6300 return;
6302 var_after = cand->var_after;
6303 iv_update = SSA_NAME_DEF_STMT (var_after);
6305 bb = gimple_bb (iv_update);
6306 gsi = gsi_last_nondebug_bb (bb);
6307 stmt = gsi_stmt (gsi);
6309 /* Only handle conditional statement for now. */
6310 if (gimple_code (stmt) != GIMPLE_COND)
6311 return;
6313 gsi_prev_nondebug (&gsi);
6314 stmt = gsi_stmt (gsi);
6315 if (stmt != iv_update)
6316 return;
6318 gsi_prev_nondebug (&gsi);
6319 if (gsi_end_p (gsi))
6320 return;
6322 stmt = gsi_stmt (gsi);
6323 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6324 return;
6326 if (stmt != use->stmt)
6327 return;
6329 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6330 return;
6332 if (dump_file && (dump_flags & TDF_DETAILS))
6334 fprintf (dump_file, "Reordering \n");
6335 print_gimple_stmt (dump_file, iv_update, 0, 0);
6336 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6337 fprintf (dump_file, "\n");
6340 gsi = gsi_for_stmt (use->stmt);
6341 gsi_iv = gsi_for_stmt (iv_update);
6342 gsi_move_before (&gsi_iv, &gsi);
6344 cand->pos = IP_BEFORE_USE;
6345 cand->incremented_at = use->stmt;
6348 /* Rewrites USE (address that is an iv) using candidate CAND. */
6350 static void
6351 rewrite_use_address (struct ivopts_data *data,
6352 struct iv_use *use, struct iv_cand *cand)
6354 aff_tree aff;
6355 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6356 tree base_hint = NULL_TREE;
6357 tree ref, iv;
6358 bool ok;
6360 adjust_iv_update_pos (cand, use);
6361 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6362 gcc_assert (ok);
6363 unshare_aff_combination (&aff);
6365 /* To avoid undefined overflow problems, all IV candidates use unsigned
6366 integer types. The drawback is that this makes it impossible for
6367 create_mem_ref to distinguish an IV that is based on a memory object
6368 from one that represents simply an offset.
6370 To work around this problem, we pass a hint to create_mem_ref that
6371 indicates which variable (if any) in aff is an IV based on a memory
6372 object. Note that we only consider the candidate. If this is not
6373 based on an object, the base of the reference is in some subexpression
6374 of the use -- but these will use pointer types, so they are recognized
6375 by the create_mem_ref heuristics anyway. */
6376 if (cand->iv->base_object)
6377 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6379 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6380 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6381 reference_alias_ptr_type (*use->op_p),
6382 iv, base_hint, data->speed);
6383 copy_ref_info (ref, *use->op_p);
6384 *use->op_p = ref;
6387 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6388 candidate CAND. */
6390 static void
6391 rewrite_use_compare (struct ivopts_data *data,
6392 struct iv_use *use, struct iv_cand *cand)
6394 tree comp, *var_p, op, bound;
6395 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6396 enum tree_code compare;
6397 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6398 bool ok;
6400 bound = cp->value;
6401 if (bound)
6403 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6404 tree var_type = TREE_TYPE (var);
6405 gimple_seq stmts;
6407 if (dump_file && (dump_flags & TDF_DETAILS))
6409 fprintf (dump_file, "Replacing exit test: ");
6410 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6412 compare = cp->comp;
6413 bound = unshare_expr (fold_convert (var_type, bound));
6414 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6415 if (stmts)
6416 gsi_insert_seq_on_edge_immediate (
6417 loop_preheader_edge (data->current_loop),
6418 stmts);
6420 gimple_cond_set_lhs (use->stmt, var);
6421 gimple_cond_set_code (use->stmt, compare);
6422 gimple_cond_set_rhs (use->stmt, op);
6423 return;
6426 /* The induction variable elimination failed; just express the original
6427 giv. */
6428 comp = get_computation (data->current_loop, use, cand);
6429 gcc_assert (comp != NULL_TREE);
6431 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6432 gcc_assert (ok);
6434 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6435 true, GSI_SAME_STMT);
6438 /* Rewrites USE using candidate CAND. */
6440 static void
6441 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6443 switch (use->type)
6445 case USE_NONLINEAR_EXPR:
6446 rewrite_use_nonlinear_expr (data, use, cand);
6447 break;
6449 case USE_ADDRESS:
6450 rewrite_use_address (data, use, cand);
6451 break;
6453 case USE_COMPARE:
6454 rewrite_use_compare (data, use, cand);
6455 break;
6457 default:
6458 gcc_unreachable ();
6461 update_stmt (use->stmt);
6464 /* Rewrite the uses using the selected induction variables. */
6466 static void
6467 rewrite_uses (struct ivopts_data *data)
6469 unsigned i;
6470 struct iv_cand *cand;
6471 struct iv_use *use;
6473 for (i = 0; i < n_iv_uses (data); i++)
6475 use = iv_use (data, i);
6476 cand = use->selected;
6477 gcc_assert (cand);
6479 rewrite_use (data, use, cand);
6483 /* Removes the ivs that are not used after rewriting. */
6485 static void
6486 remove_unused_ivs (struct ivopts_data *data)
6488 unsigned j;
6489 bitmap_iterator bi;
6490 bitmap toremove = BITMAP_ALLOC (NULL);
6492 /* Figure out an order in which to release SSA DEFs so that we don't
6493 release something that we'd have to propagate into a debug stmt
6494 afterwards. */
6495 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6497 struct version_info *info;
6499 info = ver_info (data, j);
6500 if (info->iv
6501 && !integer_zerop (info->iv->step)
6502 && !info->inv_id
6503 && !info->iv->have_use_for
6504 && !info->preserve_biv)
6506 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6508 tree def = info->iv->ssa_name;
6510 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6512 imm_use_iterator imm_iter;
6513 use_operand_p use_p;
6514 gimple stmt;
6515 int count = 0;
6517 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6519 if (!gimple_debug_bind_p (stmt))
6520 continue;
6522 /* We just want to determine whether to do nothing
6523 (count == 0), to substitute the computed
6524 expression into a single use of the SSA DEF by
6525 itself (count == 1), or to use a debug temp
6526 because the SSA DEF is used multiple times or as
6527 part of a larger expression (count > 1). */
6528 count++;
6529 if (gimple_debug_bind_get_value (stmt) != def)
6530 count++;
6532 if (count > 1)
6533 BREAK_FROM_IMM_USE_STMT (imm_iter);
6536 if (!count)
6537 continue;
6539 struct iv_use dummy_use;
6540 struct iv_cand *best_cand = NULL, *cand;
6541 unsigned i, best_pref = 0, cand_pref;
6543 memset (&dummy_use, 0, sizeof (dummy_use));
6544 dummy_use.iv = info->iv;
6545 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6547 cand = iv_use (data, i)->selected;
6548 if (cand == best_cand)
6549 continue;
6550 cand_pref = operand_equal_p (cand->iv->step,
6551 info->iv->step, 0)
6552 ? 4 : 0;
6553 cand_pref
6554 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6555 == TYPE_MODE (TREE_TYPE (info->iv->base))
6556 ? 2 : 0;
6557 cand_pref
6558 += TREE_CODE (cand->iv->base) == INTEGER_CST
6559 ? 1 : 0;
6560 if (best_cand == NULL || best_pref < cand_pref)
6562 best_cand = cand;
6563 best_pref = cand_pref;
6567 if (!best_cand)
6568 continue;
6570 tree comp = get_computation_at (data->current_loop,
6571 &dummy_use, best_cand,
6572 SSA_NAME_DEF_STMT (def));
6573 if (!comp)
6574 continue;
6576 if (count > 1)
6578 tree vexpr = make_node (DEBUG_EXPR_DECL);
6579 DECL_ARTIFICIAL (vexpr) = 1;
6580 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6581 if (SSA_NAME_VAR (def))
6582 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6583 else
6584 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6585 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6586 gimple_stmt_iterator gsi;
6588 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6589 gsi = gsi_after_labels (gimple_bb
6590 (SSA_NAME_DEF_STMT (def)));
6591 else
6592 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6594 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6595 comp = vexpr;
6598 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6600 if (!gimple_debug_bind_p (stmt))
6601 continue;
6603 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6604 SET_USE (use_p, comp);
6606 update_stmt (stmt);
6612 release_defs_bitset (toremove);
6614 BITMAP_FREE (toremove);
6617 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6618 for pointer_map_traverse. */
6620 static bool
6621 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6622 void *data ATTRIBUTE_UNUSED)
6624 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6626 free (niter);
6627 return true;
6630 /* Frees data allocated by the optimization of a single loop. */
6632 static void
6633 free_loop_data (struct ivopts_data *data)
6635 unsigned i, j;
6636 bitmap_iterator bi;
6637 tree obj;
6639 if (data->niters)
6641 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6642 pointer_map_destroy (data->niters);
6643 data->niters = NULL;
6646 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6648 struct version_info *info;
6650 info = ver_info (data, i);
6651 free (info->iv);
6652 info->iv = NULL;
6653 info->has_nonlin_use = false;
6654 info->preserve_biv = false;
6655 info->inv_id = 0;
6657 bitmap_clear (data->relevant);
6658 bitmap_clear (data->important_candidates);
6660 for (i = 0; i < n_iv_uses (data); i++)
6662 struct iv_use *use = iv_use (data, i);
6664 free (use->iv);
6665 BITMAP_FREE (use->related_cands);
6666 for (j = 0; j < use->n_map_members; j++)
6667 if (use->cost_map[j].depends_on)
6668 BITMAP_FREE (use->cost_map[j].depends_on);
6669 free (use->cost_map);
6670 free (use);
6672 data->iv_uses.truncate (0);
6674 for (i = 0; i < n_iv_cands (data); i++)
6676 struct iv_cand *cand = iv_cand (data, i);
6678 free (cand->iv);
6679 if (cand->depends_on)
6680 BITMAP_FREE (cand->depends_on);
6681 free (cand);
6683 data->iv_candidates.truncate (0);
6685 if (data->version_info_size < num_ssa_names)
6687 data->version_info_size = 2 * num_ssa_names;
6688 free (data->version_info);
6689 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6692 data->max_inv_id = 0;
6694 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6695 SET_DECL_RTL (obj, NULL_RTX);
6697 decl_rtl_to_reset.truncate (0);
6699 data->inv_expr_tab.empty ();
6700 data->inv_expr_id = 0;
6703 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6704 loop tree. */
6706 static void
6707 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6709 free_loop_data (data);
6710 free (data->version_info);
6711 BITMAP_FREE (data->relevant);
6712 BITMAP_FREE (data->important_candidates);
6714 decl_rtl_to_reset.release ();
6715 data->iv_uses.release ();
6716 data->iv_candidates.release ();
6717 data->inv_expr_tab.dispose ();
6720 /* Returns true if the loop body BODY includes any function calls. */
6722 static bool
6723 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6725 gimple_stmt_iterator gsi;
6726 unsigned i;
6728 for (i = 0; i < num_nodes; i++)
6729 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6731 gimple stmt = gsi_stmt (gsi);
6732 if (is_gimple_call (stmt)
6733 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6734 return true;
6736 return false;
6739 /* Optimizes the LOOP. Returns true if anything changed. */
6741 static bool
6742 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6744 bool changed = false;
6745 struct iv_ca *iv_ca;
6746 edge exit = single_dom_exit (loop);
6747 basic_block *body;
6749 gcc_assert (!data->niters);
6750 data->current_loop = loop;
6751 data->speed = optimize_loop_for_speed_p (loop);
6753 if (dump_file && (dump_flags & TDF_DETAILS))
6755 fprintf (dump_file, "Processing loop %d\n", loop->num);
6757 if (exit)
6759 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6760 exit->src->index, exit->dest->index);
6761 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6762 fprintf (dump_file, "\n");
6765 fprintf (dump_file, "\n");
6768 body = get_loop_body (loop);
6769 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6770 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6771 free (body);
6773 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6775 /* For each ssa name determines whether it behaves as an induction variable
6776 in some loop. */
6777 if (!find_induction_variables (data))
6778 goto finish;
6780 /* Finds interesting uses (item 1). */
6781 find_interesting_uses (data);
6782 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6783 goto finish;
6785 /* Finds candidates for the induction variables (item 2). */
6786 find_iv_candidates (data);
6788 /* Calculates the costs (item 3, part 1). */
6789 determine_iv_costs (data);
6790 determine_use_iv_costs (data);
6791 determine_set_costs (data);
6793 /* Find the optimal set of induction variables (item 3, part 2). */
6794 iv_ca = find_optimal_iv_set (data);
6795 if (!iv_ca)
6796 goto finish;
6797 changed = true;
6799 /* Create the new induction variables (item 4, part 1). */
6800 create_new_ivs (data, iv_ca);
6801 iv_ca_free (&iv_ca);
6803 /* Rewrite the uses (item 4, part 2). */
6804 rewrite_uses (data);
6806 /* Remove the ivs that are unused after rewriting. */
6807 remove_unused_ivs (data);
6809 /* We have changed the structure of induction variables; it might happen
6810 that definitions in the scev database refer to some of them that were
6811 eliminated. */
6812 scev_reset ();
6814 finish:
6815 free_loop_data (data);
6817 return changed;
6820 /* Main entry point. Optimizes induction variables in loops. */
6822 void
6823 tree_ssa_iv_optimize (void)
6825 struct loop *loop;
6826 struct ivopts_data data;
6827 loop_iterator li;
6829 tree_ssa_iv_optimize_init (&data);
6831 /* Optimize the loops starting with the innermost ones. */
6832 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6834 if (dump_file && (dump_flags & TDF_DETAILS))
6835 flow_loop_dump (loop, dump_file, NULL, 1);
6837 tree_ssa_iv_optimize_loop (&data, loop);
6840 tree_ssa_iv_optimize_finalize (&data);