* Makefile.in (C_COMMON_OBJS): Depend on c-cilkplus.o.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob6d5eeb5de99a8983796cffc191dfee88e2b39c8b
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "gimple.h"
73 #include "gimplify.h"
74 #include "gimple-iterator.h"
75 #include "gimplify-me.h"
76 #include "gimple-ssa.h"
77 #include "cgraph.h"
78 #include "tree-cfg.h"
79 #include "tree-phinodes.h"
80 #include "ssa-iterators.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-ivopts.h"
83 #include "tree-ssa-loop-manip.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
86 #include "tree-dfa.h"
87 #include "tree-ssa.h"
88 #include "cfgloop.h"
89 #include "tree-pass.h"
90 #include "ggc.h"
91 #include "insn-config.h"
92 #include "pointer-set.h"
93 #include "hash-table.h"
94 #include "tree-chrec.h"
95 #include "tree-scalar-evolution.h"
96 #include "cfgloop.h"
97 #include "params.h"
98 #include "langhooks.h"
99 #include "tree-affine.h"
100 #include "target.h"
101 #include "tree-inline.h"
102 #include "tree-ssa-propagate.h"
103 #include "expmed.h"
104 #include "tree-ssa-address.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109 #include "expr.h"
110 #include "recog.h"
112 /* The infinite cost. */
113 #define INFTY 10000000
115 #define AVG_LOOP_NITER(LOOP) 5
117 /* Returns the expected number of loop iterations for LOOP.
118 The average trip count is computed from profile data if it
119 exists. */
121 static inline HOST_WIDE_INT
122 avg_loop_niter (struct loop *loop)
124 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
125 if (niter == -1)
126 return AVG_LOOP_NITER (loop);
128 return niter;
131 /* Representation of the induction variable. */
132 struct iv
134 tree base; /* Initial value of the iv. */
135 tree base_object; /* A memory object to that the induction variable points. */
136 tree step; /* Step of the iv (constant only). */
137 tree ssa_name; /* The ssa name with the value. */
138 bool biv_p; /* Is it a biv? */
139 bool have_use_for; /* Do we already have a use for it? */
140 unsigned use_id; /* The identifier in the use if it is the case. */
143 /* Per-ssa version information (induction variable descriptions, etc.). */
144 struct version_info
146 tree name; /* The ssa name. */
147 struct iv *iv; /* Induction variable description. */
148 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
149 an expression that is not an induction variable. */
150 bool preserve_biv; /* For the original biv, whether to preserve it. */
151 unsigned inv_id; /* Id of an invariant. */
154 /* Types of uses. */
155 enum use_type
157 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
158 USE_ADDRESS, /* Use in an address. */
159 USE_COMPARE /* Use is a compare. */
162 /* Cost of a computation. */
163 typedef struct
165 int cost; /* The runtime cost. */
166 unsigned complexity; /* The estimate of the complexity of the code for
167 the computation (in no concrete units --
168 complexity field should be larger for more
169 complex expressions and addressing modes). */
170 } comp_cost;
172 static const comp_cost no_cost = {0, 0};
173 static const comp_cost infinite_cost = {INFTY, INFTY};
175 /* The candidate - cost pair. */
176 struct cost_pair
178 struct iv_cand *cand; /* The candidate. */
179 comp_cost cost; /* The cost. */
180 bitmap depends_on; /* The list of invariants that have to be
181 preserved. */
182 tree value; /* For final value elimination, the expression for
183 the final value of the iv. For iv elimination,
184 the new bound to compare with. */
185 enum tree_code comp; /* For iv elimination, the comparison. */
186 int inv_expr_id; /* Loop invariant expression id. */
189 /* Use. */
190 struct iv_use
192 unsigned id; /* The id of the use. */
193 enum use_type type; /* Type of the use. */
194 struct iv *iv; /* The induction variable it is based on. */
195 gimple stmt; /* Statement in that it occurs. */
196 tree *op_p; /* The place where it occurs. */
197 bitmap related_cands; /* The set of "related" iv candidates, plus the common
198 important ones. */
200 unsigned n_map_members; /* Number of candidates in the cost_map list. */
201 struct cost_pair *cost_map;
202 /* The costs wrto the iv candidates. */
204 struct iv_cand *selected;
205 /* The selected candidate. */
208 /* The position where the iv is computed. */
209 enum iv_position
211 IP_NORMAL, /* At the end, just before the exit condition. */
212 IP_END, /* At the end of the latch block. */
213 IP_BEFORE_USE, /* Immediately before a specific use. */
214 IP_AFTER_USE, /* Immediately after a specific use. */
215 IP_ORIGINAL /* The original biv. */
218 /* The induction variable candidate. */
219 struct iv_cand
221 unsigned id; /* The number of the candidate. */
222 bool important; /* Whether this is an "important" candidate, i.e. such
223 that it should be considered by all uses. */
224 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
225 gimple incremented_at;/* For original biv, the statement where it is
226 incremented. */
227 tree var_before; /* The variable used for it before increment. */
228 tree var_after; /* The variable used for it after increment. */
229 struct iv *iv; /* The value of the candidate. NULL for
230 "pseudocandidate" used to indicate the possibility
231 to replace the final value of an iv by direct
232 computation of the value. */
233 unsigned cost; /* Cost of the candidate. */
234 unsigned cost_step; /* Cost of the candidate's increment operation. */
235 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
236 where it is incremented. */
237 bitmap depends_on; /* The list of invariants that are used in step of the
238 biv. */
241 /* Loop invariant expression hashtable entry. */
242 struct iv_inv_expr_ent
244 tree expr;
245 int id;
246 hashval_t hash;
249 /* The data used by the induction variable optimizations. */
251 typedef struct iv_use *iv_use_p;
253 typedef struct iv_cand *iv_cand_p;
255 /* Hashtable helpers. */
257 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
259 typedef iv_inv_expr_ent value_type;
260 typedef iv_inv_expr_ent compare_type;
261 static inline hashval_t hash (const value_type *);
262 static inline bool equal (const value_type *, const compare_type *);
265 /* Hash function for loop invariant expressions. */
267 inline hashval_t
268 iv_inv_expr_hasher::hash (const value_type *expr)
270 return expr->hash;
273 /* Hash table equality function for expressions. */
275 inline bool
276 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
278 return expr1->hash == expr2->hash
279 && operand_equal_p (expr1->expr, expr2->expr, 0);
282 struct ivopts_data
284 /* The currently optimized loop. */
285 struct loop *current_loop;
287 /* Numbers of iterations for all exits of the current loop. */
288 struct pointer_map_t *niters;
290 /* Number of registers used in it. */
291 unsigned regs_used;
293 /* The size of version_info array allocated. */
294 unsigned version_info_size;
296 /* The array of information for the ssa names. */
297 struct version_info *version_info;
299 /* The hashtable of loop invariant expressions created
300 by ivopt. */
301 hash_table <iv_inv_expr_hasher> inv_expr_tab;
303 /* Loop invariant expression id. */
304 int inv_expr_id;
306 /* The bitmap of indices in version_info whose value was changed. */
307 bitmap relevant;
309 /* The uses of induction variables. */
310 vec<iv_use_p> iv_uses;
312 /* The candidates. */
313 vec<iv_cand_p> iv_candidates;
315 /* A bitmap of important candidates. */
316 bitmap important_candidates;
318 /* The maximum invariant id. */
319 unsigned max_inv_id;
321 /* Whether to consider just related and important candidates when replacing a
322 use. */
323 bool consider_all_candidates;
325 /* Are we optimizing for speed? */
326 bool speed;
328 /* Whether the loop body includes any function calls. */
329 bool body_includes_call;
331 /* Whether the loop body can only be exited via single exit. */
332 bool loop_single_exit_p;
335 /* An assignment of iv candidates to uses. */
337 struct iv_ca
339 /* The number of uses covered by the assignment. */
340 unsigned upto;
342 /* Number of uses that cannot be expressed by the candidates in the set. */
343 unsigned bad_uses;
345 /* Candidate assigned to a use, together with the related costs. */
346 struct cost_pair **cand_for_use;
348 /* Number of times each candidate is used. */
349 unsigned *n_cand_uses;
351 /* The candidates used. */
352 bitmap cands;
354 /* The number of candidates in the set. */
355 unsigned n_cands;
357 /* Total number of registers needed. */
358 unsigned n_regs;
360 /* Total cost of expressing uses. */
361 comp_cost cand_use_cost;
363 /* Total cost of candidates. */
364 unsigned cand_cost;
366 /* Number of times each invariant is used. */
367 unsigned *n_invariant_uses;
369 /* The array holding the number of uses of each loop
370 invariant expressions created by ivopt. */
371 unsigned *used_inv_expr;
373 /* The number of created loop invariants. */
374 unsigned num_used_inv_expr;
376 /* Total cost of the assignment. */
377 comp_cost cost;
380 /* Difference of two iv candidate assignments. */
382 struct iv_ca_delta
384 /* Changed use. */
385 struct iv_use *use;
387 /* An old assignment (for rollback purposes). */
388 struct cost_pair *old_cp;
390 /* A new assignment. */
391 struct cost_pair *new_cp;
393 /* Next change in the list. */
394 struct iv_ca_delta *next_change;
397 /* Bound on number of candidates below that all candidates are considered. */
399 #define CONSIDER_ALL_CANDIDATES_BOUND \
400 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
403 optimizing such a loop would help, and it would take ages). */
405 #define MAX_CONSIDERED_USES \
406 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
408 /* If there are at most this number of ivs in the set, try removing unnecessary
409 ivs from the set always. */
411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
412 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
414 /* The list of trees for that the decl_rtl field must be reset is stored
415 here. */
417 static vec<tree> decl_rtl_to_reset;
419 static comp_cost force_expr_to_var_cost (tree, bool);
421 /* Number of uses recorded in DATA. */
423 static inline unsigned
424 n_iv_uses (struct ivopts_data *data)
426 return data->iv_uses.length ();
429 /* Ith use recorded in DATA. */
431 static inline struct iv_use *
432 iv_use (struct ivopts_data *data, unsigned i)
434 return data->iv_uses[i];
437 /* Number of candidates recorded in DATA. */
439 static inline unsigned
440 n_iv_cands (struct ivopts_data *data)
442 return data->iv_candidates.length ();
445 /* Ith candidate recorded in DATA. */
447 static inline struct iv_cand *
448 iv_cand (struct ivopts_data *data, unsigned i)
450 return data->iv_candidates[i];
453 /* The single loop exit if it dominates the latch, NULL otherwise. */
455 edge
456 single_dom_exit (struct loop *loop)
458 edge exit = single_exit (loop);
460 if (!exit)
461 return NULL;
463 if (!just_once_each_iteration_p (loop, exit->src))
464 return NULL;
466 return exit;
469 /* Dumps information about the induction variable IV to FILE. */
471 void
472 dump_iv (FILE *file, struct iv *iv)
474 if (iv->ssa_name)
476 fprintf (file, "ssa name ");
477 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
478 fprintf (file, "\n");
481 fprintf (file, " type ");
482 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
483 fprintf (file, "\n");
485 if (iv->step)
487 fprintf (file, " base ");
488 print_generic_expr (file, iv->base, TDF_SLIM);
489 fprintf (file, "\n");
491 fprintf (file, " step ");
492 print_generic_expr (file, iv->step, TDF_SLIM);
493 fprintf (file, "\n");
495 else
497 fprintf (file, " invariant ");
498 print_generic_expr (file, iv->base, TDF_SLIM);
499 fprintf (file, "\n");
502 if (iv->base_object)
504 fprintf (file, " base object ");
505 print_generic_expr (file, iv->base_object, TDF_SLIM);
506 fprintf (file, "\n");
509 if (iv->biv_p)
510 fprintf (file, " is a biv\n");
513 /* Dumps information about the USE to FILE. */
515 void
516 dump_use (FILE *file, struct iv_use *use)
518 fprintf (file, "use %d\n", use->id);
520 switch (use->type)
522 case USE_NONLINEAR_EXPR:
523 fprintf (file, " generic\n");
524 break;
526 case USE_ADDRESS:
527 fprintf (file, " address\n");
528 break;
530 case USE_COMPARE:
531 fprintf (file, " compare\n");
532 break;
534 default:
535 gcc_unreachable ();
538 fprintf (file, " in statement ");
539 print_gimple_stmt (file, use->stmt, 0, 0);
540 fprintf (file, "\n");
542 fprintf (file, " at position ");
543 if (use->op_p)
544 print_generic_expr (file, *use->op_p, TDF_SLIM);
545 fprintf (file, "\n");
547 dump_iv (file, use->iv);
549 if (use->related_cands)
551 fprintf (file, " related candidates ");
552 dump_bitmap (file, use->related_cands);
556 /* Dumps information about the uses to FILE. */
558 void
559 dump_uses (FILE *file, struct ivopts_data *data)
561 unsigned i;
562 struct iv_use *use;
564 for (i = 0; i < n_iv_uses (data); i++)
566 use = iv_use (data, i);
568 dump_use (file, use);
569 fprintf (file, "\n");
573 /* Dumps information about induction variable candidate CAND to FILE. */
575 void
576 dump_cand (FILE *file, struct iv_cand *cand)
578 struct iv *iv = cand->iv;
580 fprintf (file, "candidate %d%s\n",
581 cand->id, cand->important ? " (important)" : "");
583 if (cand->depends_on)
585 fprintf (file, " depends on ");
586 dump_bitmap (file, cand->depends_on);
589 if (!iv)
591 fprintf (file, " final value replacement\n");
592 return;
595 if (cand->var_before)
597 fprintf (file, " var_before ");
598 print_generic_expr (file, cand->var_before, TDF_SLIM);
599 fprintf (file, "\n");
601 if (cand->var_after)
603 fprintf (file, " var_after ");
604 print_generic_expr (file, cand->var_after, TDF_SLIM);
605 fprintf (file, "\n");
608 switch (cand->pos)
610 case IP_NORMAL:
611 fprintf (file, " incremented before exit test\n");
612 break;
614 case IP_BEFORE_USE:
615 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
616 break;
618 case IP_AFTER_USE:
619 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
620 break;
622 case IP_END:
623 fprintf (file, " incremented at end\n");
624 break;
626 case IP_ORIGINAL:
627 fprintf (file, " original biv\n");
628 break;
631 dump_iv (file, iv);
634 /* Returns the info for ssa version VER. */
636 static inline struct version_info *
637 ver_info (struct ivopts_data *data, unsigned ver)
639 return data->version_info + ver;
642 /* Returns the info for ssa name NAME. */
644 static inline struct version_info *
645 name_info (struct ivopts_data *data, tree name)
647 return ver_info (data, SSA_NAME_VERSION (name));
650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
651 emitted in LOOP. */
653 static bool
654 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
656 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
658 gcc_assert (bb);
660 if (sbb == loop->latch)
661 return true;
663 if (sbb != bb)
664 return false;
666 return stmt == last_stmt (bb);
669 /* Returns true if STMT if after the place where the original induction
670 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
671 if the positions are identical. */
673 static bool
674 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
676 basic_block cand_bb = gimple_bb (cand->incremented_at);
677 basic_block stmt_bb = gimple_bb (stmt);
679 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
680 return false;
682 if (stmt_bb != cand_bb)
683 return true;
685 if (true_if_equal
686 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
687 return true;
688 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
691 /* Returns true if STMT if after the place where the induction variable
692 CAND is incremented in LOOP. */
694 static bool
695 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
697 switch (cand->pos)
699 case IP_END:
700 return false;
702 case IP_NORMAL:
703 return stmt_after_ip_normal_pos (loop, stmt);
705 case IP_ORIGINAL:
706 case IP_AFTER_USE:
707 return stmt_after_inc_pos (cand, stmt, false);
709 case IP_BEFORE_USE:
710 return stmt_after_inc_pos (cand, stmt, true);
712 default:
713 gcc_unreachable ();
717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
719 static bool
720 abnormal_ssa_name_p (tree exp)
722 if (!exp)
723 return false;
725 if (TREE_CODE (exp) != SSA_NAME)
726 return false;
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
732 abnormal phi node. Callback for for_each_index. */
734 static bool
735 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
736 void *data ATTRIBUTE_UNUSED)
738 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
740 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
741 return false;
742 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
743 return false;
746 return !abnormal_ssa_name_p (*index);
749 /* Returns true if EXPR contains a ssa name that occurs in an
750 abnormal phi node. */
752 bool
753 contains_abnormal_ssa_name_p (tree expr)
755 enum tree_code code;
756 enum tree_code_class codeclass;
758 if (!expr)
759 return false;
761 code = TREE_CODE (expr);
762 codeclass = TREE_CODE_CLASS (code);
764 if (code == SSA_NAME)
765 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
767 if (code == INTEGER_CST
768 || is_gimple_min_invariant (expr))
769 return false;
771 if (code == ADDR_EXPR)
772 return !for_each_index (&TREE_OPERAND (expr, 0),
773 idx_contains_abnormal_ssa_name_p,
774 NULL);
776 if (code == COND_EXPR)
777 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
778 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
779 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
781 switch (codeclass)
783 case tcc_binary:
784 case tcc_comparison:
785 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
786 return true;
788 /* Fallthru. */
789 case tcc_unary:
790 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
791 return true;
793 break;
795 default:
796 gcc_unreachable ();
799 return false;
802 /* Returns the structure describing number of iterations determined from
803 EXIT of DATA->current_loop, or NULL if something goes wrong. */
805 static struct tree_niter_desc *
806 niter_for_exit (struct ivopts_data *data, edge exit)
808 struct tree_niter_desc *desc;
809 void **slot;
811 if (!data->niters)
813 data->niters = pointer_map_create ();
814 slot = NULL;
816 else
817 slot = pointer_map_contains (data->niters, exit);
819 if (!slot)
821 /* Try to determine number of iterations. We cannot safely work with ssa
822 names that appear in phi nodes on abnormal edges, so that we do not
823 create overlapping life ranges for them (PR 27283). */
824 desc = XNEW (struct tree_niter_desc);
825 if (!number_of_iterations_exit (data->current_loop,
826 exit, desc, true)
827 || contains_abnormal_ssa_name_p (desc->niter))
829 XDELETE (desc);
830 desc = NULL;
832 slot = pointer_map_insert (data->niters, exit);
833 *slot = desc;
835 else
836 desc = (struct tree_niter_desc *) *slot;
838 return desc;
841 /* Returns the structure describing number of iterations determined from
842 single dominating exit of DATA->current_loop, or NULL if something
843 goes wrong. */
845 static struct tree_niter_desc *
846 niter_for_single_dom_exit (struct ivopts_data *data)
848 edge exit = single_dom_exit (data->current_loop);
850 if (!exit)
851 return NULL;
853 return niter_for_exit (data, exit);
856 /* Initializes data structures used by the iv optimization pass, stored
857 in DATA. */
859 static void
860 tree_ssa_iv_optimize_init (struct ivopts_data *data)
862 data->version_info_size = 2 * num_ssa_names;
863 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
864 data->relevant = BITMAP_ALLOC (NULL);
865 data->important_candidates = BITMAP_ALLOC (NULL);
866 data->max_inv_id = 0;
867 data->niters = NULL;
868 data->iv_uses.create (20);
869 data->iv_candidates.create (20);
870 data->inv_expr_tab.create (10);
871 data->inv_expr_id = 0;
872 decl_rtl_to_reset.create (20);
875 /* Returns a memory object to that EXPR points. In case we are able to
876 determine that it does not point to any such object, NULL is returned. */
878 static tree
879 determine_base_object (tree expr)
881 enum tree_code code = TREE_CODE (expr);
882 tree base, obj;
884 /* If this is a pointer casted to any type, we need to determine
885 the base object for the pointer; so handle conversions before
886 throwing away non-pointer expressions. */
887 if (CONVERT_EXPR_P (expr))
888 return determine_base_object (TREE_OPERAND (expr, 0));
890 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
891 return NULL_TREE;
893 switch (code)
895 case INTEGER_CST:
896 return NULL_TREE;
898 case ADDR_EXPR:
899 obj = TREE_OPERAND (expr, 0);
900 base = get_base_address (obj);
902 if (!base)
903 return expr;
905 if (TREE_CODE (base) == MEM_REF)
906 return determine_base_object (TREE_OPERAND (base, 0));
908 return fold_convert (ptr_type_node,
909 build_fold_addr_expr (base));
911 case POINTER_PLUS_EXPR:
912 return determine_base_object (TREE_OPERAND (expr, 0));
914 case PLUS_EXPR:
915 case MINUS_EXPR:
916 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
917 gcc_unreachable ();
919 default:
920 return fold_convert (ptr_type_node, expr);
924 /* Allocates an induction variable with given initial value BASE and step STEP
925 for loop LOOP. */
927 static struct iv *
928 alloc_iv (tree base, tree step)
930 tree base_object = base;
931 struct iv *iv = XCNEW (struct iv);
932 gcc_assert (step != NULL_TREE);
934 /* Lower all address expressions except ones with DECL_P as operand.
935 By doing this:
936 1) More accurate cost can be computed for address expressions;
937 2) Duplicate candidates won't be created for bases in different
938 forms, like &a[0] and &a. */
939 STRIP_NOPS (base_object);
940 if (TREE_CODE (base_object) == ADDR_EXPR
941 && !DECL_P (TREE_OPERAND (base_object, 0)))
943 aff_tree comb;
944 double_int size;
945 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
946 &comb, &size);
947 gcc_assert (base_object != NULL_TREE);
948 base_object = build_fold_addr_expr (base_object);
949 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
952 iv->base = base;
953 iv->base_object = determine_base_object (base_object);
954 iv->step = step;
955 iv->biv_p = false;
956 iv->have_use_for = false;
957 iv->use_id = 0;
958 iv->ssa_name = NULL_TREE;
960 return iv;
963 /* Sets STEP and BASE for induction variable IV. */
965 static void
966 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
968 struct version_info *info = name_info (data, iv);
970 gcc_assert (!info->iv);
972 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
973 info->iv = alloc_iv (base, step);
974 info->iv->ssa_name = iv;
977 /* Finds induction variable declaration for VAR. */
979 static struct iv *
980 get_iv (struct ivopts_data *data, tree var)
982 basic_block bb;
983 tree type = TREE_TYPE (var);
985 if (!POINTER_TYPE_P (type)
986 && !INTEGRAL_TYPE_P (type))
987 return NULL;
989 if (!name_info (data, var)->iv)
991 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
993 if (!bb
994 || !flow_bb_inside_loop_p (data->current_loop, bb))
995 set_iv (data, var, var, build_int_cst (type, 0));
998 return name_info (data, var)->iv;
1001 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1002 not define a simple affine biv with nonzero step. */
1004 static tree
1005 determine_biv_step (gimple phi)
1007 struct loop *loop = gimple_bb (phi)->loop_father;
1008 tree name = PHI_RESULT (phi);
1009 affine_iv iv;
1011 if (virtual_operand_p (name))
1012 return NULL_TREE;
1014 if (!simple_iv (loop, loop, name, &iv, true))
1015 return NULL_TREE;
1017 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1020 /* Finds basic ivs. */
1022 static bool
1023 find_bivs (struct ivopts_data *data)
1025 gimple phi;
1026 tree step, type, base;
1027 bool found = false;
1028 struct loop *loop = data->current_loop;
1029 gimple_stmt_iterator psi;
1031 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1033 phi = gsi_stmt (psi);
1035 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1036 continue;
1038 step = determine_biv_step (phi);
1039 if (!step)
1040 continue;
1042 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1043 base = expand_simple_operations (base);
1044 if (contains_abnormal_ssa_name_p (base)
1045 || contains_abnormal_ssa_name_p (step))
1046 continue;
1048 type = TREE_TYPE (PHI_RESULT (phi));
1049 base = fold_convert (type, base);
1050 if (step)
1052 if (POINTER_TYPE_P (type))
1053 step = convert_to_ptrofftype (step);
1054 else
1055 step = fold_convert (type, step);
1058 set_iv (data, PHI_RESULT (phi), base, step);
1059 found = true;
1062 return found;
1065 /* Marks basic ivs. */
1067 static void
1068 mark_bivs (struct ivopts_data *data)
1070 gimple phi;
1071 tree var;
1072 struct iv *iv, *incr_iv;
1073 struct loop *loop = data->current_loop;
1074 basic_block incr_bb;
1075 gimple_stmt_iterator psi;
1077 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079 phi = gsi_stmt (psi);
1081 iv = get_iv (data, PHI_RESULT (phi));
1082 if (!iv)
1083 continue;
1085 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1086 incr_iv = get_iv (data, var);
1087 if (!incr_iv)
1088 continue;
1090 /* If the increment is in the subloop, ignore it. */
1091 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1092 if (incr_bb->loop_father != data->current_loop
1093 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1094 continue;
1096 iv->biv_p = true;
1097 incr_iv->biv_p = true;
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102 parameters to IV. */
1104 static bool
1105 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1107 tree lhs;
1108 struct loop *loop = data->current_loop;
1110 iv->base = NULL_TREE;
1111 iv->step = NULL_TREE;
1113 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1114 return false;
1116 lhs = gimple_assign_lhs (stmt);
1117 if (TREE_CODE (lhs) != SSA_NAME)
1118 return false;
1120 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1121 return false;
1122 iv->base = expand_simple_operations (iv->base);
1124 if (contains_abnormal_ssa_name_p (iv->base)
1125 || contains_abnormal_ssa_name_p (iv->step))
1126 return false;
1128 /* If STMT could throw, then do not consider STMT as defining a GIV.
1129 While this will suppress optimizations, we can not safely delete this
1130 GIV and associated statements, even if it appears it is not used. */
1131 if (stmt_could_throw_p (stmt))
1132 return false;
1134 return true;
1137 /* Finds general ivs in statement STMT. */
1139 static void
1140 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1142 affine_iv iv;
1144 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1145 return;
1147 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1150 /* Finds general ivs in basic block BB. */
1152 static void
1153 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1155 gimple_stmt_iterator bsi;
1157 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1158 find_givs_in_stmt (data, gsi_stmt (bsi));
1161 /* Finds general ivs. */
1163 static void
1164 find_givs (struct ivopts_data *data)
1166 struct loop *loop = data->current_loop;
1167 basic_block *body = get_loop_body_in_dom_order (loop);
1168 unsigned i;
1170 for (i = 0; i < loop->num_nodes; i++)
1171 find_givs_in_bb (data, body[i]);
1172 free (body);
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176 variable and if so, its initial value and step. */
1178 static bool
1179 find_induction_variables (struct ivopts_data *data)
1181 unsigned i;
1182 bitmap_iterator bi;
1184 if (!find_bivs (data))
1185 return false;
1187 find_givs (data);
1188 mark_bivs (data);
1190 if (dump_file && (dump_flags & TDF_DETAILS))
1192 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1194 if (niter)
1196 fprintf (dump_file, " number of iterations ");
1197 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1198 if (!integer_zerop (niter->may_be_zero))
1200 fprintf (dump_file, "; zero if ");
1201 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1203 fprintf (dump_file, "\n\n");
1206 fprintf (dump_file, "Induction variables:\n\n");
1208 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1210 if (ver_info (data, i)->iv)
1211 dump_iv (dump_file, ver_info (data, i)->iv);
1215 return true;
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1220 static struct iv_use *
1221 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1222 gimple stmt, enum use_type use_type)
1224 struct iv_use *use = XCNEW (struct iv_use);
1226 use->id = n_iv_uses (data);
1227 use->type = use_type;
1228 use->iv = iv;
1229 use->stmt = stmt;
1230 use->op_p = use_p;
1231 use->related_cands = BITMAP_ALLOC (NULL);
1233 /* To avoid showing ssa name in the dumps, if it was not reset by the
1234 caller. */
1235 iv->ssa_name = NULL_TREE;
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 dump_use (dump_file, use);
1240 data->iv_uses.safe_push (use);
1242 return use;
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246 NONLINEAR_USE is true if the invariant is used in a way we do not
1247 handle specially. */
1249 static void
1250 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1252 basic_block bb;
1253 struct version_info *info;
1255 if (TREE_CODE (op) != SSA_NAME
1256 || virtual_operand_p (op))
1257 return;
1259 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1260 if (bb
1261 && flow_bb_inside_loop_p (data->current_loop, bb))
1262 return;
1264 info = name_info (data, op);
1265 info->name = op;
1266 info->has_nonlin_use |= nonlinear_use;
1267 if (!info->inv_id)
1268 info->inv_id = ++data->max_inv_id;
1269 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1272 /* Checks whether the use OP is interesting and if so, records it. */
1274 static struct iv_use *
1275 find_interesting_uses_op (struct ivopts_data *data, tree op)
1277 struct iv *iv;
1278 struct iv *civ;
1279 gimple stmt;
1280 struct iv_use *use;
1282 if (TREE_CODE (op) != SSA_NAME)
1283 return NULL;
1285 iv = get_iv (data, op);
1286 if (!iv)
1287 return NULL;
1289 if (iv->have_use_for)
1291 use = iv_use (data, iv->use_id);
1293 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1294 return use;
1297 if (integer_zerop (iv->step))
1299 record_invariant (data, op, true);
1300 return NULL;
1302 iv->have_use_for = true;
1304 civ = XNEW (struct iv);
1305 *civ = *iv;
1307 stmt = SSA_NAME_DEF_STMT (op);
1308 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1309 || is_gimple_assign (stmt));
1311 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1312 iv->use_id = use->id;
1314 return use;
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318 of an induction variable and an invariant. If this is the case,
1319 CONTROL_VAR is set to location of the iv, BOUND to the location of
1320 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321 induction variable descriptions, and true is returned. If this is not
1322 the case, CONTROL_VAR and BOUND are set to the arguments of the
1323 condition and false is returned. */
1325 static bool
1326 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1327 tree **control_var, tree **bound,
1328 struct iv **iv_var, struct iv **iv_bound)
1330 /* The objects returned when COND has constant operands. */
1331 static struct iv const_iv;
1332 static tree zero;
1333 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1334 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1335 bool ret = false;
1337 if (gimple_code (stmt) == GIMPLE_COND)
1339 op0 = gimple_cond_lhs_ptr (stmt);
1340 op1 = gimple_cond_rhs_ptr (stmt);
1342 else
1344 op0 = gimple_assign_rhs1_ptr (stmt);
1345 op1 = gimple_assign_rhs2_ptr (stmt);
1348 zero = integer_zero_node;
1349 const_iv.step = integer_zero_node;
1351 if (TREE_CODE (*op0) == SSA_NAME)
1352 iv0 = get_iv (data, *op0);
1353 if (TREE_CODE (*op1) == SSA_NAME)
1354 iv1 = get_iv (data, *op1);
1356 /* Exactly one of the compared values must be an iv, and the other one must
1357 be an invariant. */
1358 if (!iv0 || !iv1)
1359 goto end;
1361 if (integer_zerop (iv0->step))
1363 /* Control variable may be on the other side. */
1364 tmp_op = op0; op0 = op1; op1 = tmp_op;
1365 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1367 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1369 end:
1370 if (control_var)
1371 *control_var = op0;;
1372 if (iv_var)
1373 *iv_var = iv0;;
1374 if (bound)
1375 *bound = op1;
1376 if (iv_bound)
1377 *iv_bound = iv1;
1379 return ret;
1382 /* Checks whether the condition in STMT is interesting and if so,
1383 records it. */
1385 static void
1386 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1388 tree *var_p, *bound_p;
1389 struct iv *var_iv, *civ;
1391 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1393 find_interesting_uses_op (data, *var_p);
1394 find_interesting_uses_op (data, *bound_p);
1395 return;
1398 civ = XNEW (struct iv);
1399 *civ = *var_iv;
1400 record_use (data, NULL, civ, stmt, USE_COMPARE);
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404 relative to the loop LOOP, i.e. if all its operands are defined
1405 outside of the returned loop. Returns NULL if EXPR is not
1406 even obviously invariant in LOOP. */
1408 struct loop *
1409 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1411 basic_block def_bb;
1412 unsigned i, len;
1414 if (is_gimple_min_invariant (expr))
1415 return current_loops->tree_root;
1417 if (TREE_CODE (expr) == SSA_NAME)
1419 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1420 if (def_bb)
1422 if (flow_bb_inside_loop_p (loop, def_bb))
1423 return NULL;
1424 return superloop_at_depth (loop,
1425 loop_depth (def_bb->loop_father) + 1);
1428 return current_loops->tree_root;
1431 if (!EXPR_P (expr))
1432 return NULL;
1434 unsigned maxdepth = 0;
1435 len = TREE_OPERAND_LENGTH (expr);
1436 for (i = 0; i < len; i++)
1438 struct loop *ivloop;
1439 if (!TREE_OPERAND (expr, i))
1440 continue;
1442 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1443 if (!ivloop)
1444 return NULL;
1445 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1448 return superloop_at_depth (loop, maxdepth);
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452 i.e. if all its operands are defined outside of the LOOP. LOOP
1453 should not be the function body. */
1455 bool
1456 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1458 basic_block def_bb;
1459 unsigned i, len;
1461 gcc_assert (loop_depth (loop) > 0);
1463 if (is_gimple_min_invariant (expr))
1464 return true;
1466 if (TREE_CODE (expr) == SSA_NAME)
1468 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1469 if (def_bb
1470 && flow_bb_inside_loop_p (loop, def_bb))
1471 return false;
1473 return true;
1476 if (!EXPR_P (expr))
1477 return false;
1479 len = TREE_OPERAND_LENGTH (expr);
1480 for (i = 0; i < len; i++)
1481 if (TREE_OPERAND (expr, i)
1482 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1483 return false;
1485 return true;
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489 initial ones. Returns false when the value of the index cannot be determined.
1490 Callback for for_each_index. */
1492 struct ifs_ivopts_data
1494 struct ivopts_data *ivopts_data;
1495 gimple stmt;
1496 tree step;
1499 static bool
1500 idx_find_step (tree base, tree *idx, void *data)
1502 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1503 struct iv *iv;
1504 tree step, iv_base, iv_step, lbound, off;
1505 struct loop *loop = dta->ivopts_data->current_loop;
1507 /* If base is a component ref, require that the offset of the reference
1508 be invariant. */
1509 if (TREE_CODE (base) == COMPONENT_REF)
1511 off = component_ref_field_offset (base);
1512 return expr_invariant_in_loop_p (loop, off);
1515 /* If base is array, first check whether we will be able to move the
1516 reference out of the loop (in order to take its address in strength
1517 reduction). In order for this to work we need both lower bound
1518 and step to be loop invariants. */
1519 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1521 /* Moreover, for a range, the size needs to be invariant as well. */
1522 if (TREE_CODE (base) == ARRAY_RANGE_REF
1523 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1524 return false;
1526 step = array_ref_element_size (base);
1527 lbound = array_ref_low_bound (base);
1529 if (!expr_invariant_in_loop_p (loop, step)
1530 || !expr_invariant_in_loop_p (loop, lbound))
1531 return false;
1534 if (TREE_CODE (*idx) != SSA_NAME)
1535 return true;
1537 iv = get_iv (dta->ivopts_data, *idx);
1538 if (!iv)
1539 return false;
1541 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1542 *&x[0], which is not folded and does not trigger the
1543 ARRAY_REF path below. */
1544 *idx = iv->base;
1546 if (integer_zerop (iv->step))
1547 return true;
1549 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1551 step = array_ref_element_size (base);
1553 /* We only handle addresses whose step is an integer constant. */
1554 if (TREE_CODE (step) != INTEGER_CST)
1555 return false;
1557 else
1558 /* The step for pointer arithmetics already is 1 byte. */
1559 step = size_one_node;
1561 iv_base = iv->base;
1562 iv_step = iv->step;
1563 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1564 sizetype, &iv_base, &iv_step, dta->stmt,
1565 false))
1567 /* The index might wrap. */
1568 return false;
1571 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1572 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1574 return true;
1577 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1578 object is passed to it in DATA. */
1580 static bool
1581 idx_record_use (tree base, tree *idx,
1582 void *vdata)
1584 struct ivopts_data *data = (struct ivopts_data *) vdata;
1585 find_interesting_uses_op (data, *idx);
1586 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1588 find_interesting_uses_op (data, array_ref_element_size (base));
1589 find_interesting_uses_op (data, array_ref_low_bound (base));
1591 return true;
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595 store cst to MUL and return true. Otherwise return false.
1596 The returned value is always sign-extended, regardless of the
1597 signedness of TOP and BOT. */
1599 static bool
1600 constant_multiple_of (tree top, tree bot, double_int *mul)
1602 tree mby;
1603 enum tree_code code;
1604 double_int res, p0, p1;
1605 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1607 STRIP_NOPS (top);
1608 STRIP_NOPS (bot);
1610 if (operand_equal_p (top, bot, 0))
1612 *mul = double_int_one;
1613 return true;
1616 code = TREE_CODE (top);
1617 switch (code)
1619 case MULT_EXPR:
1620 mby = TREE_OPERAND (top, 1);
1621 if (TREE_CODE (mby) != INTEGER_CST)
1622 return false;
1624 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1625 return false;
1627 *mul = (res * tree_to_double_int (mby)).sext (precision);
1628 return true;
1630 case PLUS_EXPR:
1631 case MINUS_EXPR:
1632 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1633 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1634 return false;
1636 if (code == MINUS_EXPR)
1637 p1 = -p1;
1638 *mul = (p0 + p1).sext (precision);
1639 return true;
1641 case INTEGER_CST:
1642 if (TREE_CODE (bot) != INTEGER_CST)
1643 return false;
1645 p0 = tree_to_double_int (top).sext (precision);
1646 p1 = tree_to_double_int (bot).sext (precision);
1647 if (p1.is_zero ())
1648 return false;
1649 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1650 return res.is_zero ();
1652 default:
1653 return false;
1657 /* Returns true if memory reference REF with step STEP may be unaligned. */
1659 static bool
1660 may_be_unaligned_p (tree ref, tree step)
1662 tree base;
1663 tree base_type;
1664 HOST_WIDE_INT bitsize;
1665 HOST_WIDE_INT bitpos;
1666 tree toffset;
1667 enum machine_mode mode;
1668 int unsignedp, volatilep;
1669 unsigned base_align;
1671 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672 thus they are not misaligned. */
1673 if (TREE_CODE (ref) == TARGET_MEM_REF)
1674 return false;
1676 /* The test below is basically copy of what expr.c:normal_inner_ref
1677 does to check whether the object must be loaded by parts when
1678 STRICT_ALIGNMENT is true. */
1679 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1680 &unsignedp, &volatilep, true);
1681 base_type = TREE_TYPE (base);
1682 base_align = get_object_alignment (base);
1683 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1685 if (mode != BLKmode)
1687 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1689 if (base_align < mode_align
1690 || (bitpos % mode_align) != 0
1691 || (bitpos % BITS_PER_UNIT) != 0)
1692 return true;
1694 if (toffset
1695 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1696 return true;
1698 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1699 return true;
1702 return false;
1705 /* Return true if EXPR may be non-addressable. */
1707 bool
1708 may_be_nonaddressable_p (tree expr)
1710 switch (TREE_CODE (expr))
1712 case TARGET_MEM_REF:
1713 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714 target, thus they are always addressable. */
1715 return false;
1717 case COMPONENT_REF:
1718 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1719 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1721 case VIEW_CONVERT_EXPR:
1722 /* This kind of view-conversions may wrap non-addressable objects
1723 and make them look addressable. After some processing the
1724 non-addressability may be uncovered again, causing ADDR_EXPRs
1725 of inappropriate objects to be built. */
1726 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1727 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1728 return true;
1730 /* ... fall through ... */
1732 case ARRAY_REF:
1733 case ARRAY_RANGE_REF:
1734 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1736 CASE_CONVERT:
1737 return true;
1739 default:
1740 break;
1743 return false;
1746 /* Finds addresses in *OP_P inside STMT. */
1748 static void
1749 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1751 tree base = *op_p, step = size_zero_node;
1752 struct iv *civ;
1753 struct ifs_ivopts_data ifs_ivopts_data;
1755 /* Do not play with volatile memory references. A bit too conservative,
1756 perhaps, but safe. */
1757 if (gimple_has_volatile_ops (stmt))
1758 goto fail;
1760 /* Ignore bitfields for now. Not really something terribly complicated
1761 to handle. TODO. */
1762 if (TREE_CODE (base) == BIT_FIELD_REF)
1763 goto fail;
1765 base = unshare_expr (base);
1767 if (TREE_CODE (base) == TARGET_MEM_REF)
1769 tree type = build_pointer_type (TREE_TYPE (base));
1770 tree astep;
1772 if (TMR_BASE (base)
1773 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1775 civ = get_iv (data, TMR_BASE (base));
1776 if (!civ)
1777 goto fail;
1779 TMR_BASE (base) = civ->base;
1780 step = civ->step;
1782 if (TMR_INDEX2 (base)
1783 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1785 civ = get_iv (data, TMR_INDEX2 (base));
1786 if (!civ)
1787 goto fail;
1789 TMR_INDEX2 (base) = civ->base;
1790 step = civ->step;
1792 if (TMR_INDEX (base)
1793 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1795 civ = get_iv (data, TMR_INDEX (base));
1796 if (!civ)
1797 goto fail;
1799 TMR_INDEX (base) = civ->base;
1800 astep = civ->step;
1802 if (astep)
1804 if (TMR_STEP (base))
1805 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1807 step = fold_build2 (PLUS_EXPR, type, step, astep);
1811 if (integer_zerop (step))
1812 goto fail;
1813 base = tree_mem_ref_addr (type, base);
1815 else
1817 ifs_ivopts_data.ivopts_data = data;
1818 ifs_ivopts_data.stmt = stmt;
1819 ifs_ivopts_data.step = size_zero_node;
1820 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1821 || integer_zerop (ifs_ivopts_data.step))
1822 goto fail;
1823 step = ifs_ivopts_data.step;
1825 /* Check that the base expression is addressable. This needs
1826 to be done after substituting bases of IVs into it. */
1827 if (may_be_nonaddressable_p (base))
1828 goto fail;
1830 /* Moreover, on strict alignment platforms, check that it is
1831 sufficiently aligned. */
1832 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1833 goto fail;
1835 base = build_fold_addr_expr (base);
1837 /* Substituting bases of IVs into the base expression might
1838 have caused folding opportunities. */
1839 if (TREE_CODE (base) == ADDR_EXPR)
1841 tree *ref = &TREE_OPERAND (base, 0);
1842 while (handled_component_p (*ref))
1843 ref = &TREE_OPERAND (*ref, 0);
1844 if (TREE_CODE (*ref) == MEM_REF)
1846 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1847 TREE_OPERAND (*ref, 0),
1848 TREE_OPERAND (*ref, 1));
1849 if (tem)
1850 *ref = tem;
1855 civ = alloc_iv (base, step);
1856 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1857 return;
1859 fail:
1860 for_each_index (op_p, idx_record_use, data);
1863 /* Finds and records invariants used in STMT. */
1865 static void
1866 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1868 ssa_op_iter iter;
1869 use_operand_p use_p;
1870 tree op;
1872 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1874 op = USE_FROM_PTR (use_p);
1875 record_invariant (data, op, false);
1879 /* Finds interesting uses of induction variables in the statement STMT. */
1881 static void
1882 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1884 struct iv *iv;
1885 tree op, *lhs, *rhs;
1886 ssa_op_iter iter;
1887 use_operand_p use_p;
1888 enum tree_code code;
1890 find_invariants_stmt (data, stmt);
1892 if (gimple_code (stmt) == GIMPLE_COND)
1894 find_interesting_uses_cond (data, stmt);
1895 return;
1898 if (is_gimple_assign (stmt))
1900 lhs = gimple_assign_lhs_ptr (stmt);
1901 rhs = gimple_assign_rhs1_ptr (stmt);
1903 if (TREE_CODE (*lhs) == SSA_NAME)
1905 /* If the statement defines an induction variable, the uses are not
1906 interesting by themselves. */
1908 iv = get_iv (data, *lhs);
1910 if (iv && !integer_zerop (iv->step))
1911 return;
1914 code = gimple_assign_rhs_code (stmt);
1915 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1916 && (REFERENCE_CLASS_P (*rhs)
1917 || is_gimple_val (*rhs)))
1919 if (REFERENCE_CLASS_P (*rhs))
1920 find_interesting_uses_address (data, stmt, rhs);
1921 else
1922 find_interesting_uses_op (data, *rhs);
1924 if (REFERENCE_CLASS_P (*lhs))
1925 find_interesting_uses_address (data, stmt, lhs);
1926 return;
1928 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1930 find_interesting_uses_cond (data, stmt);
1931 return;
1934 /* TODO -- we should also handle address uses of type
1936 memory = call (whatever);
1940 call (memory). */
1943 if (gimple_code (stmt) == GIMPLE_PHI
1944 && gimple_bb (stmt) == data->current_loop->header)
1946 iv = get_iv (data, PHI_RESULT (stmt));
1948 if (iv && !integer_zerop (iv->step))
1949 return;
1952 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1954 op = USE_FROM_PTR (use_p);
1956 if (TREE_CODE (op) != SSA_NAME)
1957 continue;
1959 iv = get_iv (data, op);
1960 if (!iv)
1961 continue;
1963 find_interesting_uses_op (data, op);
1967 /* Finds interesting uses of induction variables outside of loops
1968 on loop exit edge EXIT. */
1970 static void
1971 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1973 gimple phi;
1974 gimple_stmt_iterator psi;
1975 tree def;
1977 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1979 phi = gsi_stmt (psi);
1980 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1981 if (!virtual_operand_p (def))
1982 find_interesting_uses_op (data, def);
1986 /* Finds uses of the induction variables that are interesting. */
1988 static void
1989 find_interesting_uses (struct ivopts_data *data)
1991 basic_block bb;
1992 gimple_stmt_iterator bsi;
1993 basic_block *body = get_loop_body (data->current_loop);
1994 unsigned i;
1995 struct version_info *info;
1996 edge e;
1998 if (dump_file && (dump_flags & TDF_DETAILS))
1999 fprintf (dump_file, "Uses:\n\n");
2001 for (i = 0; i < data->current_loop->num_nodes; i++)
2003 edge_iterator ei;
2004 bb = body[i];
2006 FOR_EACH_EDGE (e, ei, bb->succs)
2007 if (e->dest != EXIT_BLOCK_PTR
2008 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2009 find_interesting_uses_outside (data, e);
2011 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2012 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2014 if (!is_gimple_debug (gsi_stmt (bsi)))
2015 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2020 bitmap_iterator bi;
2022 fprintf (dump_file, "\n");
2024 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2026 info = ver_info (data, i);
2027 if (info->inv_id)
2029 fprintf (dump_file, " ");
2030 print_generic_expr (dump_file, info->name, TDF_SLIM);
2031 fprintf (dump_file, " is invariant (%d)%s\n",
2032 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2036 fprintf (dump_file, "\n");
2039 free (body);
2042 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2043 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2044 we are at the top-level of the processed address. */
2046 static tree
2047 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2048 HOST_WIDE_INT *offset)
2050 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2051 enum tree_code code;
2052 tree type, orig_type = TREE_TYPE (expr);
2053 HOST_WIDE_INT off0, off1, st;
2054 tree orig_expr = expr;
2056 STRIP_NOPS (expr);
2058 type = TREE_TYPE (expr);
2059 code = TREE_CODE (expr);
2060 *offset = 0;
2062 switch (code)
2064 case INTEGER_CST:
2065 if (!cst_and_fits_in_hwi (expr)
2066 || integer_zerop (expr))
2067 return orig_expr;
2069 *offset = int_cst_value (expr);
2070 return build_int_cst (orig_type, 0);
2072 case POINTER_PLUS_EXPR:
2073 case PLUS_EXPR:
2074 case MINUS_EXPR:
2075 op0 = TREE_OPERAND (expr, 0);
2076 op1 = TREE_OPERAND (expr, 1);
2078 op0 = strip_offset_1 (op0, false, false, &off0);
2079 op1 = strip_offset_1 (op1, false, false, &off1);
2081 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2082 if (op0 == TREE_OPERAND (expr, 0)
2083 && op1 == TREE_OPERAND (expr, 1))
2084 return orig_expr;
2086 if (integer_zerop (op1))
2087 expr = op0;
2088 else if (integer_zerop (op0))
2090 if (code == MINUS_EXPR)
2091 expr = fold_build1 (NEGATE_EXPR, type, op1);
2092 else
2093 expr = op1;
2095 else
2096 expr = fold_build2 (code, type, op0, op1);
2098 return fold_convert (orig_type, expr);
2100 case MULT_EXPR:
2101 op1 = TREE_OPERAND (expr, 1);
2102 if (!cst_and_fits_in_hwi (op1))
2103 return orig_expr;
2105 op0 = TREE_OPERAND (expr, 0);
2106 op0 = strip_offset_1 (op0, false, false, &off0);
2107 if (op0 == TREE_OPERAND (expr, 0))
2108 return orig_expr;
2110 *offset = off0 * int_cst_value (op1);
2111 if (integer_zerop (op0))
2112 expr = op0;
2113 else
2114 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2116 return fold_convert (orig_type, expr);
2118 case ARRAY_REF:
2119 case ARRAY_RANGE_REF:
2120 if (!inside_addr)
2121 return orig_expr;
2123 step = array_ref_element_size (expr);
2124 if (!cst_and_fits_in_hwi (step))
2125 break;
2127 st = int_cst_value (step);
2128 op1 = TREE_OPERAND (expr, 1);
2129 op1 = strip_offset_1 (op1, false, false, &off1);
2130 *offset = off1 * st;
2132 if (top_compref
2133 && integer_zerop (op1))
2135 /* Strip the component reference completely. */
2136 op0 = TREE_OPERAND (expr, 0);
2137 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2138 *offset += off0;
2139 return op0;
2141 break;
2143 case COMPONENT_REF:
2145 tree field;
2147 if (!inside_addr)
2148 return orig_expr;
2150 tmp = component_ref_field_offset (expr);
2151 field = TREE_OPERAND (expr, 1);
2152 if (top_compref
2153 && cst_and_fits_in_hwi (tmp)
2154 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2156 HOST_WIDE_INT boffset, abs_off;
2158 /* Strip the component reference completely. */
2159 op0 = TREE_OPERAND (expr, 0);
2160 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2161 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2162 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2163 if (boffset < 0)
2164 abs_off = -abs_off;
2166 *offset = off0 + int_cst_value (tmp) + abs_off;
2167 return op0;
2170 break;
2172 case ADDR_EXPR:
2173 op0 = TREE_OPERAND (expr, 0);
2174 op0 = strip_offset_1 (op0, true, true, &off0);
2175 *offset += off0;
2177 if (op0 == TREE_OPERAND (expr, 0))
2178 return orig_expr;
2180 expr = build_fold_addr_expr (op0);
2181 return fold_convert (orig_type, expr);
2183 case MEM_REF:
2184 /* ??? Offset operand? */
2185 inside_addr = false;
2186 break;
2188 default:
2189 return orig_expr;
2192 /* Default handling of expressions for that we want to recurse into
2193 the first operand. */
2194 op0 = TREE_OPERAND (expr, 0);
2195 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2196 *offset += off0;
2198 if (op0 == TREE_OPERAND (expr, 0)
2199 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2200 return orig_expr;
2202 expr = copy_node (expr);
2203 TREE_OPERAND (expr, 0) = op0;
2204 if (op1)
2205 TREE_OPERAND (expr, 1) = op1;
2207 /* Inside address, we might strip the top level component references,
2208 thus changing type of the expression. Handling of ADDR_EXPR
2209 will fix that. */
2210 expr = fold_convert (orig_type, expr);
2212 return expr;
2215 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2217 static tree
2218 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2220 HOST_WIDE_INT off;
2221 tree core = strip_offset_1 (expr, false, false, &off);
2222 *offset = off;
2223 return core;
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227 We return unsigned type with the same precision, which avoids problems
2228 with overflows. */
2230 static tree
2231 generic_type_for (tree type)
2233 if (POINTER_TYPE_P (type))
2234 return unsigned_type_for (type);
2236 if (TYPE_UNSIGNED (type))
2237 return type;
2239 return unsigned_type_for (type);
2242 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2243 the bitmap to that we should store it. */
2245 static struct ivopts_data *fd_ivopts_data;
2246 static tree
2247 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2249 bitmap *depends_on = (bitmap *) data;
2250 struct version_info *info;
2252 if (TREE_CODE (*expr_p) != SSA_NAME)
2253 return NULL_TREE;
2254 info = name_info (fd_ivopts_data, *expr_p);
2256 if (!info->inv_id || info->has_nonlin_use)
2257 return NULL_TREE;
2259 if (!*depends_on)
2260 *depends_on = BITMAP_ALLOC (NULL);
2261 bitmap_set_bit (*depends_on, info->inv_id);
2263 return NULL_TREE;
2266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2267 position to POS. If USE is not NULL, the candidate is set as related to
2268 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2269 replacement of the final value of the iv by a direct computation. */
2271 static struct iv_cand *
2272 add_candidate_1 (struct ivopts_data *data,
2273 tree base, tree step, bool important, enum iv_position pos,
2274 struct iv_use *use, gimple incremented_at)
2276 unsigned i;
2277 struct iv_cand *cand = NULL;
2278 tree type, orig_type;
2280 /* For non-original variables, make sure their values are computed in a type
2281 that does not invoke undefined behavior on overflows (since in general,
2282 we cannot prove that these induction variables are non-wrapping). */
2283 if (pos != IP_ORIGINAL)
2285 orig_type = TREE_TYPE (base);
2286 type = generic_type_for (orig_type);
2287 if (type != orig_type)
2289 base = fold_convert (type, base);
2290 step = fold_convert (type, step);
2294 for (i = 0; i < n_iv_cands (data); i++)
2296 cand = iv_cand (data, i);
2298 if (cand->pos != pos)
2299 continue;
2301 if (cand->incremented_at != incremented_at
2302 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2303 && cand->ainc_use != use))
2304 continue;
2306 if (!cand->iv)
2308 if (!base && !step)
2309 break;
2311 continue;
2314 if (!base && !step)
2315 continue;
2317 if (operand_equal_p (base, cand->iv->base, 0)
2318 && operand_equal_p (step, cand->iv->step, 0)
2319 && (TYPE_PRECISION (TREE_TYPE (base))
2320 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2321 break;
2324 if (i == n_iv_cands (data))
2326 cand = XCNEW (struct iv_cand);
2327 cand->id = i;
2329 if (!base && !step)
2330 cand->iv = NULL;
2331 else
2332 cand->iv = alloc_iv (base, step);
2334 cand->pos = pos;
2335 if (pos != IP_ORIGINAL && cand->iv)
2337 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2338 cand->var_after = cand->var_before;
2340 cand->important = important;
2341 cand->incremented_at = incremented_at;
2342 data->iv_candidates.safe_push (cand);
2344 if (step
2345 && TREE_CODE (step) != INTEGER_CST)
2347 fd_ivopts_data = data;
2348 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2351 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2352 cand->ainc_use = use;
2353 else
2354 cand->ainc_use = NULL;
2356 if (dump_file && (dump_flags & TDF_DETAILS))
2357 dump_cand (dump_file, cand);
2360 if (important && !cand->important)
2362 cand->important = true;
2363 if (dump_file && (dump_flags & TDF_DETAILS))
2364 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2367 if (use)
2369 bitmap_set_bit (use->related_cands, i);
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, "Candidate %d is related to use %d\n",
2372 cand->id, use->id);
2375 return cand;
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2379 is allowed.
2381 The purpose is to avoid splitting latch edge with a biv increment, thus
2382 creating a jump, possibly confusing other optimization passes and leaving
2383 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2384 is not available (so we do not have a better alternative), or if the latch
2385 edge is already nonempty. */
2387 static bool
2388 allow_ip_end_pos_p (struct loop *loop)
2390 if (!ip_normal_pos (loop))
2391 return true;
2393 if (!empty_block_p (ip_end_pos (loop)))
2394 return true;
2396 return false;
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400 Important field is set to IMPORTANT. */
2402 static void
2403 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2404 bool important, struct iv_use *use)
2406 basic_block use_bb = gimple_bb (use->stmt);
2407 enum machine_mode mem_mode;
2408 unsigned HOST_WIDE_INT cstepi;
2410 /* If we insert the increment in any position other than the standard
2411 ones, we must ensure that it is incremented once per iteration.
2412 It must not be in an inner nested loop, or one side of an if
2413 statement. */
2414 if (use_bb->loop_father != data->current_loop
2415 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2416 || stmt_could_throw_p (use->stmt)
2417 || !cst_and_fits_in_hwi (step))
2418 return;
2420 cstepi = int_cst_value (step);
2422 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2423 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2424 || USE_STORE_PRE_INCREMENT (mem_mode))
2425 && GET_MODE_SIZE (mem_mode) == cstepi)
2426 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2427 || USE_STORE_PRE_DECREMENT (mem_mode))
2428 && GET_MODE_SIZE (mem_mode) == -cstepi))
2430 enum tree_code code = MINUS_EXPR;
2431 tree new_base;
2432 tree new_step = step;
2434 if (POINTER_TYPE_P (TREE_TYPE (base)))
2436 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2437 code = POINTER_PLUS_EXPR;
2439 else
2440 new_step = fold_convert (TREE_TYPE (base), new_step);
2441 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2442 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2443 use->stmt);
2445 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2446 || USE_STORE_POST_INCREMENT (mem_mode))
2447 && GET_MODE_SIZE (mem_mode) == cstepi)
2448 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2449 || USE_STORE_POST_DECREMENT (mem_mode))
2450 && GET_MODE_SIZE (mem_mode) == -cstepi))
2452 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2453 use->stmt);
2457 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2458 position to POS. If USE is not NULL, the candidate is set as related to
2459 it. The candidate computation is scheduled on all available positions. */
2461 static void
2462 add_candidate (struct ivopts_data *data,
2463 tree base, tree step, bool important, struct iv_use *use)
2465 if (ip_normal_pos (data->current_loop))
2466 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2467 if (ip_end_pos (data->current_loop)
2468 && allow_ip_end_pos_p (data->current_loop))
2469 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2471 if (use != NULL && use->type == USE_ADDRESS)
2472 add_autoinc_candidates (data, base, step, important, use);
2475 /* Adds standard iv candidates. */
2477 static void
2478 add_standard_iv_candidates (struct ivopts_data *data)
2480 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2482 /* The same for a double-integer type if it is still fast enough. */
2483 if (TYPE_PRECISION
2484 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2485 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2486 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2487 build_int_cst (long_integer_type_node, 1), true, NULL);
2489 /* The same for a double-integer type if it is still fast enough. */
2490 if (TYPE_PRECISION
2491 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2492 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2493 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2494 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2498 /* Adds candidates bases on the old induction variable IV. */
2500 static void
2501 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2503 gimple phi;
2504 tree def;
2505 struct iv_cand *cand;
2507 add_candidate (data, iv->base, iv->step, true, NULL);
2509 /* The same, but with initial value zero. */
2510 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2511 add_candidate (data, size_int (0), iv->step, true, NULL);
2512 else
2513 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2514 iv->step, true, NULL);
2516 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2517 if (gimple_code (phi) == GIMPLE_PHI)
2519 /* Additionally record the possibility of leaving the original iv
2520 untouched. */
2521 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2522 cand = add_candidate_1 (data,
2523 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2524 SSA_NAME_DEF_STMT (def));
2525 cand->var_before = iv->ssa_name;
2526 cand->var_after = def;
2530 /* Adds candidates based on the old induction variables. */
2532 static void
2533 add_old_ivs_candidates (struct ivopts_data *data)
2535 unsigned i;
2536 struct iv *iv;
2537 bitmap_iterator bi;
2539 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2541 iv = ver_info (data, i)->iv;
2542 if (iv && iv->biv_p && !integer_zerop (iv->step))
2543 add_old_iv_candidates (data, iv);
2547 /* Adds candidates based on the value of the induction variable IV and USE. */
2549 static void
2550 add_iv_value_candidates (struct ivopts_data *data,
2551 struct iv *iv, struct iv_use *use)
2553 unsigned HOST_WIDE_INT offset;
2554 tree base;
2555 tree basetype;
2557 add_candidate (data, iv->base, iv->step, false, use);
2559 /* The same, but with initial value zero. Make such variable important,
2560 since it is generic enough so that possibly many uses may be based
2561 on it. */
2562 basetype = TREE_TYPE (iv->base);
2563 if (POINTER_TYPE_P (basetype))
2564 basetype = sizetype;
2565 add_candidate (data, build_int_cst (basetype, 0),
2566 iv->step, true, use);
2568 /* Third, try removing the constant offset. Make sure to even
2569 add a candidate for &a[0] vs. (T *)&a. */
2570 base = strip_offset (iv->base, &offset);
2571 if (offset
2572 || base != iv->base)
2573 add_candidate (data, base, iv->step, false, use);
2576 /* Adds candidates based on the uses. */
2578 static void
2579 add_derived_ivs_candidates (struct ivopts_data *data)
2581 unsigned i;
2583 for (i = 0; i < n_iv_uses (data); i++)
2585 struct iv_use *use = iv_use (data, i);
2587 if (!use)
2588 continue;
2590 switch (use->type)
2592 case USE_NONLINEAR_EXPR:
2593 case USE_COMPARE:
2594 case USE_ADDRESS:
2595 /* Just add the ivs based on the value of the iv used here. */
2596 add_iv_value_candidates (data, use->iv, use);
2597 break;
2599 default:
2600 gcc_unreachable ();
2605 /* Record important candidates and add them to related_cands bitmaps
2606 if needed. */
2608 static void
2609 record_important_candidates (struct ivopts_data *data)
2611 unsigned i;
2612 struct iv_use *use;
2614 for (i = 0; i < n_iv_cands (data); i++)
2616 struct iv_cand *cand = iv_cand (data, i);
2618 if (cand->important)
2619 bitmap_set_bit (data->important_candidates, i);
2622 data->consider_all_candidates = (n_iv_cands (data)
2623 <= CONSIDER_ALL_CANDIDATES_BOUND);
2625 if (data->consider_all_candidates)
2627 /* We will not need "related_cands" bitmaps in this case,
2628 so release them to decrease peak memory consumption. */
2629 for (i = 0; i < n_iv_uses (data); i++)
2631 use = iv_use (data, i);
2632 BITMAP_FREE (use->related_cands);
2635 else
2637 /* Add important candidates to the related_cands bitmaps. */
2638 for (i = 0; i < n_iv_uses (data); i++)
2639 bitmap_ior_into (iv_use (data, i)->related_cands,
2640 data->important_candidates);
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646 we allocate a simple list to every use. */
2648 static void
2649 alloc_use_cost_map (struct ivopts_data *data)
2651 unsigned i, size, s;
2653 for (i = 0; i < n_iv_uses (data); i++)
2655 struct iv_use *use = iv_use (data, i);
2657 if (data->consider_all_candidates)
2658 size = n_iv_cands (data);
2659 else
2661 s = bitmap_count_bits (use->related_cands);
2663 /* Round up to the power of two, so that moduling by it is fast. */
2664 size = s ? (1 << ceil_log2 (s)) : 1;
2667 use->n_map_members = size;
2668 use->cost_map = XCNEWVEC (struct cost_pair, size);
2672 /* Returns description of computation cost of expression whose runtime
2673 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2675 static comp_cost
2676 new_cost (unsigned runtime, unsigned complexity)
2678 comp_cost cost;
2680 cost.cost = runtime;
2681 cost.complexity = complexity;
2683 return cost;
2686 /* Adds costs COST1 and COST2. */
2688 static comp_cost
2689 add_costs (comp_cost cost1, comp_cost cost2)
2691 cost1.cost += cost2.cost;
2692 cost1.complexity += cost2.complexity;
2694 return cost1;
2696 /* Subtracts costs COST1 and COST2. */
2698 static comp_cost
2699 sub_costs (comp_cost cost1, comp_cost cost2)
2701 cost1.cost -= cost2.cost;
2702 cost1.complexity -= cost2.complexity;
2704 return cost1;
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708 COST1 > COST2, and 0 if COST1 = COST2. */
2710 static int
2711 compare_costs (comp_cost cost1, comp_cost cost2)
2713 if (cost1.cost == cost2.cost)
2714 return cost1.complexity - cost2.complexity;
2716 return cost1.cost - cost2.cost;
2719 /* Returns true if COST is infinite. */
2721 static bool
2722 infinite_cost_p (comp_cost cost)
2724 return cost.cost == INFTY;
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728 on invariants DEPENDS_ON and that the value used in expressing it
2729 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2731 static void
2732 set_use_iv_cost (struct ivopts_data *data,
2733 struct iv_use *use, struct iv_cand *cand,
2734 comp_cost cost, bitmap depends_on, tree value,
2735 enum tree_code comp, int inv_expr_id)
2737 unsigned i, s;
2739 if (infinite_cost_p (cost))
2741 BITMAP_FREE (depends_on);
2742 return;
2745 if (data->consider_all_candidates)
2747 use->cost_map[cand->id].cand = cand;
2748 use->cost_map[cand->id].cost = cost;
2749 use->cost_map[cand->id].depends_on = depends_on;
2750 use->cost_map[cand->id].value = value;
2751 use->cost_map[cand->id].comp = comp;
2752 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2753 return;
2756 /* n_map_members is a power of two, so this computes modulo. */
2757 s = cand->id & (use->n_map_members - 1);
2758 for (i = s; i < use->n_map_members; i++)
2759 if (!use->cost_map[i].cand)
2760 goto found;
2761 for (i = 0; i < s; i++)
2762 if (!use->cost_map[i].cand)
2763 goto found;
2765 gcc_unreachable ();
2767 found:
2768 use->cost_map[i].cand = cand;
2769 use->cost_map[i].cost = cost;
2770 use->cost_map[i].depends_on = depends_on;
2771 use->cost_map[i].value = value;
2772 use->cost_map[i].comp = comp;
2773 use->cost_map[i].inv_expr_id = inv_expr_id;
2776 /* Gets cost of (USE, CANDIDATE) pair. */
2778 static struct cost_pair *
2779 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2780 struct iv_cand *cand)
2782 unsigned i, s;
2783 struct cost_pair *ret;
2785 if (!cand)
2786 return NULL;
2788 if (data->consider_all_candidates)
2790 ret = use->cost_map + cand->id;
2791 if (!ret->cand)
2792 return NULL;
2794 return ret;
2797 /* n_map_members is a power of two, so this computes modulo. */
2798 s = cand->id & (use->n_map_members - 1);
2799 for (i = s; i < use->n_map_members; i++)
2800 if (use->cost_map[i].cand == cand)
2801 return use->cost_map + i;
2802 else if (use->cost_map[i].cand == NULL)
2803 return NULL;
2804 for (i = 0; i < s; i++)
2805 if (use->cost_map[i].cand == cand)
2806 return use->cost_map + i;
2807 else if (use->cost_map[i].cand == NULL)
2808 return NULL;
2810 return NULL;
2813 /* Returns estimate on cost of computing SEQ. */
2815 static unsigned
2816 seq_cost (rtx seq, bool speed)
2818 unsigned cost = 0;
2819 rtx set;
2821 for (; seq; seq = NEXT_INSN (seq))
2823 set = single_set (seq);
2824 if (set)
2825 cost += set_src_cost (SET_SRC (set), speed);
2826 else
2827 cost++;
2830 return cost;
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2834 static rtx
2835 produce_memory_decl_rtl (tree obj, int *regno)
2837 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2838 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2839 rtx x;
2841 gcc_assert (obj);
2842 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2844 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2845 x = gen_rtx_SYMBOL_REF (address_mode, name);
2846 SET_SYMBOL_REF_DECL (x, obj);
2847 x = gen_rtx_MEM (DECL_MODE (obj), x);
2848 set_mem_addr_space (x, as);
2849 targetm.encode_section_info (obj, x, true);
2851 else
2853 x = gen_raw_REG (address_mode, (*regno)++);
2854 x = gen_rtx_MEM (DECL_MODE (obj), x);
2855 set_mem_addr_space (x, as);
2858 return x;
2861 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2862 walk_tree. DATA contains the actual fake register number. */
2864 static tree
2865 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2867 tree obj = NULL_TREE;
2868 rtx x = NULL_RTX;
2869 int *regno = (int *) data;
2871 switch (TREE_CODE (*expr_p))
2873 case ADDR_EXPR:
2874 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2875 handled_component_p (*expr_p);
2876 expr_p = &TREE_OPERAND (*expr_p, 0))
2877 continue;
2878 obj = *expr_p;
2879 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2880 x = produce_memory_decl_rtl (obj, regno);
2881 break;
2883 case SSA_NAME:
2884 *ws = 0;
2885 obj = SSA_NAME_VAR (*expr_p);
2886 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2887 if (!obj)
2888 return NULL_TREE;
2889 if (!DECL_RTL_SET_P (obj))
2890 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2891 break;
2893 case VAR_DECL:
2894 case PARM_DECL:
2895 case RESULT_DECL:
2896 *ws = 0;
2897 obj = *expr_p;
2899 if (DECL_RTL_SET_P (obj))
2900 break;
2902 if (DECL_MODE (obj) == BLKmode)
2903 x = produce_memory_decl_rtl (obj, regno);
2904 else
2905 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2907 break;
2909 default:
2910 break;
2913 if (x)
2915 decl_rtl_to_reset.safe_push (obj);
2916 SET_DECL_RTL (obj, x);
2919 return NULL_TREE;
2922 /* Determines cost of the computation of EXPR. */
2924 static unsigned
2925 computation_cost (tree expr, bool speed)
2927 rtx seq, rslt;
2928 tree type = TREE_TYPE (expr);
2929 unsigned cost;
2930 /* Avoid using hard regs in ways which may be unsupported. */
2931 int regno = LAST_VIRTUAL_REGISTER + 1;
2932 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2933 enum node_frequency real_frequency = node->frequency;
2935 node->frequency = NODE_FREQUENCY_NORMAL;
2936 crtl->maybe_hot_insn_p = speed;
2937 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2938 start_sequence ();
2939 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2940 seq = get_insns ();
2941 end_sequence ();
2942 default_rtl_profile ();
2943 node->frequency = real_frequency;
2945 cost = seq_cost (seq, speed);
2946 if (MEM_P (rslt))
2947 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2948 TYPE_ADDR_SPACE (type), speed);
2949 else if (!REG_P (rslt))
2950 cost += set_src_cost (rslt, speed);
2952 return cost;
2955 /* Returns variable containing the value of candidate CAND at statement AT. */
2957 static tree
2958 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2960 if (stmt_after_increment (loop, cand, stmt))
2961 return cand->var_after;
2962 else
2963 return cand->var_before;
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967 same precision that is at least as wide as the precision of TYPE, stores
2968 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2969 type of A and B. */
2971 static tree
2972 determine_common_wider_type (tree *a, tree *b)
2974 tree wider_type = NULL;
2975 tree suba, subb;
2976 tree atype = TREE_TYPE (*a);
2978 if (CONVERT_EXPR_P (*a))
2980 suba = TREE_OPERAND (*a, 0);
2981 wider_type = TREE_TYPE (suba);
2982 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2983 return atype;
2985 else
2986 return atype;
2988 if (CONVERT_EXPR_P (*b))
2990 subb = TREE_OPERAND (*b, 0);
2991 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2992 return atype;
2994 else
2995 return atype;
2997 *a = suba;
2998 *b = subb;
2999 return wider_type;
3002 /* Determines the expression by that USE is expressed from induction variable
3003 CAND at statement AT in LOOP. The expression is stored in a decomposed
3004 form into AFF. Returns false if USE cannot be expressed using CAND. */
3006 static bool
3007 get_computation_aff (struct loop *loop,
3008 struct iv_use *use, struct iv_cand *cand, gimple at,
3009 struct affine_tree_combination *aff)
3011 tree ubase = use->iv->base;
3012 tree ustep = use->iv->step;
3013 tree cbase = cand->iv->base;
3014 tree cstep = cand->iv->step, cstep_common;
3015 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3016 tree common_type, var;
3017 tree uutype;
3018 aff_tree cbase_aff, var_aff;
3019 double_int rat;
3021 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3023 /* We do not have a precision to express the values of use. */
3024 return false;
3027 var = var_at_stmt (loop, cand, at);
3028 uutype = unsigned_type_for (utype);
3030 /* If the conversion is not noop, perform it. */
3031 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3033 cstep = fold_convert (uutype, cstep);
3034 cbase = fold_convert (uutype, cbase);
3035 var = fold_convert (uutype, var);
3038 if (!constant_multiple_of (ustep, cstep, &rat))
3039 return false;
3041 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042 type, we achieve better folding by computing their difference in this
3043 wider type, and cast the result to UUTYPE. We do not need to worry about
3044 overflows, as all the arithmetics will in the end be performed in UUTYPE
3045 anyway. */
3046 common_type = determine_common_wider_type (&ubase, &cbase);
3048 /* use = ubase - ratio * cbase + ratio * var. */
3049 tree_to_aff_combination (ubase, common_type, aff);
3050 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3051 tree_to_aff_combination (var, uutype, &var_aff);
3053 /* We need to shift the value if we are after the increment. */
3054 if (stmt_after_increment (loop, cand, at))
3056 aff_tree cstep_aff;
3058 if (common_type != uutype)
3059 cstep_common = fold_convert (common_type, cstep);
3060 else
3061 cstep_common = cstep;
3063 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3064 aff_combination_add (&cbase_aff, &cstep_aff);
3067 aff_combination_scale (&cbase_aff, -rat);
3068 aff_combination_add (aff, &cbase_aff);
3069 if (common_type != uutype)
3070 aff_combination_convert (aff, uutype);
3072 aff_combination_scale (&var_aff, rat);
3073 aff_combination_add (aff, &var_aff);
3075 return true;
3078 /* Return the type of USE. */
3080 static tree
3081 get_use_type (struct iv_use *use)
3083 tree base_type = TREE_TYPE (use->iv->base);
3084 tree type;
3086 if (use->type == USE_ADDRESS)
3088 /* The base_type may be a void pointer. Create a pointer type based on
3089 the mem_ref instead. */
3090 type = build_pointer_type (TREE_TYPE (*use->op_p));
3091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3094 else
3095 type = base_type;
3097 return type;
3100 /* Determines the expression by that USE is expressed from induction variable
3101 CAND at statement AT in LOOP. The computation is unshared. */
3103 static tree
3104 get_computation_at (struct loop *loop,
3105 struct iv_use *use, struct iv_cand *cand, gimple at)
3107 aff_tree aff;
3108 tree type = get_use_type (use);
3110 if (!get_computation_aff (loop, use, cand, at, &aff))
3111 return NULL_TREE;
3112 unshare_aff_combination (&aff);
3113 return fold_convert (type, aff_combination_to_tree (&aff));
3116 /* Determines the expression by that USE is expressed from induction variable
3117 CAND in LOOP. The computation is unshared. */
3119 static tree
3120 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3122 return get_computation_at (loop, use, cand, use->stmt);
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126 If we're optimizing for space, the loop setup overhead is constant;
3127 if we're optimizing for speed, amortize it over the per-iteration cost. */
3128 static unsigned
3129 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3131 if (cost == INFTY)
3132 return cost;
3133 else if (optimize_loop_for_speed_p (data->current_loop))
3134 return cost / avg_loop_niter (data->current_loop);
3135 else
3136 return cost;
3139 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3140 validity for a memory reference accessing memory of mode MODE in
3141 address space AS. */
3144 bool
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3146 addr_space_t as)
3148 #define MAX_RATIO 128
3149 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3150 static vec<sbitmap> valid_mult_list;
3151 sbitmap valid_mult;
3153 if (data_index >= valid_mult_list.length ())
3154 valid_mult_list.safe_grow_cleared (data_index + 1);
3156 valid_mult = valid_mult_list[data_index];
3157 if (!valid_mult)
3159 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3160 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3161 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3162 rtx addr, scaled;
3163 HOST_WIDE_INT i;
3165 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166 bitmap_clear (valid_mult);
3167 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3169 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3171 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3172 if (memory_address_addr_space_p (mode, addr, as)
3173 || memory_address_addr_space_p (mode, scaled, as))
3174 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, " allowed multipliers:");
3180 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3181 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3182 fprintf (dump_file, " %d", (int) i);
3183 fprintf (dump_file, "\n");
3184 fprintf (dump_file, "\n");
3187 valid_mult_list[data_index] = valid_mult;
3190 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3191 return false;
3193 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3198 variable is omitted. Compute the cost for a memory reference that accesses
3199 a memory location of mode MEM_MODE in address space AS.
3201 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202 size of MEM_MODE / RATIO) is available. To make this determination, we
3203 look at the size of the increment to be made, which is given in CSTEP.
3204 CSTEP may be zero if the step is unknown.
3205 STMT_AFTER_INC is true iff the statement we're looking at is after the
3206 increment of the original biv.
3208 TODO -- there must be some better way. This all is quite crude. */
3210 typedef struct address_cost_data_s
3212 HOST_WIDE_INT min_offset, max_offset;
3213 unsigned costs[2][2][2][2];
3214 } *address_cost_data;
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221 addr_space_t as, bool speed,
3222 bool stmt_after_inc, bool *may_autoinc)
3224 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225 static vec<address_cost_data> address_cost_data_list;
3226 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227 address_cost_data data;
3228 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230 unsigned cost, acost, complexity;
3231 bool offset_p, ratio_p, autoinc;
3232 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233 unsigned HOST_WIDE_INT mask;
3234 unsigned bits;
3236 if (data_index >= address_cost_data_list.length ())
3237 address_cost_data_list.safe_grow_cleared (data_index + 1);
3239 data = address_cost_data_list[data_index];
3240 if (!data)
3242 HOST_WIDE_INT i;
3243 HOST_WIDE_INT rat, off = 0;
3244 int old_cse_not_expected, width;
3245 unsigned sym_p, var_p, off_p, rat_p, add_c;
3246 rtx seq, addr, base;
3247 rtx reg0, reg1;
3249 data = (address_cost_data) xcalloc (1, sizeof (*data));
3251 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3253 width = GET_MODE_BITSIZE (address_mode) - 1;
3254 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3255 width = HOST_BITS_PER_WIDE_INT - 1;
3256 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3258 for (i = width; i >= 0; i--)
3260 off = -((unsigned HOST_WIDE_INT) 1 << i);
3261 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3262 if (memory_address_addr_space_p (mem_mode, addr, as))
3263 break;
3265 data->min_offset = (i == -1? 0 : off);
3267 for (i = width; i >= 0; i--)
3269 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3270 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3271 if (memory_address_addr_space_p (mem_mode, addr, as))
3272 break;
3274 if (i == -1)
3275 off = 0;
3276 data->max_offset = off;
3278 if (dump_file && (dump_flags & TDF_DETAILS))
3280 fprintf (dump_file, "get_address_cost:\n");
3281 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282 GET_MODE_NAME (mem_mode),
3283 data->min_offset);
3284 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3285 GET_MODE_NAME (mem_mode),
3286 data->max_offset);
3289 rat = 1;
3290 for (i = 2; i <= MAX_RATIO; i++)
3291 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3293 rat = i;
3294 break;
3297 /* Compute the cost of various addressing modes. */
3298 acost = 0;
3299 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3300 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3302 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3303 || USE_STORE_PRE_DECREMENT (mem_mode))
3305 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3306 has_predec[mem_mode]
3307 = memory_address_addr_space_p (mem_mode, addr, as);
3309 if (USE_LOAD_POST_DECREMENT (mem_mode)
3310 || USE_STORE_POST_DECREMENT (mem_mode))
3312 addr = gen_rtx_POST_DEC (address_mode, reg0);
3313 has_postdec[mem_mode]
3314 = memory_address_addr_space_p (mem_mode, addr, as);
3316 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3317 || USE_STORE_PRE_DECREMENT (mem_mode))
3319 addr = gen_rtx_PRE_INC (address_mode, reg0);
3320 has_preinc[mem_mode]
3321 = memory_address_addr_space_p (mem_mode, addr, as);
3323 if (USE_LOAD_POST_INCREMENT (mem_mode)
3324 || USE_STORE_POST_INCREMENT (mem_mode))
3326 addr = gen_rtx_POST_INC (address_mode, reg0);
3327 has_postinc[mem_mode]
3328 = memory_address_addr_space_p (mem_mode, addr, as);
3330 for (i = 0; i < 16; i++)
3332 sym_p = i & 1;
3333 var_p = (i >> 1) & 1;
3334 off_p = (i >> 2) & 1;
3335 rat_p = (i >> 3) & 1;
3337 addr = reg0;
3338 if (rat_p)
3339 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3340 gen_int_mode (rat, address_mode));
3342 if (var_p)
3343 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3345 if (sym_p)
3347 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3348 /* ??? We can run into trouble with some backends by presenting
3349 it with symbols which haven't been properly passed through
3350 targetm.encode_section_info. By setting the local bit, we
3351 enhance the probability of things working. */
3352 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3354 if (off_p)
3355 base = gen_rtx_fmt_e (CONST, address_mode,
3356 gen_rtx_fmt_ee
3357 (PLUS, address_mode, base,
3358 gen_int_mode (off, address_mode)));
3360 else if (off_p)
3361 base = gen_int_mode (off, address_mode);
3362 else
3363 base = NULL_RTX;
3365 if (base)
3366 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3368 start_sequence ();
3369 /* To avoid splitting addressing modes, pretend that no cse will
3370 follow. */
3371 old_cse_not_expected = cse_not_expected;
3372 cse_not_expected = true;
3373 addr = memory_address_addr_space (mem_mode, addr, as);
3374 cse_not_expected = old_cse_not_expected;
3375 seq = get_insns ();
3376 end_sequence ();
3378 acost = seq_cost (seq, speed);
3379 acost += address_cost (addr, mem_mode, as, speed);
3381 if (!acost)
3382 acost = 1;
3383 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3386 /* On some targets, it is quite expensive to load symbol to a register,
3387 which makes addresses that contain symbols look much more expensive.
3388 However, the symbol will have to be loaded in any case before the
3389 loop (and quite likely we have it in register already), so it does not
3390 make much sense to penalize them too heavily. So make some final
3391 tweaks for the SYMBOL_PRESENT modes:
3393 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3394 var is cheaper, use this mode with small penalty.
3395 If VAR_PRESENT is true, try whether the mode with
3396 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3397 if this is the case, use it. */
3398 add_c = add_cost (speed, address_mode);
3399 for (i = 0; i < 8; i++)
3401 var_p = i & 1;
3402 off_p = (i >> 1) & 1;
3403 rat_p = (i >> 2) & 1;
3405 acost = data->costs[0][1][off_p][rat_p] + 1;
3406 if (var_p)
3407 acost += add_c;
3409 if (acost < data->costs[1][var_p][off_p][rat_p])
3410 data->costs[1][var_p][off_p][rat_p] = acost;
3413 if (dump_file && (dump_flags & TDF_DETAILS))
3415 fprintf (dump_file, "Address costs:\n");
3417 for (i = 0; i < 16; i++)
3419 sym_p = i & 1;
3420 var_p = (i >> 1) & 1;
3421 off_p = (i >> 2) & 1;
3422 rat_p = (i >> 3) & 1;
3424 fprintf (dump_file, " ");
3425 if (sym_p)
3426 fprintf (dump_file, "sym + ");
3427 if (var_p)
3428 fprintf (dump_file, "var + ");
3429 if (off_p)
3430 fprintf (dump_file, "cst + ");
3431 if (rat_p)
3432 fprintf (dump_file, "rat * ");
3434 acost = data->costs[sym_p][var_p][off_p][rat_p];
3435 fprintf (dump_file, "index costs %d\n", acost);
3437 if (has_predec[mem_mode] || has_postdec[mem_mode]
3438 || has_preinc[mem_mode] || has_postinc[mem_mode])
3439 fprintf (dump_file, " May include autoinc/dec\n");
3440 fprintf (dump_file, "\n");
3443 address_cost_data_list[data_index] = data;
3446 bits = GET_MODE_BITSIZE (address_mode);
3447 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3448 offset &= mask;
3449 if ((offset >> (bits - 1) & 1))
3450 offset |= ~mask;
3451 s_offset = offset;
3453 autoinc = false;
3454 msize = GET_MODE_SIZE (mem_mode);
3455 autoinc_offset = offset;
3456 if (stmt_after_inc)
3457 autoinc_offset += ratio * cstep;
3458 if (symbol_present || var_present || ratio != 1)
3459 autoinc = false;
3460 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3461 && msize == cstep)
3462 || (has_postdec[mem_mode] && autoinc_offset == 0
3463 && msize == -cstep)
3464 || (has_preinc[mem_mode] && autoinc_offset == msize
3465 && msize == cstep)
3466 || (has_predec[mem_mode] && autoinc_offset == -msize
3467 && msize == -cstep))
3468 autoinc = true;
3470 cost = 0;
3471 offset_p = (s_offset != 0
3472 && data->min_offset <= s_offset
3473 && s_offset <= data->max_offset);
3474 ratio_p = (ratio != 1
3475 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3477 if (ratio != 1 && !ratio_p)
3478 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3480 if (s_offset && !offset_p && !symbol_present)
3481 cost += add_cost (speed, address_mode);
3483 if (may_autoinc)
3484 *may_autoinc = autoinc;
3485 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3486 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3487 return new_cost (cost + acost, complexity);
3490 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3491 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3492 calculating the operands of EXPR. Returns true if successful, and returns
3493 the cost in COST. */
3495 static bool
3496 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3497 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3499 comp_cost res;
3500 tree op1 = TREE_OPERAND (expr, 1);
3501 tree cst = TREE_OPERAND (mult, 1);
3502 tree multop = TREE_OPERAND (mult, 0);
3503 int m = exact_log2 (int_cst_value (cst));
3504 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3505 int sa_cost;
3506 bool equal_p = false;
3508 if (!(m >= 0 && m < maxm))
3509 return false;
3511 if (operand_equal_p (op1, mult, 0))
3512 equal_p = true;
3514 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3515 ? shiftadd_cost (speed, mode, m)
3516 : (equal_p
3517 ? shiftsub1_cost (speed, mode, m)
3518 : shiftsub0_cost (speed, mode, m)));
3519 res = new_cost (sa_cost, 0);
3520 res = add_costs (res, equal_p ? cost0 : cost1);
3522 STRIP_NOPS (multop);
3523 if (!is_gimple_val (multop))
3524 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3526 *cost = res;
3527 return true;
3530 /* Estimates cost of forcing expression EXPR into a variable. */
3532 static comp_cost
3533 force_expr_to_var_cost (tree expr, bool speed)
3535 static bool costs_initialized = false;
3536 static unsigned integer_cost [2];
3537 static unsigned symbol_cost [2];
3538 static unsigned address_cost [2];
3539 tree op0, op1;
3540 comp_cost cost0, cost1, cost;
3541 enum machine_mode mode;
3543 if (!costs_initialized)
3545 tree type = build_pointer_type (integer_type_node);
3546 tree var, addr;
3547 rtx x;
3548 int i;
3550 var = create_tmp_var_raw (integer_type_node, "test_var");
3551 TREE_STATIC (var) = 1;
3552 x = produce_memory_decl_rtl (var, NULL);
3553 SET_DECL_RTL (var, x);
3555 addr = build1 (ADDR_EXPR, type, var);
3558 for (i = 0; i < 2; i++)
3560 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3561 2000), i);
3563 symbol_cost[i] = computation_cost (addr, i) + 1;
3565 address_cost[i]
3566 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3567 if (dump_file && (dump_flags & TDF_DETAILS))
3569 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3570 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3571 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3572 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3573 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3574 fprintf (dump_file, "\n");
3578 costs_initialized = true;
3581 STRIP_NOPS (expr);
3583 if (SSA_VAR_P (expr))
3584 return no_cost;
3586 if (is_gimple_min_invariant (expr))
3588 if (TREE_CODE (expr) == INTEGER_CST)
3589 return new_cost (integer_cost [speed], 0);
3591 if (TREE_CODE (expr) == ADDR_EXPR)
3593 tree obj = TREE_OPERAND (expr, 0);
3595 if (TREE_CODE (obj) == VAR_DECL
3596 || TREE_CODE (obj) == PARM_DECL
3597 || TREE_CODE (obj) == RESULT_DECL)
3598 return new_cost (symbol_cost [speed], 0);
3601 return new_cost (address_cost [speed], 0);
3604 switch (TREE_CODE (expr))
3606 case POINTER_PLUS_EXPR:
3607 case PLUS_EXPR:
3608 case MINUS_EXPR:
3609 case MULT_EXPR:
3610 op0 = TREE_OPERAND (expr, 0);
3611 op1 = TREE_OPERAND (expr, 1);
3612 STRIP_NOPS (op0);
3613 STRIP_NOPS (op1);
3614 break;
3616 CASE_CONVERT:
3617 case NEGATE_EXPR:
3618 op0 = TREE_OPERAND (expr, 0);
3619 STRIP_NOPS (op0);
3620 op1 = NULL_TREE;
3621 break;
3623 default:
3624 /* Just an arbitrary value, FIXME. */
3625 return new_cost (target_spill_cost[speed], 0);
3628 if (op0 == NULL_TREE
3629 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3630 cost0 = no_cost;
3631 else
3632 cost0 = force_expr_to_var_cost (op0, speed);
3634 if (op1 == NULL_TREE
3635 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3636 cost1 = no_cost;
3637 else
3638 cost1 = force_expr_to_var_cost (op1, speed);
3640 mode = TYPE_MODE (TREE_TYPE (expr));
3641 switch (TREE_CODE (expr))
3643 case POINTER_PLUS_EXPR:
3644 case PLUS_EXPR:
3645 case MINUS_EXPR:
3646 case NEGATE_EXPR:
3647 cost = new_cost (add_cost (speed, mode), 0);
3648 if (TREE_CODE (expr) != NEGATE_EXPR)
3650 tree mult = NULL_TREE;
3651 comp_cost sa_cost;
3652 if (TREE_CODE (op1) == MULT_EXPR)
3653 mult = op1;
3654 else if (TREE_CODE (op0) == MULT_EXPR)
3655 mult = op0;
3657 if (mult != NULL_TREE
3658 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3659 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3660 speed, &sa_cost))
3661 return sa_cost;
3663 break;
3665 CASE_CONVERT:
3667 tree inner_mode, outer_mode;
3668 outer_mode = TREE_TYPE (expr);
3669 inner_mode = TREE_TYPE (op0);
3670 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3671 TYPE_MODE (inner_mode), speed), 0);
3673 break;
3675 case MULT_EXPR:
3676 if (cst_and_fits_in_hwi (op0))
3677 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3678 mode, speed), 0);
3679 else if (cst_and_fits_in_hwi (op1))
3680 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3681 mode, speed), 0);
3682 else
3683 return new_cost (target_spill_cost [speed], 0);
3684 break;
3686 default:
3687 gcc_unreachable ();
3690 cost = add_costs (cost, cost0);
3691 cost = add_costs (cost, cost1);
3693 /* Bound the cost by target_spill_cost. The parts of complicated
3694 computations often are either loop invariant or at least can
3695 be shared between several iv uses, so letting this grow without
3696 limits would not give reasonable results. */
3697 if (cost.cost > (int) target_spill_cost [speed])
3698 cost.cost = target_spill_cost [speed];
3700 return cost;
3703 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3704 invariants the computation depends on. */
3706 static comp_cost
3707 force_var_cost (struct ivopts_data *data,
3708 tree expr, bitmap *depends_on)
3710 if (depends_on)
3712 fd_ivopts_data = data;
3713 walk_tree (&expr, find_depends, depends_on, NULL);
3716 return force_expr_to_var_cost (expr, data->speed);
3719 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3720 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3721 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3722 invariants the computation depends on. */
3724 static comp_cost
3725 split_address_cost (struct ivopts_data *data,
3726 tree addr, bool *symbol_present, bool *var_present,
3727 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3729 tree core;
3730 HOST_WIDE_INT bitsize;
3731 HOST_WIDE_INT bitpos;
3732 tree toffset;
3733 enum machine_mode mode;
3734 int unsignedp, volatilep;
3736 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3737 &unsignedp, &volatilep, false);
3739 if (toffset != 0
3740 || bitpos % BITS_PER_UNIT != 0
3741 || TREE_CODE (core) != VAR_DECL)
3743 *symbol_present = false;
3744 *var_present = true;
3745 fd_ivopts_data = data;
3746 walk_tree (&addr, find_depends, depends_on, NULL);
3747 return new_cost (target_spill_cost[data->speed], 0);
3750 *offset += bitpos / BITS_PER_UNIT;
3751 if (TREE_STATIC (core)
3752 || DECL_EXTERNAL (core))
3754 *symbol_present = true;
3755 *var_present = false;
3756 return no_cost;
3759 *symbol_present = false;
3760 *var_present = true;
3761 return no_cost;
3764 /* Estimates cost of expressing difference of addresses E1 - E2 as
3765 var + symbol + offset. The value of offset is added to OFFSET,
3766 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3767 part is missing. DEPENDS_ON is a set of the invariants the computation
3768 depends on. */
3770 static comp_cost
3771 ptr_difference_cost (struct ivopts_data *data,
3772 tree e1, tree e2, bool *symbol_present, bool *var_present,
3773 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 HOST_WIDE_INT diff = 0;
3776 aff_tree aff_e1, aff_e2;
3777 tree type;
3779 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3781 if (ptr_difference_const (e1, e2, &diff))
3783 *offset += diff;
3784 *symbol_present = false;
3785 *var_present = false;
3786 return no_cost;
3789 if (integer_zerop (e2))
3790 return split_address_cost (data, TREE_OPERAND (e1, 0),
3791 symbol_present, var_present, offset, depends_on);
3793 *symbol_present = false;
3794 *var_present = true;
3796 type = signed_type_for (TREE_TYPE (e1));
3797 tree_to_aff_combination (e1, type, &aff_e1);
3798 tree_to_aff_combination (e2, type, &aff_e2);
3799 aff_combination_scale (&aff_e2, double_int_minus_one);
3800 aff_combination_add (&aff_e1, &aff_e2);
3802 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3805 /* Estimates cost of expressing difference E1 - E2 as
3806 var + symbol + offset. The value of offset is added to OFFSET,
3807 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3808 part is missing. DEPENDS_ON is a set of the invariants the computation
3809 depends on. */
3811 static comp_cost
3812 difference_cost (struct ivopts_data *data,
3813 tree e1, tree e2, bool *symbol_present, bool *var_present,
3814 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3816 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3817 unsigned HOST_WIDE_INT off1, off2;
3818 aff_tree aff_e1, aff_e2;
3819 tree type;
3821 e1 = strip_offset (e1, &off1);
3822 e2 = strip_offset (e2, &off2);
3823 *offset += off1 - off2;
3825 STRIP_NOPS (e1);
3826 STRIP_NOPS (e2);
3828 if (TREE_CODE (e1) == ADDR_EXPR)
3829 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3830 offset, depends_on);
3831 *symbol_present = false;
3833 if (operand_equal_p (e1, e2, 0))
3835 *var_present = false;
3836 return no_cost;
3839 *var_present = true;
3841 if (integer_zerop (e2))
3842 return force_var_cost (data, e1, depends_on);
3844 if (integer_zerop (e1))
3846 comp_cost cost = force_var_cost (data, e2, depends_on);
3847 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3848 return cost;
3851 type = signed_type_for (TREE_TYPE (e1));
3852 tree_to_aff_combination (e1, type, &aff_e1);
3853 tree_to_aff_combination (e2, type, &aff_e2);
3854 aff_combination_scale (&aff_e2, double_int_minus_one);
3855 aff_combination_add (&aff_e1, &aff_e2);
3857 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3860 /* Returns true if AFF1 and AFF2 are identical. */
3862 static bool
3863 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3865 unsigned i;
3867 if (aff1->n != aff2->n)
3868 return false;
3870 for (i = 0; i < aff1->n; i++)
3872 if (aff1->elts[i].coef != aff2->elts[i].coef)
3873 return false;
3875 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3876 return false;
3878 return true;
3881 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3883 static int
3884 get_expr_id (struct ivopts_data *data, tree expr)
3886 struct iv_inv_expr_ent ent;
3887 struct iv_inv_expr_ent **slot;
3889 ent.expr = expr;
3890 ent.hash = iterative_hash_expr (expr, 0);
3891 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3892 if (*slot)
3893 return (*slot)->id;
3895 *slot = XNEW (struct iv_inv_expr_ent);
3896 (*slot)->expr = expr;
3897 (*slot)->hash = ent.hash;
3898 (*slot)->id = data->inv_expr_id++;
3899 return (*slot)->id;
3902 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3903 requires a new compiler generated temporary. Returns -1 otherwise.
3904 ADDRESS_P is a flag indicating if the expression is for address
3905 computation. */
3907 static int
3908 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3909 tree cbase, HOST_WIDE_INT ratio,
3910 bool address_p)
3912 aff_tree ubase_aff, cbase_aff;
3913 tree expr, ub, cb;
3915 STRIP_NOPS (ubase);
3916 STRIP_NOPS (cbase);
3917 ub = ubase;
3918 cb = cbase;
3920 if ((TREE_CODE (ubase) == INTEGER_CST)
3921 && (TREE_CODE (cbase) == INTEGER_CST))
3922 return -1;
3924 /* Strips the constant part. */
3925 if (TREE_CODE (ubase) == PLUS_EXPR
3926 || TREE_CODE (ubase) == MINUS_EXPR
3927 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3929 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3930 ubase = TREE_OPERAND (ubase, 0);
3933 /* Strips the constant part. */
3934 if (TREE_CODE (cbase) == PLUS_EXPR
3935 || TREE_CODE (cbase) == MINUS_EXPR
3936 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3938 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3939 cbase = TREE_OPERAND (cbase, 0);
3942 if (address_p)
3944 if (((TREE_CODE (ubase) == SSA_NAME)
3945 || (TREE_CODE (ubase) == ADDR_EXPR
3946 && is_gimple_min_invariant (ubase)))
3947 && (TREE_CODE (cbase) == INTEGER_CST))
3948 return -1;
3950 if (((TREE_CODE (cbase) == SSA_NAME)
3951 || (TREE_CODE (cbase) == ADDR_EXPR
3952 && is_gimple_min_invariant (cbase)))
3953 && (TREE_CODE (ubase) == INTEGER_CST))
3954 return -1;
3957 if (ratio == 1)
3959 if (operand_equal_p (ubase, cbase, 0))
3960 return -1;
3962 if (TREE_CODE (ubase) == ADDR_EXPR
3963 && TREE_CODE (cbase) == ADDR_EXPR)
3965 tree usym, csym;
3967 usym = TREE_OPERAND (ubase, 0);
3968 csym = TREE_OPERAND (cbase, 0);
3969 if (TREE_CODE (usym) == ARRAY_REF)
3971 tree ind = TREE_OPERAND (usym, 1);
3972 if (TREE_CODE (ind) == INTEGER_CST
3973 && host_integerp (ind, 0)
3974 && TREE_INT_CST_LOW (ind) == 0)
3975 usym = TREE_OPERAND (usym, 0);
3977 if (TREE_CODE (csym) == ARRAY_REF)
3979 tree ind = TREE_OPERAND (csym, 1);
3980 if (TREE_CODE (ind) == INTEGER_CST
3981 && host_integerp (ind, 0)
3982 && TREE_INT_CST_LOW (ind) == 0)
3983 csym = TREE_OPERAND (csym, 0);
3985 if (operand_equal_p (usym, csym, 0))
3986 return -1;
3988 /* Now do more complex comparison */
3989 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3990 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3991 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3992 return -1;
3995 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3996 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3998 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3999 aff_combination_add (&ubase_aff, &cbase_aff);
4000 expr = aff_combination_to_tree (&ubase_aff);
4001 return get_expr_id (data, expr);
4006 /* Determines the cost of the computation by that USE is expressed
4007 from induction variable CAND. If ADDRESS_P is true, we just need
4008 to create an address from it, otherwise we want to get it into
4009 register. A set of invariants we depend on is stored in
4010 DEPENDS_ON. AT is the statement at that the value is computed.
4011 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4012 addressing is likely. */
4014 static comp_cost
4015 get_computation_cost_at (struct ivopts_data *data,
4016 struct iv_use *use, struct iv_cand *cand,
4017 bool address_p, bitmap *depends_on, gimple at,
4018 bool *can_autoinc,
4019 int *inv_expr_id)
4021 tree ubase = use->iv->base, ustep = use->iv->step;
4022 tree cbase, cstep;
4023 tree utype = TREE_TYPE (ubase), ctype;
4024 unsigned HOST_WIDE_INT cstepi, offset = 0;
4025 HOST_WIDE_INT ratio, aratio;
4026 bool var_present, symbol_present, stmt_is_after_inc;
4027 comp_cost cost;
4028 double_int rat;
4029 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4030 enum machine_mode mem_mode = (address_p
4031 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4032 : VOIDmode);
4034 *depends_on = NULL;
4036 /* Only consider real candidates. */
4037 if (!cand->iv)
4038 return infinite_cost;
4040 cbase = cand->iv->base;
4041 cstep = cand->iv->step;
4042 ctype = TREE_TYPE (cbase);
4044 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4046 /* We do not have a precision to express the values of use. */
4047 return infinite_cost;
4050 if (address_p
4051 || (use->iv->base_object
4052 && cand->iv->base_object
4053 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4054 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4056 /* Do not try to express address of an object with computation based
4057 on address of a different object. This may cause problems in rtl
4058 level alias analysis (that does not expect this to be happening,
4059 as this is illegal in C), and would be unlikely to be useful
4060 anyway. */
4061 if (use->iv->base_object
4062 && cand->iv->base_object
4063 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4064 return infinite_cost;
4067 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4069 /* TODO -- add direct handling of this case. */
4070 goto fallback;
4073 /* CSTEPI is removed from the offset in case statement is after the
4074 increment. If the step is not constant, we use zero instead.
4075 This is a bit imprecise (there is the extra addition), but
4076 redundancy elimination is likely to transform the code so that
4077 it uses value of the variable before increment anyway,
4078 so it is not that much unrealistic. */
4079 if (cst_and_fits_in_hwi (cstep))
4080 cstepi = int_cst_value (cstep);
4081 else
4082 cstepi = 0;
4084 if (!constant_multiple_of (ustep, cstep, &rat))
4085 return infinite_cost;
4087 if (rat.fits_shwi ())
4088 ratio = rat.to_shwi ();
4089 else
4090 return infinite_cost;
4092 STRIP_NOPS (cbase);
4093 ctype = TREE_TYPE (cbase);
4095 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4097 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4098 or ratio == 1, it is better to handle this like
4100 ubase - ratio * cbase + ratio * var
4102 (also holds in the case ratio == -1, TODO. */
4104 if (cst_and_fits_in_hwi (cbase))
4106 offset = - ratio * int_cst_value (cbase);
4107 cost = difference_cost (data,
4108 ubase, build_int_cst (utype, 0),
4109 &symbol_present, &var_present, &offset,
4110 depends_on);
4111 cost.cost /= avg_loop_niter (data->current_loop);
4113 else if (ratio == 1)
4115 tree real_cbase = cbase;
4117 /* Check to see if any adjustment is needed. */
4118 if (cstepi == 0 && stmt_is_after_inc)
4120 aff_tree real_cbase_aff;
4121 aff_tree cstep_aff;
4123 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4124 &real_cbase_aff);
4125 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4127 aff_combination_add (&real_cbase_aff, &cstep_aff);
4128 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4131 cost = difference_cost (data,
4132 ubase, real_cbase,
4133 &symbol_present, &var_present, &offset,
4134 depends_on);
4135 cost.cost /= avg_loop_niter (data->current_loop);
4137 else if (address_p
4138 && !POINTER_TYPE_P (ctype)
4139 && multiplier_allowed_in_address_p
4140 (ratio, mem_mode,
4141 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4143 cbase
4144 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4145 cost = difference_cost (data,
4146 ubase, cbase,
4147 &symbol_present, &var_present, &offset,
4148 depends_on);
4149 cost.cost /= avg_loop_niter (data->current_loop);
4151 else
4153 cost = force_var_cost (data, cbase, depends_on);
4154 cost = add_costs (cost,
4155 difference_cost (data,
4156 ubase, build_int_cst (utype, 0),
4157 &symbol_present, &var_present,
4158 &offset, depends_on));
4159 cost.cost /= avg_loop_niter (data->current_loop);
4160 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4163 if (inv_expr_id)
4165 *inv_expr_id =
4166 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4167 /* Clear depends on. */
4168 if (*inv_expr_id != -1 && depends_on && *depends_on)
4169 bitmap_clear (*depends_on);
4172 /* If we are after the increment, the value of the candidate is higher by
4173 one iteration. */
4174 if (stmt_is_after_inc)
4175 offset -= ratio * cstepi;
4177 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4178 (symbol/var1/const parts may be omitted). If we are looking for an
4179 address, find the cost of addressing this. */
4180 if (address_p)
4181 return add_costs (cost,
4182 get_address_cost (symbol_present, var_present,
4183 offset, ratio, cstepi,
4184 mem_mode,
4185 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4186 speed, stmt_is_after_inc,
4187 can_autoinc));
4189 /* Otherwise estimate the costs for computing the expression. */
4190 if (!symbol_present && !var_present && !offset)
4192 if (ratio != 1)
4193 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4194 return cost;
4197 /* Symbol + offset should be compile-time computable so consider that they
4198 are added once to the variable, if present. */
4199 if (var_present && (symbol_present || offset))
4200 cost.cost += adjust_setup_cost (data,
4201 add_cost (speed, TYPE_MODE (ctype)));
4203 /* Having offset does not affect runtime cost in case it is added to
4204 symbol, but it increases complexity. */
4205 if (offset)
4206 cost.complexity++;
4208 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4210 aratio = ratio > 0 ? ratio : -ratio;
4211 if (aratio != 1)
4212 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4213 return cost;
4215 fallback:
4216 if (can_autoinc)
4217 *can_autoinc = false;
4220 /* Just get the expression, expand it and measure the cost. */
4221 tree comp = get_computation_at (data->current_loop, use, cand, at);
4223 if (!comp)
4224 return infinite_cost;
4226 if (address_p)
4227 comp = build_simple_mem_ref (comp);
4229 return new_cost (computation_cost (comp, speed), 0);
4233 /* Determines the cost of the computation by that USE is expressed
4234 from induction variable CAND. If ADDRESS_P is true, we just need
4235 to create an address from it, otherwise we want to get it into
4236 register. A set of invariants we depend on is stored in
4237 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4238 autoinc addressing is likely. */
4240 static comp_cost
4241 get_computation_cost (struct ivopts_data *data,
4242 struct iv_use *use, struct iv_cand *cand,
4243 bool address_p, bitmap *depends_on,
4244 bool *can_autoinc, int *inv_expr_id)
4246 return get_computation_cost_at (data,
4247 use, cand, address_p, depends_on, use->stmt,
4248 can_autoinc, inv_expr_id);
4251 /* Determines cost of basing replacement of USE on CAND in a generic
4252 expression. */
4254 static bool
4255 determine_use_iv_cost_generic (struct ivopts_data *data,
4256 struct iv_use *use, struct iv_cand *cand)
4258 bitmap depends_on;
4259 comp_cost cost;
4260 int inv_expr_id = -1;
4262 /* The simple case first -- if we need to express value of the preserved
4263 original biv, the cost is 0. This also prevents us from counting the
4264 cost of increment twice -- once at this use and once in the cost of
4265 the candidate. */
4266 if (cand->pos == IP_ORIGINAL
4267 && cand->incremented_at == use->stmt)
4269 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4270 ERROR_MARK, -1);
4271 return true;
4274 cost = get_computation_cost (data, use, cand, false, &depends_on,
4275 NULL, &inv_expr_id);
4277 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4278 inv_expr_id);
4280 return !infinite_cost_p (cost);
4283 /* Determines cost of basing replacement of USE on CAND in an address. */
4285 static bool
4286 determine_use_iv_cost_address (struct ivopts_data *data,
4287 struct iv_use *use, struct iv_cand *cand)
4289 bitmap depends_on;
4290 bool can_autoinc;
4291 int inv_expr_id = -1;
4292 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4293 &can_autoinc, &inv_expr_id);
4295 if (cand->ainc_use == use)
4297 if (can_autoinc)
4298 cost.cost -= cand->cost_step;
4299 /* If we generated the candidate solely for exploiting autoincrement
4300 opportunities, and it turns out it can't be used, set the cost to
4301 infinity to make sure we ignore it. */
4302 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4303 cost = infinite_cost;
4305 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4306 inv_expr_id);
4308 return !infinite_cost_p (cost);
4311 /* Computes value of candidate CAND at position AT in iteration NITER, and
4312 stores it to VAL. */
4314 static void
4315 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4316 aff_tree *val)
4318 aff_tree step, delta, nit;
4319 struct iv *iv = cand->iv;
4320 tree type = TREE_TYPE (iv->base);
4321 tree steptype = type;
4322 if (POINTER_TYPE_P (type))
4323 steptype = sizetype;
4325 tree_to_aff_combination (iv->step, steptype, &step);
4326 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4327 aff_combination_convert (&nit, steptype);
4328 aff_combination_mult (&nit, &step, &delta);
4329 if (stmt_after_increment (loop, cand, at))
4330 aff_combination_add (&delta, &step);
4332 tree_to_aff_combination (iv->base, type, val);
4333 aff_combination_add (val, &delta);
4336 /* Returns period of induction variable iv. */
4338 static tree
4339 iv_period (struct iv *iv)
4341 tree step = iv->step, period, type;
4342 tree pow2div;
4344 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4346 type = unsigned_type_for (TREE_TYPE (step));
4347 /* Period of the iv is lcm (step, type_range)/step -1,
4348 i.e., N*type_range/step - 1. Since type range is power
4349 of two, N == (step >> num_of_ending_zeros_binary (step),
4350 so the final result is
4352 (type_range >> num_of_ending_zeros_binary (step)) - 1
4355 pow2div = num_ending_zeros (step);
4357 period = build_low_bits_mask (type,
4358 (TYPE_PRECISION (type)
4359 - tree_low_cst (pow2div, 1)));
4361 return period;
4364 /* Returns the comparison operator used when eliminating the iv USE. */
4366 static enum tree_code
4367 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4369 struct loop *loop = data->current_loop;
4370 basic_block ex_bb;
4371 edge exit;
4373 ex_bb = gimple_bb (use->stmt);
4374 exit = EDGE_SUCC (ex_bb, 0);
4375 if (flow_bb_inside_loop_p (loop, exit->dest))
4376 exit = EDGE_SUCC (ex_bb, 1);
4378 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4381 static tree
4382 strip_wrap_conserving_type_conversions (tree exp)
4384 while (tree_ssa_useless_type_conversion (exp)
4385 && (nowrap_type_p (TREE_TYPE (exp))
4386 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4387 exp = TREE_OPERAND (exp, 0);
4388 return exp;
4391 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4392 check for an exact match. */
4394 static bool
4395 expr_equal_p (tree e, tree what)
4397 gimple stmt;
4398 enum tree_code code;
4400 e = strip_wrap_conserving_type_conversions (e);
4401 what = strip_wrap_conserving_type_conversions (what);
4403 code = TREE_CODE (what);
4404 if (TREE_TYPE (e) != TREE_TYPE (what))
4405 return false;
4407 if (operand_equal_p (e, what, 0))
4408 return true;
4410 if (TREE_CODE (e) != SSA_NAME)
4411 return false;
4413 stmt = SSA_NAME_DEF_STMT (e);
4414 if (gimple_code (stmt) != GIMPLE_ASSIGN
4415 || gimple_assign_rhs_code (stmt) != code)
4416 return false;
4418 switch (get_gimple_rhs_class (code))
4420 case GIMPLE_BINARY_RHS:
4421 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4422 return false;
4423 /* Fallthru. */
4425 case GIMPLE_UNARY_RHS:
4426 case GIMPLE_SINGLE_RHS:
4427 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4428 default:
4429 return false;
4433 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4434 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4435 calculation is performed in non-wrapping type.
4437 TODO: More generally, we could test for the situation that
4438 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4439 This would require knowing the sign of OFFSET.
4441 Also, we only look for the first addition in the computation of BASE.
4442 More complex analysis would be better, but introducing it just for
4443 this optimization seems like an overkill. */
4445 static bool
4446 difference_cannot_overflow_p (tree base, tree offset)
4448 enum tree_code code;
4449 tree e1, e2;
4451 if (!nowrap_type_p (TREE_TYPE (base)))
4452 return false;
4454 base = expand_simple_operations (base);
4456 if (TREE_CODE (base) == SSA_NAME)
4458 gimple stmt = SSA_NAME_DEF_STMT (base);
4460 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4461 return false;
4463 code = gimple_assign_rhs_code (stmt);
4464 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4465 return false;
4467 e1 = gimple_assign_rhs1 (stmt);
4468 e2 = gimple_assign_rhs2 (stmt);
4470 else
4472 code = TREE_CODE (base);
4473 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4474 return false;
4475 e1 = TREE_OPERAND (base, 0);
4476 e2 = TREE_OPERAND (base, 1);
4479 /* TODO: deeper inspection may be necessary to prove the equality. */
4480 switch (code)
4482 case PLUS_EXPR:
4483 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4484 case POINTER_PLUS_EXPR:
4485 return expr_equal_p (e2, offset);
4487 default:
4488 return false;
4492 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4493 comparison with CAND. NITER describes the number of iterations of
4494 the loops. If successful, the comparison in COMP_P is altered accordingly.
4496 We aim to handle the following situation:
4498 sometype *base, *p;
4499 int a, b, i;
4501 i = a;
4502 p = p_0 = base + a;
4506 bla (*p);
4507 p++;
4508 i++;
4510 while (i < b);
4512 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4513 We aim to optimize this to
4515 p = p_0 = base + a;
4518 bla (*p);
4519 p++;
4521 while (p < p_0 - a + b);
4523 This preserves the correctness, since the pointer arithmetics does not
4524 overflow. More precisely:
4526 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4527 overflow in computing it or the values of p.
4528 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4529 overflow. To prove this, we use the fact that p_0 = base + a. */
4531 static bool
4532 iv_elimination_compare_lt (struct ivopts_data *data,
4533 struct iv_cand *cand, enum tree_code *comp_p,
4534 struct tree_niter_desc *niter)
4536 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4537 struct affine_tree_combination nit, tmpa, tmpb;
4538 enum tree_code comp;
4539 HOST_WIDE_INT step;
4541 /* We need to know that the candidate induction variable does not overflow.
4542 While more complex analysis may be used to prove this, for now just
4543 check that the variable appears in the original program and that it
4544 is computed in a type that guarantees no overflows. */
4545 cand_type = TREE_TYPE (cand->iv->base);
4546 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4547 return false;
4549 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4550 the calculation of the BOUND could overflow, making the comparison
4551 invalid. */
4552 if (!data->loop_single_exit_p)
4553 return false;
4555 /* We need to be able to decide whether candidate is increasing or decreasing
4556 in order to choose the right comparison operator. */
4557 if (!cst_and_fits_in_hwi (cand->iv->step))
4558 return false;
4559 step = int_cst_value (cand->iv->step);
4561 /* Check that the number of iterations matches the expected pattern:
4562 a + 1 > b ? 0 : b - a - 1. */
4563 mbz = niter->may_be_zero;
4564 if (TREE_CODE (mbz) == GT_EXPR)
4566 /* Handle a + 1 > b. */
4567 tree op0 = TREE_OPERAND (mbz, 0);
4568 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4570 a = TREE_OPERAND (op0, 0);
4571 b = TREE_OPERAND (mbz, 1);
4573 else
4574 return false;
4576 else if (TREE_CODE (mbz) == LT_EXPR)
4578 tree op1 = TREE_OPERAND (mbz, 1);
4580 /* Handle b < a + 1. */
4581 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4583 a = TREE_OPERAND (op1, 0);
4584 b = TREE_OPERAND (mbz, 0);
4586 else
4587 return false;
4589 else
4590 return false;
4592 /* Expected number of iterations is B - A - 1. Check that it matches
4593 the actual number, i.e., that B - A - NITER = 1. */
4594 tree_to_aff_combination (niter->niter, nit_type, &nit);
4595 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4596 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4597 aff_combination_scale (&nit, double_int_minus_one);
4598 aff_combination_scale (&tmpa, double_int_minus_one);
4599 aff_combination_add (&tmpb, &tmpa);
4600 aff_combination_add (&tmpb, &nit);
4601 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4602 return false;
4604 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4605 overflow. */
4606 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4607 cand->iv->step,
4608 fold_convert (TREE_TYPE (cand->iv->step), a));
4609 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4610 return false;
4612 /* Determine the new comparison operator. */
4613 comp = step < 0 ? GT_EXPR : LT_EXPR;
4614 if (*comp_p == NE_EXPR)
4615 *comp_p = comp;
4616 else if (*comp_p == EQ_EXPR)
4617 *comp_p = invert_tree_comparison (comp, false);
4618 else
4619 gcc_unreachable ();
4621 return true;
4624 /* Check whether it is possible to express the condition in USE by comparison
4625 of candidate CAND. If so, store the value compared with to BOUND, and the
4626 comparison operator to COMP. */
4628 static bool
4629 may_eliminate_iv (struct ivopts_data *data,
4630 struct iv_use *use, struct iv_cand *cand, tree *bound,
4631 enum tree_code *comp)
4633 basic_block ex_bb;
4634 edge exit;
4635 tree period;
4636 struct loop *loop = data->current_loop;
4637 aff_tree bnd;
4638 struct tree_niter_desc *desc = NULL;
4640 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4641 return false;
4643 /* For now works only for exits that dominate the loop latch.
4644 TODO: extend to other conditions inside loop body. */
4645 ex_bb = gimple_bb (use->stmt);
4646 if (use->stmt != last_stmt (ex_bb)
4647 || gimple_code (use->stmt) != GIMPLE_COND
4648 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4649 return false;
4651 exit = EDGE_SUCC (ex_bb, 0);
4652 if (flow_bb_inside_loop_p (loop, exit->dest))
4653 exit = EDGE_SUCC (ex_bb, 1);
4654 if (flow_bb_inside_loop_p (loop, exit->dest))
4655 return false;
4657 desc = niter_for_exit (data, exit);
4658 if (!desc)
4659 return false;
4661 /* Determine whether we can use the variable to test the exit condition.
4662 This is the case iff the period of the induction variable is greater
4663 than the number of iterations for which the exit condition is true. */
4664 period = iv_period (cand->iv);
4666 /* If the number of iterations is constant, compare against it directly. */
4667 if (TREE_CODE (desc->niter) == INTEGER_CST)
4669 /* See cand_value_at. */
4670 if (stmt_after_increment (loop, cand, use->stmt))
4672 if (!tree_int_cst_lt (desc->niter, period))
4673 return false;
4675 else
4677 if (tree_int_cst_lt (period, desc->niter))
4678 return false;
4682 /* If not, and if this is the only possible exit of the loop, see whether
4683 we can get a conservative estimate on the number of iterations of the
4684 entire loop and compare against that instead. */
4685 else
4687 double_int period_value, max_niter;
4689 max_niter = desc->max;
4690 if (stmt_after_increment (loop, cand, use->stmt))
4691 max_niter += double_int_one;
4692 period_value = tree_to_double_int (period);
4693 if (max_niter.ugt (period_value))
4695 /* See if we can take advantage of inferred loop bound information. */
4696 if (data->loop_single_exit_p)
4698 if (!max_loop_iterations (loop, &max_niter))
4699 return false;
4700 /* The loop bound is already adjusted by adding 1. */
4701 if (max_niter.ugt (period_value))
4702 return false;
4704 else
4705 return false;
4709 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4711 *bound = aff_combination_to_tree (&bnd);
4712 *comp = iv_elimination_compare (data, use);
4714 /* It is unlikely that computing the number of iterations using division
4715 would be more profitable than keeping the original induction variable. */
4716 if (expression_expensive_p (*bound))
4717 return false;
4719 /* Sometimes, it is possible to handle the situation that the number of
4720 iterations may be zero unless additional assumtions by using <
4721 instead of != in the exit condition.
4723 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4724 base the exit condition on it. However, that is often too
4725 expensive. */
4726 if (!integer_zerop (desc->may_be_zero))
4727 return iv_elimination_compare_lt (data, cand, comp, desc);
4729 return true;
4732 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4733 be copied, if is is used in the loop body and DATA->body_includes_call. */
4735 static int
4736 parm_decl_cost (struct ivopts_data *data, tree bound)
4738 tree sbound = bound;
4739 STRIP_NOPS (sbound);
4741 if (TREE_CODE (sbound) == SSA_NAME
4742 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4743 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4744 && data->body_includes_call)
4745 return COSTS_N_INSNS (1);
4747 return 0;
4750 /* Determines cost of basing replacement of USE on CAND in a condition. */
4752 static bool
4753 determine_use_iv_cost_condition (struct ivopts_data *data,
4754 struct iv_use *use, struct iv_cand *cand)
4756 tree bound = NULL_TREE;
4757 struct iv *cmp_iv;
4758 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4759 comp_cost elim_cost, express_cost, cost, bound_cost;
4760 bool ok;
4761 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4762 tree *control_var, *bound_cst;
4763 enum tree_code comp = ERROR_MARK;
4765 /* Only consider real candidates. */
4766 if (!cand->iv)
4768 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4769 ERROR_MARK, -1);
4770 return false;
4773 /* Try iv elimination. */
4774 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4776 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4777 if (elim_cost.cost == 0)
4778 elim_cost.cost = parm_decl_cost (data, bound);
4779 else if (TREE_CODE (bound) == INTEGER_CST)
4780 elim_cost.cost = 0;
4781 /* If we replace a loop condition 'i < n' with 'p < base + n',
4782 depends_on_elim will have 'base' and 'n' set, which implies
4783 that both 'base' and 'n' will be live during the loop. More likely,
4784 'base + n' will be loop invariant, resulting in only one live value
4785 during the loop. So in that case we clear depends_on_elim and set
4786 elim_inv_expr_id instead. */
4787 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4789 elim_inv_expr_id = get_expr_id (data, bound);
4790 bitmap_clear (depends_on_elim);
4792 /* The bound is a loop invariant, so it will be only computed
4793 once. */
4794 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4796 else
4797 elim_cost = infinite_cost;
4799 /* Try expressing the original giv. If it is compared with an invariant,
4800 note that we cannot get rid of it. */
4801 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4802 NULL, &cmp_iv);
4803 gcc_assert (ok);
4805 /* When the condition is a comparison of the candidate IV against
4806 zero, prefer this IV.
4808 TODO: The constant that we're subtracting from the cost should
4809 be target-dependent. This information should be added to the
4810 target costs for each backend. */
4811 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4812 && integer_zerop (*bound_cst)
4813 && (operand_equal_p (*control_var, cand->var_after, 0)
4814 || operand_equal_p (*control_var, cand->var_before, 0)))
4815 elim_cost.cost -= 1;
4817 express_cost = get_computation_cost (data, use, cand, false,
4818 &depends_on_express, NULL,
4819 &express_inv_expr_id);
4820 fd_ivopts_data = data;
4821 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4823 /* Count the cost of the original bound as well. */
4824 bound_cost = force_var_cost (data, *bound_cst, NULL);
4825 if (bound_cost.cost == 0)
4826 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4827 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4828 bound_cost.cost = 0;
4829 express_cost.cost += bound_cost.cost;
4831 /* Choose the better approach, preferring the eliminated IV. */
4832 if (compare_costs (elim_cost, express_cost) <= 0)
4834 cost = elim_cost;
4835 depends_on = depends_on_elim;
4836 depends_on_elim = NULL;
4837 inv_expr_id = elim_inv_expr_id;
4839 else
4841 cost = express_cost;
4842 depends_on = depends_on_express;
4843 depends_on_express = NULL;
4844 bound = NULL_TREE;
4845 comp = ERROR_MARK;
4846 inv_expr_id = express_inv_expr_id;
4849 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4851 if (depends_on_elim)
4852 BITMAP_FREE (depends_on_elim);
4853 if (depends_on_express)
4854 BITMAP_FREE (depends_on_express);
4856 return !infinite_cost_p (cost);
4859 /* Determines cost of basing replacement of USE on CAND. Returns false
4860 if USE cannot be based on CAND. */
4862 static bool
4863 determine_use_iv_cost (struct ivopts_data *data,
4864 struct iv_use *use, struct iv_cand *cand)
4866 switch (use->type)
4868 case USE_NONLINEAR_EXPR:
4869 return determine_use_iv_cost_generic (data, use, cand);
4871 case USE_ADDRESS:
4872 return determine_use_iv_cost_address (data, use, cand);
4874 case USE_COMPARE:
4875 return determine_use_iv_cost_condition (data, use, cand);
4877 default:
4878 gcc_unreachable ();
4882 /* Return true if get_computation_cost indicates that autoincrement is
4883 a possibility for the pair of USE and CAND, false otherwise. */
4885 static bool
4886 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4887 struct iv_cand *cand)
4889 bitmap depends_on;
4890 bool can_autoinc;
4891 comp_cost cost;
4893 if (use->type != USE_ADDRESS)
4894 return false;
4896 cost = get_computation_cost (data, use, cand, true, &depends_on,
4897 &can_autoinc, NULL);
4899 BITMAP_FREE (depends_on);
4901 return !infinite_cost_p (cost) && can_autoinc;
4904 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4905 use that allows autoincrement, and set their AINC_USE if possible. */
4907 static void
4908 set_autoinc_for_original_candidates (struct ivopts_data *data)
4910 unsigned i, j;
4912 for (i = 0; i < n_iv_cands (data); i++)
4914 struct iv_cand *cand = iv_cand (data, i);
4915 struct iv_use *closest_before = NULL;
4916 struct iv_use *closest_after = NULL;
4917 if (cand->pos != IP_ORIGINAL)
4918 continue;
4920 for (j = 0; j < n_iv_uses (data); j++)
4922 struct iv_use *use = iv_use (data, j);
4923 unsigned uid = gimple_uid (use->stmt);
4925 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4926 continue;
4928 if (uid < gimple_uid (cand->incremented_at)
4929 && (closest_before == NULL
4930 || uid > gimple_uid (closest_before->stmt)))
4931 closest_before = use;
4933 if (uid > gimple_uid (cand->incremented_at)
4934 && (closest_after == NULL
4935 || uid < gimple_uid (closest_after->stmt)))
4936 closest_after = use;
4939 if (closest_before != NULL
4940 && autoinc_possible_for_pair (data, closest_before, cand))
4941 cand->ainc_use = closest_before;
4942 else if (closest_after != NULL
4943 && autoinc_possible_for_pair (data, closest_after, cand))
4944 cand->ainc_use = closest_after;
4948 /* Finds the candidates for the induction variables. */
4950 static void
4951 find_iv_candidates (struct ivopts_data *data)
4953 /* Add commonly used ivs. */
4954 add_standard_iv_candidates (data);
4956 /* Add old induction variables. */
4957 add_old_ivs_candidates (data);
4959 /* Add induction variables derived from uses. */
4960 add_derived_ivs_candidates (data);
4962 set_autoinc_for_original_candidates (data);
4964 /* Record the important candidates. */
4965 record_important_candidates (data);
4968 /* Determines costs of basing the use of the iv on an iv candidate. */
4970 static void
4971 determine_use_iv_costs (struct ivopts_data *data)
4973 unsigned i, j;
4974 struct iv_use *use;
4975 struct iv_cand *cand;
4976 bitmap to_clear = BITMAP_ALLOC (NULL);
4978 alloc_use_cost_map (data);
4980 for (i = 0; i < n_iv_uses (data); i++)
4982 use = iv_use (data, i);
4984 if (data->consider_all_candidates)
4986 for (j = 0; j < n_iv_cands (data); j++)
4988 cand = iv_cand (data, j);
4989 determine_use_iv_cost (data, use, cand);
4992 else
4994 bitmap_iterator bi;
4996 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4998 cand = iv_cand (data, j);
4999 if (!determine_use_iv_cost (data, use, cand))
5000 bitmap_set_bit (to_clear, j);
5003 /* Remove the candidates for that the cost is infinite from
5004 the list of related candidates. */
5005 bitmap_and_compl_into (use->related_cands, to_clear);
5006 bitmap_clear (to_clear);
5010 BITMAP_FREE (to_clear);
5012 if (dump_file && (dump_flags & TDF_DETAILS))
5014 fprintf (dump_file, "Use-candidate costs:\n");
5016 for (i = 0; i < n_iv_uses (data); i++)
5018 use = iv_use (data, i);
5020 fprintf (dump_file, "Use %d:\n", i);
5021 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5022 for (j = 0; j < use->n_map_members; j++)
5024 if (!use->cost_map[j].cand
5025 || infinite_cost_p (use->cost_map[j].cost))
5026 continue;
5028 fprintf (dump_file, " %d\t%d\t%d\t",
5029 use->cost_map[j].cand->id,
5030 use->cost_map[j].cost.cost,
5031 use->cost_map[j].cost.complexity);
5032 if (use->cost_map[j].depends_on)
5033 bitmap_print (dump_file,
5034 use->cost_map[j].depends_on, "","");
5035 if (use->cost_map[j].inv_expr_id != -1)
5036 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5037 fprintf (dump_file, "\n");
5040 fprintf (dump_file, "\n");
5042 fprintf (dump_file, "\n");
5046 /* Determines cost of the candidate CAND. */
5048 static void
5049 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5051 comp_cost cost_base;
5052 unsigned cost, cost_step;
5053 tree base;
5055 if (!cand->iv)
5057 cand->cost = 0;
5058 return;
5061 /* There are two costs associated with the candidate -- its increment
5062 and its initialization. The second is almost negligible for any loop
5063 that rolls enough, so we take it just very little into account. */
5065 base = cand->iv->base;
5066 cost_base = force_var_cost (data, base, NULL);
5067 /* It will be exceptional that the iv register happens to be initialized with
5068 the proper value at no cost. In general, there will at least be a regcopy
5069 or a const set. */
5070 if (cost_base.cost == 0)
5071 cost_base.cost = COSTS_N_INSNS (1);
5072 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5074 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5076 /* Prefer the original ivs unless we may gain something by replacing it.
5077 The reason is to make debugging simpler; so this is not relevant for
5078 artificial ivs created by other optimization passes. */
5079 if (cand->pos != IP_ORIGINAL
5080 || !SSA_NAME_VAR (cand->var_before)
5081 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5082 cost++;
5084 /* Prefer not to insert statements into latch unless there are some
5085 already (so that we do not create unnecessary jumps). */
5086 if (cand->pos == IP_END
5087 && empty_block_p (ip_end_pos (data->current_loop)))
5088 cost++;
5090 cand->cost = cost;
5091 cand->cost_step = cost_step;
5094 /* Determines costs of computation of the candidates. */
5096 static void
5097 determine_iv_costs (struct ivopts_data *data)
5099 unsigned i;
5101 if (dump_file && (dump_flags & TDF_DETAILS))
5103 fprintf (dump_file, "Candidate costs:\n");
5104 fprintf (dump_file, " cand\tcost\n");
5107 for (i = 0; i < n_iv_cands (data); i++)
5109 struct iv_cand *cand = iv_cand (data, i);
5111 determine_iv_cost (data, cand);
5113 if (dump_file && (dump_flags & TDF_DETAILS))
5114 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5117 if (dump_file && (dump_flags & TDF_DETAILS))
5118 fprintf (dump_file, "\n");
5121 /* Calculates cost for having SIZE induction variables. */
5123 static unsigned
5124 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5126 /* We add size to the cost, so that we prefer eliminating ivs
5127 if possible. */
5128 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5129 data->body_includes_call);
5132 /* For each size of the induction variable set determine the penalty. */
5134 static void
5135 determine_set_costs (struct ivopts_data *data)
5137 unsigned j, n;
5138 gimple phi;
5139 gimple_stmt_iterator psi;
5140 tree op;
5141 struct loop *loop = data->current_loop;
5142 bitmap_iterator bi;
5144 if (dump_file && (dump_flags & TDF_DETAILS))
5146 fprintf (dump_file, "Global costs:\n");
5147 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5148 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5149 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5150 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5153 n = 0;
5154 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5156 phi = gsi_stmt (psi);
5157 op = PHI_RESULT (phi);
5159 if (virtual_operand_p (op))
5160 continue;
5162 if (get_iv (data, op))
5163 continue;
5165 n++;
5168 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5170 struct version_info *info = ver_info (data, j);
5172 if (info->inv_id && info->has_nonlin_use)
5173 n++;
5176 data->regs_used = n;
5177 if (dump_file && (dump_flags & TDF_DETAILS))
5178 fprintf (dump_file, " regs_used %d\n", n);
5180 if (dump_file && (dump_flags & TDF_DETAILS))
5182 fprintf (dump_file, " cost for size:\n");
5183 fprintf (dump_file, " ivs\tcost\n");
5184 for (j = 0; j <= 2 * target_avail_regs; j++)
5185 fprintf (dump_file, " %d\t%d\n", j,
5186 ivopts_global_cost_for_size (data, j));
5187 fprintf (dump_file, "\n");
5191 /* Returns true if A is a cheaper cost pair than B. */
5193 static bool
5194 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5196 int cmp;
5198 if (!a)
5199 return false;
5201 if (!b)
5202 return true;
5204 cmp = compare_costs (a->cost, b->cost);
5205 if (cmp < 0)
5206 return true;
5208 if (cmp > 0)
5209 return false;
5211 /* In case the costs are the same, prefer the cheaper candidate. */
5212 if (a->cand->cost < b->cand->cost)
5213 return true;
5215 return false;
5219 /* Returns candidate by that USE is expressed in IVS. */
5221 static struct cost_pair *
5222 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5224 return ivs->cand_for_use[use->id];
5227 /* Computes the cost field of IVS structure. */
5229 static void
5230 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5232 comp_cost cost = ivs->cand_use_cost;
5234 cost.cost += ivs->cand_cost;
5236 cost.cost += ivopts_global_cost_for_size (data,
5237 ivs->n_regs + ivs->num_used_inv_expr);
5239 ivs->cost = cost;
5242 /* Remove invariants in set INVS to set IVS. */
5244 static void
5245 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5247 bitmap_iterator bi;
5248 unsigned iid;
5250 if (!invs)
5251 return;
5253 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5255 ivs->n_invariant_uses[iid]--;
5256 if (ivs->n_invariant_uses[iid] == 0)
5257 ivs->n_regs--;
5261 /* Set USE not to be expressed by any candidate in IVS. */
5263 static void
5264 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5265 struct iv_use *use)
5267 unsigned uid = use->id, cid;
5268 struct cost_pair *cp;
5270 cp = ivs->cand_for_use[uid];
5271 if (!cp)
5272 return;
5273 cid = cp->cand->id;
5275 ivs->bad_uses++;
5276 ivs->cand_for_use[uid] = NULL;
5277 ivs->n_cand_uses[cid]--;
5279 if (ivs->n_cand_uses[cid] == 0)
5281 bitmap_clear_bit (ivs->cands, cid);
5282 /* Do not count the pseudocandidates. */
5283 if (cp->cand->iv)
5284 ivs->n_regs--;
5285 ivs->n_cands--;
5286 ivs->cand_cost -= cp->cand->cost;
5288 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5291 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5293 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5295 if (cp->inv_expr_id != -1)
5297 ivs->used_inv_expr[cp->inv_expr_id]--;
5298 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5299 ivs->num_used_inv_expr--;
5301 iv_ca_recount_cost (data, ivs);
5304 /* Add invariants in set INVS to set IVS. */
5306 static void
5307 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5309 bitmap_iterator bi;
5310 unsigned iid;
5312 if (!invs)
5313 return;
5315 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5317 ivs->n_invariant_uses[iid]++;
5318 if (ivs->n_invariant_uses[iid] == 1)
5319 ivs->n_regs++;
5323 /* Set cost pair for USE in set IVS to CP. */
5325 static void
5326 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5327 struct iv_use *use, struct cost_pair *cp)
5329 unsigned uid = use->id, cid;
5331 if (ivs->cand_for_use[uid] == cp)
5332 return;
5334 if (ivs->cand_for_use[uid])
5335 iv_ca_set_no_cp (data, ivs, use);
5337 if (cp)
5339 cid = cp->cand->id;
5341 ivs->bad_uses--;
5342 ivs->cand_for_use[uid] = cp;
5343 ivs->n_cand_uses[cid]++;
5344 if (ivs->n_cand_uses[cid] == 1)
5346 bitmap_set_bit (ivs->cands, cid);
5347 /* Do not count the pseudocandidates. */
5348 if (cp->cand->iv)
5349 ivs->n_regs++;
5350 ivs->n_cands++;
5351 ivs->cand_cost += cp->cand->cost;
5353 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5356 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5357 iv_ca_set_add_invariants (ivs, cp->depends_on);
5359 if (cp->inv_expr_id != -1)
5361 ivs->used_inv_expr[cp->inv_expr_id]++;
5362 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5363 ivs->num_used_inv_expr++;
5365 iv_ca_recount_cost (data, ivs);
5369 /* Extend set IVS by expressing USE by some of the candidates in it
5370 if possible. All important candidates will be considered
5371 if IMPORTANT_CANDIDATES is true. */
5373 static void
5374 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5375 struct iv_use *use, bool important_candidates)
5377 struct cost_pair *best_cp = NULL, *cp;
5378 bitmap_iterator bi;
5379 bitmap cands;
5380 unsigned i;
5382 gcc_assert (ivs->upto >= use->id);
5384 if (ivs->upto == use->id)
5386 ivs->upto++;
5387 ivs->bad_uses++;
5390 cands = (important_candidates ? data->important_candidates : ivs->cands);
5391 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5393 struct iv_cand *cand = iv_cand (data, i);
5395 cp = get_use_iv_cost (data, use, cand);
5397 if (cheaper_cost_pair (cp, best_cp))
5398 best_cp = cp;
5401 iv_ca_set_cp (data, ivs, use, best_cp);
5404 /* Get cost for assignment IVS. */
5406 static comp_cost
5407 iv_ca_cost (struct iv_ca *ivs)
5409 /* This was a conditional expression but it triggered a bug in
5410 Sun C 5.5. */
5411 if (ivs->bad_uses)
5412 return infinite_cost;
5413 else
5414 return ivs->cost;
5417 /* Returns true if all dependences of CP are among invariants in IVS. */
5419 static bool
5420 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5422 unsigned i;
5423 bitmap_iterator bi;
5425 if (!cp->depends_on)
5426 return true;
5428 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5430 if (ivs->n_invariant_uses[i] == 0)
5431 return false;
5434 return true;
5437 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5438 it before NEXT_CHANGE. */
5440 static struct iv_ca_delta *
5441 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5442 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5444 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5446 change->use = use;
5447 change->old_cp = old_cp;
5448 change->new_cp = new_cp;
5449 change->next_change = next_change;
5451 return change;
5454 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5455 are rewritten. */
5457 static struct iv_ca_delta *
5458 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5460 struct iv_ca_delta *last;
5462 if (!l2)
5463 return l1;
5465 if (!l1)
5466 return l2;
5468 for (last = l1; last->next_change; last = last->next_change)
5469 continue;
5470 last->next_change = l2;
5472 return l1;
5475 /* Reverse the list of changes DELTA, forming the inverse to it. */
5477 static struct iv_ca_delta *
5478 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5480 struct iv_ca_delta *act, *next, *prev = NULL;
5481 struct cost_pair *tmp;
5483 for (act = delta; act; act = next)
5485 next = act->next_change;
5486 act->next_change = prev;
5487 prev = act;
5489 tmp = act->old_cp;
5490 act->old_cp = act->new_cp;
5491 act->new_cp = tmp;
5494 return prev;
5497 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5498 reverted instead. */
5500 static void
5501 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5502 struct iv_ca_delta *delta, bool forward)
5504 struct cost_pair *from, *to;
5505 struct iv_ca_delta *act;
5507 if (!forward)
5508 delta = iv_ca_delta_reverse (delta);
5510 for (act = delta; act; act = act->next_change)
5512 from = act->old_cp;
5513 to = act->new_cp;
5514 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5515 iv_ca_set_cp (data, ivs, act->use, to);
5518 if (!forward)
5519 iv_ca_delta_reverse (delta);
5522 /* Returns true if CAND is used in IVS. */
5524 static bool
5525 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5527 return ivs->n_cand_uses[cand->id] > 0;
5530 /* Returns number of induction variable candidates in the set IVS. */
5532 static unsigned
5533 iv_ca_n_cands (struct iv_ca *ivs)
5535 return ivs->n_cands;
5538 /* Free the list of changes DELTA. */
5540 static void
5541 iv_ca_delta_free (struct iv_ca_delta **delta)
5543 struct iv_ca_delta *act, *next;
5545 for (act = *delta; act; act = next)
5547 next = act->next_change;
5548 free (act);
5551 *delta = NULL;
5554 /* Allocates new iv candidates assignment. */
5556 static struct iv_ca *
5557 iv_ca_new (struct ivopts_data *data)
5559 struct iv_ca *nw = XNEW (struct iv_ca);
5561 nw->upto = 0;
5562 nw->bad_uses = 0;
5563 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5564 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5565 nw->cands = BITMAP_ALLOC (NULL);
5566 nw->n_cands = 0;
5567 nw->n_regs = 0;
5568 nw->cand_use_cost = no_cost;
5569 nw->cand_cost = 0;
5570 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5571 nw->cost = no_cost;
5572 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5573 nw->num_used_inv_expr = 0;
5575 return nw;
5578 /* Free memory occupied by the set IVS. */
5580 static void
5581 iv_ca_free (struct iv_ca **ivs)
5583 free ((*ivs)->cand_for_use);
5584 free ((*ivs)->n_cand_uses);
5585 BITMAP_FREE ((*ivs)->cands);
5586 free ((*ivs)->n_invariant_uses);
5587 free ((*ivs)->used_inv_expr);
5588 free (*ivs);
5589 *ivs = NULL;
5592 /* Dumps IVS to FILE. */
5594 static void
5595 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5597 const char *pref = " invariants ";
5598 unsigned i;
5599 comp_cost cost = iv_ca_cost (ivs);
5601 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5602 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5603 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5604 bitmap_print (file, ivs->cands, " candidates: ","\n");
5606 for (i = 0; i < ivs->upto; i++)
5608 struct iv_use *use = iv_use (data, i);
5609 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5610 if (cp)
5611 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5612 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5613 else
5614 fprintf (file, " use:%d --> ??\n", use->id);
5617 for (i = 1; i <= data->max_inv_id; i++)
5618 if (ivs->n_invariant_uses[i])
5620 fprintf (file, "%s%d", pref, i);
5621 pref = ", ";
5623 fprintf (file, "\n\n");
5626 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5627 new set, and store differences in DELTA. Number of induction variables
5628 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5629 the function will try to find a solution with mimimal iv candidates. */
5631 static comp_cost
5632 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5633 struct iv_cand *cand, struct iv_ca_delta **delta,
5634 unsigned *n_ivs, bool min_ncand)
5636 unsigned i;
5637 comp_cost cost;
5638 struct iv_use *use;
5639 struct cost_pair *old_cp, *new_cp;
5641 *delta = NULL;
5642 for (i = 0; i < ivs->upto; i++)
5644 use = iv_use (data, i);
5645 old_cp = iv_ca_cand_for_use (ivs, use);
5647 if (old_cp
5648 && old_cp->cand == cand)
5649 continue;
5651 new_cp = get_use_iv_cost (data, use, cand);
5652 if (!new_cp)
5653 continue;
5655 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5656 continue;
5658 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5659 continue;
5661 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5664 iv_ca_delta_commit (data, ivs, *delta, true);
5665 cost = iv_ca_cost (ivs);
5666 if (n_ivs)
5667 *n_ivs = iv_ca_n_cands (ivs);
5668 iv_ca_delta_commit (data, ivs, *delta, false);
5670 return cost;
5673 /* Try narrowing set IVS by removing CAND. Return the cost of
5674 the new set and store the differences in DELTA. */
5676 static comp_cost
5677 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5678 struct iv_cand *cand, struct iv_ca_delta **delta)
5680 unsigned i, ci;
5681 struct iv_use *use;
5682 struct cost_pair *old_cp, *new_cp, *cp;
5683 bitmap_iterator bi;
5684 struct iv_cand *cnd;
5685 comp_cost cost;
5687 *delta = NULL;
5688 for (i = 0; i < n_iv_uses (data); i++)
5690 use = iv_use (data, i);
5692 old_cp = iv_ca_cand_for_use (ivs, use);
5693 if (old_cp->cand != cand)
5694 continue;
5696 new_cp = NULL;
5698 if (data->consider_all_candidates)
5700 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5702 if (ci == cand->id)
5703 continue;
5705 cnd = iv_cand (data, ci);
5707 cp = get_use_iv_cost (data, use, cnd);
5708 if (!cp)
5709 continue;
5711 if (!iv_ca_has_deps (ivs, cp))
5712 continue;
5714 if (!cheaper_cost_pair (cp, new_cp))
5715 continue;
5717 new_cp = cp;
5720 else
5722 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5724 if (ci == cand->id)
5725 continue;
5727 cnd = iv_cand (data, ci);
5729 cp = get_use_iv_cost (data, use, cnd);
5730 if (!cp)
5731 continue;
5732 if (!iv_ca_has_deps (ivs, cp))
5733 continue;
5735 if (!cheaper_cost_pair (cp, new_cp))
5736 continue;
5738 new_cp = cp;
5742 if (!new_cp)
5744 iv_ca_delta_free (delta);
5745 return infinite_cost;
5748 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5751 iv_ca_delta_commit (data, ivs, *delta, true);
5752 cost = iv_ca_cost (ivs);
5753 iv_ca_delta_commit (data, ivs, *delta, false);
5755 return cost;
5758 /* Try optimizing the set of candidates IVS by removing candidates different
5759 from to EXCEPT_CAND from it. Return cost of the new set, and store
5760 differences in DELTA. */
5762 static comp_cost
5763 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5764 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5766 bitmap_iterator bi;
5767 struct iv_ca_delta *act_delta, *best_delta;
5768 unsigned i;
5769 comp_cost best_cost, acost;
5770 struct iv_cand *cand;
5772 best_delta = NULL;
5773 best_cost = iv_ca_cost (ivs);
5775 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5777 cand = iv_cand (data, i);
5779 if (cand == except_cand)
5780 continue;
5782 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5784 if (compare_costs (acost, best_cost) < 0)
5786 best_cost = acost;
5787 iv_ca_delta_free (&best_delta);
5788 best_delta = act_delta;
5790 else
5791 iv_ca_delta_free (&act_delta);
5794 if (!best_delta)
5796 *delta = NULL;
5797 return best_cost;
5800 /* Recurse to possibly remove other unnecessary ivs. */
5801 iv_ca_delta_commit (data, ivs, best_delta, true);
5802 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5803 iv_ca_delta_commit (data, ivs, best_delta, false);
5804 *delta = iv_ca_delta_join (best_delta, *delta);
5805 return best_cost;
5808 /* Tries to extend the sets IVS in the best possible way in order
5809 to express the USE. If ORIGINALP is true, prefer candidates from
5810 the original set of IVs, otherwise favor important candidates not
5811 based on any memory object. */
5813 static bool
5814 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5815 struct iv_use *use, bool originalp)
5817 comp_cost best_cost, act_cost;
5818 unsigned i;
5819 bitmap_iterator bi;
5820 struct iv_cand *cand;
5821 struct iv_ca_delta *best_delta = NULL, *act_delta;
5822 struct cost_pair *cp;
5824 iv_ca_add_use (data, ivs, use, false);
5825 best_cost = iv_ca_cost (ivs);
5827 cp = iv_ca_cand_for_use (ivs, use);
5828 if (!cp)
5830 ivs->upto--;
5831 ivs->bad_uses--;
5832 iv_ca_add_use (data, ivs, use, true);
5833 best_cost = iv_ca_cost (ivs);
5834 cp = iv_ca_cand_for_use (ivs, use);
5836 if (cp)
5838 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5839 iv_ca_set_no_cp (data, ivs, use);
5842 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5843 first try important candidates not based on any memory object. Only if
5844 this fails, try the specific ones. Rationale -- in loops with many
5845 variables the best choice often is to use just one generic biv. If we
5846 added here many ivs specific to the uses, the optimization algorithm later
5847 would be likely to get stuck in a local minimum, thus causing us to create
5848 too many ivs. The approach from few ivs to more seems more likely to be
5849 successful -- starting from few ivs, replacing an expensive use by a
5850 specific iv should always be a win. */
5851 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5853 cand = iv_cand (data, i);
5855 if (originalp && cand->pos !=IP_ORIGINAL)
5856 continue;
5858 if (!originalp && cand->iv->base_object != NULL_TREE)
5859 continue;
5861 if (iv_ca_cand_used_p (ivs, cand))
5862 continue;
5864 cp = get_use_iv_cost (data, use, cand);
5865 if (!cp)
5866 continue;
5868 iv_ca_set_cp (data, ivs, use, cp);
5869 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5870 true);
5871 iv_ca_set_no_cp (data, ivs, use);
5872 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5874 if (compare_costs (act_cost, best_cost) < 0)
5876 best_cost = act_cost;
5878 iv_ca_delta_free (&best_delta);
5879 best_delta = act_delta;
5881 else
5882 iv_ca_delta_free (&act_delta);
5885 if (infinite_cost_p (best_cost))
5887 for (i = 0; i < use->n_map_members; i++)
5889 cp = use->cost_map + i;
5890 cand = cp->cand;
5891 if (!cand)
5892 continue;
5894 /* Already tried this. */
5895 if (cand->important)
5897 if (originalp && cand->pos == IP_ORIGINAL)
5898 continue;
5899 if (!originalp && cand->iv->base_object == NULL_TREE)
5900 continue;
5903 if (iv_ca_cand_used_p (ivs, cand))
5904 continue;
5906 act_delta = NULL;
5907 iv_ca_set_cp (data, ivs, use, cp);
5908 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5909 iv_ca_set_no_cp (data, ivs, use);
5910 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5911 cp, act_delta);
5913 if (compare_costs (act_cost, best_cost) < 0)
5915 best_cost = act_cost;
5917 if (best_delta)
5918 iv_ca_delta_free (&best_delta);
5919 best_delta = act_delta;
5921 else
5922 iv_ca_delta_free (&act_delta);
5926 iv_ca_delta_commit (data, ivs, best_delta, true);
5927 iv_ca_delta_free (&best_delta);
5929 return !infinite_cost_p (best_cost);
5932 /* Finds an initial assignment of candidates to uses. */
5934 static struct iv_ca *
5935 get_initial_solution (struct ivopts_data *data, bool originalp)
5937 struct iv_ca *ivs = iv_ca_new (data);
5938 unsigned i;
5940 for (i = 0; i < n_iv_uses (data); i++)
5941 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5943 iv_ca_free (&ivs);
5944 return NULL;
5947 return ivs;
5950 /* Tries to improve set of induction variables IVS. */
5952 static bool
5953 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5955 unsigned i, n_ivs;
5956 comp_cost acost, best_cost = iv_ca_cost (ivs);
5957 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5958 struct iv_cand *cand;
5960 /* Try extending the set of induction variables by one. */
5961 for (i = 0; i < n_iv_cands (data); i++)
5963 cand = iv_cand (data, i);
5965 if (iv_ca_cand_used_p (ivs, cand))
5966 continue;
5968 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5969 if (!act_delta)
5970 continue;
5972 /* If we successfully added the candidate and the set is small enough,
5973 try optimizing it by removing other candidates. */
5974 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5976 iv_ca_delta_commit (data, ivs, act_delta, true);
5977 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5978 iv_ca_delta_commit (data, ivs, act_delta, false);
5979 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5982 if (compare_costs (acost, best_cost) < 0)
5984 best_cost = acost;
5985 iv_ca_delta_free (&best_delta);
5986 best_delta = act_delta;
5988 else
5989 iv_ca_delta_free (&act_delta);
5992 if (!best_delta)
5994 /* Try removing the candidates from the set instead. */
5995 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5997 /* Nothing more we can do. */
5998 if (!best_delta)
5999 return false;
6002 iv_ca_delta_commit (data, ivs, best_delta, true);
6003 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6004 iv_ca_delta_free (&best_delta);
6005 return true;
6008 /* Attempts to find the optimal set of induction variables. We do simple
6009 greedy heuristic -- we try to replace at most one candidate in the selected
6010 solution and remove the unused ivs while this improves the cost. */
6012 static struct iv_ca *
6013 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6015 struct iv_ca *set;
6017 /* Get the initial solution. */
6018 set = get_initial_solution (data, originalp);
6019 if (!set)
6021 if (dump_file && (dump_flags & TDF_DETAILS))
6022 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6023 return NULL;
6026 if (dump_file && (dump_flags & TDF_DETAILS))
6028 fprintf (dump_file, "Initial set of candidates:\n");
6029 iv_ca_dump (data, dump_file, set);
6032 while (try_improve_iv_set (data, set))
6034 if (dump_file && (dump_flags & TDF_DETAILS))
6036 fprintf (dump_file, "Improved to:\n");
6037 iv_ca_dump (data, dump_file, set);
6041 return set;
6044 static struct iv_ca *
6045 find_optimal_iv_set (struct ivopts_data *data)
6047 unsigned i;
6048 struct iv_ca *set, *origset;
6049 struct iv_use *use;
6050 comp_cost cost, origcost;
6052 /* Determine the cost based on a strategy that starts with original IVs,
6053 and try again using a strategy that prefers candidates not based
6054 on any IVs. */
6055 origset = find_optimal_iv_set_1 (data, true);
6056 set = find_optimal_iv_set_1 (data, false);
6058 if (!origset && !set)
6059 return NULL;
6061 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6062 cost = set ? iv_ca_cost (set) : infinite_cost;
6064 if (dump_file && (dump_flags & TDF_DETAILS))
6066 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6067 origcost.cost, origcost.complexity);
6068 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6069 cost.cost, cost.complexity);
6072 /* Choose the one with the best cost. */
6073 if (compare_costs (origcost, cost) <= 0)
6075 if (set)
6076 iv_ca_free (&set);
6077 set = origset;
6079 else if (origset)
6080 iv_ca_free (&origset);
6082 for (i = 0; i < n_iv_uses (data); i++)
6084 use = iv_use (data, i);
6085 use->selected = iv_ca_cand_for_use (set, use)->cand;
6088 return set;
6091 /* Creates a new induction variable corresponding to CAND. */
6093 static void
6094 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6096 gimple_stmt_iterator incr_pos;
6097 tree base;
6098 bool after = false;
6100 if (!cand->iv)
6101 return;
6103 switch (cand->pos)
6105 case IP_NORMAL:
6106 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6107 break;
6109 case IP_END:
6110 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6111 after = true;
6112 break;
6114 case IP_AFTER_USE:
6115 after = true;
6116 /* fall through */
6117 case IP_BEFORE_USE:
6118 incr_pos = gsi_for_stmt (cand->incremented_at);
6119 break;
6121 case IP_ORIGINAL:
6122 /* Mark that the iv is preserved. */
6123 name_info (data, cand->var_before)->preserve_biv = true;
6124 name_info (data, cand->var_after)->preserve_biv = true;
6126 /* Rewrite the increment so that it uses var_before directly. */
6127 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6128 return;
6131 gimple_add_tmp_var (cand->var_before);
6133 base = unshare_expr (cand->iv->base);
6135 create_iv (base, unshare_expr (cand->iv->step),
6136 cand->var_before, data->current_loop,
6137 &incr_pos, after, &cand->var_before, &cand->var_after);
6140 /* Creates new induction variables described in SET. */
6142 static void
6143 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6145 unsigned i;
6146 struct iv_cand *cand;
6147 bitmap_iterator bi;
6149 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6151 cand = iv_cand (data, i);
6152 create_new_iv (data, cand);
6155 if (dump_file && (dump_flags & TDF_DETAILS))
6157 fprintf (dump_file, "\nSelected IV set: \n");
6158 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6160 cand = iv_cand (data, i);
6161 dump_cand (dump_file, cand);
6163 fprintf (dump_file, "\n");
6167 /* Rewrites USE (definition of iv used in a nonlinear expression)
6168 using candidate CAND. */
6170 static void
6171 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6172 struct iv_use *use, struct iv_cand *cand)
6174 tree comp;
6175 tree op, tgt;
6176 gimple ass;
6177 gimple_stmt_iterator bsi;
6179 /* An important special case -- if we are asked to express value of
6180 the original iv by itself, just exit; there is no need to
6181 introduce a new computation (that might also need casting the
6182 variable to unsigned and back). */
6183 if (cand->pos == IP_ORIGINAL
6184 && cand->incremented_at == use->stmt)
6186 enum tree_code stmt_code;
6188 gcc_assert (is_gimple_assign (use->stmt));
6189 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6191 /* Check whether we may leave the computation unchanged.
6192 This is the case only if it does not rely on other
6193 computations in the loop -- otherwise, the computation
6194 we rely upon may be removed in remove_unused_ivs,
6195 thus leading to ICE. */
6196 stmt_code = gimple_assign_rhs_code (use->stmt);
6197 if (stmt_code == PLUS_EXPR
6198 || stmt_code == MINUS_EXPR
6199 || stmt_code == POINTER_PLUS_EXPR)
6201 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6202 op = gimple_assign_rhs2 (use->stmt);
6203 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6204 op = gimple_assign_rhs1 (use->stmt);
6205 else
6206 op = NULL_TREE;
6208 else
6209 op = NULL_TREE;
6211 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6212 return;
6215 comp = get_computation (data->current_loop, use, cand);
6216 gcc_assert (comp != NULL_TREE);
6218 switch (gimple_code (use->stmt))
6220 case GIMPLE_PHI:
6221 tgt = PHI_RESULT (use->stmt);
6223 /* If we should keep the biv, do not replace it. */
6224 if (name_info (data, tgt)->preserve_biv)
6225 return;
6227 bsi = gsi_after_labels (gimple_bb (use->stmt));
6228 break;
6230 case GIMPLE_ASSIGN:
6231 tgt = gimple_assign_lhs (use->stmt);
6232 bsi = gsi_for_stmt (use->stmt);
6233 break;
6235 default:
6236 gcc_unreachable ();
6239 if (!valid_gimple_rhs_p (comp)
6240 || (gimple_code (use->stmt) != GIMPLE_PHI
6241 /* We can't allow re-allocating the stmt as it might be pointed
6242 to still. */
6243 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6244 >= gimple_num_ops (gsi_stmt (bsi)))))
6246 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6247 true, GSI_SAME_STMT);
6248 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6250 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6251 /* As this isn't a plain copy we have to reset alignment
6252 information. */
6253 if (SSA_NAME_PTR_INFO (comp))
6254 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6258 if (gimple_code (use->stmt) == GIMPLE_PHI)
6260 ass = gimple_build_assign (tgt, comp);
6261 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6263 bsi = gsi_for_stmt (use->stmt);
6264 remove_phi_node (&bsi, false);
6266 else
6268 gimple_assign_set_rhs_from_tree (&bsi, comp);
6269 use->stmt = gsi_stmt (bsi);
6273 /* Performs a peephole optimization to reorder the iv update statement with
6274 a mem ref to enable instruction combining in later phases. The mem ref uses
6275 the iv value before the update, so the reordering transformation requires
6276 adjustment of the offset. CAND is the selected IV_CAND.
6278 Example:
6280 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6281 iv2 = iv1 + 1;
6283 if (t < val) (1)
6284 goto L;
6285 goto Head;
6288 directly propagating t over to (1) will introduce overlapping live range
6289 thus increase register pressure. This peephole transform it into:
6292 iv2 = iv1 + 1;
6293 t = MEM_REF (base, iv2, 8, 8);
6294 if (t < val)
6295 goto L;
6296 goto Head;
6299 static void
6300 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6302 tree var_after;
6303 gimple iv_update, stmt;
6304 basic_block bb;
6305 gimple_stmt_iterator gsi, gsi_iv;
6307 if (cand->pos != IP_NORMAL)
6308 return;
6310 var_after = cand->var_after;
6311 iv_update = SSA_NAME_DEF_STMT (var_after);
6313 bb = gimple_bb (iv_update);
6314 gsi = gsi_last_nondebug_bb (bb);
6315 stmt = gsi_stmt (gsi);
6317 /* Only handle conditional statement for now. */
6318 if (gimple_code (stmt) != GIMPLE_COND)
6319 return;
6321 gsi_prev_nondebug (&gsi);
6322 stmt = gsi_stmt (gsi);
6323 if (stmt != iv_update)
6324 return;
6326 gsi_prev_nondebug (&gsi);
6327 if (gsi_end_p (gsi))
6328 return;
6330 stmt = gsi_stmt (gsi);
6331 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6332 return;
6334 if (stmt != use->stmt)
6335 return;
6337 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6338 return;
6340 if (dump_file && (dump_flags & TDF_DETAILS))
6342 fprintf (dump_file, "Reordering \n");
6343 print_gimple_stmt (dump_file, iv_update, 0, 0);
6344 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6345 fprintf (dump_file, "\n");
6348 gsi = gsi_for_stmt (use->stmt);
6349 gsi_iv = gsi_for_stmt (iv_update);
6350 gsi_move_before (&gsi_iv, &gsi);
6352 cand->pos = IP_BEFORE_USE;
6353 cand->incremented_at = use->stmt;
6356 /* Rewrites USE (address that is an iv) using candidate CAND. */
6358 static void
6359 rewrite_use_address (struct ivopts_data *data,
6360 struct iv_use *use, struct iv_cand *cand)
6362 aff_tree aff;
6363 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6364 tree base_hint = NULL_TREE;
6365 tree ref, iv;
6366 bool ok;
6368 adjust_iv_update_pos (cand, use);
6369 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6370 gcc_assert (ok);
6371 unshare_aff_combination (&aff);
6373 /* To avoid undefined overflow problems, all IV candidates use unsigned
6374 integer types. The drawback is that this makes it impossible for
6375 create_mem_ref to distinguish an IV that is based on a memory object
6376 from one that represents simply an offset.
6378 To work around this problem, we pass a hint to create_mem_ref that
6379 indicates which variable (if any) in aff is an IV based on a memory
6380 object. Note that we only consider the candidate. If this is not
6381 based on an object, the base of the reference is in some subexpression
6382 of the use -- but these will use pointer types, so they are recognized
6383 by the create_mem_ref heuristics anyway. */
6384 if (cand->iv->base_object)
6385 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6387 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6388 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6389 reference_alias_ptr_type (*use->op_p),
6390 iv, base_hint, data->speed);
6391 copy_ref_info (ref, *use->op_p);
6392 *use->op_p = ref;
6395 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6396 candidate CAND. */
6398 static void
6399 rewrite_use_compare (struct ivopts_data *data,
6400 struct iv_use *use, struct iv_cand *cand)
6402 tree comp, *var_p, op, bound;
6403 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6404 enum tree_code compare;
6405 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6406 bool ok;
6408 bound = cp->value;
6409 if (bound)
6411 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6412 tree var_type = TREE_TYPE (var);
6413 gimple_seq stmts;
6415 if (dump_file && (dump_flags & TDF_DETAILS))
6417 fprintf (dump_file, "Replacing exit test: ");
6418 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6420 compare = cp->comp;
6421 bound = unshare_expr (fold_convert (var_type, bound));
6422 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6423 if (stmts)
6424 gsi_insert_seq_on_edge_immediate (
6425 loop_preheader_edge (data->current_loop),
6426 stmts);
6428 gimple_cond_set_lhs (use->stmt, var);
6429 gimple_cond_set_code (use->stmt, compare);
6430 gimple_cond_set_rhs (use->stmt, op);
6431 return;
6434 /* The induction variable elimination failed; just express the original
6435 giv. */
6436 comp = get_computation (data->current_loop, use, cand);
6437 gcc_assert (comp != NULL_TREE);
6439 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6440 gcc_assert (ok);
6442 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6443 true, GSI_SAME_STMT);
6446 /* Rewrites USE using candidate CAND. */
6448 static void
6449 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6451 switch (use->type)
6453 case USE_NONLINEAR_EXPR:
6454 rewrite_use_nonlinear_expr (data, use, cand);
6455 break;
6457 case USE_ADDRESS:
6458 rewrite_use_address (data, use, cand);
6459 break;
6461 case USE_COMPARE:
6462 rewrite_use_compare (data, use, cand);
6463 break;
6465 default:
6466 gcc_unreachable ();
6469 update_stmt (use->stmt);
6472 /* Rewrite the uses using the selected induction variables. */
6474 static void
6475 rewrite_uses (struct ivopts_data *data)
6477 unsigned i;
6478 struct iv_cand *cand;
6479 struct iv_use *use;
6481 for (i = 0; i < n_iv_uses (data); i++)
6483 use = iv_use (data, i);
6484 cand = use->selected;
6485 gcc_assert (cand);
6487 rewrite_use (data, use, cand);
6491 /* Removes the ivs that are not used after rewriting. */
6493 static void
6494 remove_unused_ivs (struct ivopts_data *data)
6496 unsigned j;
6497 bitmap_iterator bi;
6498 bitmap toremove = BITMAP_ALLOC (NULL);
6500 /* Figure out an order in which to release SSA DEFs so that we don't
6501 release something that we'd have to propagate into a debug stmt
6502 afterwards. */
6503 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6505 struct version_info *info;
6507 info = ver_info (data, j);
6508 if (info->iv
6509 && !integer_zerop (info->iv->step)
6510 && !info->inv_id
6511 && !info->iv->have_use_for
6512 && !info->preserve_biv)
6514 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6516 tree def = info->iv->ssa_name;
6518 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6520 imm_use_iterator imm_iter;
6521 use_operand_p use_p;
6522 gimple stmt;
6523 int count = 0;
6525 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6527 if (!gimple_debug_bind_p (stmt))
6528 continue;
6530 /* We just want to determine whether to do nothing
6531 (count == 0), to substitute the computed
6532 expression into a single use of the SSA DEF by
6533 itself (count == 1), or to use a debug temp
6534 because the SSA DEF is used multiple times or as
6535 part of a larger expression (count > 1). */
6536 count++;
6537 if (gimple_debug_bind_get_value (stmt) != def)
6538 count++;
6540 if (count > 1)
6541 BREAK_FROM_IMM_USE_STMT (imm_iter);
6544 if (!count)
6545 continue;
6547 struct iv_use dummy_use;
6548 struct iv_cand *best_cand = NULL, *cand;
6549 unsigned i, best_pref = 0, cand_pref;
6551 memset (&dummy_use, 0, sizeof (dummy_use));
6552 dummy_use.iv = info->iv;
6553 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6555 cand = iv_use (data, i)->selected;
6556 if (cand == best_cand)
6557 continue;
6558 cand_pref = operand_equal_p (cand->iv->step,
6559 info->iv->step, 0)
6560 ? 4 : 0;
6561 cand_pref
6562 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6563 == TYPE_MODE (TREE_TYPE (info->iv->base))
6564 ? 2 : 0;
6565 cand_pref
6566 += TREE_CODE (cand->iv->base) == INTEGER_CST
6567 ? 1 : 0;
6568 if (best_cand == NULL || best_pref < cand_pref)
6570 best_cand = cand;
6571 best_pref = cand_pref;
6575 if (!best_cand)
6576 continue;
6578 tree comp = get_computation_at (data->current_loop,
6579 &dummy_use, best_cand,
6580 SSA_NAME_DEF_STMT (def));
6581 if (!comp)
6582 continue;
6584 if (count > 1)
6586 tree vexpr = make_node (DEBUG_EXPR_DECL);
6587 DECL_ARTIFICIAL (vexpr) = 1;
6588 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6589 if (SSA_NAME_VAR (def))
6590 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6591 else
6592 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6593 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6594 gimple_stmt_iterator gsi;
6596 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6597 gsi = gsi_after_labels (gimple_bb
6598 (SSA_NAME_DEF_STMT (def)));
6599 else
6600 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6602 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6603 comp = vexpr;
6606 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6608 if (!gimple_debug_bind_p (stmt))
6609 continue;
6611 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6612 SET_USE (use_p, comp);
6614 update_stmt (stmt);
6620 release_defs_bitset (toremove);
6622 BITMAP_FREE (toremove);
6625 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6626 for pointer_map_traverse. */
6628 static bool
6629 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6630 void *data ATTRIBUTE_UNUSED)
6632 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6634 free (niter);
6635 return true;
6638 /* Frees data allocated by the optimization of a single loop. */
6640 static void
6641 free_loop_data (struct ivopts_data *data)
6643 unsigned i, j;
6644 bitmap_iterator bi;
6645 tree obj;
6647 if (data->niters)
6649 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6650 pointer_map_destroy (data->niters);
6651 data->niters = NULL;
6654 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6656 struct version_info *info;
6658 info = ver_info (data, i);
6659 free (info->iv);
6660 info->iv = NULL;
6661 info->has_nonlin_use = false;
6662 info->preserve_biv = false;
6663 info->inv_id = 0;
6665 bitmap_clear (data->relevant);
6666 bitmap_clear (data->important_candidates);
6668 for (i = 0; i < n_iv_uses (data); i++)
6670 struct iv_use *use = iv_use (data, i);
6672 free (use->iv);
6673 BITMAP_FREE (use->related_cands);
6674 for (j = 0; j < use->n_map_members; j++)
6675 if (use->cost_map[j].depends_on)
6676 BITMAP_FREE (use->cost_map[j].depends_on);
6677 free (use->cost_map);
6678 free (use);
6680 data->iv_uses.truncate (0);
6682 for (i = 0; i < n_iv_cands (data); i++)
6684 struct iv_cand *cand = iv_cand (data, i);
6686 free (cand->iv);
6687 if (cand->depends_on)
6688 BITMAP_FREE (cand->depends_on);
6689 free (cand);
6691 data->iv_candidates.truncate (0);
6693 if (data->version_info_size < num_ssa_names)
6695 data->version_info_size = 2 * num_ssa_names;
6696 free (data->version_info);
6697 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6700 data->max_inv_id = 0;
6702 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6703 SET_DECL_RTL (obj, NULL_RTX);
6705 decl_rtl_to_reset.truncate (0);
6707 data->inv_expr_tab.empty ();
6708 data->inv_expr_id = 0;
6711 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6712 loop tree. */
6714 static void
6715 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6717 free_loop_data (data);
6718 free (data->version_info);
6719 BITMAP_FREE (data->relevant);
6720 BITMAP_FREE (data->important_candidates);
6722 decl_rtl_to_reset.release ();
6723 data->iv_uses.release ();
6724 data->iv_candidates.release ();
6725 data->inv_expr_tab.dispose ();
6728 /* Returns true if the loop body BODY includes any function calls. */
6730 static bool
6731 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6733 gimple_stmt_iterator gsi;
6734 unsigned i;
6736 for (i = 0; i < num_nodes; i++)
6737 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6739 gimple stmt = gsi_stmt (gsi);
6740 if (is_gimple_call (stmt)
6741 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6742 return true;
6744 return false;
6747 /* Optimizes the LOOP. Returns true if anything changed. */
6749 static bool
6750 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6752 bool changed = false;
6753 struct iv_ca *iv_ca;
6754 edge exit = single_dom_exit (loop);
6755 basic_block *body;
6757 gcc_assert (!data->niters);
6758 data->current_loop = loop;
6759 data->speed = optimize_loop_for_speed_p (loop);
6761 if (dump_file && (dump_flags & TDF_DETAILS))
6763 fprintf (dump_file, "Processing loop %d\n", loop->num);
6765 if (exit)
6767 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6768 exit->src->index, exit->dest->index);
6769 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6770 fprintf (dump_file, "\n");
6773 fprintf (dump_file, "\n");
6776 body = get_loop_body (loop);
6777 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6778 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6779 free (body);
6781 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6783 /* For each ssa name determines whether it behaves as an induction variable
6784 in some loop. */
6785 if (!find_induction_variables (data))
6786 goto finish;
6788 /* Finds interesting uses (item 1). */
6789 find_interesting_uses (data);
6790 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6791 goto finish;
6793 /* Finds candidates for the induction variables (item 2). */
6794 find_iv_candidates (data);
6796 /* Calculates the costs (item 3, part 1). */
6797 determine_iv_costs (data);
6798 determine_use_iv_costs (data);
6799 determine_set_costs (data);
6801 /* Find the optimal set of induction variables (item 3, part 2). */
6802 iv_ca = find_optimal_iv_set (data);
6803 if (!iv_ca)
6804 goto finish;
6805 changed = true;
6807 /* Create the new induction variables (item 4, part 1). */
6808 create_new_ivs (data, iv_ca);
6809 iv_ca_free (&iv_ca);
6811 /* Rewrite the uses (item 4, part 2). */
6812 rewrite_uses (data);
6814 /* Remove the ivs that are unused after rewriting. */
6815 remove_unused_ivs (data);
6817 /* We have changed the structure of induction variables; it might happen
6818 that definitions in the scev database refer to some of them that were
6819 eliminated. */
6820 scev_reset ();
6822 finish:
6823 free_loop_data (data);
6825 return changed;
6828 /* Main entry point. Optimizes induction variables in loops. */
6830 void
6831 tree_ssa_iv_optimize (void)
6833 struct loop *loop;
6834 struct ivopts_data data;
6835 loop_iterator li;
6837 tree_ssa_iv_optimize_init (&data);
6839 /* Optimize the loops starting with the innermost ones. */
6840 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6842 if (dump_file && (dump_flags & TDF_DETAILS))
6843 flow_loop_dump (loop, dump_file, NULL, 1);
6845 tree_ssa_iv_optimize_loop (&data, loop);
6848 tree_ssa_iv_optimize_finalize (&data);