* gcc.dg/c11-complex-1.c: Use dg-add-options ieee.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobf53fa2286f2edf5b9618c117c6152cd222a4ff5e
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "gimple.h"
73 #include "gimplify.h"
74 #include "gimple-iterator.h"
75 #include "gimplify-me.h"
76 #include "gimple-ssa.h"
77 #include "cgraph.h"
78 #include "tree-cfg.h"
79 #include "tree-phinodes.h"
80 #include "ssa-iterators.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-ivopts.h"
83 #include "tree-ssa-loop-manip.h"
84 #include "tree-ssa-loop-niter.h"
85 #include "tree-ssa-loop.h"
86 #include "tree-dfa.h"
87 #include "tree-ssa.h"
88 #include "cfgloop.h"
89 #include "tree-pass.h"
90 #include "ggc.h"
91 #include "insn-config.h"
92 #include "pointer-set.h"
93 #include "hash-table.h"
94 #include "tree-chrec.h"
95 #include "tree-scalar-evolution.h"
96 #include "cfgloop.h"
97 #include "params.h"
98 #include "langhooks.h"
99 #include "tree-affine.h"
100 #include "target.h"
101 #include "tree-inline.h"
102 #include "tree-ssa-propagate.h"
103 #include "expmed.h"
104 #include "tree-ssa-address.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109 #include "expr.h"
110 #include "recog.h"
112 /* The infinite cost. */
113 #define INFTY 10000000
115 #define AVG_LOOP_NITER(LOOP) 5
117 /* Returns the expected number of loop iterations for LOOP.
118 The average trip count is computed from profile data if it
119 exists. */
121 static inline HOST_WIDE_INT
122 avg_loop_niter (struct loop *loop)
124 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
125 if (niter == -1)
126 return AVG_LOOP_NITER (loop);
128 return niter;
131 /* Representation of the induction variable. */
132 struct iv
134 tree base; /* Initial value of the iv. */
135 tree base_object; /* A memory object to that the induction variable points. */
136 tree step; /* Step of the iv (constant only). */
137 tree ssa_name; /* The ssa name with the value. */
138 bool biv_p; /* Is it a biv? */
139 bool have_use_for; /* Do we already have a use for it? */
140 unsigned use_id; /* The identifier in the use if it is the case. */
143 /* Per-ssa version information (induction variable descriptions, etc.). */
144 struct version_info
146 tree name; /* The ssa name. */
147 struct iv *iv; /* Induction variable description. */
148 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
149 an expression that is not an induction variable. */
150 bool preserve_biv; /* For the original biv, whether to preserve it. */
151 unsigned inv_id; /* Id of an invariant. */
154 /* Types of uses. */
155 enum use_type
157 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
158 USE_ADDRESS, /* Use in an address. */
159 USE_COMPARE /* Use is a compare. */
162 /* Cost of a computation. */
163 typedef struct
165 int cost; /* The runtime cost. */
166 unsigned complexity; /* The estimate of the complexity of the code for
167 the computation (in no concrete units --
168 complexity field should be larger for more
169 complex expressions and addressing modes). */
170 } comp_cost;
172 static const comp_cost no_cost = {0, 0};
173 static const comp_cost infinite_cost = {INFTY, INFTY};
175 /* The candidate - cost pair. */
176 struct cost_pair
178 struct iv_cand *cand; /* The candidate. */
179 comp_cost cost; /* The cost. */
180 bitmap depends_on; /* The list of invariants that have to be
181 preserved. */
182 tree value; /* For final value elimination, the expression for
183 the final value of the iv. For iv elimination,
184 the new bound to compare with. */
185 enum tree_code comp; /* For iv elimination, the comparison. */
186 int inv_expr_id; /* Loop invariant expression id. */
189 /* Use. */
190 struct iv_use
192 unsigned id; /* The id of the use. */
193 enum use_type type; /* Type of the use. */
194 struct iv *iv; /* The induction variable it is based on. */
195 gimple stmt; /* Statement in that it occurs. */
196 tree *op_p; /* The place where it occurs. */
197 bitmap related_cands; /* The set of "related" iv candidates, plus the common
198 important ones. */
200 unsigned n_map_members; /* Number of candidates in the cost_map list. */
201 struct cost_pair *cost_map;
202 /* The costs wrto the iv candidates. */
204 struct iv_cand *selected;
205 /* The selected candidate. */
208 /* The position where the iv is computed. */
209 enum iv_position
211 IP_NORMAL, /* At the end, just before the exit condition. */
212 IP_END, /* At the end of the latch block. */
213 IP_BEFORE_USE, /* Immediately before a specific use. */
214 IP_AFTER_USE, /* Immediately after a specific use. */
215 IP_ORIGINAL /* The original biv. */
218 /* The induction variable candidate. */
219 struct iv_cand
221 unsigned id; /* The number of the candidate. */
222 bool important; /* Whether this is an "important" candidate, i.e. such
223 that it should be considered by all uses. */
224 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
225 gimple incremented_at;/* For original biv, the statement where it is
226 incremented. */
227 tree var_before; /* The variable used for it before increment. */
228 tree var_after; /* The variable used for it after increment. */
229 struct iv *iv; /* The value of the candidate. NULL for
230 "pseudocandidate" used to indicate the possibility
231 to replace the final value of an iv by direct
232 computation of the value. */
233 unsigned cost; /* Cost of the candidate. */
234 unsigned cost_step; /* Cost of the candidate's increment operation. */
235 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
236 where it is incremented. */
237 bitmap depends_on; /* The list of invariants that are used in step of the
238 biv. */
241 /* Loop invariant expression hashtable entry. */
242 struct iv_inv_expr_ent
244 tree expr;
245 int id;
246 hashval_t hash;
249 /* The data used by the induction variable optimizations. */
251 typedef struct iv_use *iv_use_p;
253 typedef struct iv_cand *iv_cand_p;
255 /* Hashtable helpers. */
257 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
259 typedef iv_inv_expr_ent value_type;
260 typedef iv_inv_expr_ent compare_type;
261 static inline hashval_t hash (const value_type *);
262 static inline bool equal (const value_type *, const compare_type *);
265 /* Hash function for loop invariant expressions. */
267 inline hashval_t
268 iv_inv_expr_hasher::hash (const value_type *expr)
270 return expr->hash;
273 /* Hash table equality function for expressions. */
275 inline bool
276 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
278 return expr1->hash == expr2->hash
279 && operand_equal_p (expr1->expr, expr2->expr, 0);
282 struct ivopts_data
284 /* The currently optimized loop. */
285 struct loop *current_loop;
287 /* Numbers of iterations for all exits of the current loop. */
288 struct pointer_map_t *niters;
290 /* Number of registers used in it. */
291 unsigned regs_used;
293 /* The size of version_info array allocated. */
294 unsigned version_info_size;
296 /* The array of information for the ssa names. */
297 struct version_info *version_info;
299 /* The hashtable of loop invariant expressions created
300 by ivopt. */
301 hash_table <iv_inv_expr_hasher> inv_expr_tab;
303 /* Loop invariant expression id. */
304 int inv_expr_id;
306 /* The bitmap of indices in version_info whose value was changed. */
307 bitmap relevant;
309 /* The uses of induction variables. */
310 vec<iv_use_p> iv_uses;
312 /* The candidates. */
313 vec<iv_cand_p> iv_candidates;
315 /* A bitmap of important candidates. */
316 bitmap important_candidates;
318 /* The maximum invariant id. */
319 unsigned max_inv_id;
321 /* Whether to consider just related and important candidates when replacing a
322 use. */
323 bool consider_all_candidates;
325 /* Are we optimizing for speed? */
326 bool speed;
328 /* Whether the loop body includes any function calls. */
329 bool body_includes_call;
331 /* Whether the loop body can only be exited via single exit. */
332 bool loop_single_exit_p;
335 /* An assignment of iv candidates to uses. */
337 struct iv_ca
339 /* The number of uses covered by the assignment. */
340 unsigned upto;
342 /* Number of uses that cannot be expressed by the candidates in the set. */
343 unsigned bad_uses;
345 /* Candidate assigned to a use, together with the related costs. */
346 struct cost_pair **cand_for_use;
348 /* Number of times each candidate is used. */
349 unsigned *n_cand_uses;
351 /* The candidates used. */
352 bitmap cands;
354 /* The number of candidates in the set. */
355 unsigned n_cands;
357 /* Total number of registers needed. */
358 unsigned n_regs;
360 /* Total cost of expressing uses. */
361 comp_cost cand_use_cost;
363 /* Total cost of candidates. */
364 unsigned cand_cost;
366 /* Number of times each invariant is used. */
367 unsigned *n_invariant_uses;
369 /* The array holding the number of uses of each loop
370 invariant expressions created by ivopt. */
371 unsigned *used_inv_expr;
373 /* The number of created loop invariants. */
374 unsigned num_used_inv_expr;
376 /* Total cost of the assignment. */
377 comp_cost cost;
380 /* Difference of two iv candidate assignments. */
382 struct iv_ca_delta
384 /* Changed use. */
385 struct iv_use *use;
387 /* An old assignment (for rollback purposes). */
388 struct cost_pair *old_cp;
390 /* A new assignment. */
391 struct cost_pair *new_cp;
393 /* Next change in the list. */
394 struct iv_ca_delta *next_change;
397 /* Bound on number of candidates below that all candidates are considered. */
399 #define CONSIDER_ALL_CANDIDATES_BOUND \
400 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
402 /* If there are more iv occurrences, we just give up (it is quite unlikely that
403 optimizing such a loop would help, and it would take ages). */
405 #define MAX_CONSIDERED_USES \
406 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
408 /* If there are at most this number of ivs in the set, try removing unnecessary
409 ivs from the set always. */
411 #define ALWAYS_PRUNE_CAND_SET_BOUND \
412 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
414 /* The list of trees for that the decl_rtl field must be reset is stored
415 here. */
417 static vec<tree> decl_rtl_to_reset;
419 static comp_cost force_expr_to_var_cost (tree, bool);
421 /* Number of uses recorded in DATA. */
423 static inline unsigned
424 n_iv_uses (struct ivopts_data *data)
426 return data->iv_uses.length ();
429 /* Ith use recorded in DATA. */
431 static inline struct iv_use *
432 iv_use (struct ivopts_data *data, unsigned i)
434 return data->iv_uses[i];
437 /* Number of candidates recorded in DATA. */
439 static inline unsigned
440 n_iv_cands (struct ivopts_data *data)
442 return data->iv_candidates.length ();
445 /* Ith candidate recorded in DATA. */
447 static inline struct iv_cand *
448 iv_cand (struct ivopts_data *data, unsigned i)
450 return data->iv_candidates[i];
453 /* The single loop exit if it dominates the latch, NULL otherwise. */
455 edge
456 single_dom_exit (struct loop *loop)
458 edge exit = single_exit (loop);
460 if (!exit)
461 return NULL;
463 if (!just_once_each_iteration_p (loop, exit->src))
464 return NULL;
466 return exit;
469 /* Dumps information about the induction variable IV to FILE. */
471 void
472 dump_iv (FILE *file, struct iv *iv)
474 if (iv->ssa_name)
476 fprintf (file, "ssa name ");
477 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
478 fprintf (file, "\n");
481 fprintf (file, " type ");
482 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
483 fprintf (file, "\n");
485 if (iv->step)
487 fprintf (file, " base ");
488 print_generic_expr (file, iv->base, TDF_SLIM);
489 fprintf (file, "\n");
491 fprintf (file, " step ");
492 print_generic_expr (file, iv->step, TDF_SLIM);
493 fprintf (file, "\n");
495 else
497 fprintf (file, " invariant ");
498 print_generic_expr (file, iv->base, TDF_SLIM);
499 fprintf (file, "\n");
502 if (iv->base_object)
504 fprintf (file, " base object ");
505 print_generic_expr (file, iv->base_object, TDF_SLIM);
506 fprintf (file, "\n");
509 if (iv->biv_p)
510 fprintf (file, " is a biv\n");
513 /* Dumps information about the USE to FILE. */
515 void
516 dump_use (FILE *file, struct iv_use *use)
518 fprintf (file, "use %d\n", use->id);
520 switch (use->type)
522 case USE_NONLINEAR_EXPR:
523 fprintf (file, " generic\n");
524 break;
526 case USE_ADDRESS:
527 fprintf (file, " address\n");
528 break;
530 case USE_COMPARE:
531 fprintf (file, " compare\n");
532 break;
534 default:
535 gcc_unreachable ();
538 fprintf (file, " in statement ");
539 print_gimple_stmt (file, use->stmt, 0, 0);
540 fprintf (file, "\n");
542 fprintf (file, " at position ");
543 if (use->op_p)
544 print_generic_expr (file, *use->op_p, TDF_SLIM);
545 fprintf (file, "\n");
547 dump_iv (file, use->iv);
549 if (use->related_cands)
551 fprintf (file, " related candidates ");
552 dump_bitmap (file, use->related_cands);
556 /* Dumps information about the uses to FILE. */
558 void
559 dump_uses (FILE *file, struct ivopts_data *data)
561 unsigned i;
562 struct iv_use *use;
564 for (i = 0; i < n_iv_uses (data); i++)
566 use = iv_use (data, i);
568 dump_use (file, use);
569 fprintf (file, "\n");
573 /* Dumps information about induction variable candidate CAND to FILE. */
575 void
576 dump_cand (FILE *file, struct iv_cand *cand)
578 struct iv *iv = cand->iv;
580 fprintf (file, "candidate %d%s\n",
581 cand->id, cand->important ? " (important)" : "");
583 if (cand->depends_on)
585 fprintf (file, " depends on ");
586 dump_bitmap (file, cand->depends_on);
589 if (!iv)
591 fprintf (file, " final value replacement\n");
592 return;
595 if (cand->var_before)
597 fprintf (file, " var_before ");
598 print_generic_expr (file, cand->var_before, TDF_SLIM);
599 fprintf (file, "\n");
601 if (cand->var_after)
603 fprintf (file, " var_after ");
604 print_generic_expr (file, cand->var_after, TDF_SLIM);
605 fprintf (file, "\n");
608 switch (cand->pos)
610 case IP_NORMAL:
611 fprintf (file, " incremented before exit test\n");
612 break;
614 case IP_BEFORE_USE:
615 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
616 break;
618 case IP_AFTER_USE:
619 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
620 break;
622 case IP_END:
623 fprintf (file, " incremented at end\n");
624 break;
626 case IP_ORIGINAL:
627 fprintf (file, " original biv\n");
628 break;
631 dump_iv (file, iv);
634 /* Returns the info for ssa version VER. */
636 static inline struct version_info *
637 ver_info (struct ivopts_data *data, unsigned ver)
639 return data->version_info + ver;
642 /* Returns the info for ssa name NAME. */
644 static inline struct version_info *
645 name_info (struct ivopts_data *data, tree name)
647 return ver_info (data, SSA_NAME_VERSION (name));
650 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
651 emitted in LOOP. */
653 static bool
654 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
656 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
658 gcc_assert (bb);
660 if (sbb == loop->latch)
661 return true;
663 if (sbb != bb)
664 return false;
666 return stmt == last_stmt (bb);
669 /* Returns true if STMT if after the place where the original induction
670 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
671 if the positions are identical. */
673 static bool
674 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
676 basic_block cand_bb = gimple_bb (cand->incremented_at);
677 basic_block stmt_bb = gimple_bb (stmt);
679 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
680 return false;
682 if (stmt_bb != cand_bb)
683 return true;
685 if (true_if_equal
686 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
687 return true;
688 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
691 /* Returns true if STMT if after the place where the induction variable
692 CAND is incremented in LOOP. */
694 static bool
695 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
697 switch (cand->pos)
699 case IP_END:
700 return false;
702 case IP_NORMAL:
703 return stmt_after_ip_normal_pos (loop, stmt);
705 case IP_ORIGINAL:
706 case IP_AFTER_USE:
707 return stmt_after_inc_pos (cand, stmt, false);
709 case IP_BEFORE_USE:
710 return stmt_after_inc_pos (cand, stmt, true);
712 default:
713 gcc_unreachable ();
717 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
719 static bool
720 abnormal_ssa_name_p (tree exp)
722 if (!exp)
723 return false;
725 if (TREE_CODE (exp) != SSA_NAME)
726 return false;
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
731 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
732 abnormal phi node. Callback for for_each_index. */
734 static bool
735 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
736 void *data ATTRIBUTE_UNUSED)
738 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
740 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
741 return false;
742 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
743 return false;
746 return !abnormal_ssa_name_p (*index);
749 /* Returns true if EXPR contains a ssa name that occurs in an
750 abnormal phi node. */
752 bool
753 contains_abnormal_ssa_name_p (tree expr)
755 enum tree_code code;
756 enum tree_code_class codeclass;
758 if (!expr)
759 return false;
761 code = TREE_CODE (expr);
762 codeclass = TREE_CODE_CLASS (code);
764 if (code == SSA_NAME)
765 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
767 if (code == INTEGER_CST
768 || is_gimple_min_invariant (expr))
769 return false;
771 if (code == ADDR_EXPR)
772 return !for_each_index (&TREE_OPERAND (expr, 0),
773 idx_contains_abnormal_ssa_name_p,
774 NULL);
776 if (code == COND_EXPR)
777 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
778 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
779 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
781 switch (codeclass)
783 case tcc_binary:
784 case tcc_comparison:
785 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
786 return true;
788 /* Fallthru. */
789 case tcc_unary:
790 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
791 return true;
793 break;
795 default:
796 gcc_unreachable ();
799 return false;
802 /* Returns the structure describing number of iterations determined from
803 EXIT of DATA->current_loop, or NULL if something goes wrong. */
805 static struct tree_niter_desc *
806 niter_for_exit (struct ivopts_data *data, edge exit)
808 struct tree_niter_desc *desc;
809 void **slot;
811 if (!data->niters)
813 data->niters = pointer_map_create ();
814 slot = NULL;
816 else
817 slot = pointer_map_contains (data->niters, exit);
819 if (!slot)
821 /* Try to determine number of iterations. We cannot safely work with ssa
822 names that appear in phi nodes on abnormal edges, so that we do not
823 create overlapping life ranges for them (PR 27283). */
824 desc = XNEW (struct tree_niter_desc);
825 if (!number_of_iterations_exit (data->current_loop,
826 exit, desc, true)
827 || contains_abnormal_ssa_name_p (desc->niter))
829 XDELETE (desc);
830 desc = NULL;
832 slot = pointer_map_insert (data->niters, exit);
833 *slot = desc;
835 else
836 desc = (struct tree_niter_desc *) *slot;
838 return desc;
841 /* Returns the structure describing number of iterations determined from
842 single dominating exit of DATA->current_loop, or NULL if something
843 goes wrong. */
845 static struct tree_niter_desc *
846 niter_for_single_dom_exit (struct ivopts_data *data)
848 edge exit = single_dom_exit (data->current_loop);
850 if (!exit)
851 return NULL;
853 return niter_for_exit (data, exit);
856 /* Initializes data structures used by the iv optimization pass, stored
857 in DATA. */
859 static void
860 tree_ssa_iv_optimize_init (struct ivopts_data *data)
862 data->version_info_size = 2 * num_ssa_names;
863 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
864 data->relevant = BITMAP_ALLOC (NULL);
865 data->important_candidates = BITMAP_ALLOC (NULL);
866 data->max_inv_id = 0;
867 data->niters = NULL;
868 data->iv_uses.create (20);
869 data->iv_candidates.create (20);
870 data->inv_expr_tab.create (10);
871 data->inv_expr_id = 0;
872 decl_rtl_to_reset.create (20);
875 /* Returns a memory object to that EXPR points. In case we are able to
876 determine that it does not point to any such object, NULL is returned. */
878 static tree
879 determine_base_object (tree expr)
881 enum tree_code code = TREE_CODE (expr);
882 tree base, obj;
884 /* If this is a pointer casted to any type, we need to determine
885 the base object for the pointer; so handle conversions before
886 throwing away non-pointer expressions. */
887 if (CONVERT_EXPR_P (expr))
888 return determine_base_object (TREE_OPERAND (expr, 0));
890 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
891 return NULL_TREE;
893 switch (code)
895 case INTEGER_CST:
896 return NULL_TREE;
898 case ADDR_EXPR:
899 obj = TREE_OPERAND (expr, 0);
900 base = get_base_address (obj);
902 if (!base)
903 return expr;
905 if (TREE_CODE (base) == MEM_REF)
906 return determine_base_object (TREE_OPERAND (base, 0));
908 return fold_convert (ptr_type_node,
909 build_fold_addr_expr (base));
911 case POINTER_PLUS_EXPR:
912 return determine_base_object (TREE_OPERAND (expr, 0));
914 case PLUS_EXPR:
915 case MINUS_EXPR:
916 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
917 gcc_unreachable ();
919 default:
920 return fold_convert (ptr_type_node, expr);
924 /* Allocates an induction variable with given initial value BASE and step STEP
925 for loop LOOP. */
927 static struct iv *
928 alloc_iv (tree base, tree step)
930 tree base_object = base;
931 struct iv *iv = XCNEW (struct iv);
932 gcc_assert (step != NULL_TREE);
934 /* Lower all address expressions except ones with DECL_P as operand.
935 By doing this:
936 1) More accurate cost can be computed for address expressions;
937 2) Duplicate candidates won't be created for bases in different
938 forms, like &a[0] and &a. */
939 STRIP_NOPS (base_object);
940 if (TREE_CODE (base_object) == ADDR_EXPR
941 && !DECL_P (TREE_OPERAND (base_object, 0)))
943 aff_tree comb;
944 double_int size;
945 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
946 &comb, &size);
947 gcc_assert (base_object != NULL_TREE);
948 base_object = build_fold_addr_expr (base_object);
949 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
952 iv->base = base;
953 iv->base_object = determine_base_object (base_object);
954 iv->step = step;
955 iv->biv_p = false;
956 iv->have_use_for = false;
957 iv->use_id = 0;
958 iv->ssa_name = NULL_TREE;
960 return iv;
963 /* Sets STEP and BASE for induction variable IV. */
965 static void
966 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
968 struct version_info *info = name_info (data, iv);
970 gcc_assert (!info->iv);
972 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
973 info->iv = alloc_iv (base, step);
974 info->iv->ssa_name = iv;
977 /* Finds induction variable declaration for VAR. */
979 static struct iv *
980 get_iv (struct ivopts_data *data, tree var)
982 basic_block bb;
983 tree type = TREE_TYPE (var);
985 if (!POINTER_TYPE_P (type)
986 && !INTEGRAL_TYPE_P (type))
987 return NULL;
989 if (!name_info (data, var)->iv)
991 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
993 if (!bb
994 || !flow_bb_inside_loop_p (data->current_loop, bb))
995 set_iv (data, var, var, build_int_cst (type, 0));
998 return name_info (data, var)->iv;
1001 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1002 not define a simple affine biv with nonzero step. */
1004 static tree
1005 determine_biv_step (gimple phi)
1007 struct loop *loop = gimple_bb (phi)->loop_father;
1008 tree name = PHI_RESULT (phi);
1009 affine_iv iv;
1011 if (virtual_operand_p (name))
1012 return NULL_TREE;
1014 if (!simple_iv (loop, loop, name, &iv, true))
1015 return NULL_TREE;
1017 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1020 /* Finds basic ivs. */
1022 static bool
1023 find_bivs (struct ivopts_data *data)
1025 gimple phi;
1026 tree step, type, base;
1027 bool found = false;
1028 struct loop *loop = data->current_loop;
1029 gimple_stmt_iterator psi;
1031 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1033 phi = gsi_stmt (psi);
1035 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1036 continue;
1038 step = determine_biv_step (phi);
1039 if (!step)
1040 continue;
1042 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1043 base = expand_simple_operations (base);
1044 if (contains_abnormal_ssa_name_p (base)
1045 || contains_abnormal_ssa_name_p (step))
1046 continue;
1048 type = TREE_TYPE (PHI_RESULT (phi));
1049 base = fold_convert (type, base);
1050 if (step)
1052 if (POINTER_TYPE_P (type))
1053 step = convert_to_ptrofftype (step);
1054 else
1055 step = fold_convert (type, step);
1058 set_iv (data, PHI_RESULT (phi), base, step);
1059 found = true;
1062 return found;
1065 /* Marks basic ivs. */
1067 static void
1068 mark_bivs (struct ivopts_data *data)
1070 gimple phi;
1071 tree var;
1072 struct iv *iv, *incr_iv;
1073 struct loop *loop = data->current_loop;
1074 basic_block incr_bb;
1075 gimple_stmt_iterator psi;
1077 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079 phi = gsi_stmt (psi);
1081 iv = get_iv (data, PHI_RESULT (phi));
1082 if (!iv)
1083 continue;
1085 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1086 incr_iv = get_iv (data, var);
1087 if (!incr_iv)
1088 continue;
1090 /* If the increment is in the subloop, ignore it. */
1091 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1092 if (incr_bb->loop_father != data->current_loop
1093 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1094 continue;
1096 iv->biv_p = true;
1097 incr_iv->biv_p = true;
1101 /* Checks whether STMT defines a linear induction variable and stores its
1102 parameters to IV. */
1104 static bool
1105 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1107 tree lhs;
1108 struct loop *loop = data->current_loop;
1110 iv->base = NULL_TREE;
1111 iv->step = NULL_TREE;
1113 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1114 return false;
1116 lhs = gimple_assign_lhs (stmt);
1117 if (TREE_CODE (lhs) != SSA_NAME)
1118 return false;
1120 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1121 return false;
1122 iv->base = expand_simple_operations (iv->base);
1124 if (contains_abnormal_ssa_name_p (iv->base)
1125 || contains_abnormal_ssa_name_p (iv->step))
1126 return false;
1128 /* If STMT could throw, then do not consider STMT as defining a GIV.
1129 While this will suppress optimizations, we can not safely delete this
1130 GIV and associated statements, even if it appears it is not used. */
1131 if (stmt_could_throw_p (stmt))
1132 return false;
1134 return true;
1137 /* Finds general ivs in statement STMT. */
1139 static void
1140 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1142 affine_iv iv;
1144 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1145 return;
1147 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1150 /* Finds general ivs in basic block BB. */
1152 static void
1153 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1155 gimple_stmt_iterator bsi;
1157 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1158 find_givs_in_stmt (data, gsi_stmt (bsi));
1161 /* Finds general ivs. */
1163 static void
1164 find_givs (struct ivopts_data *data)
1166 struct loop *loop = data->current_loop;
1167 basic_block *body = get_loop_body_in_dom_order (loop);
1168 unsigned i;
1170 for (i = 0; i < loop->num_nodes; i++)
1171 find_givs_in_bb (data, body[i]);
1172 free (body);
1175 /* For each ssa name defined in LOOP determines whether it is an induction
1176 variable and if so, its initial value and step. */
1178 static bool
1179 find_induction_variables (struct ivopts_data *data)
1181 unsigned i;
1182 bitmap_iterator bi;
1184 if (!find_bivs (data))
1185 return false;
1187 find_givs (data);
1188 mark_bivs (data);
1190 if (dump_file && (dump_flags & TDF_DETAILS))
1192 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1194 if (niter)
1196 fprintf (dump_file, " number of iterations ");
1197 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1198 if (!integer_zerop (niter->may_be_zero))
1200 fprintf (dump_file, "; zero if ");
1201 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1203 fprintf (dump_file, "\n\n");
1206 fprintf (dump_file, "Induction variables:\n\n");
1208 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1210 if (ver_info (data, i)->iv)
1211 dump_iv (dump_file, ver_info (data, i)->iv);
1215 return true;
1218 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1220 static struct iv_use *
1221 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1222 gimple stmt, enum use_type use_type)
1224 struct iv_use *use = XCNEW (struct iv_use);
1226 use->id = n_iv_uses (data);
1227 use->type = use_type;
1228 use->iv = iv;
1229 use->stmt = stmt;
1230 use->op_p = use_p;
1231 use->related_cands = BITMAP_ALLOC (NULL);
1233 /* To avoid showing ssa name in the dumps, if it was not reset by the
1234 caller. */
1235 iv->ssa_name = NULL_TREE;
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 dump_use (dump_file, use);
1240 data->iv_uses.safe_push (use);
1242 return use;
1245 /* Checks whether OP is a loop-level invariant and if so, records it.
1246 NONLINEAR_USE is true if the invariant is used in a way we do not
1247 handle specially. */
1249 static void
1250 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1252 basic_block bb;
1253 struct version_info *info;
1255 if (TREE_CODE (op) != SSA_NAME
1256 || virtual_operand_p (op))
1257 return;
1259 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1260 if (bb
1261 && flow_bb_inside_loop_p (data->current_loop, bb))
1262 return;
1264 info = name_info (data, op);
1265 info->name = op;
1266 info->has_nonlin_use |= nonlinear_use;
1267 if (!info->inv_id)
1268 info->inv_id = ++data->max_inv_id;
1269 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1272 /* Checks whether the use OP is interesting and if so, records it. */
1274 static struct iv_use *
1275 find_interesting_uses_op (struct ivopts_data *data, tree op)
1277 struct iv *iv;
1278 struct iv *civ;
1279 gimple stmt;
1280 struct iv_use *use;
1282 if (TREE_CODE (op) != SSA_NAME)
1283 return NULL;
1285 iv = get_iv (data, op);
1286 if (!iv)
1287 return NULL;
1289 if (iv->have_use_for)
1291 use = iv_use (data, iv->use_id);
1293 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1294 return use;
1297 if (integer_zerop (iv->step))
1299 record_invariant (data, op, true);
1300 return NULL;
1302 iv->have_use_for = true;
1304 civ = XNEW (struct iv);
1305 *civ = *iv;
1307 stmt = SSA_NAME_DEF_STMT (op);
1308 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1309 || is_gimple_assign (stmt));
1311 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1312 iv->use_id = use->id;
1314 return use;
1317 /* Given a condition in statement STMT, checks whether it is a compare
1318 of an induction variable and an invariant. If this is the case,
1319 CONTROL_VAR is set to location of the iv, BOUND to the location of
1320 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1321 induction variable descriptions, and true is returned. If this is not
1322 the case, CONTROL_VAR and BOUND are set to the arguments of the
1323 condition and false is returned. */
1325 static bool
1326 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1327 tree **control_var, tree **bound,
1328 struct iv **iv_var, struct iv **iv_bound)
1330 /* The objects returned when COND has constant operands. */
1331 static struct iv const_iv;
1332 static tree zero;
1333 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1334 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1335 bool ret = false;
1337 if (gimple_code (stmt) == GIMPLE_COND)
1339 op0 = gimple_cond_lhs_ptr (stmt);
1340 op1 = gimple_cond_rhs_ptr (stmt);
1342 else
1344 op0 = gimple_assign_rhs1_ptr (stmt);
1345 op1 = gimple_assign_rhs2_ptr (stmt);
1348 zero = integer_zero_node;
1349 const_iv.step = integer_zero_node;
1351 if (TREE_CODE (*op0) == SSA_NAME)
1352 iv0 = get_iv (data, *op0);
1353 if (TREE_CODE (*op1) == SSA_NAME)
1354 iv1 = get_iv (data, *op1);
1356 /* Exactly one of the compared values must be an iv, and the other one must
1357 be an invariant. */
1358 if (!iv0 || !iv1)
1359 goto end;
1361 if (integer_zerop (iv0->step))
1363 /* Control variable may be on the other side. */
1364 tmp_op = op0; op0 = op1; op1 = tmp_op;
1365 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1367 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1369 end:
1370 if (control_var)
1371 *control_var = op0;;
1372 if (iv_var)
1373 *iv_var = iv0;;
1374 if (bound)
1375 *bound = op1;
1376 if (iv_bound)
1377 *iv_bound = iv1;
1379 return ret;
1382 /* Checks whether the condition in STMT is interesting and if so,
1383 records it. */
1385 static void
1386 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1388 tree *var_p, *bound_p;
1389 struct iv *var_iv, *civ;
1391 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1393 find_interesting_uses_op (data, *var_p);
1394 find_interesting_uses_op (data, *bound_p);
1395 return;
1398 civ = XNEW (struct iv);
1399 *civ = *var_iv;
1400 record_use (data, NULL, civ, stmt, USE_COMPARE);
1403 /* Returns the outermost loop EXPR is obviously invariant in
1404 relative to the loop LOOP, i.e. if all its operands are defined
1405 outside of the returned loop. Returns NULL if EXPR is not
1406 even obviously invariant in LOOP. */
1408 struct loop *
1409 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1411 basic_block def_bb;
1412 unsigned i, len;
1414 if (is_gimple_min_invariant (expr))
1415 return current_loops->tree_root;
1417 if (TREE_CODE (expr) == SSA_NAME)
1419 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1420 if (def_bb)
1422 if (flow_bb_inside_loop_p (loop, def_bb))
1423 return NULL;
1424 return superloop_at_depth (loop,
1425 loop_depth (def_bb->loop_father) + 1);
1428 return current_loops->tree_root;
1431 if (!EXPR_P (expr))
1432 return NULL;
1434 unsigned maxdepth = 0;
1435 len = TREE_OPERAND_LENGTH (expr);
1436 for (i = 0; i < len; i++)
1438 struct loop *ivloop;
1439 if (!TREE_OPERAND (expr, i))
1440 continue;
1442 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1443 if (!ivloop)
1444 return NULL;
1445 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1448 return superloop_at_depth (loop, maxdepth);
1451 /* Returns true if expression EXPR is obviously invariant in LOOP,
1452 i.e. if all its operands are defined outside of the LOOP. LOOP
1453 should not be the function body. */
1455 bool
1456 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1458 basic_block def_bb;
1459 unsigned i, len;
1461 gcc_assert (loop_depth (loop) > 0);
1463 if (is_gimple_min_invariant (expr))
1464 return true;
1466 if (TREE_CODE (expr) == SSA_NAME)
1468 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1469 if (def_bb
1470 && flow_bb_inside_loop_p (loop, def_bb))
1471 return false;
1473 return true;
1476 if (!EXPR_P (expr))
1477 return false;
1479 len = TREE_OPERAND_LENGTH (expr);
1480 for (i = 0; i < len; i++)
1481 if (TREE_OPERAND (expr, i)
1482 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1483 return false;
1485 return true;
1488 /* Cumulates the steps of indices into DATA and replaces their values with the
1489 initial ones. Returns false when the value of the index cannot be determined.
1490 Callback for for_each_index. */
1492 struct ifs_ivopts_data
1494 struct ivopts_data *ivopts_data;
1495 gimple stmt;
1496 tree step;
1499 static bool
1500 idx_find_step (tree base, tree *idx, void *data)
1502 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1503 struct iv *iv;
1504 tree step, iv_base, iv_step, lbound, off;
1505 struct loop *loop = dta->ivopts_data->current_loop;
1507 /* If base is a component ref, require that the offset of the reference
1508 be invariant. */
1509 if (TREE_CODE (base) == COMPONENT_REF)
1511 off = component_ref_field_offset (base);
1512 return expr_invariant_in_loop_p (loop, off);
1515 /* If base is array, first check whether we will be able to move the
1516 reference out of the loop (in order to take its address in strength
1517 reduction). In order for this to work we need both lower bound
1518 and step to be loop invariants. */
1519 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1521 /* Moreover, for a range, the size needs to be invariant as well. */
1522 if (TREE_CODE (base) == ARRAY_RANGE_REF
1523 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1524 return false;
1526 step = array_ref_element_size (base);
1527 lbound = array_ref_low_bound (base);
1529 if (!expr_invariant_in_loop_p (loop, step)
1530 || !expr_invariant_in_loop_p (loop, lbound))
1531 return false;
1534 if (TREE_CODE (*idx) != SSA_NAME)
1535 return true;
1537 iv = get_iv (dta->ivopts_data, *idx);
1538 if (!iv)
1539 return false;
1541 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1542 *&x[0], which is not folded and does not trigger the
1543 ARRAY_REF path below. */
1544 *idx = iv->base;
1546 if (integer_zerop (iv->step))
1547 return true;
1549 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1551 step = array_ref_element_size (base);
1553 /* We only handle addresses whose step is an integer constant. */
1554 if (TREE_CODE (step) != INTEGER_CST)
1555 return false;
1557 else
1558 /* The step for pointer arithmetics already is 1 byte. */
1559 step = size_one_node;
1561 iv_base = iv->base;
1562 iv_step = iv->step;
1563 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1564 sizetype, &iv_base, &iv_step, dta->stmt,
1565 false))
1567 /* The index might wrap. */
1568 return false;
1571 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1572 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1574 return true;
1577 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1578 object is passed to it in DATA. */
1580 static bool
1581 idx_record_use (tree base, tree *idx,
1582 void *vdata)
1584 struct ivopts_data *data = (struct ivopts_data *) vdata;
1585 find_interesting_uses_op (data, *idx);
1586 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1588 find_interesting_uses_op (data, array_ref_element_size (base));
1589 find_interesting_uses_op (data, array_ref_low_bound (base));
1591 return true;
1594 /* If we can prove that TOP = cst * BOT for some constant cst,
1595 store cst to MUL and return true. Otherwise return false.
1596 The returned value is always sign-extended, regardless of the
1597 signedness of TOP and BOT. */
1599 static bool
1600 constant_multiple_of (tree top, tree bot, double_int *mul)
1602 tree mby;
1603 enum tree_code code;
1604 double_int res, p0, p1;
1605 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1607 STRIP_NOPS (top);
1608 STRIP_NOPS (bot);
1610 if (operand_equal_p (top, bot, 0))
1612 *mul = double_int_one;
1613 return true;
1616 code = TREE_CODE (top);
1617 switch (code)
1619 case MULT_EXPR:
1620 mby = TREE_OPERAND (top, 1);
1621 if (TREE_CODE (mby) != INTEGER_CST)
1622 return false;
1624 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1625 return false;
1627 *mul = (res * tree_to_double_int (mby)).sext (precision);
1628 return true;
1630 case PLUS_EXPR:
1631 case MINUS_EXPR:
1632 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1633 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1634 return false;
1636 if (code == MINUS_EXPR)
1637 p1 = -p1;
1638 *mul = (p0 + p1).sext (precision);
1639 return true;
1641 case INTEGER_CST:
1642 if (TREE_CODE (bot) != INTEGER_CST)
1643 return false;
1645 p0 = tree_to_double_int (top).sext (precision);
1646 p1 = tree_to_double_int (bot).sext (precision);
1647 if (p1.is_zero ())
1648 return false;
1649 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1650 return res.is_zero ();
1652 default:
1653 return false;
1657 /* Returns true if memory reference REF with step STEP may be unaligned. */
1659 static bool
1660 may_be_unaligned_p (tree ref, tree step)
1662 tree base;
1663 tree base_type;
1664 HOST_WIDE_INT bitsize;
1665 HOST_WIDE_INT bitpos;
1666 tree toffset;
1667 enum machine_mode mode;
1668 int unsignedp, volatilep;
1669 unsigned base_align;
1671 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1672 thus they are not misaligned. */
1673 if (TREE_CODE (ref) == TARGET_MEM_REF)
1674 return false;
1676 /* The test below is basically copy of what expr.c:normal_inner_ref
1677 does to check whether the object must be loaded by parts when
1678 STRICT_ALIGNMENT is true. */
1679 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1680 &unsignedp, &volatilep, true);
1681 base_type = TREE_TYPE (base);
1682 base_align = get_object_alignment (base);
1683 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1685 if (mode != BLKmode)
1687 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1689 if (base_align < mode_align
1690 || (bitpos % mode_align) != 0
1691 || (bitpos % BITS_PER_UNIT) != 0)
1692 return true;
1694 if (toffset
1695 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1696 return true;
1698 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1699 return true;
1702 return false;
1705 /* Return true if EXPR may be non-addressable. */
1707 bool
1708 may_be_nonaddressable_p (tree expr)
1710 switch (TREE_CODE (expr))
1712 case TARGET_MEM_REF:
1713 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1714 target, thus they are always addressable. */
1715 return false;
1717 case COMPONENT_REF:
1718 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1719 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1721 case VIEW_CONVERT_EXPR:
1722 /* This kind of view-conversions may wrap non-addressable objects
1723 and make them look addressable. After some processing the
1724 non-addressability may be uncovered again, causing ADDR_EXPRs
1725 of inappropriate objects to be built. */
1726 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1727 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1728 return true;
1730 /* ... fall through ... */
1732 case ARRAY_REF:
1733 case ARRAY_RANGE_REF:
1734 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1736 CASE_CONVERT:
1737 return true;
1739 default:
1740 break;
1743 return false;
1746 /* Finds addresses in *OP_P inside STMT. */
1748 static void
1749 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1751 tree base = *op_p, step = size_zero_node;
1752 struct iv *civ;
1753 struct ifs_ivopts_data ifs_ivopts_data;
1755 /* Do not play with volatile memory references. A bit too conservative,
1756 perhaps, but safe. */
1757 if (gimple_has_volatile_ops (stmt))
1758 goto fail;
1760 /* Ignore bitfields for now. Not really something terribly complicated
1761 to handle. TODO. */
1762 if (TREE_CODE (base) == BIT_FIELD_REF)
1763 goto fail;
1765 base = unshare_expr (base);
1767 if (TREE_CODE (base) == TARGET_MEM_REF)
1769 tree type = build_pointer_type (TREE_TYPE (base));
1770 tree astep;
1772 if (TMR_BASE (base)
1773 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1775 civ = get_iv (data, TMR_BASE (base));
1776 if (!civ)
1777 goto fail;
1779 TMR_BASE (base) = civ->base;
1780 step = civ->step;
1782 if (TMR_INDEX2 (base)
1783 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1785 civ = get_iv (data, TMR_INDEX2 (base));
1786 if (!civ)
1787 goto fail;
1789 TMR_INDEX2 (base) = civ->base;
1790 step = civ->step;
1792 if (TMR_INDEX (base)
1793 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1795 civ = get_iv (data, TMR_INDEX (base));
1796 if (!civ)
1797 goto fail;
1799 TMR_INDEX (base) = civ->base;
1800 astep = civ->step;
1802 if (astep)
1804 if (TMR_STEP (base))
1805 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1807 step = fold_build2 (PLUS_EXPR, type, step, astep);
1811 if (integer_zerop (step))
1812 goto fail;
1813 base = tree_mem_ref_addr (type, base);
1815 else
1817 ifs_ivopts_data.ivopts_data = data;
1818 ifs_ivopts_data.stmt = stmt;
1819 ifs_ivopts_data.step = size_zero_node;
1820 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1821 || integer_zerop (ifs_ivopts_data.step))
1822 goto fail;
1823 step = ifs_ivopts_data.step;
1825 /* Check that the base expression is addressable. This needs
1826 to be done after substituting bases of IVs into it. */
1827 if (may_be_nonaddressable_p (base))
1828 goto fail;
1830 /* Moreover, on strict alignment platforms, check that it is
1831 sufficiently aligned. */
1832 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1833 goto fail;
1835 base = build_fold_addr_expr (base);
1837 /* Substituting bases of IVs into the base expression might
1838 have caused folding opportunities. */
1839 if (TREE_CODE (base) == ADDR_EXPR)
1841 tree *ref = &TREE_OPERAND (base, 0);
1842 while (handled_component_p (*ref))
1843 ref = &TREE_OPERAND (*ref, 0);
1844 if (TREE_CODE (*ref) == MEM_REF)
1846 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1847 TREE_OPERAND (*ref, 0),
1848 TREE_OPERAND (*ref, 1));
1849 if (tem)
1850 *ref = tem;
1855 civ = alloc_iv (base, step);
1856 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1857 return;
1859 fail:
1860 for_each_index (op_p, idx_record_use, data);
1863 /* Finds and records invariants used in STMT. */
1865 static void
1866 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1868 ssa_op_iter iter;
1869 use_operand_p use_p;
1870 tree op;
1872 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1874 op = USE_FROM_PTR (use_p);
1875 record_invariant (data, op, false);
1879 /* Finds interesting uses of induction variables in the statement STMT. */
1881 static void
1882 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1884 struct iv *iv;
1885 tree op, *lhs, *rhs;
1886 ssa_op_iter iter;
1887 use_operand_p use_p;
1888 enum tree_code code;
1890 find_invariants_stmt (data, stmt);
1892 if (gimple_code (stmt) == GIMPLE_COND)
1894 find_interesting_uses_cond (data, stmt);
1895 return;
1898 if (is_gimple_assign (stmt))
1900 lhs = gimple_assign_lhs_ptr (stmt);
1901 rhs = gimple_assign_rhs1_ptr (stmt);
1903 if (TREE_CODE (*lhs) == SSA_NAME)
1905 /* If the statement defines an induction variable, the uses are not
1906 interesting by themselves. */
1908 iv = get_iv (data, *lhs);
1910 if (iv && !integer_zerop (iv->step))
1911 return;
1914 code = gimple_assign_rhs_code (stmt);
1915 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1916 && (REFERENCE_CLASS_P (*rhs)
1917 || is_gimple_val (*rhs)))
1919 if (REFERENCE_CLASS_P (*rhs))
1920 find_interesting_uses_address (data, stmt, rhs);
1921 else
1922 find_interesting_uses_op (data, *rhs);
1924 if (REFERENCE_CLASS_P (*lhs))
1925 find_interesting_uses_address (data, stmt, lhs);
1926 return;
1928 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1930 find_interesting_uses_cond (data, stmt);
1931 return;
1934 /* TODO -- we should also handle address uses of type
1936 memory = call (whatever);
1940 call (memory). */
1943 if (gimple_code (stmt) == GIMPLE_PHI
1944 && gimple_bb (stmt) == data->current_loop->header)
1946 iv = get_iv (data, PHI_RESULT (stmt));
1948 if (iv && !integer_zerop (iv->step))
1949 return;
1952 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1954 op = USE_FROM_PTR (use_p);
1956 if (TREE_CODE (op) != SSA_NAME)
1957 continue;
1959 iv = get_iv (data, op);
1960 if (!iv)
1961 continue;
1963 find_interesting_uses_op (data, op);
1967 /* Finds interesting uses of induction variables outside of loops
1968 on loop exit edge EXIT. */
1970 static void
1971 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1973 gimple phi;
1974 gimple_stmt_iterator psi;
1975 tree def;
1977 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1979 phi = gsi_stmt (psi);
1980 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1981 if (!virtual_operand_p (def))
1982 find_interesting_uses_op (data, def);
1986 /* Finds uses of the induction variables that are interesting. */
1988 static void
1989 find_interesting_uses (struct ivopts_data *data)
1991 basic_block bb;
1992 gimple_stmt_iterator bsi;
1993 basic_block *body = get_loop_body (data->current_loop);
1994 unsigned i;
1995 struct version_info *info;
1996 edge e;
1998 if (dump_file && (dump_flags & TDF_DETAILS))
1999 fprintf (dump_file, "Uses:\n\n");
2001 for (i = 0; i < data->current_loop->num_nodes; i++)
2003 edge_iterator ei;
2004 bb = body[i];
2006 FOR_EACH_EDGE (e, ei, bb->succs)
2007 if (e->dest != EXIT_BLOCK_PTR
2008 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2009 find_interesting_uses_outside (data, e);
2011 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2012 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2014 if (!is_gimple_debug (gsi_stmt (bsi)))
2015 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2020 bitmap_iterator bi;
2022 fprintf (dump_file, "\n");
2024 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2026 info = ver_info (data, i);
2027 if (info->inv_id)
2029 fprintf (dump_file, " ");
2030 print_generic_expr (dump_file, info->name, TDF_SLIM);
2031 fprintf (dump_file, " is invariant (%d)%s\n",
2032 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2036 fprintf (dump_file, "\n");
2039 free (body);
2042 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2043 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2044 we are at the top-level of the processed address. */
2046 static tree
2047 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2048 HOST_WIDE_INT *offset)
2050 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2051 enum tree_code code;
2052 tree type, orig_type = TREE_TYPE (expr);
2053 HOST_WIDE_INT off0, off1, st;
2054 tree orig_expr = expr;
2056 STRIP_NOPS (expr);
2058 type = TREE_TYPE (expr);
2059 code = TREE_CODE (expr);
2060 *offset = 0;
2062 switch (code)
2064 case INTEGER_CST:
2065 if (!cst_and_fits_in_hwi (expr)
2066 || integer_zerop (expr))
2067 return orig_expr;
2069 *offset = int_cst_value (expr);
2070 return build_int_cst (orig_type, 0);
2072 case POINTER_PLUS_EXPR:
2073 case PLUS_EXPR:
2074 case MINUS_EXPR:
2075 op0 = TREE_OPERAND (expr, 0);
2076 op1 = TREE_OPERAND (expr, 1);
2078 op0 = strip_offset_1 (op0, false, false, &off0);
2079 op1 = strip_offset_1 (op1, false, false, &off1);
2081 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2082 if (op0 == TREE_OPERAND (expr, 0)
2083 && op1 == TREE_OPERAND (expr, 1))
2084 return orig_expr;
2086 if (integer_zerop (op1))
2087 expr = op0;
2088 else if (integer_zerop (op0))
2090 if (code == MINUS_EXPR)
2091 expr = fold_build1 (NEGATE_EXPR, type, op1);
2092 else
2093 expr = op1;
2095 else
2096 expr = fold_build2 (code, type, op0, op1);
2098 return fold_convert (orig_type, expr);
2100 case MULT_EXPR:
2101 op1 = TREE_OPERAND (expr, 1);
2102 if (!cst_and_fits_in_hwi (op1))
2103 return orig_expr;
2105 op0 = TREE_OPERAND (expr, 0);
2106 op0 = strip_offset_1 (op0, false, false, &off0);
2107 if (op0 == TREE_OPERAND (expr, 0))
2108 return orig_expr;
2110 *offset = off0 * int_cst_value (op1);
2111 if (integer_zerop (op0))
2112 expr = op0;
2113 else
2114 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2116 return fold_convert (orig_type, expr);
2118 case ARRAY_REF:
2119 case ARRAY_RANGE_REF:
2120 if (!inside_addr)
2121 return orig_expr;
2123 step = array_ref_element_size (expr);
2124 if (!cst_and_fits_in_hwi (step))
2125 break;
2127 st = int_cst_value (step);
2128 op1 = TREE_OPERAND (expr, 1);
2129 op1 = strip_offset_1 (op1, false, false, &off1);
2130 *offset = off1 * st;
2132 if (top_compref
2133 && integer_zerop (op1))
2135 /* Strip the component reference completely. */
2136 op0 = TREE_OPERAND (expr, 0);
2137 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2138 *offset += off0;
2139 return op0;
2141 break;
2143 case COMPONENT_REF:
2145 tree field;
2147 if (!inside_addr)
2148 return orig_expr;
2150 tmp = component_ref_field_offset (expr);
2151 field = TREE_OPERAND (expr, 1);
2152 if (top_compref
2153 && cst_and_fits_in_hwi (tmp)
2154 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2156 HOST_WIDE_INT boffset, abs_off;
2158 /* Strip the component reference completely. */
2159 op0 = TREE_OPERAND (expr, 0);
2160 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2161 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2162 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2163 if (boffset < 0)
2164 abs_off = -abs_off;
2166 *offset = off0 + int_cst_value (tmp) + abs_off;
2167 return op0;
2170 break;
2172 case ADDR_EXPR:
2173 op0 = TREE_OPERAND (expr, 0);
2174 op0 = strip_offset_1 (op0, true, true, &off0);
2175 *offset += off0;
2177 if (op0 == TREE_OPERAND (expr, 0))
2178 return orig_expr;
2180 expr = build_fold_addr_expr (op0);
2181 return fold_convert (orig_type, expr);
2183 case MEM_REF:
2184 /* ??? Offset operand? */
2185 inside_addr = false;
2186 break;
2188 default:
2189 return orig_expr;
2192 /* Default handling of expressions for that we want to recurse into
2193 the first operand. */
2194 op0 = TREE_OPERAND (expr, 0);
2195 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2196 *offset += off0;
2198 if (op0 == TREE_OPERAND (expr, 0)
2199 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2200 return orig_expr;
2202 expr = copy_node (expr);
2203 TREE_OPERAND (expr, 0) = op0;
2204 if (op1)
2205 TREE_OPERAND (expr, 1) = op1;
2207 /* Inside address, we might strip the top level component references,
2208 thus changing type of the expression. Handling of ADDR_EXPR
2209 will fix that. */
2210 expr = fold_convert (orig_type, expr);
2212 return expr;
2215 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2217 static tree
2218 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2220 HOST_WIDE_INT off;
2221 tree core = strip_offset_1 (expr, false, false, &off);
2222 *offset = off;
2223 return core;
2226 /* Returns variant of TYPE that can be used as base for different uses.
2227 We return unsigned type with the same precision, which avoids problems
2228 with overflows. */
2230 static tree
2231 generic_type_for (tree type)
2233 if (POINTER_TYPE_P (type))
2234 return unsigned_type_for (type);
2236 if (TYPE_UNSIGNED (type))
2237 return type;
2239 return unsigned_type_for (type);
2242 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2243 the bitmap to that we should store it. */
2245 static struct ivopts_data *fd_ivopts_data;
2246 static tree
2247 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2249 bitmap *depends_on = (bitmap *) data;
2250 struct version_info *info;
2252 if (TREE_CODE (*expr_p) != SSA_NAME)
2253 return NULL_TREE;
2254 info = name_info (fd_ivopts_data, *expr_p);
2256 if (!info->inv_id || info->has_nonlin_use)
2257 return NULL_TREE;
2259 if (!*depends_on)
2260 *depends_on = BITMAP_ALLOC (NULL);
2261 bitmap_set_bit (*depends_on, info->inv_id);
2263 return NULL_TREE;
2266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2267 position to POS. If USE is not NULL, the candidate is set as related to
2268 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2269 replacement of the final value of the iv by a direct computation. */
2271 static struct iv_cand *
2272 add_candidate_1 (struct ivopts_data *data,
2273 tree base, tree step, bool important, enum iv_position pos,
2274 struct iv_use *use, gimple incremented_at)
2276 unsigned i;
2277 struct iv_cand *cand = NULL;
2278 tree type, orig_type;
2280 /* For non-original variables, make sure their values are computed in a type
2281 that does not invoke undefined behavior on overflows (since in general,
2282 we cannot prove that these induction variables are non-wrapping). */
2283 if (pos != IP_ORIGINAL)
2285 orig_type = TREE_TYPE (base);
2286 type = generic_type_for (orig_type);
2287 if (type != orig_type)
2289 base = fold_convert (type, base);
2290 step = fold_convert (type, step);
2294 for (i = 0; i < n_iv_cands (data); i++)
2296 cand = iv_cand (data, i);
2298 if (cand->pos != pos)
2299 continue;
2301 if (cand->incremented_at != incremented_at
2302 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2303 && cand->ainc_use != use))
2304 continue;
2306 if (!cand->iv)
2308 if (!base && !step)
2309 break;
2311 continue;
2314 if (!base && !step)
2315 continue;
2317 if (operand_equal_p (base, cand->iv->base, 0)
2318 && operand_equal_p (step, cand->iv->step, 0)
2319 && (TYPE_PRECISION (TREE_TYPE (base))
2320 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2321 break;
2324 if (i == n_iv_cands (data))
2326 cand = XCNEW (struct iv_cand);
2327 cand->id = i;
2329 if (!base && !step)
2330 cand->iv = NULL;
2331 else
2332 cand->iv = alloc_iv (base, step);
2334 cand->pos = pos;
2335 if (pos != IP_ORIGINAL && cand->iv)
2337 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2338 cand->var_after = cand->var_before;
2340 cand->important = important;
2341 cand->incremented_at = incremented_at;
2342 data->iv_candidates.safe_push (cand);
2344 if (step
2345 && TREE_CODE (step) != INTEGER_CST)
2347 fd_ivopts_data = data;
2348 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2351 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2352 cand->ainc_use = use;
2353 else
2354 cand->ainc_use = NULL;
2356 if (dump_file && (dump_flags & TDF_DETAILS))
2357 dump_cand (dump_file, cand);
2360 if (important && !cand->important)
2362 cand->important = true;
2363 if (dump_file && (dump_flags & TDF_DETAILS))
2364 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2367 if (use)
2369 bitmap_set_bit (use->related_cands, i);
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, "Candidate %d is related to use %d\n",
2372 cand->id, use->id);
2375 return cand;
2378 /* Returns true if incrementing the induction variable at the end of the LOOP
2379 is allowed.
2381 The purpose is to avoid splitting latch edge with a biv increment, thus
2382 creating a jump, possibly confusing other optimization passes and leaving
2383 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2384 is not available (so we do not have a better alternative), or if the latch
2385 edge is already nonempty. */
2387 static bool
2388 allow_ip_end_pos_p (struct loop *loop)
2390 if (!ip_normal_pos (loop))
2391 return true;
2393 if (!empty_block_p (ip_end_pos (loop)))
2394 return true;
2396 return false;
2399 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2400 Important field is set to IMPORTANT. */
2402 static void
2403 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2404 bool important, struct iv_use *use)
2406 basic_block use_bb = gimple_bb (use->stmt);
2407 enum machine_mode mem_mode;
2408 unsigned HOST_WIDE_INT cstepi;
2410 /* If we insert the increment in any position other than the standard
2411 ones, we must ensure that it is incremented once per iteration.
2412 It must not be in an inner nested loop, or one side of an if
2413 statement. */
2414 if (use_bb->loop_father != data->current_loop
2415 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2416 || stmt_could_throw_p (use->stmt)
2417 || !cst_and_fits_in_hwi (step))
2418 return;
2420 cstepi = int_cst_value (step);
2422 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2423 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2424 || USE_STORE_PRE_INCREMENT (mem_mode))
2425 && GET_MODE_SIZE (mem_mode) == cstepi)
2426 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2427 || USE_STORE_PRE_DECREMENT (mem_mode))
2428 && GET_MODE_SIZE (mem_mode) == -cstepi))
2430 enum tree_code code = MINUS_EXPR;
2431 tree new_base;
2432 tree new_step = step;
2434 if (POINTER_TYPE_P (TREE_TYPE (base)))
2436 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2437 code = POINTER_PLUS_EXPR;
2439 else
2440 new_step = fold_convert (TREE_TYPE (base), new_step);
2441 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2442 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2443 use->stmt);
2445 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2446 || USE_STORE_POST_INCREMENT (mem_mode))
2447 && GET_MODE_SIZE (mem_mode) == cstepi)
2448 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2449 || USE_STORE_POST_DECREMENT (mem_mode))
2450 && GET_MODE_SIZE (mem_mode) == -cstepi))
2452 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2453 use->stmt);
2457 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2458 position to POS. If USE is not NULL, the candidate is set as related to
2459 it. The candidate computation is scheduled on all available positions. */
2461 static void
2462 add_candidate (struct ivopts_data *data,
2463 tree base, tree step, bool important, struct iv_use *use)
2465 if (ip_normal_pos (data->current_loop))
2466 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2467 if (ip_end_pos (data->current_loop)
2468 && allow_ip_end_pos_p (data->current_loop))
2469 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2471 if (use != NULL && use->type == USE_ADDRESS)
2472 add_autoinc_candidates (data, base, step, important, use);
2475 /* Adds standard iv candidates. */
2477 static void
2478 add_standard_iv_candidates (struct ivopts_data *data)
2480 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2482 /* The same for a double-integer type if it is still fast enough. */
2483 if (TYPE_PRECISION
2484 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2485 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2486 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2487 build_int_cst (long_integer_type_node, 1), true, NULL);
2489 /* The same for a double-integer type if it is still fast enough. */
2490 if (TYPE_PRECISION
2491 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2492 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2493 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2494 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2498 /* Adds candidates bases on the old induction variable IV. */
2500 static void
2501 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2503 gimple phi;
2504 tree def;
2505 struct iv_cand *cand;
2507 add_candidate (data, iv->base, iv->step, true, NULL);
2509 /* The same, but with initial value zero. */
2510 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2511 add_candidate (data, size_int (0), iv->step, true, NULL);
2512 else
2513 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2514 iv->step, true, NULL);
2516 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2517 if (gimple_code (phi) == GIMPLE_PHI)
2519 /* Additionally record the possibility of leaving the original iv
2520 untouched. */
2521 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2522 cand = add_candidate_1 (data,
2523 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2524 SSA_NAME_DEF_STMT (def));
2525 cand->var_before = iv->ssa_name;
2526 cand->var_after = def;
2530 /* Adds candidates based on the old induction variables. */
2532 static void
2533 add_old_ivs_candidates (struct ivopts_data *data)
2535 unsigned i;
2536 struct iv *iv;
2537 bitmap_iterator bi;
2539 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2541 iv = ver_info (data, i)->iv;
2542 if (iv && iv->biv_p && !integer_zerop (iv->step))
2543 add_old_iv_candidates (data, iv);
2547 /* Adds candidates based on the value of the induction variable IV and USE. */
2549 static void
2550 add_iv_value_candidates (struct ivopts_data *data,
2551 struct iv *iv, struct iv_use *use)
2553 unsigned HOST_WIDE_INT offset;
2554 tree base;
2555 tree basetype;
2557 add_candidate (data, iv->base, iv->step, false, use);
2559 /* The same, but with initial value zero. Make such variable important,
2560 since it is generic enough so that possibly many uses may be based
2561 on it. */
2562 basetype = TREE_TYPE (iv->base);
2563 if (POINTER_TYPE_P (basetype))
2564 basetype = sizetype;
2565 add_candidate (data, build_int_cst (basetype, 0),
2566 iv->step, true, use);
2568 /* Third, try removing the constant offset. Make sure to even
2569 add a candidate for &a[0] vs. (T *)&a. */
2570 base = strip_offset (iv->base, &offset);
2571 if (offset
2572 || base != iv->base)
2573 add_candidate (data, base, iv->step, false, use);
2576 /* Adds candidates based on the uses. */
2578 static void
2579 add_derived_ivs_candidates (struct ivopts_data *data)
2581 unsigned i;
2583 for (i = 0; i < n_iv_uses (data); i++)
2585 struct iv_use *use = iv_use (data, i);
2587 if (!use)
2588 continue;
2590 switch (use->type)
2592 case USE_NONLINEAR_EXPR:
2593 case USE_COMPARE:
2594 case USE_ADDRESS:
2595 /* Just add the ivs based on the value of the iv used here. */
2596 add_iv_value_candidates (data, use->iv, use);
2597 break;
2599 default:
2600 gcc_unreachable ();
2605 /* Record important candidates and add them to related_cands bitmaps
2606 if needed. */
2608 static void
2609 record_important_candidates (struct ivopts_data *data)
2611 unsigned i;
2612 struct iv_use *use;
2614 for (i = 0; i < n_iv_cands (data); i++)
2616 struct iv_cand *cand = iv_cand (data, i);
2618 if (cand->important)
2619 bitmap_set_bit (data->important_candidates, i);
2622 data->consider_all_candidates = (n_iv_cands (data)
2623 <= CONSIDER_ALL_CANDIDATES_BOUND);
2625 if (data->consider_all_candidates)
2627 /* We will not need "related_cands" bitmaps in this case,
2628 so release them to decrease peak memory consumption. */
2629 for (i = 0; i < n_iv_uses (data); i++)
2631 use = iv_use (data, i);
2632 BITMAP_FREE (use->related_cands);
2635 else
2637 /* Add important candidates to the related_cands bitmaps. */
2638 for (i = 0; i < n_iv_uses (data); i++)
2639 bitmap_ior_into (iv_use (data, i)->related_cands,
2640 data->important_candidates);
2644 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2645 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2646 we allocate a simple list to every use. */
2648 static void
2649 alloc_use_cost_map (struct ivopts_data *data)
2651 unsigned i, size, s;
2653 for (i = 0; i < n_iv_uses (data); i++)
2655 struct iv_use *use = iv_use (data, i);
2657 if (data->consider_all_candidates)
2658 size = n_iv_cands (data);
2659 else
2661 s = bitmap_count_bits (use->related_cands);
2663 /* Round up to the power of two, so that moduling by it is fast. */
2664 size = s ? (1 << ceil_log2 (s)) : 1;
2667 use->n_map_members = size;
2668 use->cost_map = XCNEWVEC (struct cost_pair, size);
2672 /* Returns description of computation cost of expression whose runtime
2673 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2675 static comp_cost
2676 new_cost (unsigned runtime, unsigned complexity)
2678 comp_cost cost;
2680 cost.cost = runtime;
2681 cost.complexity = complexity;
2683 return cost;
2686 /* Adds costs COST1 and COST2. */
2688 static comp_cost
2689 add_costs (comp_cost cost1, comp_cost cost2)
2691 cost1.cost += cost2.cost;
2692 cost1.complexity += cost2.complexity;
2694 return cost1;
2696 /* Subtracts costs COST1 and COST2. */
2698 static comp_cost
2699 sub_costs (comp_cost cost1, comp_cost cost2)
2701 cost1.cost -= cost2.cost;
2702 cost1.complexity -= cost2.complexity;
2704 return cost1;
2707 /* Returns a negative number if COST1 < COST2, a positive number if
2708 COST1 > COST2, and 0 if COST1 = COST2. */
2710 static int
2711 compare_costs (comp_cost cost1, comp_cost cost2)
2713 if (cost1.cost == cost2.cost)
2714 return cost1.complexity - cost2.complexity;
2716 return cost1.cost - cost2.cost;
2719 /* Returns true if COST is infinite. */
2721 static bool
2722 infinite_cost_p (comp_cost cost)
2724 return cost.cost == INFTY;
2727 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2728 on invariants DEPENDS_ON and that the value used in expressing it
2729 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2731 static void
2732 set_use_iv_cost (struct ivopts_data *data,
2733 struct iv_use *use, struct iv_cand *cand,
2734 comp_cost cost, bitmap depends_on, tree value,
2735 enum tree_code comp, int inv_expr_id)
2737 unsigned i, s;
2739 if (infinite_cost_p (cost))
2741 BITMAP_FREE (depends_on);
2742 return;
2745 if (data->consider_all_candidates)
2747 use->cost_map[cand->id].cand = cand;
2748 use->cost_map[cand->id].cost = cost;
2749 use->cost_map[cand->id].depends_on = depends_on;
2750 use->cost_map[cand->id].value = value;
2751 use->cost_map[cand->id].comp = comp;
2752 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2753 return;
2756 /* n_map_members is a power of two, so this computes modulo. */
2757 s = cand->id & (use->n_map_members - 1);
2758 for (i = s; i < use->n_map_members; i++)
2759 if (!use->cost_map[i].cand)
2760 goto found;
2761 for (i = 0; i < s; i++)
2762 if (!use->cost_map[i].cand)
2763 goto found;
2765 gcc_unreachable ();
2767 found:
2768 use->cost_map[i].cand = cand;
2769 use->cost_map[i].cost = cost;
2770 use->cost_map[i].depends_on = depends_on;
2771 use->cost_map[i].value = value;
2772 use->cost_map[i].comp = comp;
2773 use->cost_map[i].inv_expr_id = inv_expr_id;
2776 /* Gets cost of (USE, CANDIDATE) pair. */
2778 static struct cost_pair *
2779 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2780 struct iv_cand *cand)
2782 unsigned i, s;
2783 struct cost_pair *ret;
2785 if (!cand)
2786 return NULL;
2788 if (data->consider_all_candidates)
2790 ret = use->cost_map + cand->id;
2791 if (!ret->cand)
2792 return NULL;
2794 return ret;
2797 /* n_map_members is a power of two, so this computes modulo. */
2798 s = cand->id & (use->n_map_members - 1);
2799 for (i = s; i < use->n_map_members; i++)
2800 if (use->cost_map[i].cand == cand)
2801 return use->cost_map + i;
2802 else if (use->cost_map[i].cand == NULL)
2803 return NULL;
2804 for (i = 0; i < s; i++)
2805 if (use->cost_map[i].cand == cand)
2806 return use->cost_map + i;
2807 else if (use->cost_map[i].cand == NULL)
2808 return NULL;
2810 return NULL;
2813 /* Returns estimate on cost of computing SEQ. */
2815 static unsigned
2816 seq_cost (rtx seq, bool speed)
2818 unsigned cost = 0;
2819 rtx set;
2821 for (; seq; seq = NEXT_INSN (seq))
2823 set = single_set (seq);
2824 if (set)
2825 cost += set_src_cost (SET_SRC (set), speed);
2826 else
2827 cost++;
2830 return cost;
2833 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2834 static rtx
2835 produce_memory_decl_rtl (tree obj, int *regno)
2837 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2838 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2839 rtx x;
2841 gcc_assert (obj);
2842 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2844 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2845 x = gen_rtx_SYMBOL_REF (address_mode, name);
2846 SET_SYMBOL_REF_DECL (x, obj);
2847 x = gen_rtx_MEM (DECL_MODE (obj), x);
2848 set_mem_addr_space (x, as);
2849 targetm.encode_section_info (obj, x, true);
2851 else
2853 x = gen_raw_REG (address_mode, (*regno)++);
2854 x = gen_rtx_MEM (DECL_MODE (obj), x);
2855 set_mem_addr_space (x, as);
2858 return x;
2861 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2862 walk_tree. DATA contains the actual fake register number. */
2864 static tree
2865 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2867 tree obj = NULL_TREE;
2868 rtx x = NULL_RTX;
2869 int *regno = (int *) data;
2871 switch (TREE_CODE (*expr_p))
2873 case ADDR_EXPR:
2874 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2875 handled_component_p (*expr_p);
2876 expr_p = &TREE_OPERAND (*expr_p, 0))
2877 continue;
2878 obj = *expr_p;
2879 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2880 x = produce_memory_decl_rtl (obj, regno);
2881 break;
2883 case SSA_NAME:
2884 *ws = 0;
2885 obj = SSA_NAME_VAR (*expr_p);
2886 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2887 if (!obj)
2888 return NULL_TREE;
2889 if (!DECL_RTL_SET_P (obj))
2890 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2891 break;
2893 case VAR_DECL:
2894 case PARM_DECL:
2895 case RESULT_DECL:
2896 *ws = 0;
2897 obj = *expr_p;
2899 if (DECL_RTL_SET_P (obj))
2900 break;
2902 if (DECL_MODE (obj) == BLKmode)
2903 x = produce_memory_decl_rtl (obj, regno);
2904 else
2905 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2907 break;
2909 default:
2910 break;
2913 if (x)
2915 decl_rtl_to_reset.safe_push (obj);
2916 SET_DECL_RTL (obj, x);
2919 return NULL_TREE;
2922 /* Determines cost of the computation of EXPR. */
2924 static unsigned
2925 computation_cost (tree expr, bool speed)
2927 rtx seq, rslt;
2928 tree type = TREE_TYPE (expr);
2929 unsigned cost;
2930 /* Avoid using hard regs in ways which may be unsupported. */
2931 int regno = LAST_VIRTUAL_REGISTER + 1;
2932 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2933 enum node_frequency real_frequency = node->frequency;
2935 node->frequency = NODE_FREQUENCY_NORMAL;
2936 crtl->maybe_hot_insn_p = speed;
2937 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2938 start_sequence ();
2939 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2940 seq = get_insns ();
2941 end_sequence ();
2942 default_rtl_profile ();
2943 node->frequency = real_frequency;
2945 cost = seq_cost (seq, speed);
2946 if (MEM_P (rslt))
2947 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2948 TYPE_ADDR_SPACE (type), speed);
2949 else if (!REG_P (rslt))
2950 cost += set_src_cost (rslt, speed);
2952 return cost;
2955 /* Returns variable containing the value of candidate CAND at statement AT. */
2957 static tree
2958 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2960 if (stmt_after_increment (loop, cand, stmt))
2961 return cand->var_after;
2962 else
2963 return cand->var_before;
2966 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2967 same precision that is at least as wide as the precision of TYPE, stores
2968 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2969 type of A and B. */
2971 static tree
2972 determine_common_wider_type (tree *a, tree *b)
2974 tree wider_type = NULL;
2975 tree suba, subb;
2976 tree atype = TREE_TYPE (*a);
2978 if (CONVERT_EXPR_P (*a))
2980 suba = TREE_OPERAND (*a, 0);
2981 wider_type = TREE_TYPE (suba);
2982 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2983 return atype;
2985 else
2986 return atype;
2988 if (CONVERT_EXPR_P (*b))
2990 subb = TREE_OPERAND (*b, 0);
2991 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2992 return atype;
2994 else
2995 return atype;
2997 *a = suba;
2998 *b = subb;
2999 return wider_type;
3002 /* Determines the expression by that USE is expressed from induction variable
3003 CAND at statement AT in LOOP. The expression is stored in a decomposed
3004 form into AFF. Returns false if USE cannot be expressed using CAND. */
3006 static bool
3007 get_computation_aff (struct loop *loop,
3008 struct iv_use *use, struct iv_cand *cand, gimple at,
3009 struct affine_tree_combination *aff)
3011 tree ubase = use->iv->base;
3012 tree ustep = use->iv->step;
3013 tree cbase = cand->iv->base;
3014 tree cstep = cand->iv->step, cstep_common;
3015 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3016 tree common_type, var;
3017 tree uutype;
3018 aff_tree cbase_aff, var_aff;
3019 double_int rat;
3021 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3023 /* We do not have a precision to express the values of use. */
3024 return false;
3027 var = var_at_stmt (loop, cand, at);
3028 uutype = unsigned_type_for (utype);
3030 /* If the conversion is not noop, perform it. */
3031 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3033 cstep = fold_convert (uutype, cstep);
3034 cbase = fold_convert (uutype, cbase);
3035 var = fold_convert (uutype, var);
3038 if (!constant_multiple_of (ustep, cstep, &rat))
3039 return false;
3041 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3042 type, we achieve better folding by computing their difference in this
3043 wider type, and cast the result to UUTYPE. We do not need to worry about
3044 overflows, as all the arithmetics will in the end be performed in UUTYPE
3045 anyway. */
3046 common_type = determine_common_wider_type (&ubase, &cbase);
3048 /* use = ubase - ratio * cbase + ratio * var. */
3049 tree_to_aff_combination (ubase, common_type, aff);
3050 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3051 tree_to_aff_combination (var, uutype, &var_aff);
3053 /* We need to shift the value if we are after the increment. */
3054 if (stmt_after_increment (loop, cand, at))
3056 aff_tree cstep_aff;
3058 if (common_type != uutype)
3059 cstep_common = fold_convert (common_type, cstep);
3060 else
3061 cstep_common = cstep;
3063 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3064 aff_combination_add (&cbase_aff, &cstep_aff);
3067 aff_combination_scale (&cbase_aff, -rat);
3068 aff_combination_add (aff, &cbase_aff);
3069 if (common_type != uutype)
3070 aff_combination_convert (aff, uutype);
3072 aff_combination_scale (&var_aff, rat);
3073 aff_combination_add (aff, &var_aff);
3075 return true;
3078 /* Return the type of USE. */
3080 static tree
3081 get_use_type (struct iv_use *use)
3083 tree base_type = TREE_TYPE (use->iv->base);
3084 tree type;
3086 if (use->type == USE_ADDRESS)
3088 /* The base_type may be a void pointer. Create a pointer type based on
3089 the mem_ref instead. */
3090 type = build_pointer_type (TREE_TYPE (*use->op_p));
3091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3094 else
3095 type = base_type;
3097 return type;
3100 /* Determines the expression by that USE is expressed from induction variable
3101 CAND at statement AT in LOOP. The computation is unshared. */
3103 static tree
3104 get_computation_at (struct loop *loop,
3105 struct iv_use *use, struct iv_cand *cand, gimple at)
3107 aff_tree aff;
3108 tree type = get_use_type (use);
3110 if (!get_computation_aff (loop, use, cand, at, &aff))
3111 return NULL_TREE;
3112 unshare_aff_combination (&aff);
3113 return fold_convert (type, aff_combination_to_tree (&aff));
3116 /* Determines the expression by that USE is expressed from induction variable
3117 CAND in LOOP. The computation is unshared. */
3119 static tree
3120 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3122 return get_computation_at (loop, use, cand, use->stmt);
3125 /* Adjust the cost COST for being in loop setup rather than loop body.
3126 If we're optimizing for space, the loop setup overhead is constant;
3127 if we're optimizing for speed, amortize it over the per-iteration cost. */
3128 static unsigned
3129 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3131 if (cost == INFTY)
3132 return cost;
3133 else if (optimize_loop_for_speed_p (data->current_loop))
3134 return cost / avg_loop_niter (data->current_loop);
3135 else
3136 return cost;
3139 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3140 validity for a memory reference accessing memory of mode MODE in
3141 address space AS. */
3144 bool
3145 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3146 addr_space_t as)
3148 #define MAX_RATIO 128
3149 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3150 static vec<sbitmap> valid_mult_list;
3151 sbitmap valid_mult;
3153 if (data_index >= valid_mult_list.length ())
3154 valid_mult_list.safe_grow_cleared (data_index + 1);
3156 valid_mult = valid_mult_list[data_index];
3157 if (!valid_mult)
3159 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3160 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3161 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3162 rtx addr, scaled;
3163 HOST_WIDE_INT i;
3165 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166 bitmap_clear (valid_mult);
3167 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3169 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3171 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3172 if (memory_address_addr_space_p (mode, addr, as)
3173 || memory_address_addr_space_p (mode, scaled, as))
3174 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, " allowed multipliers:");
3180 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3181 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3182 fprintf (dump_file, " %d", (int) i);
3183 fprintf (dump_file, "\n");
3184 fprintf (dump_file, "\n");
3187 valid_mult_list[data_index] = valid_mult;
3190 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3191 return false;
3193 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3196 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3197 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3198 variable is omitted. Compute the cost for a memory reference that accesses
3199 a memory location of mode MEM_MODE in address space AS.
3201 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3202 size of MEM_MODE / RATIO) is available. To make this determination, we
3203 look at the size of the increment to be made, which is given in CSTEP.
3204 CSTEP may be zero if the step is unknown.
3205 STMT_AFTER_INC is true iff the statement we're looking at is after the
3206 increment of the original biv.
3208 TODO -- there must be some better way. This all is quite crude. */
3210 enum ainc_type
3212 AINC_PRE_INC, /* Pre increment. */
3213 AINC_PRE_DEC, /* Pre decrement. */
3214 AINC_POST_INC, /* Post increment. */
3215 AINC_POST_DEC, /* Post decrement. */
3216 AINC_NONE /* Also the number of auto increment types. */
3219 typedef struct address_cost_data_s
3221 HOST_WIDE_INT min_offset, max_offset;
3222 unsigned costs[2][2][2][2];
3223 unsigned ainc_costs[AINC_NONE];
3224 } *address_cost_data;
3227 static comp_cost
3228 get_address_cost (bool symbol_present, bool var_present,
3229 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3230 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3231 addr_space_t as, bool speed,
3232 bool stmt_after_inc, bool *may_autoinc)
3234 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3235 static vec<address_cost_data> address_cost_data_list;
3236 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3237 address_cost_data data;
3238 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3239 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3240 unsigned cost, acost, complexity;
3241 enum ainc_type autoinc_type;
3242 bool offset_p, ratio_p, autoinc;
3243 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3244 unsigned HOST_WIDE_INT mask;
3245 unsigned bits;
3247 if (data_index >= address_cost_data_list.length ())
3248 address_cost_data_list.safe_grow_cleared (data_index + 1);
3250 data = address_cost_data_list[data_index];
3251 if (!data)
3253 HOST_WIDE_INT i;
3254 HOST_WIDE_INT rat, off = 0;
3255 int old_cse_not_expected, width;
3256 unsigned sym_p, var_p, off_p, rat_p, add_c;
3257 rtx seq, addr, base;
3258 rtx reg0, reg1;
3260 data = (address_cost_data) xcalloc (1, sizeof (*data));
3262 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3264 width = GET_MODE_BITSIZE (address_mode) - 1;
3265 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3266 width = HOST_BITS_PER_WIDE_INT - 1;
3267 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3269 for (i = width; i >= 0; i--)
3271 off = -((unsigned HOST_WIDE_INT) 1 << i);
3272 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3273 if (memory_address_addr_space_p (mem_mode, addr, as))
3274 break;
3276 data->min_offset = (i == -1? 0 : off);
3278 for (i = width; i >= 0; i--)
3280 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3281 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3282 if (memory_address_addr_space_p (mem_mode, addr, as))
3283 break;
3285 if (i == -1)
3286 off = 0;
3287 data->max_offset = off;
3289 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, "get_address_cost:\n");
3292 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3293 GET_MODE_NAME (mem_mode),
3294 data->min_offset);
3295 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296 GET_MODE_NAME (mem_mode),
3297 data->max_offset);
3300 rat = 1;
3301 for (i = 2; i <= MAX_RATIO; i++)
3302 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3304 rat = i;
3305 break;
3308 /* Compute the cost of various addressing modes. */
3309 acost = 0;
3310 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3311 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3313 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3314 || USE_STORE_PRE_DECREMENT (mem_mode))
3316 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3317 has_predec[mem_mode]
3318 = memory_address_addr_space_p (mem_mode, addr, as);
3320 if (has_predec[mem_mode])
3321 data->ainc_costs[AINC_PRE_DEC]
3322 = address_cost (addr, mem_mode, as, speed);
3324 if (USE_LOAD_POST_DECREMENT (mem_mode)
3325 || USE_STORE_POST_DECREMENT (mem_mode))
3327 addr = gen_rtx_POST_DEC (address_mode, reg0);
3328 has_postdec[mem_mode]
3329 = memory_address_addr_space_p (mem_mode, addr, as);
3331 if (has_postdec[mem_mode])
3332 data->ainc_costs[AINC_POST_DEC]
3333 = address_cost (addr, mem_mode, as, speed);
3335 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3336 || USE_STORE_PRE_DECREMENT (mem_mode))
3338 addr = gen_rtx_PRE_INC (address_mode, reg0);
3339 has_preinc[mem_mode]
3340 = memory_address_addr_space_p (mem_mode, addr, as);
3342 if (has_preinc[mem_mode])
3343 data->ainc_costs[AINC_PRE_INC]
3344 = address_cost (addr, mem_mode, as, speed);
3346 if (USE_LOAD_POST_INCREMENT (mem_mode)
3347 || USE_STORE_POST_INCREMENT (mem_mode))
3349 addr = gen_rtx_POST_INC (address_mode, reg0);
3350 has_postinc[mem_mode]
3351 = memory_address_addr_space_p (mem_mode, addr, as);
3353 if (has_postinc[mem_mode])
3354 data->ainc_costs[AINC_POST_INC]
3355 = address_cost (addr, mem_mode, as, speed);
3357 for (i = 0; i < 16; i++)
3359 sym_p = i & 1;
3360 var_p = (i >> 1) & 1;
3361 off_p = (i >> 2) & 1;
3362 rat_p = (i >> 3) & 1;
3364 addr = reg0;
3365 if (rat_p)
3366 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3367 gen_int_mode (rat, address_mode));
3369 if (var_p)
3370 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3372 if (sym_p)
3374 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3375 /* ??? We can run into trouble with some backends by presenting
3376 it with symbols which haven't been properly passed through
3377 targetm.encode_section_info. By setting the local bit, we
3378 enhance the probability of things working. */
3379 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3381 if (off_p)
3382 base = gen_rtx_fmt_e (CONST, address_mode,
3383 gen_rtx_fmt_ee
3384 (PLUS, address_mode, base,
3385 gen_int_mode (off, address_mode)));
3387 else if (off_p)
3388 base = gen_int_mode (off, address_mode);
3389 else
3390 base = NULL_RTX;
3392 if (base)
3393 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3395 start_sequence ();
3396 /* To avoid splitting addressing modes, pretend that no cse will
3397 follow. */
3398 old_cse_not_expected = cse_not_expected;
3399 cse_not_expected = true;
3400 addr = memory_address_addr_space (mem_mode, addr, as);
3401 cse_not_expected = old_cse_not_expected;
3402 seq = get_insns ();
3403 end_sequence ();
3405 acost = seq_cost (seq, speed);
3406 acost += address_cost (addr, mem_mode, as, speed);
3408 if (!acost)
3409 acost = 1;
3410 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3413 /* On some targets, it is quite expensive to load symbol to a register,
3414 which makes addresses that contain symbols look much more expensive.
3415 However, the symbol will have to be loaded in any case before the
3416 loop (and quite likely we have it in register already), so it does not
3417 make much sense to penalize them too heavily. So make some final
3418 tweaks for the SYMBOL_PRESENT modes:
3420 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3421 var is cheaper, use this mode with small penalty.
3422 If VAR_PRESENT is true, try whether the mode with
3423 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3424 if this is the case, use it. */
3425 add_c = add_cost (speed, address_mode);
3426 for (i = 0; i < 8; i++)
3428 var_p = i & 1;
3429 off_p = (i >> 1) & 1;
3430 rat_p = (i >> 2) & 1;
3432 acost = data->costs[0][1][off_p][rat_p] + 1;
3433 if (var_p)
3434 acost += add_c;
3436 if (acost < data->costs[1][var_p][off_p][rat_p])
3437 data->costs[1][var_p][off_p][rat_p] = acost;
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3442 fprintf (dump_file, "Address costs:\n");
3444 for (i = 0; i < 16; i++)
3446 sym_p = i & 1;
3447 var_p = (i >> 1) & 1;
3448 off_p = (i >> 2) & 1;
3449 rat_p = (i >> 3) & 1;
3451 fprintf (dump_file, " ");
3452 if (sym_p)
3453 fprintf (dump_file, "sym + ");
3454 if (var_p)
3455 fprintf (dump_file, "var + ");
3456 if (off_p)
3457 fprintf (dump_file, "cst + ");
3458 if (rat_p)
3459 fprintf (dump_file, "rat * ");
3461 acost = data->costs[sym_p][var_p][off_p][rat_p];
3462 fprintf (dump_file, "index costs %d\n", acost);
3464 if (has_predec[mem_mode] || has_postdec[mem_mode]
3465 || has_preinc[mem_mode] || has_postinc[mem_mode])
3466 fprintf (dump_file, " May include autoinc/dec\n");
3467 fprintf (dump_file, "\n");
3470 address_cost_data_list[data_index] = data;
3473 bits = GET_MODE_BITSIZE (address_mode);
3474 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3475 offset &= mask;
3476 if ((offset >> (bits - 1) & 1))
3477 offset |= ~mask;
3478 s_offset = offset;
3480 autoinc = false;
3481 autoinc_type = AINC_NONE;
3482 msize = GET_MODE_SIZE (mem_mode);
3483 autoinc_offset = offset;
3484 if (stmt_after_inc)
3485 autoinc_offset += ratio * cstep;
3486 if (symbol_present || var_present || ratio != 1)
3487 autoinc = false;
3488 else
3490 if (has_postinc[mem_mode] && autoinc_offset == 0
3491 && msize == cstep)
3492 autoinc_type = AINC_POST_INC;
3493 else if (has_postdec[mem_mode] && autoinc_offset == 0
3494 && msize == -cstep)
3495 autoinc_type = AINC_POST_DEC;
3496 else if (has_preinc[mem_mode] && autoinc_offset == msize
3497 && msize == cstep)
3498 autoinc_type = AINC_PRE_INC;
3499 else if (has_predec[mem_mode] && autoinc_offset == -msize
3500 && msize == -cstep)
3501 autoinc_type = AINC_PRE_DEC;
3503 if (autoinc_type != AINC_NONE)
3504 autoinc = true;
3507 cost = 0;
3508 offset_p = (s_offset != 0
3509 && data->min_offset <= s_offset
3510 && s_offset <= data->max_offset);
3511 ratio_p = (ratio != 1
3512 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3514 if (ratio != 1 && !ratio_p)
3515 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3517 if (s_offset && !offset_p && !symbol_present)
3518 cost += add_cost (speed, address_mode);
3520 if (may_autoinc)
3521 *may_autoinc = autoinc;
3522 if (autoinc)
3523 acost = data->ainc_costs[autoinc_type];
3524 else
3525 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3526 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3527 return new_cost (cost + acost, complexity);
3530 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3531 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3532 calculating the operands of EXPR. Returns true if successful, and returns
3533 the cost in COST. */
3535 static bool
3536 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3537 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3539 comp_cost res;
3540 tree op1 = TREE_OPERAND (expr, 1);
3541 tree cst = TREE_OPERAND (mult, 1);
3542 tree multop = TREE_OPERAND (mult, 0);
3543 int m = exact_log2 (int_cst_value (cst));
3544 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3545 int sa_cost;
3546 bool equal_p = false;
3548 if (!(m >= 0 && m < maxm))
3549 return false;
3551 if (operand_equal_p (op1, mult, 0))
3552 equal_p = true;
3554 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3555 ? shiftadd_cost (speed, mode, m)
3556 : (equal_p
3557 ? shiftsub1_cost (speed, mode, m)
3558 : shiftsub0_cost (speed, mode, m)));
3559 res = new_cost (sa_cost, 0);
3560 res = add_costs (res, equal_p ? cost0 : cost1);
3562 STRIP_NOPS (multop);
3563 if (!is_gimple_val (multop))
3564 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3566 *cost = res;
3567 return true;
3570 /* Estimates cost of forcing expression EXPR into a variable. */
3572 static comp_cost
3573 force_expr_to_var_cost (tree expr, bool speed)
3575 static bool costs_initialized = false;
3576 static unsigned integer_cost [2];
3577 static unsigned symbol_cost [2];
3578 static unsigned address_cost [2];
3579 tree op0, op1;
3580 comp_cost cost0, cost1, cost;
3581 enum machine_mode mode;
3583 if (!costs_initialized)
3585 tree type = build_pointer_type (integer_type_node);
3586 tree var, addr;
3587 rtx x;
3588 int i;
3590 var = create_tmp_var_raw (integer_type_node, "test_var");
3591 TREE_STATIC (var) = 1;
3592 x = produce_memory_decl_rtl (var, NULL);
3593 SET_DECL_RTL (var, x);
3595 addr = build1 (ADDR_EXPR, type, var);
3598 for (i = 0; i < 2; i++)
3600 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3601 2000), i);
3603 symbol_cost[i] = computation_cost (addr, i) + 1;
3605 address_cost[i]
3606 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3607 if (dump_file && (dump_flags & TDF_DETAILS))
3609 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3610 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3611 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3612 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3613 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3614 fprintf (dump_file, "\n");
3618 costs_initialized = true;
3621 STRIP_NOPS (expr);
3623 if (SSA_VAR_P (expr))
3624 return no_cost;
3626 if (is_gimple_min_invariant (expr))
3628 if (TREE_CODE (expr) == INTEGER_CST)
3629 return new_cost (integer_cost [speed], 0);
3631 if (TREE_CODE (expr) == ADDR_EXPR)
3633 tree obj = TREE_OPERAND (expr, 0);
3635 if (TREE_CODE (obj) == VAR_DECL
3636 || TREE_CODE (obj) == PARM_DECL
3637 || TREE_CODE (obj) == RESULT_DECL)
3638 return new_cost (symbol_cost [speed], 0);
3641 return new_cost (address_cost [speed], 0);
3644 switch (TREE_CODE (expr))
3646 case POINTER_PLUS_EXPR:
3647 case PLUS_EXPR:
3648 case MINUS_EXPR:
3649 case MULT_EXPR:
3650 op0 = TREE_OPERAND (expr, 0);
3651 op1 = TREE_OPERAND (expr, 1);
3652 STRIP_NOPS (op0);
3653 STRIP_NOPS (op1);
3654 break;
3656 CASE_CONVERT:
3657 case NEGATE_EXPR:
3658 op0 = TREE_OPERAND (expr, 0);
3659 STRIP_NOPS (op0);
3660 op1 = NULL_TREE;
3661 break;
3663 default:
3664 /* Just an arbitrary value, FIXME. */
3665 return new_cost (target_spill_cost[speed], 0);
3668 if (op0 == NULL_TREE
3669 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3670 cost0 = no_cost;
3671 else
3672 cost0 = force_expr_to_var_cost (op0, speed);
3674 if (op1 == NULL_TREE
3675 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3676 cost1 = no_cost;
3677 else
3678 cost1 = force_expr_to_var_cost (op1, speed);
3680 mode = TYPE_MODE (TREE_TYPE (expr));
3681 switch (TREE_CODE (expr))
3683 case POINTER_PLUS_EXPR:
3684 case PLUS_EXPR:
3685 case MINUS_EXPR:
3686 case NEGATE_EXPR:
3687 cost = new_cost (add_cost (speed, mode), 0);
3688 if (TREE_CODE (expr) != NEGATE_EXPR)
3690 tree mult = NULL_TREE;
3691 comp_cost sa_cost;
3692 if (TREE_CODE (op1) == MULT_EXPR)
3693 mult = op1;
3694 else if (TREE_CODE (op0) == MULT_EXPR)
3695 mult = op0;
3697 if (mult != NULL_TREE
3698 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3699 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3700 speed, &sa_cost))
3701 return sa_cost;
3703 break;
3705 CASE_CONVERT:
3707 tree inner_mode, outer_mode;
3708 outer_mode = TREE_TYPE (expr);
3709 inner_mode = TREE_TYPE (op0);
3710 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3711 TYPE_MODE (inner_mode), speed), 0);
3713 break;
3715 case MULT_EXPR:
3716 if (cst_and_fits_in_hwi (op0))
3717 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3718 mode, speed), 0);
3719 else if (cst_and_fits_in_hwi (op1))
3720 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3721 mode, speed), 0);
3722 else
3723 return new_cost (target_spill_cost [speed], 0);
3724 break;
3726 default:
3727 gcc_unreachable ();
3730 cost = add_costs (cost, cost0);
3731 cost = add_costs (cost, cost1);
3733 /* Bound the cost by target_spill_cost. The parts of complicated
3734 computations often are either loop invariant or at least can
3735 be shared between several iv uses, so letting this grow without
3736 limits would not give reasonable results. */
3737 if (cost.cost > (int) target_spill_cost [speed])
3738 cost.cost = target_spill_cost [speed];
3740 return cost;
3743 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3744 invariants the computation depends on. */
3746 static comp_cost
3747 force_var_cost (struct ivopts_data *data,
3748 tree expr, bitmap *depends_on)
3750 if (depends_on)
3752 fd_ivopts_data = data;
3753 walk_tree (&expr, find_depends, depends_on, NULL);
3756 return force_expr_to_var_cost (expr, data->speed);
3759 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3760 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3761 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3762 invariants the computation depends on. */
3764 static comp_cost
3765 split_address_cost (struct ivopts_data *data,
3766 tree addr, bool *symbol_present, bool *var_present,
3767 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3769 tree core;
3770 HOST_WIDE_INT bitsize;
3771 HOST_WIDE_INT bitpos;
3772 tree toffset;
3773 enum machine_mode mode;
3774 int unsignedp, volatilep;
3776 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3777 &unsignedp, &volatilep, false);
3779 if (toffset != 0
3780 || bitpos % BITS_PER_UNIT != 0
3781 || TREE_CODE (core) != VAR_DECL)
3783 *symbol_present = false;
3784 *var_present = true;
3785 fd_ivopts_data = data;
3786 walk_tree (&addr, find_depends, depends_on, NULL);
3787 return new_cost (target_spill_cost[data->speed], 0);
3790 *offset += bitpos / BITS_PER_UNIT;
3791 if (TREE_STATIC (core)
3792 || DECL_EXTERNAL (core))
3794 *symbol_present = true;
3795 *var_present = false;
3796 return no_cost;
3799 *symbol_present = false;
3800 *var_present = true;
3801 return no_cost;
3804 /* Estimates cost of expressing difference of addresses E1 - E2 as
3805 var + symbol + offset. The value of offset is added to OFFSET,
3806 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3807 part is missing. DEPENDS_ON is a set of the invariants the computation
3808 depends on. */
3810 static comp_cost
3811 ptr_difference_cost (struct ivopts_data *data,
3812 tree e1, tree e2, bool *symbol_present, bool *var_present,
3813 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3815 HOST_WIDE_INT diff = 0;
3816 aff_tree aff_e1, aff_e2;
3817 tree type;
3819 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3821 if (ptr_difference_const (e1, e2, &diff))
3823 *offset += diff;
3824 *symbol_present = false;
3825 *var_present = false;
3826 return no_cost;
3829 if (integer_zerop (e2))
3830 return split_address_cost (data, TREE_OPERAND (e1, 0),
3831 symbol_present, var_present, offset, depends_on);
3833 *symbol_present = false;
3834 *var_present = true;
3836 type = signed_type_for (TREE_TYPE (e1));
3837 tree_to_aff_combination (e1, type, &aff_e1);
3838 tree_to_aff_combination (e2, type, &aff_e2);
3839 aff_combination_scale (&aff_e2, double_int_minus_one);
3840 aff_combination_add (&aff_e1, &aff_e2);
3842 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3845 /* Estimates cost of expressing difference E1 - E2 as
3846 var + symbol + offset. The value of offset is added to OFFSET,
3847 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3848 part is missing. DEPENDS_ON is a set of the invariants the computation
3849 depends on. */
3851 static comp_cost
3852 difference_cost (struct ivopts_data *data,
3853 tree e1, tree e2, bool *symbol_present, bool *var_present,
3854 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3856 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3857 unsigned HOST_WIDE_INT off1, off2;
3858 aff_tree aff_e1, aff_e2;
3859 tree type;
3861 e1 = strip_offset (e1, &off1);
3862 e2 = strip_offset (e2, &off2);
3863 *offset += off1 - off2;
3865 STRIP_NOPS (e1);
3866 STRIP_NOPS (e2);
3868 if (TREE_CODE (e1) == ADDR_EXPR)
3869 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3870 offset, depends_on);
3871 *symbol_present = false;
3873 if (operand_equal_p (e1, e2, 0))
3875 *var_present = false;
3876 return no_cost;
3879 *var_present = true;
3881 if (integer_zerop (e2))
3882 return force_var_cost (data, e1, depends_on);
3884 if (integer_zerop (e1))
3886 comp_cost cost = force_var_cost (data, e2, depends_on);
3887 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3888 return cost;
3891 type = signed_type_for (TREE_TYPE (e1));
3892 tree_to_aff_combination (e1, type, &aff_e1);
3893 tree_to_aff_combination (e2, type, &aff_e2);
3894 aff_combination_scale (&aff_e2, double_int_minus_one);
3895 aff_combination_add (&aff_e1, &aff_e2);
3897 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3900 /* Returns true if AFF1 and AFF2 are identical. */
3902 static bool
3903 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3905 unsigned i;
3907 if (aff1->n != aff2->n)
3908 return false;
3910 for (i = 0; i < aff1->n; i++)
3912 if (aff1->elts[i].coef != aff2->elts[i].coef)
3913 return false;
3915 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3916 return false;
3918 return true;
3921 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3923 static int
3924 get_expr_id (struct ivopts_data *data, tree expr)
3926 struct iv_inv_expr_ent ent;
3927 struct iv_inv_expr_ent **slot;
3929 ent.expr = expr;
3930 ent.hash = iterative_hash_expr (expr, 0);
3931 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3932 if (*slot)
3933 return (*slot)->id;
3935 *slot = XNEW (struct iv_inv_expr_ent);
3936 (*slot)->expr = expr;
3937 (*slot)->hash = ent.hash;
3938 (*slot)->id = data->inv_expr_id++;
3939 return (*slot)->id;
3942 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3943 requires a new compiler generated temporary. Returns -1 otherwise.
3944 ADDRESS_P is a flag indicating if the expression is for address
3945 computation. */
3947 static int
3948 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3949 tree cbase, HOST_WIDE_INT ratio,
3950 bool address_p)
3952 aff_tree ubase_aff, cbase_aff;
3953 tree expr, ub, cb;
3955 STRIP_NOPS (ubase);
3956 STRIP_NOPS (cbase);
3957 ub = ubase;
3958 cb = cbase;
3960 if ((TREE_CODE (ubase) == INTEGER_CST)
3961 && (TREE_CODE (cbase) == INTEGER_CST))
3962 return -1;
3964 /* Strips the constant part. */
3965 if (TREE_CODE (ubase) == PLUS_EXPR
3966 || TREE_CODE (ubase) == MINUS_EXPR
3967 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3969 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3970 ubase = TREE_OPERAND (ubase, 0);
3973 /* Strips the constant part. */
3974 if (TREE_CODE (cbase) == PLUS_EXPR
3975 || TREE_CODE (cbase) == MINUS_EXPR
3976 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3978 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3979 cbase = TREE_OPERAND (cbase, 0);
3982 if (address_p)
3984 if (((TREE_CODE (ubase) == SSA_NAME)
3985 || (TREE_CODE (ubase) == ADDR_EXPR
3986 && is_gimple_min_invariant (ubase)))
3987 && (TREE_CODE (cbase) == INTEGER_CST))
3988 return -1;
3990 if (((TREE_CODE (cbase) == SSA_NAME)
3991 || (TREE_CODE (cbase) == ADDR_EXPR
3992 && is_gimple_min_invariant (cbase)))
3993 && (TREE_CODE (ubase) == INTEGER_CST))
3994 return -1;
3997 if (ratio == 1)
3999 if (operand_equal_p (ubase, cbase, 0))
4000 return -1;
4002 if (TREE_CODE (ubase) == ADDR_EXPR
4003 && TREE_CODE (cbase) == ADDR_EXPR)
4005 tree usym, csym;
4007 usym = TREE_OPERAND (ubase, 0);
4008 csym = TREE_OPERAND (cbase, 0);
4009 if (TREE_CODE (usym) == ARRAY_REF)
4011 tree ind = TREE_OPERAND (usym, 1);
4012 if (TREE_CODE (ind) == INTEGER_CST
4013 && tree_fits_shwi_p (ind)
4014 && TREE_INT_CST_LOW (ind) == 0)
4015 usym = TREE_OPERAND (usym, 0);
4017 if (TREE_CODE (csym) == ARRAY_REF)
4019 tree ind = TREE_OPERAND (csym, 1);
4020 if (TREE_CODE (ind) == INTEGER_CST
4021 && tree_fits_shwi_p (ind)
4022 && TREE_INT_CST_LOW (ind) == 0)
4023 csym = TREE_OPERAND (csym, 0);
4025 if (operand_equal_p (usym, csym, 0))
4026 return -1;
4028 /* Now do more complex comparison */
4029 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4030 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4031 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4032 return -1;
4035 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4036 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4038 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4039 aff_combination_add (&ubase_aff, &cbase_aff);
4040 expr = aff_combination_to_tree (&ubase_aff);
4041 return get_expr_id (data, expr);
4046 /* Determines the cost of the computation by that USE is expressed
4047 from induction variable CAND. If ADDRESS_P is true, we just need
4048 to create an address from it, otherwise we want to get it into
4049 register. A set of invariants we depend on is stored in
4050 DEPENDS_ON. AT is the statement at that the value is computed.
4051 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4052 addressing is likely. */
4054 static comp_cost
4055 get_computation_cost_at (struct ivopts_data *data,
4056 struct iv_use *use, struct iv_cand *cand,
4057 bool address_p, bitmap *depends_on, gimple at,
4058 bool *can_autoinc,
4059 int *inv_expr_id)
4061 tree ubase = use->iv->base, ustep = use->iv->step;
4062 tree cbase, cstep;
4063 tree utype = TREE_TYPE (ubase), ctype;
4064 unsigned HOST_WIDE_INT cstepi, offset = 0;
4065 HOST_WIDE_INT ratio, aratio;
4066 bool var_present, symbol_present, stmt_is_after_inc;
4067 comp_cost cost;
4068 double_int rat;
4069 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4070 enum machine_mode mem_mode = (address_p
4071 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4072 : VOIDmode);
4074 *depends_on = NULL;
4076 /* Only consider real candidates. */
4077 if (!cand->iv)
4078 return infinite_cost;
4080 cbase = cand->iv->base;
4081 cstep = cand->iv->step;
4082 ctype = TREE_TYPE (cbase);
4084 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4086 /* We do not have a precision to express the values of use. */
4087 return infinite_cost;
4090 if (address_p
4091 || (use->iv->base_object
4092 && cand->iv->base_object
4093 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4094 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4096 /* Do not try to express address of an object with computation based
4097 on address of a different object. This may cause problems in rtl
4098 level alias analysis (that does not expect this to be happening,
4099 as this is illegal in C), and would be unlikely to be useful
4100 anyway. */
4101 if (use->iv->base_object
4102 && cand->iv->base_object
4103 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4104 return infinite_cost;
4107 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4109 /* TODO -- add direct handling of this case. */
4110 goto fallback;
4113 /* CSTEPI is removed from the offset in case statement is after the
4114 increment. If the step is not constant, we use zero instead.
4115 This is a bit imprecise (there is the extra addition), but
4116 redundancy elimination is likely to transform the code so that
4117 it uses value of the variable before increment anyway,
4118 so it is not that much unrealistic. */
4119 if (cst_and_fits_in_hwi (cstep))
4120 cstepi = int_cst_value (cstep);
4121 else
4122 cstepi = 0;
4124 if (!constant_multiple_of (ustep, cstep, &rat))
4125 return infinite_cost;
4127 if (rat.fits_shwi ())
4128 ratio = rat.to_shwi ();
4129 else
4130 return infinite_cost;
4132 STRIP_NOPS (cbase);
4133 ctype = TREE_TYPE (cbase);
4135 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4137 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4138 or ratio == 1, it is better to handle this like
4140 ubase - ratio * cbase + ratio * var
4142 (also holds in the case ratio == -1, TODO. */
4144 if (cst_and_fits_in_hwi (cbase))
4146 offset = - ratio * int_cst_value (cbase);
4147 cost = difference_cost (data,
4148 ubase, build_int_cst (utype, 0),
4149 &symbol_present, &var_present, &offset,
4150 depends_on);
4151 cost.cost /= avg_loop_niter (data->current_loop);
4153 else if (ratio == 1)
4155 tree real_cbase = cbase;
4157 /* Check to see if any adjustment is needed. */
4158 if (cstepi == 0 && stmt_is_after_inc)
4160 aff_tree real_cbase_aff;
4161 aff_tree cstep_aff;
4163 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4164 &real_cbase_aff);
4165 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4167 aff_combination_add (&real_cbase_aff, &cstep_aff);
4168 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4171 cost = difference_cost (data,
4172 ubase, real_cbase,
4173 &symbol_present, &var_present, &offset,
4174 depends_on);
4175 cost.cost /= avg_loop_niter (data->current_loop);
4177 else if (address_p
4178 && !POINTER_TYPE_P (ctype)
4179 && multiplier_allowed_in_address_p
4180 (ratio, mem_mode,
4181 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4183 cbase
4184 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4185 cost = difference_cost (data,
4186 ubase, cbase,
4187 &symbol_present, &var_present, &offset,
4188 depends_on);
4189 cost.cost /= avg_loop_niter (data->current_loop);
4191 else
4193 cost = force_var_cost (data, cbase, depends_on);
4194 cost = add_costs (cost,
4195 difference_cost (data,
4196 ubase, build_int_cst (utype, 0),
4197 &symbol_present, &var_present,
4198 &offset, depends_on));
4199 cost.cost /= avg_loop_niter (data->current_loop);
4200 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4203 if (inv_expr_id)
4205 *inv_expr_id =
4206 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4207 /* Clear depends on. */
4208 if (*inv_expr_id != -1 && depends_on && *depends_on)
4209 bitmap_clear (*depends_on);
4212 /* If we are after the increment, the value of the candidate is higher by
4213 one iteration. */
4214 if (stmt_is_after_inc)
4215 offset -= ratio * cstepi;
4217 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4218 (symbol/var1/const parts may be omitted). If we are looking for an
4219 address, find the cost of addressing this. */
4220 if (address_p)
4221 return add_costs (cost,
4222 get_address_cost (symbol_present, var_present,
4223 offset, ratio, cstepi,
4224 mem_mode,
4225 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4226 speed, stmt_is_after_inc,
4227 can_autoinc));
4229 /* Otherwise estimate the costs for computing the expression. */
4230 if (!symbol_present && !var_present && !offset)
4232 if (ratio != 1)
4233 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4234 return cost;
4237 /* Symbol + offset should be compile-time computable so consider that they
4238 are added once to the variable, if present. */
4239 if (var_present && (symbol_present || offset))
4240 cost.cost += adjust_setup_cost (data,
4241 add_cost (speed, TYPE_MODE (ctype)));
4243 /* Having offset does not affect runtime cost in case it is added to
4244 symbol, but it increases complexity. */
4245 if (offset)
4246 cost.complexity++;
4248 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4250 aratio = ratio > 0 ? ratio : -ratio;
4251 if (aratio != 1)
4252 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4253 return cost;
4255 fallback:
4256 if (can_autoinc)
4257 *can_autoinc = false;
4260 /* Just get the expression, expand it and measure the cost. */
4261 tree comp = get_computation_at (data->current_loop, use, cand, at);
4263 if (!comp)
4264 return infinite_cost;
4266 if (address_p)
4267 comp = build_simple_mem_ref (comp);
4269 return new_cost (computation_cost (comp, speed), 0);
4273 /* Determines the cost of the computation by that USE is expressed
4274 from induction variable CAND. If ADDRESS_P is true, we just need
4275 to create an address from it, otherwise we want to get it into
4276 register. A set of invariants we depend on is stored in
4277 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4278 autoinc addressing is likely. */
4280 static comp_cost
4281 get_computation_cost (struct ivopts_data *data,
4282 struct iv_use *use, struct iv_cand *cand,
4283 bool address_p, bitmap *depends_on,
4284 bool *can_autoinc, int *inv_expr_id)
4286 return get_computation_cost_at (data,
4287 use, cand, address_p, depends_on, use->stmt,
4288 can_autoinc, inv_expr_id);
4291 /* Determines cost of basing replacement of USE on CAND in a generic
4292 expression. */
4294 static bool
4295 determine_use_iv_cost_generic (struct ivopts_data *data,
4296 struct iv_use *use, struct iv_cand *cand)
4298 bitmap depends_on;
4299 comp_cost cost;
4300 int inv_expr_id = -1;
4302 /* The simple case first -- if we need to express value of the preserved
4303 original biv, the cost is 0. This also prevents us from counting the
4304 cost of increment twice -- once at this use and once in the cost of
4305 the candidate. */
4306 if (cand->pos == IP_ORIGINAL
4307 && cand->incremented_at == use->stmt)
4309 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4310 ERROR_MARK, -1);
4311 return true;
4314 cost = get_computation_cost (data, use, cand, false, &depends_on,
4315 NULL, &inv_expr_id);
4317 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4318 inv_expr_id);
4320 return !infinite_cost_p (cost);
4323 /* Determines cost of basing replacement of USE on CAND in an address. */
4325 static bool
4326 determine_use_iv_cost_address (struct ivopts_data *data,
4327 struct iv_use *use, struct iv_cand *cand)
4329 bitmap depends_on;
4330 bool can_autoinc;
4331 int inv_expr_id = -1;
4332 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4333 &can_autoinc, &inv_expr_id);
4335 if (cand->ainc_use == use)
4337 if (can_autoinc)
4338 cost.cost -= cand->cost_step;
4339 /* If we generated the candidate solely for exploiting autoincrement
4340 opportunities, and it turns out it can't be used, set the cost to
4341 infinity to make sure we ignore it. */
4342 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4343 cost = infinite_cost;
4345 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4346 inv_expr_id);
4348 return !infinite_cost_p (cost);
4351 /* Computes value of candidate CAND at position AT in iteration NITER, and
4352 stores it to VAL. */
4354 static void
4355 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4356 aff_tree *val)
4358 aff_tree step, delta, nit;
4359 struct iv *iv = cand->iv;
4360 tree type = TREE_TYPE (iv->base);
4361 tree steptype = type;
4362 if (POINTER_TYPE_P (type))
4363 steptype = sizetype;
4365 tree_to_aff_combination (iv->step, steptype, &step);
4366 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4367 aff_combination_convert (&nit, steptype);
4368 aff_combination_mult (&nit, &step, &delta);
4369 if (stmt_after_increment (loop, cand, at))
4370 aff_combination_add (&delta, &step);
4372 tree_to_aff_combination (iv->base, type, val);
4373 aff_combination_add (val, &delta);
4376 /* Returns period of induction variable iv. */
4378 static tree
4379 iv_period (struct iv *iv)
4381 tree step = iv->step, period, type;
4382 tree pow2div;
4384 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4386 type = unsigned_type_for (TREE_TYPE (step));
4387 /* Period of the iv is lcm (step, type_range)/step -1,
4388 i.e., N*type_range/step - 1. Since type range is power
4389 of two, N == (step >> num_of_ending_zeros_binary (step),
4390 so the final result is
4392 (type_range >> num_of_ending_zeros_binary (step)) - 1
4395 pow2div = num_ending_zeros (step);
4397 period = build_low_bits_mask (type,
4398 (TYPE_PRECISION (type)
4399 - tree_to_uhwi (pow2div)));
4401 return period;
4404 /* Returns the comparison operator used when eliminating the iv USE. */
4406 static enum tree_code
4407 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4409 struct loop *loop = data->current_loop;
4410 basic_block ex_bb;
4411 edge exit;
4413 ex_bb = gimple_bb (use->stmt);
4414 exit = EDGE_SUCC (ex_bb, 0);
4415 if (flow_bb_inside_loop_p (loop, exit->dest))
4416 exit = EDGE_SUCC (ex_bb, 1);
4418 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4421 static tree
4422 strip_wrap_conserving_type_conversions (tree exp)
4424 while (tree_ssa_useless_type_conversion (exp)
4425 && (nowrap_type_p (TREE_TYPE (exp))
4426 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4427 exp = TREE_OPERAND (exp, 0);
4428 return exp;
4431 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4432 check for an exact match. */
4434 static bool
4435 expr_equal_p (tree e, tree what)
4437 gimple stmt;
4438 enum tree_code code;
4440 e = strip_wrap_conserving_type_conversions (e);
4441 what = strip_wrap_conserving_type_conversions (what);
4443 code = TREE_CODE (what);
4444 if (TREE_TYPE (e) != TREE_TYPE (what))
4445 return false;
4447 if (operand_equal_p (e, what, 0))
4448 return true;
4450 if (TREE_CODE (e) != SSA_NAME)
4451 return false;
4453 stmt = SSA_NAME_DEF_STMT (e);
4454 if (gimple_code (stmt) != GIMPLE_ASSIGN
4455 || gimple_assign_rhs_code (stmt) != code)
4456 return false;
4458 switch (get_gimple_rhs_class (code))
4460 case GIMPLE_BINARY_RHS:
4461 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4462 return false;
4463 /* Fallthru. */
4465 case GIMPLE_UNARY_RHS:
4466 case GIMPLE_SINGLE_RHS:
4467 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4468 default:
4469 return false;
4473 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4474 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4475 calculation is performed in non-wrapping type.
4477 TODO: More generally, we could test for the situation that
4478 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4479 This would require knowing the sign of OFFSET.
4481 Also, we only look for the first addition in the computation of BASE.
4482 More complex analysis would be better, but introducing it just for
4483 this optimization seems like an overkill. */
4485 static bool
4486 difference_cannot_overflow_p (tree base, tree offset)
4488 enum tree_code code;
4489 tree e1, e2;
4491 if (!nowrap_type_p (TREE_TYPE (base)))
4492 return false;
4494 base = expand_simple_operations (base);
4496 if (TREE_CODE (base) == SSA_NAME)
4498 gimple stmt = SSA_NAME_DEF_STMT (base);
4500 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4501 return false;
4503 code = gimple_assign_rhs_code (stmt);
4504 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4505 return false;
4507 e1 = gimple_assign_rhs1 (stmt);
4508 e2 = gimple_assign_rhs2 (stmt);
4510 else
4512 code = TREE_CODE (base);
4513 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4514 return false;
4515 e1 = TREE_OPERAND (base, 0);
4516 e2 = TREE_OPERAND (base, 1);
4519 /* TODO: deeper inspection may be necessary to prove the equality. */
4520 switch (code)
4522 case PLUS_EXPR:
4523 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4524 case POINTER_PLUS_EXPR:
4525 return expr_equal_p (e2, offset);
4527 default:
4528 return false;
4532 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4533 comparison with CAND. NITER describes the number of iterations of
4534 the loops. If successful, the comparison in COMP_P is altered accordingly.
4536 We aim to handle the following situation:
4538 sometype *base, *p;
4539 int a, b, i;
4541 i = a;
4542 p = p_0 = base + a;
4546 bla (*p);
4547 p++;
4548 i++;
4550 while (i < b);
4552 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4553 We aim to optimize this to
4555 p = p_0 = base + a;
4558 bla (*p);
4559 p++;
4561 while (p < p_0 - a + b);
4563 This preserves the correctness, since the pointer arithmetics does not
4564 overflow. More precisely:
4566 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4567 overflow in computing it or the values of p.
4568 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4569 overflow. To prove this, we use the fact that p_0 = base + a. */
4571 static bool
4572 iv_elimination_compare_lt (struct ivopts_data *data,
4573 struct iv_cand *cand, enum tree_code *comp_p,
4574 struct tree_niter_desc *niter)
4576 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4577 struct affine_tree_combination nit, tmpa, tmpb;
4578 enum tree_code comp;
4579 HOST_WIDE_INT step;
4581 /* We need to know that the candidate induction variable does not overflow.
4582 While more complex analysis may be used to prove this, for now just
4583 check that the variable appears in the original program and that it
4584 is computed in a type that guarantees no overflows. */
4585 cand_type = TREE_TYPE (cand->iv->base);
4586 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4587 return false;
4589 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4590 the calculation of the BOUND could overflow, making the comparison
4591 invalid. */
4592 if (!data->loop_single_exit_p)
4593 return false;
4595 /* We need to be able to decide whether candidate is increasing or decreasing
4596 in order to choose the right comparison operator. */
4597 if (!cst_and_fits_in_hwi (cand->iv->step))
4598 return false;
4599 step = int_cst_value (cand->iv->step);
4601 /* Check that the number of iterations matches the expected pattern:
4602 a + 1 > b ? 0 : b - a - 1. */
4603 mbz = niter->may_be_zero;
4604 if (TREE_CODE (mbz) == GT_EXPR)
4606 /* Handle a + 1 > b. */
4607 tree op0 = TREE_OPERAND (mbz, 0);
4608 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4610 a = TREE_OPERAND (op0, 0);
4611 b = TREE_OPERAND (mbz, 1);
4613 else
4614 return false;
4616 else if (TREE_CODE (mbz) == LT_EXPR)
4618 tree op1 = TREE_OPERAND (mbz, 1);
4620 /* Handle b < a + 1. */
4621 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4623 a = TREE_OPERAND (op1, 0);
4624 b = TREE_OPERAND (mbz, 0);
4626 else
4627 return false;
4629 else
4630 return false;
4632 /* Expected number of iterations is B - A - 1. Check that it matches
4633 the actual number, i.e., that B - A - NITER = 1. */
4634 tree_to_aff_combination (niter->niter, nit_type, &nit);
4635 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4636 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4637 aff_combination_scale (&nit, double_int_minus_one);
4638 aff_combination_scale (&tmpa, double_int_minus_one);
4639 aff_combination_add (&tmpb, &tmpa);
4640 aff_combination_add (&tmpb, &nit);
4641 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4642 return false;
4644 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4645 overflow. */
4646 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4647 cand->iv->step,
4648 fold_convert (TREE_TYPE (cand->iv->step), a));
4649 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4650 return false;
4652 /* Determine the new comparison operator. */
4653 comp = step < 0 ? GT_EXPR : LT_EXPR;
4654 if (*comp_p == NE_EXPR)
4655 *comp_p = comp;
4656 else if (*comp_p == EQ_EXPR)
4657 *comp_p = invert_tree_comparison (comp, false);
4658 else
4659 gcc_unreachable ();
4661 return true;
4664 /* Check whether it is possible to express the condition in USE by comparison
4665 of candidate CAND. If so, store the value compared with to BOUND, and the
4666 comparison operator to COMP. */
4668 static bool
4669 may_eliminate_iv (struct ivopts_data *data,
4670 struct iv_use *use, struct iv_cand *cand, tree *bound,
4671 enum tree_code *comp)
4673 basic_block ex_bb;
4674 edge exit;
4675 tree period;
4676 struct loop *loop = data->current_loop;
4677 aff_tree bnd;
4678 struct tree_niter_desc *desc = NULL;
4680 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4681 return false;
4683 /* For now works only for exits that dominate the loop latch.
4684 TODO: extend to other conditions inside loop body. */
4685 ex_bb = gimple_bb (use->stmt);
4686 if (use->stmt != last_stmt (ex_bb)
4687 || gimple_code (use->stmt) != GIMPLE_COND
4688 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4689 return false;
4691 exit = EDGE_SUCC (ex_bb, 0);
4692 if (flow_bb_inside_loop_p (loop, exit->dest))
4693 exit = EDGE_SUCC (ex_bb, 1);
4694 if (flow_bb_inside_loop_p (loop, exit->dest))
4695 return false;
4697 desc = niter_for_exit (data, exit);
4698 if (!desc)
4699 return false;
4701 /* Determine whether we can use the variable to test the exit condition.
4702 This is the case iff the period of the induction variable is greater
4703 than the number of iterations for which the exit condition is true. */
4704 period = iv_period (cand->iv);
4706 /* If the number of iterations is constant, compare against it directly. */
4707 if (TREE_CODE (desc->niter) == INTEGER_CST)
4709 /* See cand_value_at. */
4710 if (stmt_after_increment (loop, cand, use->stmt))
4712 if (!tree_int_cst_lt (desc->niter, period))
4713 return false;
4715 else
4717 if (tree_int_cst_lt (period, desc->niter))
4718 return false;
4722 /* If not, and if this is the only possible exit of the loop, see whether
4723 we can get a conservative estimate on the number of iterations of the
4724 entire loop and compare against that instead. */
4725 else
4727 double_int period_value, max_niter;
4729 max_niter = desc->max;
4730 if (stmt_after_increment (loop, cand, use->stmt))
4731 max_niter += double_int_one;
4732 period_value = tree_to_double_int (period);
4733 if (max_niter.ugt (period_value))
4735 /* See if we can take advantage of inferred loop bound information. */
4736 if (data->loop_single_exit_p)
4738 if (!max_loop_iterations (loop, &max_niter))
4739 return false;
4740 /* The loop bound is already adjusted by adding 1. */
4741 if (max_niter.ugt (period_value))
4742 return false;
4744 else
4745 return false;
4749 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4751 *bound = aff_combination_to_tree (&bnd);
4752 *comp = iv_elimination_compare (data, use);
4754 /* It is unlikely that computing the number of iterations using division
4755 would be more profitable than keeping the original induction variable. */
4756 if (expression_expensive_p (*bound))
4757 return false;
4759 /* Sometimes, it is possible to handle the situation that the number of
4760 iterations may be zero unless additional assumtions by using <
4761 instead of != in the exit condition.
4763 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4764 base the exit condition on it. However, that is often too
4765 expensive. */
4766 if (!integer_zerop (desc->may_be_zero))
4767 return iv_elimination_compare_lt (data, cand, comp, desc);
4769 return true;
4772 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4773 be copied, if is is used in the loop body and DATA->body_includes_call. */
4775 static int
4776 parm_decl_cost (struct ivopts_data *data, tree bound)
4778 tree sbound = bound;
4779 STRIP_NOPS (sbound);
4781 if (TREE_CODE (sbound) == SSA_NAME
4782 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4783 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4784 && data->body_includes_call)
4785 return COSTS_N_INSNS (1);
4787 return 0;
4790 /* Determines cost of basing replacement of USE on CAND in a condition. */
4792 static bool
4793 determine_use_iv_cost_condition (struct ivopts_data *data,
4794 struct iv_use *use, struct iv_cand *cand)
4796 tree bound = NULL_TREE;
4797 struct iv *cmp_iv;
4798 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4799 comp_cost elim_cost, express_cost, cost, bound_cost;
4800 bool ok;
4801 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4802 tree *control_var, *bound_cst;
4803 enum tree_code comp = ERROR_MARK;
4805 /* Only consider real candidates. */
4806 if (!cand->iv)
4808 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4809 ERROR_MARK, -1);
4810 return false;
4813 /* Try iv elimination. */
4814 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4816 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4817 if (elim_cost.cost == 0)
4818 elim_cost.cost = parm_decl_cost (data, bound);
4819 else if (TREE_CODE (bound) == INTEGER_CST)
4820 elim_cost.cost = 0;
4821 /* If we replace a loop condition 'i < n' with 'p < base + n',
4822 depends_on_elim will have 'base' and 'n' set, which implies
4823 that both 'base' and 'n' will be live during the loop. More likely,
4824 'base + n' will be loop invariant, resulting in only one live value
4825 during the loop. So in that case we clear depends_on_elim and set
4826 elim_inv_expr_id instead. */
4827 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4829 elim_inv_expr_id = get_expr_id (data, bound);
4830 bitmap_clear (depends_on_elim);
4832 /* The bound is a loop invariant, so it will be only computed
4833 once. */
4834 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4836 else
4837 elim_cost = infinite_cost;
4839 /* Try expressing the original giv. If it is compared with an invariant,
4840 note that we cannot get rid of it. */
4841 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4842 NULL, &cmp_iv);
4843 gcc_assert (ok);
4845 /* When the condition is a comparison of the candidate IV against
4846 zero, prefer this IV.
4848 TODO: The constant that we're subtracting from the cost should
4849 be target-dependent. This information should be added to the
4850 target costs for each backend. */
4851 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4852 && integer_zerop (*bound_cst)
4853 && (operand_equal_p (*control_var, cand->var_after, 0)
4854 || operand_equal_p (*control_var, cand->var_before, 0)))
4855 elim_cost.cost -= 1;
4857 express_cost = get_computation_cost (data, use, cand, false,
4858 &depends_on_express, NULL,
4859 &express_inv_expr_id);
4860 fd_ivopts_data = data;
4861 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4863 /* Count the cost of the original bound as well. */
4864 bound_cost = force_var_cost (data, *bound_cst, NULL);
4865 if (bound_cost.cost == 0)
4866 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4867 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4868 bound_cost.cost = 0;
4869 express_cost.cost += bound_cost.cost;
4871 /* Choose the better approach, preferring the eliminated IV. */
4872 if (compare_costs (elim_cost, express_cost) <= 0)
4874 cost = elim_cost;
4875 depends_on = depends_on_elim;
4876 depends_on_elim = NULL;
4877 inv_expr_id = elim_inv_expr_id;
4879 else
4881 cost = express_cost;
4882 depends_on = depends_on_express;
4883 depends_on_express = NULL;
4884 bound = NULL_TREE;
4885 comp = ERROR_MARK;
4886 inv_expr_id = express_inv_expr_id;
4889 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4891 if (depends_on_elim)
4892 BITMAP_FREE (depends_on_elim);
4893 if (depends_on_express)
4894 BITMAP_FREE (depends_on_express);
4896 return !infinite_cost_p (cost);
4899 /* Determines cost of basing replacement of USE on CAND. Returns false
4900 if USE cannot be based on CAND. */
4902 static bool
4903 determine_use_iv_cost (struct ivopts_data *data,
4904 struct iv_use *use, struct iv_cand *cand)
4906 switch (use->type)
4908 case USE_NONLINEAR_EXPR:
4909 return determine_use_iv_cost_generic (data, use, cand);
4911 case USE_ADDRESS:
4912 return determine_use_iv_cost_address (data, use, cand);
4914 case USE_COMPARE:
4915 return determine_use_iv_cost_condition (data, use, cand);
4917 default:
4918 gcc_unreachable ();
4922 /* Return true if get_computation_cost indicates that autoincrement is
4923 a possibility for the pair of USE and CAND, false otherwise. */
4925 static bool
4926 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4927 struct iv_cand *cand)
4929 bitmap depends_on;
4930 bool can_autoinc;
4931 comp_cost cost;
4933 if (use->type != USE_ADDRESS)
4934 return false;
4936 cost = get_computation_cost (data, use, cand, true, &depends_on,
4937 &can_autoinc, NULL);
4939 BITMAP_FREE (depends_on);
4941 return !infinite_cost_p (cost) && can_autoinc;
4944 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4945 use that allows autoincrement, and set their AINC_USE if possible. */
4947 static void
4948 set_autoinc_for_original_candidates (struct ivopts_data *data)
4950 unsigned i, j;
4952 for (i = 0; i < n_iv_cands (data); i++)
4954 struct iv_cand *cand = iv_cand (data, i);
4955 struct iv_use *closest_before = NULL;
4956 struct iv_use *closest_after = NULL;
4957 if (cand->pos != IP_ORIGINAL)
4958 continue;
4960 for (j = 0; j < n_iv_uses (data); j++)
4962 struct iv_use *use = iv_use (data, j);
4963 unsigned uid = gimple_uid (use->stmt);
4965 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4966 continue;
4968 if (uid < gimple_uid (cand->incremented_at)
4969 && (closest_before == NULL
4970 || uid > gimple_uid (closest_before->stmt)))
4971 closest_before = use;
4973 if (uid > gimple_uid (cand->incremented_at)
4974 && (closest_after == NULL
4975 || uid < gimple_uid (closest_after->stmt)))
4976 closest_after = use;
4979 if (closest_before != NULL
4980 && autoinc_possible_for_pair (data, closest_before, cand))
4981 cand->ainc_use = closest_before;
4982 else if (closest_after != NULL
4983 && autoinc_possible_for_pair (data, closest_after, cand))
4984 cand->ainc_use = closest_after;
4988 /* Finds the candidates for the induction variables. */
4990 static void
4991 find_iv_candidates (struct ivopts_data *data)
4993 /* Add commonly used ivs. */
4994 add_standard_iv_candidates (data);
4996 /* Add old induction variables. */
4997 add_old_ivs_candidates (data);
4999 /* Add induction variables derived from uses. */
5000 add_derived_ivs_candidates (data);
5002 set_autoinc_for_original_candidates (data);
5004 /* Record the important candidates. */
5005 record_important_candidates (data);
5008 /* Determines costs of basing the use of the iv on an iv candidate. */
5010 static void
5011 determine_use_iv_costs (struct ivopts_data *data)
5013 unsigned i, j;
5014 struct iv_use *use;
5015 struct iv_cand *cand;
5016 bitmap to_clear = BITMAP_ALLOC (NULL);
5018 alloc_use_cost_map (data);
5020 for (i = 0; i < n_iv_uses (data); i++)
5022 use = iv_use (data, i);
5024 if (data->consider_all_candidates)
5026 for (j = 0; j < n_iv_cands (data); j++)
5028 cand = iv_cand (data, j);
5029 determine_use_iv_cost (data, use, cand);
5032 else
5034 bitmap_iterator bi;
5036 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5038 cand = iv_cand (data, j);
5039 if (!determine_use_iv_cost (data, use, cand))
5040 bitmap_set_bit (to_clear, j);
5043 /* Remove the candidates for that the cost is infinite from
5044 the list of related candidates. */
5045 bitmap_and_compl_into (use->related_cands, to_clear);
5046 bitmap_clear (to_clear);
5050 BITMAP_FREE (to_clear);
5052 if (dump_file && (dump_flags & TDF_DETAILS))
5054 fprintf (dump_file, "Use-candidate costs:\n");
5056 for (i = 0; i < n_iv_uses (data); i++)
5058 use = iv_use (data, i);
5060 fprintf (dump_file, "Use %d:\n", i);
5061 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5062 for (j = 0; j < use->n_map_members; j++)
5064 if (!use->cost_map[j].cand
5065 || infinite_cost_p (use->cost_map[j].cost))
5066 continue;
5068 fprintf (dump_file, " %d\t%d\t%d\t",
5069 use->cost_map[j].cand->id,
5070 use->cost_map[j].cost.cost,
5071 use->cost_map[j].cost.complexity);
5072 if (use->cost_map[j].depends_on)
5073 bitmap_print (dump_file,
5074 use->cost_map[j].depends_on, "","");
5075 if (use->cost_map[j].inv_expr_id != -1)
5076 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5077 fprintf (dump_file, "\n");
5080 fprintf (dump_file, "\n");
5082 fprintf (dump_file, "\n");
5086 /* Determines cost of the candidate CAND. */
5088 static void
5089 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5091 comp_cost cost_base;
5092 unsigned cost, cost_step;
5093 tree base;
5095 if (!cand->iv)
5097 cand->cost = 0;
5098 return;
5101 /* There are two costs associated with the candidate -- its increment
5102 and its initialization. The second is almost negligible for any loop
5103 that rolls enough, so we take it just very little into account. */
5105 base = cand->iv->base;
5106 cost_base = force_var_cost (data, base, NULL);
5107 /* It will be exceptional that the iv register happens to be initialized with
5108 the proper value at no cost. In general, there will at least be a regcopy
5109 or a const set. */
5110 if (cost_base.cost == 0)
5111 cost_base.cost = COSTS_N_INSNS (1);
5112 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5114 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5116 /* Prefer the original ivs unless we may gain something by replacing it.
5117 The reason is to make debugging simpler; so this is not relevant for
5118 artificial ivs created by other optimization passes. */
5119 if (cand->pos != IP_ORIGINAL
5120 || !SSA_NAME_VAR (cand->var_before)
5121 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5122 cost++;
5124 /* Prefer not to insert statements into latch unless there are some
5125 already (so that we do not create unnecessary jumps). */
5126 if (cand->pos == IP_END
5127 && empty_block_p (ip_end_pos (data->current_loop)))
5128 cost++;
5130 cand->cost = cost;
5131 cand->cost_step = cost_step;
5134 /* Determines costs of computation of the candidates. */
5136 static void
5137 determine_iv_costs (struct ivopts_data *data)
5139 unsigned i;
5141 if (dump_file && (dump_flags & TDF_DETAILS))
5143 fprintf (dump_file, "Candidate costs:\n");
5144 fprintf (dump_file, " cand\tcost\n");
5147 for (i = 0; i < n_iv_cands (data); i++)
5149 struct iv_cand *cand = iv_cand (data, i);
5151 determine_iv_cost (data, cand);
5153 if (dump_file && (dump_flags & TDF_DETAILS))
5154 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5157 if (dump_file && (dump_flags & TDF_DETAILS))
5158 fprintf (dump_file, "\n");
5161 /* Calculates cost for having SIZE induction variables. */
5163 static unsigned
5164 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5166 /* We add size to the cost, so that we prefer eliminating ivs
5167 if possible. */
5168 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5169 data->body_includes_call);
5172 /* For each size of the induction variable set determine the penalty. */
5174 static void
5175 determine_set_costs (struct ivopts_data *data)
5177 unsigned j, n;
5178 gimple phi;
5179 gimple_stmt_iterator psi;
5180 tree op;
5181 struct loop *loop = data->current_loop;
5182 bitmap_iterator bi;
5184 if (dump_file && (dump_flags & TDF_DETAILS))
5186 fprintf (dump_file, "Global costs:\n");
5187 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5188 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5189 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5190 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5193 n = 0;
5194 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5196 phi = gsi_stmt (psi);
5197 op = PHI_RESULT (phi);
5199 if (virtual_operand_p (op))
5200 continue;
5202 if (get_iv (data, op))
5203 continue;
5205 n++;
5208 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5210 struct version_info *info = ver_info (data, j);
5212 if (info->inv_id && info->has_nonlin_use)
5213 n++;
5216 data->regs_used = n;
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5218 fprintf (dump_file, " regs_used %d\n", n);
5220 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, " cost for size:\n");
5223 fprintf (dump_file, " ivs\tcost\n");
5224 for (j = 0; j <= 2 * target_avail_regs; j++)
5225 fprintf (dump_file, " %d\t%d\n", j,
5226 ivopts_global_cost_for_size (data, j));
5227 fprintf (dump_file, "\n");
5231 /* Returns true if A is a cheaper cost pair than B. */
5233 static bool
5234 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5236 int cmp;
5238 if (!a)
5239 return false;
5241 if (!b)
5242 return true;
5244 cmp = compare_costs (a->cost, b->cost);
5245 if (cmp < 0)
5246 return true;
5248 if (cmp > 0)
5249 return false;
5251 /* In case the costs are the same, prefer the cheaper candidate. */
5252 if (a->cand->cost < b->cand->cost)
5253 return true;
5255 return false;
5259 /* Returns candidate by that USE is expressed in IVS. */
5261 static struct cost_pair *
5262 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5264 return ivs->cand_for_use[use->id];
5267 /* Computes the cost field of IVS structure. */
5269 static void
5270 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5272 comp_cost cost = ivs->cand_use_cost;
5274 cost.cost += ivs->cand_cost;
5276 cost.cost += ivopts_global_cost_for_size (data,
5277 ivs->n_regs + ivs->num_used_inv_expr);
5279 ivs->cost = cost;
5282 /* Remove invariants in set INVS to set IVS. */
5284 static void
5285 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5287 bitmap_iterator bi;
5288 unsigned iid;
5290 if (!invs)
5291 return;
5293 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5295 ivs->n_invariant_uses[iid]--;
5296 if (ivs->n_invariant_uses[iid] == 0)
5297 ivs->n_regs--;
5301 /* Set USE not to be expressed by any candidate in IVS. */
5303 static void
5304 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5305 struct iv_use *use)
5307 unsigned uid = use->id, cid;
5308 struct cost_pair *cp;
5310 cp = ivs->cand_for_use[uid];
5311 if (!cp)
5312 return;
5313 cid = cp->cand->id;
5315 ivs->bad_uses++;
5316 ivs->cand_for_use[uid] = NULL;
5317 ivs->n_cand_uses[cid]--;
5319 if (ivs->n_cand_uses[cid] == 0)
5321 bitmap_clear_bit (ivs->cands, cid);
5322 /* Do not count the pseudocandidates. */
5323 if (cp->cand->iv)
5324 ivs->n_regs--;
5325 ivs->n_cands--;
5326 ivs->cand_cost -= cp->cand->cost;
5328 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5331 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5333 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5335 if (cp->inv_expr_id != -1)
5337 ivs->used_inv_expr[cp->inv_expr_id]--;
5338 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5339 ivs->num_used_inv_expr--;
5341 iv_ca_recount_cost (data, ivs);
5344 /* Add invariants in set INVS to set IVS. */
5346 static void
5347 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5349 bitmap_iterator bi;
5350 unsigned iid;
5352 if (!invs)
5353 return;
5355 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5357 ivs->n_invariant_uses[iid]++;
5358 if (ivs->n_invariant_uses[iid] == 1)
5359 ivs->n_regs++;
5363 /* Set cost pair for USE in set IVS to CP. */
5365 static void
5366 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5367 struct iv_use *use, struct cost_pair *cp)
5369 unsigned uid = use->id, cid;
5371 if (ivs->cand_for_use[uid] == cp)
5372 return;
5374 if (ivs->cand_for_use[uid])
5375 iv_ca_set_no_cp (data, ivs, use);
5377 if (cp)
5379 cid = cp->cand->id;
5381 ivs->bad_uses--;
5382 ivs->cand_for_use[uid] = cp;
5383 ivs->n_cand_uses[cid]++;
5384 if (ivs->n_cand_uses[cid] == 1)
5386 bitmap_set_bit (ivs->cands, cid);
5387 /* Do not count the pseudocandidates. */
5388 if (cp->cand->iv)
5389 ivs->n_regs++;
5390 ivs->n_cands++;
5391 ivs->cand_cost += cp->cand->cost;
5393 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5396 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5397 iv_ca_set_add_invariants (ivs, cp->depends_on);
5399 if (cp->inv_expr_id != -1)
5401 ivs->used_inv_expr[cp->inv_expr_id]++;
5402 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5403 ivs->num_used_inv_expr++;
5405 iv_ca_recount_cost (data, ivs);
5409 /* Extend set IVS by expressing USE by some of the candidates in it
5410 if possible. All important candidates will be considered
5411 if IMPORTANT_CANDIDATES is true. */
5413 static void
5414 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5415 struct iv_use *use, bool important_candidates)
5417 struct cost_pair *best_cp = NULL, *cp;
5418 bitmap_iterator bi;
5419 bitmap cands;
5420 unsigned i;
5422 gcc_assert (ivs->upto >= use->id);
5424 if (ivs->upto == use->id)
5426 ivs->upto++;
5427 ivs->bad_uses++;
5430 cands = (important_candidates ? data->important_candidates : ivs->cands);
5431 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5433 struct iv_cand *cand = iv_cand (data, i);
5435 cp = get_use_iv_cost (data, use, cand);
5437 if (cheaper_cost_pair (cp, best_cp))
5438 best_cp = cp;
5441 iv_ca_set_cp (data, ivs, use, best_cp);
5444 /* Get cost for assignment IVS. */
5446 static comp_cost
5447 iv_ca_cost (struct iv_ca *ivs)
5449 /* This was a conditional expression but it triggered a bug in
5450 Sun C 5.5. */
5451 if (ivs->bad_uses)
5452 return infinite_cost;
5453 else
5454 return ivs->cost;
5457 /* Returns true if all dependences of CP are among invariants in IVS. */
5459 static bool
5460 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5462 unsigned i;
5463 bitmap_iterator bi;
5465 if (!cp->depends_on)
5466 return true;
5468 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5470 if (ivs->n_invariant_uses[i] == 0)
5471 return false;
5474 return true;
5477 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5478 it before NEXT_CHANGE. */
5480 static struct iv_ca_delta *
5481 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5482 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5484 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5486 change->use = use;
5487 change->old_cp = old_cp;
5488 change->new_cp = new_cp;
5489 change->next_change = next_change;
5491 return change;
5494 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5495 are rewritten. */
5497 static struct iv_ca_delta *
5498 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5500 struct iv_ca_delta *last;
5502 if (!l2)
5503 return l1;
5505 if (!l1)
5506 return l2;
5508 for (last = l1; last->next_change; last = last->next_change)
5509 continue;
5510 last->next_change = l2;
5512 return l1;
5515 /* Reverse the list of changes DELTA, forming the inverse to it. */
5517 static struct iv_ca_delta *
5518 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5520 struct iv_ca_delta *act, *next, *prev = NULL;
5521 struct cost_pair *tmp;
5523 for (act = delta; act; act = next)
5525 next = act->next_change;
5526 act->next_change = prev;
5527 prev = act;
5529 tmp = act->old_cp;
5530 act->old_cp = act->new_cp;
5531 act->new_cp = tmp;
5534 return prev;
5537 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5538 reverted instead. */
5540 static void
5541 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5542 struct iv_ca_delta *delta, bool forward)
5544 struct cost_pair *from, *to;
5545 struct iv_ca_delta *act;
5547 if (!forward)
5548 delta = iv_ca_delta_reverse (delta);
5550 for (act = delta; act; act = act->next_change)
5552 from = act->old_cp;
5553 to = act->new_cp;
5554 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5555 iv_ca_set_cp (data, ivs, act->use, to);
5558 if (!forward)
5559 iv_ca_delta_reverse (delta);
5562 /* Returns true if CAND is used in IVS. */
5564 static bool
5565 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5567 return ivs->n_cand_uses[cand->id] > 0;
5570 /* Returns number of induction variable candidates in the set IVS. */
5572 static unsigned
5573 iv_ca_n_cands (struct iv_ca *ivs)
5575 return ivs->n_cands;
5578 /* Free the list of changes DELTA. */
5580 static void
5581 iv_ca_delta_free (struct iv_ca_delta **delta)
5583 struct iv_ca_delta *act, *next;
5585 for (act = *delta; act; act = next)
5587 next = act->next_change;
5588 free (act);
5591 *delta = NULL;
5594 /* Allocates new iv candidates assignment. */
5596 static struct iv_ca *
5597 iv_ca_new (struct ivopts_data *data)
5599 struct iv_ca *nw = XNEW (struct iv_ca);
5601 nw->upto = 0;
5602 nw->bad_uses = 0;
5603 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5604 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5605 nw->cands = BITMAP_ALLOC (NULL);
5606 nw->n_cands = 0;
5607 nw->n_regs = 0;
5608 nw->cand_use_cost = no_cost;
5609 nw->cand_cost = 0;
5610 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5611 nw->cost = no_cost;
5612 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5613 nw->num_used_inv_expr = 0;
5615 return nw;
5618 /* Free memory occupied by the set IVS. */
5620 static void
5621 iv_ca_free (struct iv_ca **ivs)
5623 free ((*ivs)->cand_for_use);
5624 free ((*ivs)->n_cand_uses);
5625 BITMAP_FREE ((*ivs)->cands);
5626 free ((*ivs)->n_invariant_uses);
5627 free ((*ivs)->used_inv_expr);
5628 free (*ivs);
5629 *ivs = NULL;
5632 /* Dumps IVS to FILE. */
5634 static void
5635 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5637 const char *pref = " invariants ";
5638 unsigned i;
5639 comp_cost cost = iv_ca_cost (ivs);
5641 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5642 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5643 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5644 bitmap_print (file, ivs->cands, " candidates: ","\n");
5646 for (i = 0; i < ivs->upto; i++)
5648 struct iv_use *use = iv_use (data, i);
5649 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5650 if (cp)
5651 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5652 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5653 else
5654 fprintf (file, " use:%d --> ??\n", use->id);
5657 for (i = 1; i <= data->max_inv_id; i++)
5658 if (ivs->n_invariant_uses[i])
5660 fprintf (file, "%s%d", pref, i);
5661 pref = ", ";
5663 fprintf (file, "\n\n");
5666 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5667 new set, and store differences in DELTA. Number of induction variables
5668 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5669 the function will try to find a solution with mimimal iv candidates. */
5671 static comp_cost
5672 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5673 struct iv_cand *cand, struct iv_ca_delta **delta,
5674 unsigned *n_ivs, bool min_ncand)
5676 unsigned i;
5677 comp_cost cost;
5678 struct iv_use *use;
5679 struct cost_pair *old_cp, *new_cp;
5681 *delta = NULL;
5682 for (i = 0; i < ivs->upto; i++)
5684 use = iv_use (data, i);
5685 old_cp = iv_ca_cand_for_use (ivs, use);
5687 if (old_cp
5688 && old_cp->cand == cand)
5689 continue;
5691 new_cp = get_use_iv_cost (data, use, cand);
5692 if (!new_cp)
5693 continue;
5695 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5696 continue;
5698 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5699 continue;
5701 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5704 iv_ca_delta_commit (data, ivs, *delta, true);
5705 cost = iv_ca_cost (ivs);
5706 if (n_ivs)
5707 *n_ivs = iv_ca_n_cands (ivs);
5708 iv_ca_delta_commit (data, ivs, *delta, false);
5710 return cost;
5713 /* Try narrowing set IVS by removing CAND. Return the cost of
5714 the new set and store the differences in DELTA. */
5716 static comp_cost
5717 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5718 struct iv_cand *cand, struct iv_ca_delta **delta)
5720 unsigned i, ci;
5721 struct iv_use *use;
5722 struct cost_pair *old_cp, *new_cp, *cp;
5723 bitmap_iterator bi;
5724 struct iv_cand *cnd;
5725 comp_cost cost;
5727 *delta = NULL;
5728 for (i = 0; i < n_iv_uses (data); i++)
5730 use = iv_use (data, i);
5732 old_cp = iv_ca_cand_for_use (ivs, use);
5733 if (old_cp->cand != cand)
5734 continue;
5736 new_cp = NULL;
5738 if (data->consider_all_candidates)
5740 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5742 if (ci == cand->id)
5743 continue;
5745 cnd = iv_cand (data, ci);
5747 cp = get_use_iv_cost (data, use, cnd);
5748 if (!cp)
5749 continue;
5751 if (!iv_ca_has_deps (ivs, cp))
5752 continue;
5754 if (!cheaper_cost_pair (cp, new_cp))
5755 continue;
5757 new_cp = cp;
5760 else
5762 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5764 if (ci == cand->id)
5765 continue;
5767 cnd = iv_cand (data, ci);
5769 cp = get_use_iv_cost (data, use, cnd);
5770 if (!cp)
5771 continue;
5772 if (!iv_ca_has_deps (ivs, cp))
5773 continue;
5775 if (!cheaper_cost_pair (cp, new_cp))
5776 continue;
5778 new_cp = cp;
5782 if (!new_cp)
5784 iv_ca_delta_free (delta);
5785 return infinite_cost;
5788 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5791 iv_ca_delta_commit (data, ivs, *delta, true);
5792 cost = iv_ca_cost (ivs);
5793 iv_ca_delta_commit (data, ivs, *delta, false);
5795 return cost;
5798 /* Try optimizing the set of candidates IVS by removing candidates different
5799 from to EXCEPT_CAND from it. Return cost of the new set, and store
5800 differences in DELTA. */
5802 static comp_cost
5803 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5804 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5806 bitmap_iterator bi;
5807 struct iv_ca_delta *act_delta, *best_delta;
5808 unsigned i;
5809 comp_cost best_cost, acost;
5810 struct iv_cand *cand;
5812 best_delta = NULL;
5813 best_cost = iv_ca_cost (ivs);
5815 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5817 cand = iv_cand (data, i);
5819 if (cand == except_cand)
5820 continue;
5822 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5824 if (compare_costs (acost, best_cost) < 0)
5826 best_cost = acost;
5827 iv_ca_delta_free (&best_delta);
5828 best_delta = act_delta;
5830 else
5831 iv_ca_delta_free (&act_delta);
5834 if (!best_delta)
5836 *delta = NULL;
5837 return best_cost;
5840 /* Recurse to possibly remove other unnecessary ivs. */
5841 iv_ca_delta_commit (data, ivs, best_delta, true);
5842 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5843 iv_ca_delta_commit (data, ivs, best_delta, false);
5844 *delta = iv_ca_delta_join (best_delta, *delta);
5845 return best_cost;
5848 /* Tries to extend the sets IVS in the best possible way in order
5849 to express the USE. If ORIGINALP is true, prefer candidates from
5850 the original set of IVs, otherwise favor important candidates not
5851 based on any memory object. */
5853 static bool
5854 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5855 struct iv_use *use, bool originalp)
5857 comp_cost best_cost, act_cost;
5858 unsigned i;
5859 bitmap_iterator bi;
5860 struct iv_cand *cand;
5861 struct iv_ca_delta *best_delta = NULL, *act_delta;
5862 struct cost_pair *cp;
5864 iv_ca_add_use (data, ivs, use, false);
5865 best_cost = iv_ca_cost (ivs);
5867 cp = iv_ca_cand_for_use (ivs, use);
5868 if (!cp)
5870 ivs->upto--;
5871 ivs->bad_uses--;
5872 iv_ca_add_use (data, ivs, use, true);
5873 best_cost = iv_ca_cost (ivs);
5874 cp = iv_ca_cand_for_use (ivs, use);
5876 if (cp)
5878 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5879 iv_ca_set_no_cp (data, ivs, use);
5882 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5883 first try important candidates not based on any memory object. Only if
5884 this fails, try the specific ones. Rationale -- in loops with many
5885 variables the best choice often is to use just one generic biv. If we
5886 added here many ivs specific to the uses, the optimization algorithm later
5887 would be likely to get stuck in a local minimum, thus causing us to create
5888 too many ivs. The approach from few ivs to more seems more likely to be
5889 successful -- starting from few ivs, replacing an expensive use by a
5890 specific iv should always be a win. */
5891 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5893 cand = iv_cand (data, i);
5895 if (originalp && cand->pos !=IP_ORIGINAL)
5896 continue;
5898 if (!originalp && cand->iv->base_object != NULL_TREE)
5899 continue;
5901 if (iv_ca_cand_used_p (ivs, cand))
5902 continue;
5904 cp = get_use_iv_cost (data, use, cand);
5905 if (!cp)
5906 continue;
5908 iv_ca_set_cp (data, ivs, use, cp);
5909 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5910 true);
5911 iv_ca_set_no_cp (data, ivs, use);
5912 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5914 if (compare_costs (act_cost, best_cost) < 0)
5916 best_cost = act_cost;
5918 iv_ca_delta_free (&best_delta);
5919 best_delta = act_delta;
5921 else
5922 iv_ca_delta_free (&act_delta);
5925 if (infinite_cost_p (best_cost))
5927 for (i = 0; i < use->n_map_members; i++)
5929 cp = use->cost_map + i;
5930 cand = cp->cand;
5931 if (!cand)
5932 continue;
5934 /* Already tried this. */
5935 if (cand->important)
5937 if (originalp && cand->pos == IP_ORIGINAL)
5938 continue;
5939 if (!originalp && cand->iv->base_object == NULL_TREE)
5940 continue;
5943 if (iv_ca_cand_used_p (ivs, cand))
5944 continue;
5946 act_delta = NULL;
5947 iv_ca_set_cp (data, ivs, use, cp);
5948 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5949 iv_ca_set_no_cp (data, ivs, use);
5950 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5951 cp, act_delta);
5953 if (compare_costs (act_cost, best_cost) < 0)
5955 best_cost = act_cost;
5957 if (best_delta)
5958 iv_ca_delta_free (&best_delta);
5959 best_delta = act_delta;
5961 else
5962 iv_ca_delta_free (&act_delta);
5966 iv_ca_delta_commit (data, ivs, best_delta, true);
5967 iv_ca_delta_free (&best_delta);
5969 return !infinite_cost_p (best_cost);
5972 /* Finds an initial assignment of candidates to uses. */
5974 static struct iv_ca *
5975 get_initial_solution (struct ivopts_data *data, bool originalp)
5977 struct iv_ca *ivs = iv_ca_new (data);
5978 unsigned i;
5980 for (i = 0; i < n_iv_uses (data); i++)
5981 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5983 iv_ca_free (&ivs);
5984 return NULL;
5987 return ivs;
5990 /* Tries to improve set of induction variables IVS. */
5992 static bool
5993 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5995 unsigned i, n_ivs;
5996 comp_cost acost, best_cost = iv_ca_cost (ivs);
5997 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5998 struct iv_cand *cand;
6000 /* Try extending the set of induction variables by one. */
6001 for (i = 0; i < n_iv_cands (data); i++)
6003 cand = iv_cand (data, i);
6005 if (iv_ca_cand_used_p (ivs, cand))
6006 continue;
6008 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6009 if (!act_delta)
6010 continue;
6012 /* If we successfully added the candidate and the set is small enough,
6013 try optimizing it by removing other candidates. */
6014 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6016 iv_ca_delta_commit (data, ivs, act_delta, true);
6017 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6018 iv_ca_delta_commit (data, ivs, act_delta, false);
6019 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6022 if (compare_costs (acost, best_cost) < 0)
6024 best_cost = acost;
6025 iv_ca_delta_free (&best_delta);
6026 best_delta = act_delta;
6028 else
6029 iv_ca_delta_free (&act_delta);
6032 if (!best_delta)
6034 /* Try removing the candidates from the set instead. */
6035 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6037 /* Nothing more we can do. */
6038 if (!best_delta)
6039 return false;
6042 iv_ca_delta_commit (data, ivs, best_delta, true);
6043 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6044 iv_ca_delta_free (&best_delta);
6045 return true;
6048 /* Attempts to find the optimal set of induction variables. We do simple
6049 greedy heuristic -- we try to replace at most one candidate in the selected
6050 solution and remove the unused ivs while this improves the cost. */
6052 static struct iv_ca *
6053 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6055 struct iv_ca *set;
6057 /* Get the initial solution. */
6058 set = get_initial_solution (data, originalp);
6059 if (!set)
6061 if (dump_file && (dump_flags & TDF_DETAILS))
6062 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6063 return NULL;
6066 if (dump_file && (dump_flags & TDF_DETAILS))
6068 fprintf (dump_file, "Initial set of candidates:\n");
6069 iv_ca_dump (data, dump_file, set);
6072 while (try_improve_iv_set (data, set))
6074 if (dump_file && (dump_flags & TDF_DETAILS))
6076 fprintf (dump_file, "Improved to:\n");
6077 iv_ca_dump (data, dump_file, set);
6081 return set;
6084 static struct iv_ca *
6085 find_optimal_iv_set (struct ivopts_data *data)
6087 unsigned i;
6088 struct iv_ca *set, *origset;
6089 struct iv_use *use;
6090 comp_cost cost, origcost;
6092 /* Determine the cost based on a strategy that starts with original IVs,
6093 and try again using a strategy that prefers candidates not based
6094 on any IVs. */
6095 origset = find_optimal_iv_set_1 (data, true);
6096 set = find_optimal_iv_set_1 (data, false);
6098 if (!origset && !set)
6099 return NULL;
6101 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6102 cost = set ? iv_ca_cost (set) : infinite_cost;
6104 if (dump_file && (dump_flags & TDF_DETAILS))
6106 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6107 origcost.cost, origcost.complexity);
6108 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6109 cost.cost, cost.complexity);
6112 /* Choose the one with the best cost. */
6113 if (compare_costs (origcost, cost) <= 0)
6115 if (set)
6116 iv_ca_free (&set);
6117 set = origset;
6119 else if (origset)
6120 iv_ca_free (&origset);
6122 for (i = 0; i < n_iv_uses (data); i++)
6124 use = iv_use (data, i);
6125 use->selected = iv_ca_cand_for_use (set, use)->cand;
6128 return set;
6131 /* Creates a new induction variable corresponding to CAND. */
6133 static void
6134 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6136 gimple_stmt_iterator incr_pos;
6137 tree base;
6138 bool after = false;
6140 if (!cand->iv)
6141 return;
6143 switch (cand->pos)
6145 case IP_NORMAL:
6146 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6147 break;
6149 case IP_END:
6150 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6151 after = true;
6152 break;
6154 case IP_AFTER_USE:
6155 after = true;
6156 /* fall through */
6157 case IP_BEFORE_USE:
6158 incr_pos = gsi_for_stmt (cand->incremented_at);
6159 break;
6161 case IP_ORIGINAL:
6162 /* Mark that the iv is preserved. */
6163 name_info (data, cand->var_before)->preserve_biv = true;
6164 name_info (data, cand->var_after)->preserve_biv = true;
6166 /* Rewrite the increment so that it uses var_before directly. */
6167 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6168 return;
6171 gimple_add_tmp_var (cand->var_before);
6173 base = unshare_expr (cand->iv->base);
6175 create_iv (base, unshare_expr (cand->iv->step),
6176 cand->var_before, data->current_loop,
6177 &incr_pos, after, &cand->var_before, &cand->var_after);
6180 /* Creates new induction variables described in SET. */
6182 static void
6183 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6185 unsigned i;
6186 struct iv_cand *cand;
6187 bitmap_iterator bi;
6189 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6191 cand = iv_cand (data, i);
6192 create_new_iv (data, cand);
6195 if (dump_file && (dump_flags & TDF_DETAILS))
6197 fprintf (dump_file, "\nSelected IV set: \n");
6198 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6200 cand = iv_cand (data, i);
6201 dump_cand (dump_file, cand);
6203 fprintf (dump_file, "\n");
6207 /* Rewrites USE (definition of iv used in a nonlinear expression)
6208 using candidate CAND. */
6210 static void
6211 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6212 struct iv_use *use, struct iv_cand *cand)
6214 tree comp;
6215 tree op, tgt;
6216 gimple ass;
6217 gimple_stmt_iterator bsi;
6219 /* An important special case -- if we are asked to express value of
6220 the original iv by itself, just exit; there is no need to
6221 introduce a new computation (that might also need casting the
6222 variable to unsigned and back). */
6223 if (cand->pos == IP_ORIGINAL
6224 && cand->incremented_at == use->stmt)
6226 enum tree_code stmt_code;
6228 gcc_assert (is_gimple_assign (use->stmt));
6229 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6231 /* Check whether we may leave the computation unchanged.
6232 This is the case only if it does not rely on other
6233 computations in the loop -- otherwise, the computation
6234 we rely upon may be removed in remove_unused_ivs,
6235 thus leading to ICE. */
6236 stmt_code = gimple_assign_rhs_code (use->stmt);
6237 if (stmt_code == PLUS_EXPR
6238 || stmt_code == MINUS_EXPR
6239 || stmt_code == POINTER_PLUS_EXPR)
6241 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6242 op = gimple_assign_rhs2 (use->stmt);
6243 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6244 op = gimple_assign_rhs1 (use->stmt);
6245 else
6246 op = NULL_TREE;
6248 else
6249 op = NULL_TREE;
6251 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6252 return;
6255 comp = get_computation (data->current_loop, use, cand);
6256 gcc_assert (comp != NULL_TREE);
6258 switch (gimple_code (use->stmt))
6260 case GIMPLE_PHI:
6261 tgt = PHI_RESULT (use->stmt);
6263 /* If we should keep the biv, do not replace it. */
6264 if (name_info (data, tgt)->preserve_biv)
6265 return;
6267 bsi = gsi_after_labels (gimple_bb (use->stmt));
6268 break;
6270 case GIMPLE_ASSIGN:
6271 tgt = gimple_assign_lhs (use->stmt);
6272 bsi = gsi_for_stmt (use->stmt);
6273 break;
6275 default:
6276 gcc_unreachable ();
6279 if (!valid_gimple_rhs_p (comp)
6280 || (gimple_code (use->stmt) != GIMPLE_PHI
6281 /* We can't allow re-allocating the stmt as it might be pointed
6282 to still. */
6283 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6284 >= gimple_num_ops (gsi_stmt (bsi)))))
6286 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6287 true, GSI_SAME_STMT);
6288 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6290 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6291 /* As this isn't a plain copy we have to reset alignment
6292 information. */
6293 if (SSA_NAME_PTR_INFO (comp))
6294 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6298 if (gimple_code (use->stmt) == GIMPLE_PHI)
6300 ass = gimple_build_assign (tgt, comp);
6301 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6303 bsi = gsi_for_stmt (use->stmt);
6304 remove_phi_node (&bsi, false);
6306 else
6308 gimple_assign_set_rhs_from_tree (&bsi, comp);
6309 use->stmt = gsi_stmt (bsi);
6313 /* Performs a peephole optimization to reorder the iv update statement with
6314 a mem ref to enable instruction combining in later phases. The mem ref uses
6315 the iv value before the update, so the reordering transformation requires
6316 adjustment of the offset. CAND is the selected IV_CAND.
6318 Example:
6320 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6321 iv2 = iv1 + 1;
6323 if (t < val) (1)
6324 goto L;
6325 goto Head;
6328 directly propagating t over to (1) will introduce overlapping live range
6329 thus increase register pressure. This peephole transform it into:
6332 iv2 = iv1 + 1;
6333 t = MEM_REF (base, iv2, 8, 8);
6334 if (t < val)
6335 goto L;
6336 goto Head;
6339 static void
6340 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6342 tree var_after;
6343 gimple iv_update, stmt;
6344 basic_block bb;
6345 gimple_stmt_iterator gsi, gsi_iv;
6347 if (cand->pos != IP_NORMAL)
6348 return;
6350 var_after = cand->var_after;
6351 iv_update = SSA_NAME_DEF_STMT (var_after);
6353 bb = gimple_bb (iv_update);
6354 gsi = gsi_last_nondebug_bb (bb);
6355 stmt = gsi_stmt (gsi);
6357 /* Only handle conditional statement for now. */
6358 if (gimple_code (stmt) != GIMPLE_COND)
6359 return;
6361 gsi_prev_nondebug (&gsi);
6362 stmt = gsi_stmt (gsi);
6363 if (stmt != iv_update)
6364 return;
6366 gsi_prev_nondebug (&gsi);
6367 if (gsi_end_p (gsi))
6368 return;
6370 stmt = gsi_stmt (gsi);
6371 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6372 return;
6374 if (stmt != use->stmt)
6375 return;
6377 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6378 return;
6380 if (dump_file && (dump_flags & TDF_DETAILS))
6382 fprintf (dump_file, "Reordering \n");
6383 print_gimple_stmt (dump_file, iv_update, 0, 0);
6384 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6385 fprintf (dump_file, "\n");
6388 gsi = gsi_for_stmt (use->stmt);
6389 gsi_iv = gsi_for_stmt (iv_update);
6390 gsi_move_before (&gsi_iv, &gsi);
6392 cand->pos = IP_BEFORE_USE;
6393 cand->incremented_at = use->stmt;
6396 /* Rewrites USE (address that is an iv) using candidate CAND. */
6398 static void
6399 rewrite_use_address (struct ivopts_data *data,
6400 struct iv_use *use, struct iv_cand *cand)
6402 aff_tree aff;
6403 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6404 tree base_hint = NULL_TREE;
6405 tree ref, iv;
6406 bool ok;
6408 adjust_iv_update_pos (cand, use);
6409 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6410 gcc_assert (ok);
6411 unshare_aff_combination (&aff);
6413 /* To avoid undefined overflow problems, all IV candidates use unsigned
6414 integer types. The drawback is that this makes it impossible for
6415 create_mem_ref to distinguish an IV that is based on a memory object
6416 from one that represents simply an offset.
6418 To work around this problem, we pass a hint to create_mem_ref that
6419 indicates which variable (if any) in aff is an IV based on a memory
6420 object. Note that we only consider the candidate. If this is not
6421 based on an object, the base of the reference is in some subexpression
6422 of the use -- but these will use pointer types, so they are recognized
6423 by the create_mem_ref heuristics anyway. */
6424 if (cand->iv->base_object)
6425 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6427 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6428 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6429 reference_alias_ptr_type (*use->op_p),
6430 iv, base_hint, data->speed);
6431 copy_ref_info (ref, *use->op_p);
6432 *use->op_p = ref;
6435 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6436 candidate CAND. */
6438 static void
6439 rewrite_use_compare (struct ivopts_data *data,
6440 struct iv_use *use, struct iv_cand *cand)
6442 tree comp, *var_p, op, bound;
6443 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6444 enum tree_code compare;
6445 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6446 bool ok;
6448 bound = cp->value;
6449 if (bound)
6451 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6452 tree var_type = TREE_TYPE (var);
6453 gimple_seq stmts;
6455 if (dump_file && (dump_flags & TDF_DETAILS))
6457 fprintf (dump_file, "Replacing exit test: ");
6458 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6460 compare = cp->comp;
6461 bound = unshare_expr (fold_convert (var_type, bound));
6462 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6463 if (stmts)
6464 gsi_insert_seq_on_edge_immediate (
6465 loop_preheader_edge (data->current_loop),
6466 stmts);
6468 gimple_cond_set_lhs (use->stmt, var);
6469 gimple_cond_set_code (use->stmt, compare);
6470 gimple_cond_set_rhs (use->stmt, op);
6471 return;
6474 /* The induction variable elimination failed; just express the original
6475 giv. */
6476 comp = get_computation (data->current_loop, use, cand);
6477 gcc_assert (comp != NULL_TREE);
6479 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6480 gcc_assert (ok);
6482 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6483 true, GSI_SAME_STMT);
6486 /* Rewrites USE using candidate CAND. */
6488 static void
6489 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6491 switch (use->type)
6493 case USE_NONLINEAR_EXPR:
6494 rewrite_use_nonlinear_expr (data, use, cand);
6495 break;
6497 case USE_ADDRESS:
6498 rewrite_use_address (data, use, cand);
6499 break;
6501 case USE_COMPARE:
6502 rewrite_use_compare (data, use, cand);
6503 break;
6505 default:
6506 gcc_unreachable ();
6509 update_stmt (use->stmt);
6512 /* Rewrite the uses using the selected induction variables. */
6514 static void
6515 rewrite_uses (struct ivopts_data *data)
6517 unsigned i;
6518 struct iv_cand *cand;
6519 struct iv_use *use;
6521 for (i = 0; i < n_iv_uses (data); i++)
6523 use = iv_use (data, i);
6524 cand = use->selected;
6525 gcc_assert (cand);
6527 rewrite_use (data, use, cand);
6531 /* Removes the ivs that are not used after rewriting. */
6533 static void
6534 remove_unused_ivs (struct ivopts_data *data)
6536 unsigned j;
6537 bitmap_iterator bi;
6538 bitmap toremove = BITMAP_ALLOC (NULL);
6540 /* Figure out an order in which to release SSA DEFs so that we don't
6541 release something that we'd have to propagate into a debug stmt
6542 afterwards. */
6543 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6545 struct version_info *info;
6547 info = ver_info (data, j);
6548 if (info->iv
6549 && !integer_zerop (info->iv->step)
6550 && !info->inv_id
6551 && !info->iv->have_use_for
6552 && !info->preserve_biv)
6554 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6556 tree def = info->iv->ssa_name;
6558 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6560 imm_use_iterator imm_iter;
6561 use_operand_p use_p;
6562 gimple stmt;
6563 int count = 0;
6565 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6567 if (!gimple_debug_bind_p (stmt))
6568 continue;
6570 /* We just want to determine whether to do nothing
6571 (count == 0), to substitute the computed
6572 expression into a single use of the SSA DEF by
6573 itself (count == 1), or to use a debug temp
6574 because the SSA DEF is used multiple times or as
6575 part of a larger expression (count > 1). */
6576 count++;
6577 if (gimple_debug_bind_get_value (stmt) != def)
6578 count++;
6580 if (count > 1)
6581 BREAK_FROM_IMM_USE_STMT (imm_iter);
6584 if (!count)
6585 continue;
6587 struct iv_use dummy_use;
6588 struct iv_cand *best_cand = NULL, *cand;
6589 unsigned i, best_pref = 0, cand_pref;
6591 memset (&dummy_use, 0, sizeof (dummy_use));
6592 dummy_use.iv = info->iv;
6593 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6595 cand = iv_use (data, i)->selected;
6596 if (cand == best_cand)
6597 continue;
6598 cand_pref = operand_equal_p (cand->iv->step,
6599 info->iv->step, 0)
6600 ? 4 : 0;
6601 cand_pref
6602 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6603 == TYPE_MODE (TREE_TYPE (info->iv->base))
6604 ? 2 : 0;
6605 cand_pref
6606 += TREE_CODE (cand->iv->base) == INTEGER_CST
6607 ? 1 : 0;
6608 if (best_cand == NULL || best_pref < cand_pref)
6610 best_cand = cand;
6611 best_pref = cand_pref;
6615 if (!best_cand)
6616 continue;
6618 tree comp = get_computation_at (data->current_loop,
6619 &dummy_use, best_cand,
6620 SSA_NAME_DEF_STMT (def));
6621 if (!comp)
6622 continue;
6624 if (count > 1)
6626 tree vexpr = make_node (DEBUG_EXPR_DECL);
6627 DECL_ARTIFICIAL (vexpr) = 1;
6628 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6629 if (SSA_NAME_VAR (def))
6630 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6631 else
6632 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6633 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6634 gimple_stmt_iterator gsi;
6636 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6637 gsi = gsi_after_labels (gimple_bb
6638 (SSA_NAME_DEF_STMT (def)));
6639 else
6640 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6642 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6643 comp = vexpr;
6646 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6648 if (!gimple_debug_bind_p (stmt))
6649 continue;
6651 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6652 SET_USE (use_p, comp);
6654 update_stmt (stmt);
6660 release_defs_bitset (toremove);
6662 BITMAP_FREE (toremove);
6665 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6666 for pointer_map_traverse. */
6668 static bool
6669 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6670 void *data ATTRIBUTE_UNUSED)
6672 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6674 free (niter);
6675 return true;
6678 /* Frees data allocated by the optimization of a single loop. */
6680 static void
6681 free_loop_data (struct ivopts_data *data)
6683 unsigned i, j;
6684 bitmap_iterator bi;
6685 tree obj;
6687 if (data->niters)
6689 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6690 pointer_map_destroy (data->niters);
6691 data->niters = NULL;
6694 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6696 struct version_info *info;
6698 info = ver_info (data, i);
6699 free (info->iv);
6700 info->iv = NULL;
6701 info->has_nonlin_use = false;
6702 info->preserve_biv = false;
6703 info->inv_id = 0;
6705 bitmap_clear (data->relevant);
6706 bitmap_clear (data->important_candidates);
6708 for (i = 0; i < n_iv_uses (data); i++)
6710 struct iv_use *use = iv_use (data, i);
6712 free (use->iv);
6713 BITMAP_FREE (use->related_cands);
6714 for (j = 0; j < use->n_map_members; j++)
6715 if (use->cost_map[j].depends_on)
6716 BITMAP_FREE (use->cost_map[j].depends_on);
6717 free (use->cost_map);
6718 free (use);
6720 data->iv_uses.truncate (0);
6722 for (i = 0; i < n_iv_cands (data); i++)
6724 struct iv_cand *cand = iv_cand (data, i);
6726 free (cand->iv);
6727 if (cand->depends_on)
6728 BITMAP_FREE (cand->depends_on);
6729 free (cand);
6731 data->iv_candidates.truncate (0);
6733 if (data->version_info_size < num_ssa_names)
6735 data->version_info_size = 2 * num_ssa_names;
6736 free (data->version_info);
6737 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6740 data->max_inv_id = 0;
6742 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6743 SET_DECL_RTL (obj, NULL_RTX);
6745 decl_rtl_to_reset.truncate (0);
6747 data->inv_expr_tab.empty ();
6748 data->inv_expr_id = 0;
6751 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6752 loop tree. */
6754 static void
6755 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6757 free_loop_data (data);
6758 free (data->version_info);
6759 BITMAP_FREE (data->relevant);
6760 BITMAP_FREE (data->important_candidates);
6762 decl_rtl_to_reset.release ();
6763 data->iv_uses.release ();
6764 data->iv_candidates.release ();
6765 data->inv_expr_tab.dispose ();
6768 /* Returns true if the loop body BODY includes any function calls. */
6770 static bool
6771 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6773 gimple_stmt_iterator gsi;
6774 unsigned i;
6776 for (i = 0; i < num_nodes; i++)
6777 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6779 gimple stmt = gsi_stmt (gsi);
6780 if (is_gimple_call (stmt)
6781 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6782 return true;
6784 return false;
6787 /* Optimizes the LOOP. Returns true if anything changed. */
6789 static bool
6790 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6792 bool changed = false;
6793 struct iv_ca *iv_ca;
6794 edge exit = single_dom_exit (loop);
6795 basic_block *body;
6797 gcc_assert (!data->niters);
6798 data->current_loop = loop;
6799 data->speed = optimize_loop_for_speed_p (loop);
6801 if (dump_file && (dump_flags & TDF_DETAILS))
6803 fprintf (dump_file, "Processing loop %d\n", loop->num);
6805 if (exit)
6807 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6808 exit->src->index, exit->dest->index);
6809 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6810 fprintf (dump_file, "\n");
6813 fprintf (dump_file, "\n");
6816 body = get_loop_body (loop);
6817 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6818 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6819 free (body);
6821 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6823 /* For each ssa name determines whether it behaves as an induction variable
6824 in some loop. */
6825 if (!find_induction_variables (data))
6826 goto finish;
6828 /* Finds interesting uses (item 1). */
6829 find_interesting_uses (data);
6830 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6831 goto finish;
6833 /* Finds candidates for the induction variables (item 2). */
6834 find_iv_candidates (data);
6836 /* Calculates the costs (item 3, part 1). */
6837 determine_iv_costs (data);
6838 determine_use_iv_costs (data);
6839 determine_set_costs (data);
6841 /* Find the optimal set of induction variables (item 3, part 2). */
6842 iv_ca = find_optimal_iv_set (data);
6843 if (!iv_ca)
6844 goto finish;
6845 changed = true;
6847 /* Create the new induction variables (item 4, part 1). */
6848 create_new_ivs (data, iv_ca);
6849 iv_ca_free (&iv_ca);
6851 /* Rewrite the uses (item 4, part 2). */
6852 rewrite_uses (data);
6854 /* Remove the ivs that are unused after rewriting. */
6855 remove_unused_ivs (data);
6857 /* We have changed the structure of induction variables; it might happen
6858 that definitions in the scev database refer to some of them that were
6859 eliminated. */
6860 scev_reset ();
6862 finish:
6863 free_loop_data (data);
6865 return changed;
6868 /* Main entry point. Optimizes induction variables in loops. */
6870 void
6871 tree_ssa_iv_optimize (void)
6873 struct loop *loop;
6874 struct ivopts_data data;
6875 loop_iterator li;
6877 tree_ssa_iv_optimize_init (&data);
6879 /* Optimize the loops starting with the innermost ones. */
6880 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6882 if (dump_file && (dump_flags & TDF_DETAILS))
6883 flow_loop_dump (loop, dump_file, NULL, 1);
6885 tree_ssa_iv_optimize_loop (&data, loop);
6888 tree_ssa_iv_optimize_finalize (&data);