PR c++/53858
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob0abbaff0f5fcba2f0440323922f0173ff3fd9ab2
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "tree-pretty-print.h"
73 #include "gimple-pretty-print.h"
74 #include "tree-flow.h"
75 #include "tree-dump.h"
76 #include "timevar.h"
77 #include "cfgloop.h"
78 #include "tree-pass.h"
79 #include "ggc.h"
80 #include "insn-config.h"
81 #include "pointer-set.h"
82 #include "hashtab.h"
83 #include "tree-chrec.h"
84 #include "tree-scalar-evolution.h"
85 #include "cfgloop.h"
86 #include "params.h"
87 #include "langhooks.h"
88 #include "tree-affine.h"
89 #include "target.h"
90 #include "tree-inline.h"
91 #include "tree-ssa-propagate.h"
92 #include "expmed.h"
94 static hashval_t mbc_entry_hash (const void *);
95 static int mbc_entry_eq (const void*, const void *);
97 /* FIXME: Expressions are expanded to RTL in this pass to determine the
98 cost of different addressing modes. This should be moved to a TBD
99 interface between the GIMPLE and RTL worlds. */
100 #include "expr.h"
101 #include "recog.h"
103 /* The infinite cost. */
104 #define INFTY 10000000
106 #define AVG_LOOP_NITER(LOOP) 5
108 /* Returns the expected number of loop iterations for LOOP.
109 The average trip count is computed from profile data if it
110 exists. */
112 static inline HOST_WIDE_INT
113 avg_loop_niter (struct loop *loop)
115 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
116 if (niter == -1)
117 return AVG_LOOP_NITER (loop);
119 return niter;
122 /* Representation of the induction variable. */
123 struct iv
125 tree base; /* Initial value of the iv. */
126 tree base_object; /* A memory object to that the induction variable points. */
127 tree step; /* Step of the iv (constant only). */
128 tree ssa_name; /* The ssa name with the value. */
129 bool biv_p; /* Is it a biv? */
130 bool have_use_for; /* Do we already have a use for it? */
131 unsigned use_id; /* The identifier in the use if it is the case. */
134 /* Per-ssa version information (induction variable descriptions, etc.). */
135 struct version_info
137 tree name; /* The ssa name. */
138 struct iv *iv; /* Induction variable description. */
139 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
140 an expression that is not an induction variable. */
141 bool preserve_biv; /* For the original biv, whether to preserve it. */
142 unsigned inv_id; /* Id of an invariant. */
145 /* Types of uses. */
146 enum use_type
148 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
149 USE_ADDRESS, /* Use in an address. */
150 USE_COMPARE /* Use is a compare. */
153 /* Cost of a computation. */
154 typedef struct
156 int cost; /* The runtime cost. */
157 unsigned complexity; /* The estimate of the complexity of the code for
158 the computation (in no concrete units --
159 complexity field should be larger for more
160 complex expressions and addressing modes). */
161 } comp_cost;
163 static const comp_cost no_cost = {0, 0};
164 static const comp_cost infinite_cost = {INFTY, INFTY};
166 /* The candidate - cost pair. */
167 struct cost_pair
169 struct iv_cand *cand; /* The candidate. */
170 comp_cost cost; /* The cost. */
171 bitmap depends_on; /* The list of invariants that have to be
172 preserved. */
173 tree value; /* For final value elimination, the expression for
174 the final value of the iv. For iv elimination,
175 the new bound to compare with. */
176 enum tree_code comp; /* For iv elimination, the comparison. */
177 int inv_expr_id; /* Loop invariant expression id. */
180 /* Use. */
181 struct iv_use
183 unsigned id; /* The id of the use. */
184 enum use_type type; /* Type of the use. */
185 struct iv *iv; /* The induction variable it is based on. */
186 gimple stmt; /* Statement in that it occurs. */
187 tree *op_p; /* The place where it occurs. */
188 bitmap related_cands; /* The set of "related" iv candidates, plus the common
189 important ones. */
191 unsigned n_map_members; /* Number of candidates in the cost_map list. */
192 struct cost_pair *cost_map;
193 /* The costs wrto the iv candidates. */
195 struct iv_cand *selected;
196 /* The selected candidate. */
199 /* The position where the iv is computed. */
200 enum iv_position
202 IP_NORMAL, /* At the end, just before the exit condition. */
203 IP_END, /* At the end of the latch block. */
204 IP_BEFORE_USE, /* Immediately before a specific use. */
205 IP_AFTER_USE, /* Immediately after a specific use. */
206 IP_ORIGINAL /* The original biv. */
209 /* The induction variable candidate. */
210 struct iv_cand
212 unsigned id; /* The number of the candidate. */
213 bool important; /* Whether this is an "important" candidate, i.e. such
214 that it should be considered by all uses. */
215 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
216 gimple incremented_at;/* For original biv, the statement where it is
217 incremented. */
218 tree var_before; /* The variable used for it before increment. */
219 tree var_after; /* The variable used for it after increment. */
220 struct iv *iv; /* The value of the candidate. NULL for
221 "pseudocandidate" used to indicate the possibility
222 to replace the final value of an iv by direct
223 computation of the value. */
224 unsigned cost; /* Cost of the candidate. */
225 unsigned cost_step; /* Cost of the candidate's increment operation. */
226 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
227 where it is incremented. */
228 bitmap depends_on; /* The list of invariants that are used in step of the
229 biv. */
232 /* Loop invariant expression hashtable entry. */
233 struct iv_inv_expr_ent
235 tree expr;
236 int id;
237 hashval_t hash;
240 /* The data used by the induction variable optimizations. */
242 typedef struct iv_use *iv_use_p;
243 DEF_VEC_P(iv_use_p);
244 DEF_VEC_ALLOC_P(iv_use_p,heap);
246 typedef struct iv_cand *iv_cand_p;
247 DEF_VEC_P(iv_cand_p);
248 DEF_VEC_ALLOC_P(iv_cand_p,heap);
250 struct ivopts_data
252 /* The currently optimized loop. */
253 struct loop *current_loop;
255 /* Numbers of iterations for all exits of the current loop. */
256 struct pointer_map_t *niters;
258 /* Number of registers used in it. */
259 unsigned regs_used;
261 /* The size of version_info array allocated. */
262 unsigned version_info_size;
264 /* The array of information for the ssa names. */
265 struct version_info *version_info;
267 /* The hashtable of loop invariant expressions created
268 by ivopt. */
269 htab_t inv_expr_tab;
271 /* Loop invariant expression id. */
272 int inv_expr_id;
274 /* The bitmap of indices in version_info whose value was changed. */
275 bitmap relevant;
277 /* The uses of induction variables. */
278 VEC(iv_use_p,heap) *iv_uses;
280 /* The candidates. */
281 VEC(iv_cand_p,heap) *iv_candidates;
283 /* A bitmap of important candidates. */
284 bitmap important_candidates;
286 /* The maximum invariant id. */
287 unsigned max_inv_id;
289 /* Whether to consider just related and important candidates when replacing a
290 use. */
291 bool consider_all_candidates;
293 /* Are we optimizing for speed? */
294 bool speed;
296 /* Whether the loop body includes any function calls. */
297 bool body_includes_call;
299 /* Whether the loop body can only be exited via single exit. */
300 bool loop_single_exit_p;
303 /* An assignment of iv candidates to uses. */
305 struct iv_ca
307 /* The number of uses covered by the assignment. */
308 unsigned upto;
310 /* Number of uses that cannot be expressed by the candidates in the set. */
311 unsigned bad_uses;
313 /* Candidate assigned to a use, together with the related costs. */
314 struct cost_pair **cand_for_use;
316 /* Number of times each candidate is used. */
317 unsigned *n_cand_uses;
319 /* The candidates used. */
320 bitmap cands;
322 /* The number of candidates in the set. */
323 unsigned n_cands;
325 /* Total number of registers needed. */
326 unsigned n_regs;
328 /* Total cost of expressing uses. */
329 comp_cost cand_use_cost;
331 /* Total cost of candidates. */
332 unsigned cand_cost;
334 /* Number of times each invariant is used. */
335 unsigned *n_invariant_uses;
337 /* The array holding the number of uses of each loop
338 invariant expressions created by ivopt. */
339 unsigned *used_inv_expr;
341 /* The number of created loop invariants. */
342 unsigned num_used_inv_expr;
344 /* Total cost of the assignment. */
345 comp_cost cost;
348 /* Difference of two iv candidate assignments. */
350 struct iv_ca_delta
352 /* Changed use. */
353 struct iv_use *use;
355 /* An old assignment (for rollback purposes). */
356 struct cost_pair *old_cp;
358 /* A new assignment. */
359 struct cost_pair *new_cp;
361 /* Next change in the list. */
362 struct iv_ca_delta *next_change;
365 /* Bound on number of candidates below that all candidates are considered. */
367 #define CONSIDER_ALL_CANDIDATES_BOUND \
368 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
370 /* If there are more iv occurrences, we just give up (it is quite unlikely that
371 optimizing such a loop would help, and it would take ages). */
373 #define MAX_CONSIDERED_USES \
374 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
376 /* If there are at most this number of ivs in the set, try removing unnecessary
377 ivs from the set always. */
379 #define ALWAYS_PRUNE_CAND_SET_BOUND \
380 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
382 /* The list of trees for that the decl_rtl field must be reset is stored
383 here. */
385 static VEC(tree,heap) *decl_rtl_to_reset;
387 /* Cached costs for multiplies by constants, and a flag to indicate
388 when they're valid. */
389 static htab_t mult_costs[2];
390 static bool cost_tables_exist = false;
392 static comp_cost force_expr_to_var_cost (tree, bool);
394 /* Number of uses recorded in DATA. */
396 static inline unsigned
397 n_iv_uses (struct ivopts_data *data)
399 return VEC_length (iv_use_p, data->iv_uses);
402 /* Ith use recorded in DATA. */
404 static inline struct iv_use *
405 iv_use (struct ivopts_data *data, unsigned i)
407 return VEC_index (iv_use_p, data->iv_uses, i);
410 /* Number of candidates recorded in DATA. */
412 static inline unsigned
413 n_iv_cands (struct ivopts_data *data)
415 return VEC_length (iv_cand_p, data->iv_candidates);
418 /* Ith candidate recorded in DATA. */
420 static inline struct iv_cand *
421 iv_cand (struct ivopts_data *data, unsigned i)
423 return VEC_index (iv_cand_p, data->iv_candidates, i);
426 /* The single loop exit if it dominates the latch, NULL otherwise. */
428 edge
429 single_dom_exit (struct loop *loop)
431 edge exit = single_exit (loop);
433 if (!exit)
434 return NULL;
436 if (!just_once_each_iteration_p (loop, exit->src))
437 return NULL;
439 return exit;
442 /* Dumps information about the induction variable IV to FILE. */
444 extern void dump_iv (FILE *, struct iv *);
445 void
446 dump_iv (FILE *file, struct iv *iv)
448 if (iv->ssa_name)
450 fprintf (file, "ssa name ");
451 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
452 fprintf (file, "\n");
455 fprintf (file, " type ");
456 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
457 fprintf (file, "\n");
459 if (iv->step)
461 fprintf (file, " base ");
462 print_generic_expr (file, iv->base, TDF_SLIM);
463 fprintf (file, "\n");
465 fprintf (file, " step ");
466 print_generic_expr (file, iv->step, TDF_SLIM);
467 fprintf (file, "\n");
469 else
471 fprintf (file, " invariant ");
472 print_generic_expr (file, iv->base, TDF_SLIM);
473 fprintf (file, "\n");
476 if (iv->base_object)
478 fprintf (file, " base object ");
479 print_generic_expr (file, iv->base_object, TDF_SLIM);
480 fprintf (file, "\n");
483 if (iv->biv_p)
484 fprintf (file, " is a biv\n");
487 /* Dumps information about the USE to FILE. */
489 extern void dump_use (FILE *, struct iv_use *);
490 void
491 dump_use (FILE *file, struct iv_use *use)
493 fprintf (file, "use %d\n", use->id);
495 switch (use->type)
497 case USE_NONLINEAR_EXPR:
498 fprintf (file, " generic\n");
499 break;
501 case USE_ADDRESS:
502 fprintf (file, " address\n");
503 break;
505 case USE_COMPARE:
506 fprintf (file, " compare\n");
507 break;
509 default:
510 gcc_unreachable ();
513 fprintf (file, " in statement ");
514 print_gimple_stmt (file, use->stmt, 0, 0);
515 fprintf (file, "\n");
517 fprintf (file, " at position ");
518 if (use->op_p)
519 print_generic_expr (file, *use->op_p, TDF_SLIM);
520 fprintf (file, "\n");
522 dump_iv (file, use->iv);
524 if (use->related_cands)
526 fprintf (file, " related candidates ");
527 dump_bitmap (file, use->related_cands);
531 /* Dumps information about the uses to FILE. */
533 extern void dump_uses (FILE *, struct ivopts_data *);
534 void
535 dump_uses (FILE *file, struct ivopts_data *data)
537 unsigned i;
538 struct iv_use *use;
540 for (i = 0; i < n_iv_uses (data); i++)
542 use = iv_use (data, i);
544 dump_use (file, use);
545 fprintf (file, "\n");
549 /* Dumps information about induction variable candidate CAND to FILE. */
551 extern void dump_cand (FILE *, struct iv_cand *);
552 void
553 dump_cand (FILE *file, struct iv_cand *cand)
555 struct iv *iv = cand->iv;
557 fprintf (file, "candidate %d%s\n",
558 cand->id, cand->important ? " (important)" : "");
560 if (cand->depends_on)
562 fprintf (file, " depends on ");
563 dump_bitmap (file, cand->depends_on);
566 if (!iv)
568 fprintf (file, " final value replacement\n");
569 return;
572 if (cand->var_before)
574 fprintf (file, " var_before ");
575 print_generic_expr (file, cand->var_before, TDF_SLIM);
576 fprintf (file, "\n");
578 if (cand->var_after)
580 fprintf (file, " var_after ");
581 print_generic_expr (file, cand->var_after, TDF_SLIM);
582 fprintf (file, "\n");
585 switch (cand->pos)
587 case IP_NORMAL:
588 fprintf (file, " incremented before exit test\n");
589 break;
591 case IP_BEFORE_USE:
592 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
593 break;
595 case IP_AFTER_USE:
596 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
597 break;
599 case IP_END:
600 fprintf (file, " incremented at end\n");
601 break;
603 case IP_ORIGINAL:
604 fprintf (file, " original biv\n");
605 break;
608 dump_iv (file, iv);
611 /* Returns the info for ssa version VER. */
613 static inline struct version_info *
614 ver_info (struct ivopts_data *data, unsigned ver)
616 return data->version_info + ver;
619 /* Returns the info for ssa name NAME. */
621 static inline struct version_info *
622 name_info (struct ivopts_data *data, tree name)
624 return ver_info (data, SSA_NAME_VERSION (name));
627 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
628 emitted in LOOP. */
630 static bool
631 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
633 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
635 gcc_assert (bb);
637 if (sbb == loop->latch)
638 return true;
640 if (sbb != bb)
641 return false;
643 return stmt == last_stmt (bb);
646 /* Returns true if STMT if after the place where the original induction
647 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
648 if the positions are identical. */
650 static bool
651 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
653 basic_block cand_bb = gimple_bb (cand->incremented_at);
654 basic_block stmt_bb = gimple_bb (stmt);
656 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
657 return false;
659 if (stmt_bb != cand_bb)
660 return true;
662 if (true_if_equal
663 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
664 return true;
665 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
668 /* Returns true if STMT if after the place where the induction variable
669 CAND is incremented in LOOP. */
671 static bool
672 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
674 switch (cand->pos)
676 case IP_END:
677 return false;
679 case IP_NORMAL:
680 return stmt_after_ip_normal_pos (loop, stmt);
682 case IP_ORIGINAL:
683 case IP_AFTER_USE:
684 return stmt_after_inc_pos (cand, stmt, false);
686 case IP_BEFORE_USE:
687 return stmt_after_inc_pos (cand, stmt, true);
689 default:
690 gcc_unreachable ();
694 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
696 static bool
697 abnormal_ssa_name_p (tree exp)
699 if (!exp)
700 return false;
702 if (TREE_CODE (exp) != SSA_NAME)
703 return false;
705 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
708 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
709 abnormal phi node. Callback for for_each_index. */
711 static bool
712 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
713 void *data ATTRIBUTE_UNUSED)
715 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
717 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
718 return false;
719 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
720 return false;
723 return !abnormal_ssa_name_p (*index);
726 /* Returns true if EXPR contains a ssa name that occurs in an
727 abnormal phi node. */
729 bool
730 contains_abnormal_ssa_name_p (tree expr)
732 enum tree_code code;
733 enum tree_code_class codeclass;
735 if (!expr)
736 return false;
738 code = TREE_CODE (expr);
739 codeclass = TREE_CODE_CLASS (code);
741 if (code == SSA_NAME)
742 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
744 if (code == INTEGER_CST
745 || is_gimple_min_invariant (expr))
746 return false;
748 if (code == ADDR_EXPR)
749 return !for_each_index (&TREE_OPERAND (expr, 0),
750 idx_contains_abnormal_ssa_name_p,
751 NULL);
753 if (code == COND_EXPR)
754 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
755 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
756 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
758 switch (codeclass)
760 case tcc_binary:
761 case tcc_comparison:
762 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
763 return true;
765 /* Fallthru. */
766 case tcc_unary:
767 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
768 return true;
770 break;
772 default:
773 gcc_unreachable ();
776 return false;
779 /* Returns the structure describing number of iterations determined from
780 EXIT of DATA->current_loop, or NULL if something goes wrong. */
782 static struct tree_niter_desc *
783 niter_for_exit (struct ivopts_data *data, edge exit)
785 struct tree_niter_desc *desc;
786 void **slot;
788 if (!data->niters)
790 data->niters = pointer_map_create ();
791 slot = NULL;
793 else
794 slot = pointer_map_contains (data->niters, exit);
796 if (!slot)
798 /* Try to determine number of iterations. We cannot safely work with ssa
799 names that appear in phi nodes on abnormal edges, so that we do not
800 create overlapping life ranges for them (PR 27283). */
801 desc = XNEW (struct tree_niter_desc);
802 if (!number_of_iterations_exit (data->current_loop,
803 exit, desc, true)
804 || contains_abnormal_ssa_name_p (desc->niter))
806 XDELETE (desc);
807 desc = NULL;
809 slot = pointer_map_insert (data->niters, exit);
810 *slot = desc;
812 else
813 desc = (struct tree_niter_desc *) *slot;
815 return desc;
818 /* Returns the structure describing number of iterations determined from
819 single dominating exit of DATA->current_loop, or NULL if something
820 goes wrong. */
822 static struct tree_niter_desc *
823 niter_for_single_dom_exit (struct ivopts_data *data)
825 edge exit = single_dom_exit (data->current_loop);
827 if (!exit)
828 return NULL;
830 return niter_for_exit (data, exit);
833 /* Hash table equality function for expressions. */
835 static int
836 htab_inv_expr_eq (const void *ent1, const void *ent2)
838 const struct iv_inv_expr_ent *expr1 =
839 (const struct iv_inv_expr_ent *)ent1;
840 const struct iv_inv_expr_ent *expr2 =
841 (const struct iv_inv_expr_ent *)ent2;
843 return expr1->hash == expr2->hash
844 && operand_equal_p (expr1->expr, expr2->expr, 0);
847 /* Hash function for loop invariant expressions. */
849 static hashval_t
850 htab_inv_expr_hash (const void *ent)
852 const struct iv_inv_expr_ent *expr =
853 (const struct iv_inv_expr_ent *)ent;
854 return expr->hash;
857 /* Allocate data structures for the cost model. */
859 void
860 initialize_costs (void)
862 mult_costs[0] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
863 mult_costs[1] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
864 cost_tables_exist = true;
867 /* Release data structures for the cost model. */
869 void
870 finalize_costs (void)
872 cost_tables_exist = false;
873 htab_delete (mult_costs[0]);
874 htab_delete (mult_costs[1]);
877 /* Initializes data structures used by the iv optimization pass, stored
878 in DATA. */
880 static void
881 tree_ssa_iv_optimize_init (struct ivopts_data *data)
883 data->version_info_size = 2 * num_ssa_names;
884 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
885 data->relevant = BITMAP_ALLOC (NULL);
886 data->important_candidates = BITMAP_ALLOC (NULL);
887 data->max_inv_id = 0;
888 data->niters = NULL;
889 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
890 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
891 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
892 htab_inv_expr_eq, free);
893 data->inv_expr_id = 0;
894 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
896 initialize_costs ();
899 /* Returns a memory object to that EXPR points. In case we are able to
900 determine that it does not point to any such object, NULL is returned. */
902 static tree
903 determine_base_object (tree expr)
905 enum tree_code code = TREE_CODE (expr);
906 tree base, obj;
908 /* If this is a pointer casted to any type, we need to determine
909 the base object for the pointer; so handle conversions before
910 throwing away non-pointer expressions. */
911 if (CONVERT_EXPR_P (expr))
912 return determine_base_object (TREE_OPERAND (expr, 0));
914 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
915 return NULL_TREE;
917 switch (code)
919 case INTEGER_CST:
920 return NULL_TREE;
922 case ADDR_EXPR:
923 obj = TREE_OPERAND (expr, 0);
924 base = get_base_address (obj);
926 if (!base)
927 return expr;
929 if (TREE_CODE (base) == MEM_REF)
930 return determine_base_object (TREE_OPERAND (base, 0));
932 return fold_convert (ptr_type_node,
933 build_fold_addr_expr (base));
935 case POINTER_PLUS_EXPR:
936 return determine_base_object (TREE_OPERAND (expr, 0));
938 case PLUS_EXPR:
939 case MINUS_EXPR:
940 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
941 gcc_unreachable ();
943 default:
944 return fold_convert (ptr_type_node, expr);
948 /* Allocates an induction variable with given initial value BASE and step STEP
949 for loop LOOP. */
951 static struct iv *
952 alloc_iv (tree base, tree step)
954 struct iv *iv = XCNEW (struct iv);
955 gcc_assert (step != NULL_TREE);
957 iv->base = base;
958 iv->base_object = determine_base_object (base);
959 iv->step = step;
960 iv->biv_p = false;
961 iv->have_use_for = false;
962 iv->use_id = 0;
963 iv->ssa_name = NULL_TREE;
965 return iv;
968 /* Sets STEP and BASE for induction variable IV. */
970 static void
971 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
973 struct version_info *info = name_info (data, iv);
975 gcc_assert (!info->iv);
977 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
978 info->iv = alloc_iv (base, step);
979 info->iv->ssa_name = iv;
982 /* Finds induction variable declaration for VAR. */
984 static struct iv *
985 get_iv (struct ivopts_data *data, tree var)
987 basic_block bb;
988 tree type = TREE_TYPE (var);
990 if (!POINTER_TYPE_P (type)
991 && !INTEGRAL_TYPE_P (type))
992 return NULL;
994 if (!name_info (data, var)->iv)
996 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
998 if (!bb
999 || !flow_bb_inside_loop_p (data->current_loop, bb))
1000 set_iv (data, var, var, build_int_cst (type, 0));
1003 return name_info (data, var)->iv;
1006 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1007 not define a simple affine biv with nonzero step. */
1009 static tree
1010 determine_biv_step (gimple phi)
1012 struct loop *loop = gimple_bb (phi)->loop_father;
1013 tree name = PHI_RESULT (phi);
1014 affine_iv iv;
1016 if (!is_gimple_reg (name))
1017 return NULL_TREE;
1019 if (!simple_iv (loop, loop, name, &iv, true))
1020 return NULL_TREE;
1022 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 /* Finds basic ivs. */
1027 static bool
1028 find_bivs (struct ivopts_data *data)
1030 gimple phi;
1031 tree step, type, base;
1032 bool found = false;
1033 struct loop *loop = data->current_loop;
1034 gimple_stmt_iterator psi;
1036 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1038 phi = gsi_stmt (psi);
1040 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1041 continue;
1043 step = determine_biv_step (phi);
1044 if (!step)
1045 continue;
1047 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1048 base = expand_simple_operations (base);
1049 if (contains_abnormal_ssa_name_p (base)
1050 || contains_abnormal_ssa_name_p (step))
1051 continue;
1053 type = TREE_TYPE (PHI_RESULT (phi));
1054 base = fold_convert (type, base);
1055 if (step)
1057 if (POINTER_TYPE_P (type))
1058 step = convert_to_ptrofftype (step);
1059 else
1060 step = fold_convert (type, step);
1063 set_iv (data, PHI_RESULT (phi), base, step);
1064 found = true;
1067 return found;
1070 /* Marks basic ivs. */
1072 static void
1073 mark_bivs (struct ivopts_data *data)
1075 gimple phi;
1076 tree var;
1077 struct iv *iv, *incr_iv;
1078 struct loop *loop = data->current_loop;
1079 basic_block incr_bb;
1080 gimple_stmt_iterator psi;
1082 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1084 phi = gsi_stmt (psi);
1086 iv = get_iv (data, PHI_RESULT (phi));
1087 if (!iv)
1088 continue;
1090 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1091 incr_iv = get_iv (data, var);
1092 if (!incr_iv)
1093 continue;
1095 /* If the increment is in the subloop, ignore it. */
1096 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1097 if (incr_bb->loop_father != data->current_loop
1098 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1099 continue;
1101 iv->biv_p = true;
1102 incr_iv->biv_p = true;
1106 /* Checks whether STMT defines a linear induction variable and stores its
1107 parameters to IV. */
1109 static bool
1110 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1112 tree lhs;
1113 struct loop *loop = data->current_loop;
1115 iv->base = NULL_TREE;
1116 iv->step = NULL_TREE;
1118 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1119 return false;
1121 lhs = gimple_assign_lhs (stmt);
1122 if (TREE_CODE (lhs) != SSA_NAME)
1123 return false;
1125 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1126 return false;
1127 iv->base = expand_simple_operations (iv->base);
1129 if (contains_abnormal_ssa_name_p (iv->base)
1130 || contains_abnormal_ssa_name_p (iv->step))
1131 return false;
1133 /* If STMT could throw, then do not consider STMT as defining a GIV.
1134 While this will suppress optimizations, we can not safely delete this
1135 GIV and associated statements, even if it appears it is not used. */
1136 if (stmt_could_throw_p (stmt))
1137 return false;
1139 return true;
1142 /* Finds general ivs in statement STMT. */
1144 static void
1145 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1147 affine_iv iv;
1149 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1150 return;
1152 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1155 /* Finds general ivs in basic block BB. */
1157 static void
1158 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1160 gimple_stmt_iterator bsi;
1162 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1163 find_givs_in_stmt (data, gsi_stmt (bsi));
1166 /* Finds general ivs. */
1168 static void
1169 find_givs (struct ivopts_data *data)
1171 struct loop *loop = data->current_loop;
1172 basic_block *body = get_loop_body_in_dom_order (loop);
1173 unsigned i;
1175 for (i = 0; i < loop->num_nodes; i++)
1176 find_givs_in_bb (data, body[i]);
1177 free (body);
1180 /* For each ssa name defined in LOOP determines whether it is an induction
1181 variable and if so, its initial value and step. */
1183 static bool
1184 find_induction_variables (struct ivopts_data *data)
1186 unsigned i;
1187 bitmap_iterator bi;
1189 if (!find_bivs (data))
1190 return false;
1192 find_givs (data);
1193 mark_bivs (data);
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1197 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1199 if (niter)
1201 fprintf (dump_file, " number of iterations ");
1202 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1203 if (!integer_zerop (niter->may_be_zero))
1205 fprintf (dump_file, "; zero if ");
1206 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1208 fprintf (dump_file, "\n\n");
1211 fprintf (dump_file, "Induction variables:\n\n");
1213 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1215 if (ver_info (data, i)->iv)
1216 dump_iv (dump_file, ver_info (data, i)->iv);
1220 return true;
1223 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1225 static struct iv_use *
1226 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1227 gimple stmt, enum use_type use_type)
1229 struct iv_use *use = XCNEW (struct iv_use);
1231 use->id = n_iv_uses (data);
1232 use->type = use_type;
1233 use->iv = iv;
1234 use->stmt = stmt;
1235 use->op_p = use_p;
1236 use->related_cands = BITMAP_ALLOC (NULL);
1238 /* To avoid showing ssa name in the dumps, if it was not reset by the
1239 caller. */
1240 iv->ssa_name = NULL_TREE;
1242 if (dump_file && (dump_flags & TDF_DETAILS))
1243 dump_use (dump_file, use);
1245 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1247 return use;
1250 /* Checks whether OP is a loop-level invariant and if so, records it.
1251 NONLINEAR_USE is true if the invariant is used in a way we do not
1252 handle specially. */
1254 static void
1255 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1257 basic_block bb;
1258 struct version_info *info;
1260 if (TREE_CODE (op) != SSA_NAME
1261 || !is_gimple_reg (op))
1262 return;
1264 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1265 if (bb
1266 && flow_bb_inside_loop_p (data->current_loop, bb))
1267 return;
1269 info = name_info (data, op);
1270 info->name = op;
1271 info->has_nonlin_use |= nonlinear_use;
1272 if (!info->inv_id)
1273 info->inv_id = ++data->max_inv_id;
1274 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1277 /* Checks whether the use OP is interesting and if so, records it. */
1279 static struct iv_use *
1280 find_interesting_uses_op (struct ivopts_data *data, tree op)
1282 struct iv *iv;
1283 struct iv *civ;
1284 gimple stmt;
1285 struct iv_use *use;
1287 if (TREE_CODE (op) != SSA_NAME)
1288 return NULL;
1290 iv = get_iv (data, op);
1291 if (!iv)
1292 return NULL;
1294 if (iv->have_use_for)
1296 use = iv_use (data, iv->use_id);
1298 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1299 return use;
1302 if (integer_zerop (iv->step))
1304 record_invariant (data, op, true);
1305 return NULL;
1307 iv->have_use_for = true;
1309 civ = XNEW (struct iv);
1310 *civ = *iv;
1312 stmt = SSA_NAME_DEF_STMT (op);
1313 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1314 || is_gimple_assign (stmt));
1316 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1317 iv->use_id = use->id;
1319 return use;
1322 /* Given a condition in statement STMT, checks whether it is a compare
1323 of an induction variable and an invariant. If this is the case,
1324 CONTROL_VAR is set to location of the iv, BOUND to the location of
1325 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1326 induction variable descriptions, and true is returned. If this is not
1327 the case, CONTROL_VAR and BOUND are set to the arguments of the
1328 condition and false is returned. */
1330 static bool
1331 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1332 tree **control_var, tree **bound,
1333 struct iv **iv_var, struct iv **iv_bound)
1335 /* The objects returned when COND has constant operands. */
1336 static struct iv const_iv;
1337 static tree zero;
1338 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1339 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1340 bool ret = false;
1342 if (gimple_code (stmt) == GIMPLE_COND)
1344 op0 = gimple_cond_lhs_ptr (stmt);
1345 op1 = gimple_cond_rhs_ptr (stmt);
1347 else
1349 op0 = gimple_assign_rhs1_ptr (stmt);
1350 op1 = gimple_assign_rhs2_ptr (stmt);
1353 zero = integer_zero_node;
1354 const_iv.step = integer_zero_node;
1356 if (TREE_CODE (*op0) == SSA_NAME)
1357 iv0 = get_iv (data, *op0);
1358 if (TREE_CODE (*op1) == SSA_NAME)
1359 iv1 = get_iv (data, *op1);
1361 /* Exactly one of the compared values must be an iv, and the other one must
1362 be an invariant. */
1363 if (!iv0 || !iv1)
1364 goto end;
1366 if (integer_zerop (iv0->step))
1368 /* Control variable may be on the other side. */
1369 tmp_op = op0; op0 = op1; op1 = tmp_op;
1370 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1372 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1374 end:
1375 if (control_var)
1376 *control_var = op0;;
1377 if (iv_var)
1378 *iv_var = iv0;;
1379 if (bound)
1380 *bound = op1;
1381 if (iv_bound)
1382 *iv_bound = iv1;
1384 return ret;
1387 /* Checks whether the condition in STMT is interesting and if so,
1388 records it. */
1390 static void
1391 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1393 tree *var_p, *bound_p;
1394 struct iv *var_iv, *civ;
1396 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1398 find_interesting_uses_op (data, *var_p);
1399 find_interesting_uses_op (data, *bound_p);
1400 return;
1403 civ = XNEW (struct iv);
1404 *civ = *var_iv;
1405 record_use (data, NULL, civ, stmt, USE_COMPARE);
1408 /* Returns true if expression EXPR is obviously invariant in LOOP,
1409 i.e. if all its operands are defined outside of the LOOP. LOOP
1410 should not be the function body. */
1412 bool
1413 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1415 basic_block def_bb;
1416 unsigned i, len;
1418 gcc_assert (loop_depth (loop) > 0);
1420 if (is_gimple_min_invariant (expr))
1421 return true;
1423 if (TREE_CODE (expr) == SSA_NAME)
1425 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1426 if (def_bb
1427 && flow_bb_inside_loop_p (loop, def_bb))
1428 return false;
1430 return true;
1433 if (!EXPR_P (expr))
1434 return false;
1436 len = TREE_OPERAND_LENGTH (expr);
1437 for (i = 0; i < len; i++)
1438 if (TREE_OPERAND (expr, i)
1439 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1440 return false;
1442 return true;
1445 /* Returns true if statement STMT is obviously invariant in LOOP,
1446 i.e. if all its operands on the RHS are defined outside of the LOOP.
1447 LOOP should not be the function body. */
1449 bool
1450 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1452 unsigned i;
1453 tree lhs;
1455 gcc_assert (loop_depth (loop) > 0);
1457 lhs = gimple_get_lhs (stmt);
1458 for (i = 0; i < gimple_num_ops (stmt); i++)
1460 tree op = gimple_op (stmt, i);
1461 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1462 return false;
1465 return true;
1468 /* Cumulates the steps of indices into DATA and replaces their values with the
1469 initial ones. Returns false when the value of the index cannot be determined.
1470 Callback for for_each_index. */
1472 struct ifs_ivopts_data
1474 struct ivopts_data *ivopts_data;
1475 gimple stmt;
1476 tree step;
1479 static bool
1480 idx_find_step (tree base, tree *idx, void *data)
1482 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1483 struct iv *iv;
1484 tree step, iv_base, iv_step, lbound, off;
1485 struct loop *loop = dta->ivopts_data->current_loop;
1487 /* If base is a component ref, require that the offset of the reference
1488 be invariant. */
1489 if (TREE_CODE (base) == COMPONENT_REF)
1491 off = component_ref_field_offset (base);
1492 return expr_invariant_in_loop_p (loop, off);
1495 /* If base is array, first check whether we will be able to move the
1496 reference out of the loop (in order to take its address in strength
1497 reduction). In order for this to work we need both lower bound
1498 and step to be loop invariants. */
1499 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1501 /* Moreover, for a range, the size needs to be invariant as well. */
1502 if (TREE_CODE (base) == ARRAY_RANGE_REF
1503 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1504 return false;
1506 step = array_ref_element_size (base);
1507 lbound = array_ref_low_bound (base);
1509 if (!expr_invariant_in_loop_p (loop, step)
1510 || !expr_invariant_in_loop_p (loop, lbound))
1511 return false;
1514 if (TREE_CODE (*idx) != SSA_NAME)
1515 return true;
1517 iv = get_iv (dta->ivopts_data, *idx);
1518 if (!iv)
1519 return false;
1521 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1522 *&x[0], which is not folded and does not trigger the
1523 ARRAY_REF path below. */
1524 *idx = iv->base;
1526 if (integer_zerop (iv->step))
1527 return true;
1529 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1531 step = array_ref_element_size (base);
1533 /* We only handle addresses whose step is an integer constant. */
1534 if (TREE_CODE (step) != INTEGER_CST)
1535 return false;
1537 else
1538 /* The step for pointer arithmetics already is 1 byte. */
1539 step = size_one_node;
1541 iv_base = iv->base;
1542 iv_step = iv->step;
1543 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1544 sizetype, &iv_base, &iv_step, dta->stmt,
1545 false))
1547 /* The index might wrap. */
1548 return false;
1551 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1552 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1554 return true;
1557 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1558 object is passed to it in DATA. */
1560 static bool
1561 idx_record_use (tree base, tree *idx,
1562 void *vdata)
1564 struct ivopts_data *data = (struct ivopts_data *) vdata;
1565 find_interesting_uses_op (data, *idx);
1566 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1568 find_interesting_uses_op (data, array_ref_element_size (base));
1569 find_interesting_uses_op (data, array_ref_low_bound (base));
1571 return true;
1574 /* If we can prove that TOP = cst * BOT for some constant cst,
1575 store cst to MUL and return true. Otherwise return false.
1576 The returned value is always sign-extended, regardless of the
1577 signedness of TOP and BOT. */
1579 static bool
1580 constant_multiple_of (tree top, tree bot, double_int *mul)
1582 tree mby;
1583 enum tree_code code;
1584 double_int res, p0, p1;
1585 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1587 STRIP_NOPS (top);
1588 STRIP_NOPS (bot);
1590 if (operand_equal_p (top, bot, 0))
1592 *mul = double_int_one;
1593 return true;
1596 code = TREE_CODE (top);
1597 switch (code)
1599 case MULT_EXPR:
1600 mby = TREE_OPERAND (top, 1);
1601 if (TREE_CODE (mby) != INTEGER_CST)
1602 return false;
1604 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1605 return false;
1607 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1608 precision);
1609 return true;
1611 case PLUS_EXPR:
1612 case MINUS_EXPR:
1613 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1614 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1615 return false;
1617 if (code == MINUS_EXPR)
1618 p1 = double_int_neg (p1);
1619 *mul = double_int_sext (double_int_add (p0, p1), precision);
1620 return true;
1622 case INTEGER_CST:
1623 if (TREE_CODE (bot) != INTEGER_CST)
1624 return false;
1626 p0 = double_int_sext (tree_to_double_int (top), precision);
1627 p1 = double_int_sext (tree_to_double_int (bot), precision);
1628 if (double_int_zero_p (p1))
1629 return false;
1630 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1631 precision);
1632 return double_int_zero_p (res);
1634 default:
1635 return false;
1639 /* Returns true if memory reference REF with step STEP may be unaligned. */
1641 static bool
1642 may_be_unaligned_p (tree ref, tree step)
1644 tree base;
1645 tree base_type;
1646 HOST_WIDE_INT bitsize;
1647 HOST_WIDE_INT bitpos;
1648 tree toffset;
1649 enum machine_mode mode;
1650 int unsignedp, volatilep;
1651 unsigned base_align;
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1654 thus they are not misaligned. */
1655 if (TREE_CODE (ref) == TARGET_MEM_REF)
1656 return false;
1658 /* The test below is basically copy of what expr.c:normal_inner_ref
1659 does to check whether the object must be loaded by parts when
1660 STRICT_ALIGNMENT is true. */
1661 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1662 &unsignedp, &volatilep, true);
1663 base_type = TREE_TYPE (base);
1664 base_align = get_object_alignment (base);
1665 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1667 if (mode != BLKmode)
1669 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1671 if (base_align < mode_align
1672 || (bitpos % mode_align) != 0
1673 || (bitpos % BITS_PER_UNIT) != 0)
1674 return true;
1676 if (toffset
1677 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1678 return true;
1680 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1681 return true;
1684 return false;
1687 /* Return true if EXPR may be non-addressable. */
1689 bool
1690 may_be_nonaddressable_p (tree expr)
1692 switch (TREE_CODE (expr))
1694 case TARGET_MEM_REF:
1695 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1696 target, thus they are always addressable. */
1697 return false;
1699 case COMPONENT_REF:
1700 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1701 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1703 case VIEW_CONVERT_EXPR:
1704 /* This kind of view-conversions may wrap non-addressable objects
1705 and make them look addressable. After some processing the
1706 non-addressability may be uncovered again, causing ADDR_EXPRs
1707 of inappropriate objects to be built. */
1708 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1709 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1710 return true;
1712 /* ... fall through ... */
1714 case ARRAY_REF:
1715 case ARRAY_RANGE_REF:
1716 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1718 CASE_CONVERT:
1719 return true;
1721 default:
1722 break;
1725 return false;
1728 /* Finds addresses in *OP_P inside STMT. */
1730 static void
1731 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1733 tree base = *op_p, step = size_zero_node;
1734 struct iv *civ;
1735 struct ifs_ivopts_data ifs_ivopts_data;
1737 /* Do not play with volatile memory references. A bit too conservative,
1738 perhaps, but safe. */
1739 if (gimple_has_volatile_ops (stmt))
1740 goto fail;
1742 /* Ignore bitfields for now. Not really something terribly complicated
1743 to handle. TODO. */
1744 if (TREE_CODE (base) == BIT_FIELD_REF)
1745 goto fail;
1747 base = unshare_expr (base);
1749 if (TREE_CODE (base) == TARGET_MEM_REF)
1751 tree type = build_pointer_type (TREE_TYPE (base));
1752 tree astep;
1754 if (TMR_BASE (base)
1755 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1757 civ = get_iv (data, TMR_BASE (base));
1758 if (!civ)
1759 goto fail;
1761 TMR_BASE (base) = civ->base;
1762 step = civ->step;
1764 if (TMR_INDEX2 (base)
1765 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1767 civ = get_iv (data, TMR_INDEX2 (base));
1768 if (!civ)
1769 goto fail;
1771 TMR_INDEX2 (base) = civ->base;
1772 step = civ->step;
1774 if (TMR_INDEX (base)
1775 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1777 civ = get_iv (data, TMR_INDEX (base));
1778 if (!civ)
1779 goto fail;
1781 TMR_INDEX (base) = civ->base;
1782 astep = civ->step;
1784 if (astep)
1786 if (TMR_STEP (base))
1787 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1789 step = fold_build2 (PLUS_EXPR, type, step, astep);
1793 if (integer_zerop (step))
1794 goto fail;
1795 base = tree_mem_ref_addr (type, base);
1797 else
1799 ifs_ivopts_data.ivopts_data = data;
1800 ifs_ivopts_data.stmt = stmt;
1801 ifs_ivopts_data.step = size_zero_node;
1802 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1803 || integer_zerop (ifs_ivopts_data.step))
1804 goto fail;
1805 step = ifs_ivopts_data.step;
1807 /* Check that the base expression is addressable. This needs
1808 to be done after substituting bases of IVs into it. */
1809 if (may_be_nonaddressable_p (base))
1810 goto fail;
1812 /* Moreover, on strict alignment platforms, check that it is
1813 sufficiently aligned. */
1814 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1815 goto fail;
1817 base = build_fold_addr_expr (base);
1819 /* Substituting bases of IVs into the base expression might
1820 have caused folding opportunities. */
1821 if (TREE_CODE (base) == ADDR_EXPR)
1823 tree *ref = &TREE_OPERAND (base, 0);
1824 while (handled_component_p (*ref))
1825 ref = &TREE_OPERAND (*ref, 0);
1826 if (TREE_CODE (*ref) == MEM_REF)
1828 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1829 TREE_OPERAND (*ref, 0),
1830 TREE_OPERAND (*ref, 1));
1831 if (tem)
1832 *ref = tem;
1837 civ = alloc_iv (base, step);
1838 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1839 return;
1841 fail:
1842 for_each_index (op_p, idx_record_use, data);
1845 /* Finds and records invariants used in STMT. */
1847 static void
1848 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1850 ssa_op_iter iter;
1851 use_operand_p use_p;
1852 tree op;
1854 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1856 op = USE_FROM_PTR (use_p);
1857 record_invariant (data, op, false);
1861 /* Finds interesting uses of induction variables in the statement STMT. */
1863 static void
1864 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1866 struct iv *iv;
1867 tree op, *lhs, *rhs;
1868 ssa_op_iter iter;
1869 use_operand_p use_p;
1870 enum tree_code code;
1872 find_invariants_stmt (data, stmt);
1874 if (gimple_code (stmt) == GIMPLE_COND)
1876 find_interesting_uses_cond (data, stmt);
1877 return;
1880 if (is_gimple_assign (stmt))
1882 lhs = gimple_assign_lhs_ptr (stmt);
1883 rhs = gimple_assign_rhs1_ptr (stmt);
1885 if (TREE_CODE (*lhs) == SSA_NAME)
1887 /* If the statement defines an induction variable, the uses are not
1888 interesting by themselves. */
1890 iv = get_iv (data, *lhs);
1892 if (iv && !integer_zerop (iv->step))
1893 return;
1896 code = gimple_assign_rhs_code (stmt);
1897 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1898 && (REFERENCE_CLASS_P (*rhs)
1899 || is_gimple_val (*rhs)))
1901 if (REFERENCE_CLASS_P (*rhs))
1902 find_interesting_uses_address (data, stmt, rhs);
1903 else
1904 find_interesting_uses_op (data, *rhs);
1906 if (REFERENCE_CLASS_P (*lhs))
1907 find_interesting_uses_address (data, stmt, lhs);
1908 return;
1910 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1912 find_interesting_uses_cond (data, stmt);
1913 return;
1916 /* TODO -- we should also handle address uses of type
1918 memory = call (whatever);
1922 call (memory). */
1925 if (gimple_code (stmt) == GIMPLE_PHI
1926 && gimple_bb (stmt) == data->current_loop->header)
1928 iv = get_iv (data, PHI_RESULT (stmt));
1930 if (iv && !integer_zerop (iv->step))
1931 return;
1934 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1936 op = USE_FROM_PTR (use_p);
1938 if (TREE_CODE (op) != SSA_NAME)
1939 continue;
1941 iv = get_iv (data, op);
1942 if (!iv)
1943 continue;
1945 find_interesting_uses_op (data, op);
1949 /* Finds interesting uses of induction variables outside of loops
1950 on loop exit edge EXIT. */
1952 static void
1953 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1955 gimple phi;
1956 gimple_stmt_iterator psi;
1957 tree def;
1959 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1961 phi = gsi_stmt (psi);
1962 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1963 if (is_gimple_reg (def))
1964 find_interesting_uses_op (data, def);
1968 /* Finds uses of the induction variables that are interesting. */
1970 static void
1971 find_interesting_uses (struct ivopts_data *data)
1973 basic_block bb;
1974 gimple_stmt_iterator bsi;
1975 basic_block *body = get_loop_body (data->current_loop);
1976 unsigned i;
1977 struct version_info *info;
1978 edge e;
1980 if (dump_file && (dump_flags & TDF_DETAILS))
1981 fprintf (dump_file, "Uses:\n\n");
1983 for (i = 0; i < data->current_loop->num_nodes; i++)
1985 edge_iterator ei;
1986 bb = body[i];
1988 FOR_EACH_EDGE (e, ei, bb->succs)
1989 if (e->dest != EXIT_BLOCK_PTR
1990 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1991 find_interesting_uses_outside (data, e);
1993 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1994 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1995 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1996 if (!is_gimple_debug (gsi_stmt (bsi)))
1997 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2000 if (dump_file && (dump_flags & TDF_DETAILS))
2002 bitmap_iterator bi;
2004 fprintf (dump_file, "\n");
2006 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2008 info = ver_info (data, i);
2009 if (info->inv_id)
2011 fprintf (dump_file, " ");
2012 print_generic_expr (dump_file, info->name, TDF_SLIM);
2013 fprintf (dump_file, " is invariant (%d)%s\n",
2014 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2018 fprintf (dump_file, "\n");
2021 free (body);
2024 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2025 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2026 we are at the top-level of the processed address. */
2028 static tree
2029 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2030 unsigned HOST_WIDE_INT *offset)
2032 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2033 enum tree_code code;
2034 tree type, orig_type = TREE_TYPE (expr);
2035 unsigned HOST_WIDE_INT off0, off1, st;
2036 tree orig_expr = expr;
2038 STRIP_NOPS (expr);
2040 type = TREE_TYPE (expr);
2041 code = TREE_CODE (expr);
2042 *offset = 0;
2044 switch (code)
2046 case INTEGER_CST:
2047 if (!cst_and_fits_in_hwi (expr)
2048 || integer_zerop (expr))
2049 return orig_expr;
2051 *offset = int_cst_value (expr);
2052 return build_int_cst (orig_type, 0);
2054 case POINTER_PLUS_EXPR:
2055 case PLUS_EXPR:
2056 case MINUS_EXPR:
2057 op0 = TREE_OPERAND (expr, 0);
2058 op1 = TREE_OPERAND (expr, 1);
2060 op0 = strip_offset_1 (op0, false, false, &off0);
2061 op1 = strip_offset_1 (op1, false, false, &off1);
2063 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2064 if (op0 == TREE_OPERAND (expr, 0)
2065 && op1 == TREE_OPERAND (expr, 1))
2066 return orig_expr;
2068 if (integer_zerop (op1))
2069 expr = op0;
2070 else if (integer_zerop (op0))
2072 if (code == MINUS_EXPR)
2073 expr = fold_build1 (NEGATE_EXPR, type, op1);
2074 else
2075 expr = op1;
2077 else
2078 expr = fold_build2 (code, type, op0, op1);
2080 return fold_convert (orig_type, expr);
2082 case MULT_EXPR:
2083 op1 = TREE_OPERAND (expr, 1);
2084 if (!cst_and_fits_in_hwi (op1))
2085 return orig_expr;
2087 op0 = TREE_OPERAND (expr, 0);
2088 op0 = strip_offset_1 (op0, false, false, &off0);
2089 if (op0 == TREE_OPERAND (expr, 0))
2090 return orig_expr;
2092 *offset = off0 * int_cst_value (op1);
2093 if (integer_zerop (op0))
2094 expr = op0;
2095 else
2096 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2098 return fold_convert (orig_type, expr);
2100 case ARRAY_REF:
2101 case ARRAY_RANGE_REF:
2102 if (!inside_addr)
2103 return orig_expr;
2105 step = array_ref_element_size (expr);
2106 if (!cst_and_fits_in_hwi (step))
2107 break;
2109 st = int_cst_value (step);
2110 op1 = TREE_OPERAND (expr, 1);
2111 op1 = strip_offset_1 (op1, false, false, &off1);
2112 *offset = off1 * st;
2114 if (top_compref
2115 && integer_zerop (op1))
2117 /* Strip the component reference completely. */
2118 op0 = TREE_OPERAND (expr, 0);
2119 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2120 *offset += off0;
2121 return op0;
2123 break;
2125 case COMPONENT_REF:
2126 if (!inside_addr)
2127 return orig_expr;
2129 tmp = component_ref_field_offset (expr);
2130 if (top_compref
2131 && cst_and_fits_in_hwi (tmp))
2133 /* Strip the component reference completely. */
2134 op0 = TREE_OPERAND (expr, 0);
2135 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2136 *offset = off0 + int_cst_value (tmp);
2137 return op0;
2139 break;
2141 case ADDR_EXPR:
2142 op0 = TREE_OPERAND (expr, 0);
2143 op0 = strip_offset_1 (op0, true, true, &off0);
2144 *offset += off0;
2146 if (op0 == TREE_OPERAND (expr, 0))
2147 return orig_expr;
2149 expr = build_fold_addr_expr (op0);
2150 return fold_convert (orig_type, expr);
2152 case MEM_REF:
2153 /* ??? Offset operand? */
2154 inside_addr = false;
2155 break;
2157 default:
2158 return orig_expr;
2161 /* Default handling of expressions for that we want to recurse into
2162 the first operand. */
2163 op0 = TREE_OPERAND (expr, 0);
2164 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2165 *offset += off0;
2167 if (op0 == TREE_OPERAND (expr, 0)
2168 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2169 return orig_expr;
2171 expr = copy_node (expr);
2172 TREE_OPERAND (expr, 0) = op0;
2173 if (op1)
2174 TREE_OPERAND (expr, 1) = op1;
2176 /* Inside address, we might strip the top level component references,
2177 thus changing type of the expression. Handling of ADDR_EXPR
2178 will fix that. */
2179 expr = fold_convert (orig_type, expr);
2181 return expr;
2184 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2186 static tree
2187 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2189 return strip_offset_1 (expr, false, false, offset);
2192 /* Returns variant of TYPE that can be used as base for different uses.
2193 We return unsigned type with the same precision, which avoids problems
2194 with overflows. */
2196 static tree
2197 generic_type_for (tree type)
2199 if (POINTER_TYPE_P (type))
2200 return unsigned_type_for (type);
2202 if (TYPE_UNSIGNED (type))
2203 return type;
2205 return unsigned_type_for (type);
2208 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2209 the bitmap to that we should store it. */
2211 static struct ivopts_data *fd_ivopts_data;
2212 static tree
2213 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2215 bitmap *depends_on = (bitmap *) data;
2216 struct version_info *info;
2218 if (TREE_CODE (*expr_p) != SSA_NAME)
2219 return NULL_TREE;
2220 info = name_info (fd_ivopts_data, *expr_p);
2222 if (!info->inv_id || info->has_nonlin_use)
2223 return NULL_TREE;
2225 if (!*depends_on)
2226 *depends_on = BITMAP_ALLOC (NULL);
2227 bitmap_set_bit (*depends_on, info->inv_id);
2229 return NULL_TREE;
2232 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2233 position to POS. If USE is not NULL, the candidate is set as related to
2234 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2235 replacement of the final value of the iv by a direct computation. */
2237 static struct iv_cand *
2238 add_candidate_1 (struct ivopts_data *data,
2239 tree base, tree step, bool important, enum iv_position pos,
2240 struct iv_use *use, gimple incremented_at)
2242 unsigned i;
2243 struct iv_cand *cand = NULL;
2244 tree type, orig_type;
2246 /* For non-original variables, make sure their values are computed in a type
2247 that does not invoke undefined behavior on overflows (since in general,
2248 we cannot prove that these induction variables are non-wrapping). */
2249 if (pos != IP_ORIGINAL)
2251 orig_type = TREE_TYPE (base);
2252 type = generic_type_for (orig_type);
2253 if (type != orig_type)
2255 base = fold_convert (type, base);
2256 step = fold_convert (type, step);
2260 for (i = 0; i < n_iv_cands (data); i++)
2262 cand = iv_cand (data, i);
2264 if (cand->pos != pos)
2265 continue;
2267 if (cand->incremented_at != incremented_at
2268 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2269 && cand->ainc_use != use))
2270 continue;
2272 if (!cand->iv)
2274 if (!base && !step)
2275 break;
2277 continue;
2280 if (!base && !step)
2281 continue;
2283 if (operand_equal_p (base, cand->iv->base, 0)
2284 && operand_equal_p (step, cand->iv->step, 0)
2285 && (TYPE_PRECISION (TREE_TYPE (base))
2286 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2287 break;
2290 if (i == n_iv_cands (data))
2292 cand = XCNEW (struct iv_cand);
2293 cand->id = i;
2295 if (!base && !step)
2296 cand->iv = NULL;
2297 else
2298 cand->iv = alloc_iv (base, step);
2300 cand->pos = pos;
2301 if (pos != IP_ORIGINAL && cand->iv)
2303 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2304 cand->var_after = cand->var_before;
2306 cand->important = important;
2307 cand->incremented_at = incremented_at;
2308 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2310 if (step
2311 && TREE_CODE (step) != INTEGER_CST)
2313 fd_ivopts_data = data;
2314 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2317 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2318 cand->ainc_use = use;
2319 else
2320 cand->ainc_use = NULL;
2322 if (dump_file && (dump_flags & TDF_DETAILS))
2323 dump_cand (dump_file, cand);
2326 if (important && !cand->important)
2328 cand->important = true;
2329 if (dump_file && (dump_flags & TDF_DETAILS))
2330 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2333 if (use)
2335 bitmap_set_bit (use->related_cands, i);
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fprintf (dump_file, "Candidate %d is related to use %d\n",
2338 cand->id, use->id);
2341 return cand;
2344 /* Returns true if incrementing the induction variable at the end of the LOOP
2345 is allowed.
2347 The purpose is to avoid splitting latch edge with a biv increment, thus
2348 creating a jump, possibly confusing other optimization passes and leaving
2349 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2350 is not available (so we do not have a better alternative), or if the latch
2351 edge is already nonempty. */
2353 static bool
2354 allow_ip_end_pos_p (struct loop *loop)
2356 if (!ip_normal_pos (loop))
2357 return true;
2359 if (!empty_block_p (ip_end_pos (loop)))
2360 return true;
2362 return false;
2365 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2366 Important field is set to IMPORTANT. */
2368 static void
2369 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2370 bool important, struct iv_use *use)
2372 basic_block use_bb = gimple_bb (use->stmt);
2373 enum machine_mode mem_mode;
2374 unsigned HOST_WIDE_INT cstepi;
2376 /* If we insert the increment in any position other than the standard
2377 ones, we must ensure that it is incremented once per iteration.
2378 It must not be in an inner nested loop, or one side of an if
2379 statement. */
2380 if (use_bb->loop_father != data->current_loop
2381 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2382 || stmt_could_throw_p (use->stmt)
2383 || !cst_and_fits_in_hwi (step))
2384 return;
2386 cstepi = int_cst_value (step);
2388 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2389 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2390 || USE_STORE_PRE_INCREMENT (mem_mode))
2391 && GET_MODE_SIZE (mem_mode) == cstepi)
2392 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2393 || USE_STORE_PRE_DECREMENT (mem_mode))
2394 && GET_MODE_SIZE (mem_mode) == -cstepi))
2396 enum tree_code code = MINUS_EXPR;
2397 tree new_base;
2398 tree new_step = step;
2400 if (POINTER_TYPE_P (TREE_TYPE (base)))
2402 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2403 code = POINTER_PLUS_EXPR;
2405 else
2406 new_step = fold_convert (TREE_TYPE (base), new_step);
2407 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2408 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2409 use->stmt);
2411 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2412 || USE_STORE_POST_INCREMENT (mem_mode))
2413 && GET_MODE_SIZE (mem_mode) == cstepi)
2414 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2415 || USE_STORE_POST_DECREMENT (mem_mode))
2416 && GET_MODE_SIZE (mem_mode) == -cstepi))
2418 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2419 use->stmt);
2423 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2424 position to POS. If USE is not NULL, the candidate is set as related to
2425 it. The candidate computation is scheduled on all available positions. */
2427 static void
2428 add_candidate (struct ivopts_data *data,
2429 tree base, tree step, bool important, struct iv_use *use)
2431 if (ip_normal_pos (data->current_loop))
2432 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2433 if (ip_end_pos (data->current_loop)
2434 && allow_ip_end_pos_p (data->current_loop))
2435 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2437 if (use != NULL && use->type == USE_ADDRESS)
2438 add_autoinc_candidates (data, base, step, important, use);
2441 /* Adds standard iv candidates. */
2443 static void
2444 add_standard_iv_candidates (struct ivopts_data *data)
2446 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2448 /* The same for a double-integer type if it is still fast enough. */
2449 if (TYPE_PRECISION
2450 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2451 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2452 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2453 build_int_cst (long_integer_type_node, 1), true, NULL);
2455 /* The same for a double-integer type if it is still fast enough. */
2456 if (TYPE_PRECISION
2457 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2458 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2459 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2460 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2464 /* Adds candidates bases on the old induction variable IV. */
2466 static void
2467 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2469 gimple phi;
2470 tree def;
2471 struct iv_cand *cand;
2473 add_candidate (data, iv->base, iv->step, true, NULL);
2475 /* The same, but with initial value zero. */
2476 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2477 add_candidate (data, size_int (0), iv->step, true, NULL);
2478 else
2479 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2480 iv->step, true, NULL);
2482 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2483 if (gimple_code (phi) == GIMPLE_PHI)
2485 /* Additionally record the possibility of leaving the original iv
2486 untouched. */
2487 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2488 cand = add_candidate_1 (data,
2489 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2490 SSA_NAME_DEF_STMT (def));
2491 cand->var_before = iv->ssa_name;
2492 cand->var_after = def;
2496 /* Adds candidates based on the old induction variables. */
2498 static void
2499 add_old_ivs_candidates (struct ivopts_data *data)
2501 unsigned i;
2502 struct iv *iv;
2503 bitmap_iterator bi;
2505 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2507 iv = ver_info (data, i)->iv;
2508 if (iv && iv->biv_p && !integer_zerop (iv->step))
2509 add_old_iv_candidates (data, iv);
2513 /* Adds candidates based on the value of the induction variable IV and USE. */
2515 static void
2516 add_iv_value_candidates (struct ivopts_data *data,
2517 struct iv *iv, struct iv_use *use)
2519 unsigned HOST_WIDE_INT offset;
2520 tree base;
2521 tree basetype;
2523 add_candidate (data, iv->base, iv->step, false, use);
2525 /* The same, but with initial value zero. Make such variable important,
2526 since it is generic enough so that possibly many uses may be based
2527 on it. */
2528 basetype = TREE_TYPE (iv->base);
2529 if (POINTER_TYPE_P (basetype))
2530 basetype = sizetype;
2531 add_candidate (data, build_int_cst (basetype, 0),
2532 iv->step, true, use);
2534 /* Third, try removing the constant offset. Make sure to even
2535 add a candidate for &a[0] vs. (T *)&a. */
2536 base = strip_offset (iv->base, &offset);
2537 if (offset
2538 || base != iv->base)
2539 add_candidate (data, base, iv->step, false, use);
2542 /* Adds candidates based on the uses. */
2544 static void
2545 add_derived_ivs_candidates (struct ivopts_data *data)
2547 unsigned i;
2549 for (i = 0; i < n_iv_uses (data); i++)
2551 struct iv_use *use = iv_use (data, i);
2553 if (!use)
2554 continue;
2556 switch (use->type)
2558 case USE_NONLINEAR_EXPR:
2559 case USE_COMPARE:
2560 case USE_ADDRESS:
2561 /* Just add the ivs based on the value of the iv used here. */
2562 add_iv_value_candidates (data, use->iv, use);
2563 break;
2565 default:
2566 gcc_unreachable ();
2571 /* Record important candidates and add them to related_cands bitmaps
2572 if needed. */
2574 static void
2575 record_important_candidates (struct ivopts_data *data)
2577 unsigned i;
2578 struct iv_use *use;
2580 for (i = 0; i < n_iv_cands (data); i++)
2582 struct iv_cand *cand = iv_cand (data, i);
2584 if (cand->important)
2585 bitmap_set_bit (data->important_candidates, i);
2588 data->consider_all_candidates = (n_iv_cands (data)
2589 <= CONSIDER_ALL_CANDIDATES_BOUND);
2591 if (data->consider_all_candidates)
2593 /* We will not need "related_cands" bitmaps in this case,
2594 so release them to decrease peak memory consumption. */
2595 for (i = 0; i < n_iv_uses (data); i++)
2597 use = iv_use (data, i);
2598 BITMAP_FREE (use->related_cands);
2601 else
2603 /* Add important candidates to the related_cands bitmaps. */
2604 for (i = 0; i < n_iv_uses (data); i++)
2605 bitmap_ior_into (iv_use (data, i)->related_cands,
2606 data->important_candidates);
2610 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2611 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2612 we allocate a simple list to every use. */
2614 static void
2615 alloc_use_cost_map (struct ivopts_data *data)
2617 unsigned i, size, s, j;
2619 for (i = 0; i < n_iv_uses (data); i++)
2621 struct iv_use *use = iv_use (data, i);
2622 bitmap_iterator bi;
2624 if (data->consider_all_candidates)
2625 size = n_iv_cands (data);
2626 else
2628 s = 0;
2629 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2631 s++;
2634 /* Round up to the power of two, so that moduling by it is fast. */
2635 for (size = 1; size < s; size <<= 1)
2636 continue;
2639 use->n_map_members = size;
2640 use->cost_map = XCNEWVEC (struct cost_pair, size);
2644 /* Returns description of computation cost of expression whose runtime
2645 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2647 static comp_cost
2648 new_cost (unsigned runtime, unsigned complexity)
2650 comp_cost cost;
2652 cost.cost = runtime;
2653 cost.complexity = complexity;
2655 return cost;
2658 /* Adds costs COST1 and COST2. */
2660 static comp_cost
2661 add_costs (comp_cost cost1, comp_cost cost2)
2663 cost1.cost += cost2.cost;
2664 cost1.complexity += cost2.complexity;
2666 return cost1;
2668 /* Subtracts costs COST1 and COST2. */
2670 static comp_cost
2671 sub_costs (comp_cost cost1, comp_cost cost2)
2673 cost1.cost -= cost2.cost;
2674 cost1.complexity -= cost2.complexity;
2676 return cost1;
2679 /* Returns a negative number if COST1 < COST2, a positive number if
2680 COST1 > COST2, and 0 if COST1 = COST2. */
2682 static int
2683 compare_costs (comp_cost cost1, comp_cost cost2)
2685 if (cost1.cost == cost2.cost)
2686 return cost1.complexity - cost2.complexity;
2688 return cost1.cost - cost2.cost;
2691 /* Returns true if COST is infinite. */
2693 static bool
2694 infinite_cost_p (comp_cost cost)
2696 return cost.cost == INFTY;
2699 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2700 on invariants DEPENDS_ON and that the value used in expressing it
2701 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2703 static void
2704 set_use_iv_cost (struct ivopts_data *data,
2705 struct iv_use *use, struct iv_cand *cand,
2706 comp_cost cost, bitmap depends_on, tree value,
2707 enum tree_code comp, int inv_expr_id)
2709 unsigned i, s;
2711 if (infinite_cost_p (cost))
2713 BITMAP_FREE (depends_on);
2714 return;
2717 if (data->consider_all_candidates)
2719 use->cost_map[cand->id].cand = cand;
2720 use->cost_map[cand->id].cost = cost;
2721 use->cost_map[cand->id].depends_on = depends_on;
2722 use->cost_map[cand->id].value = value;
2723 use->cost_map[cand->id].comp = comp;
2724 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2725 return;
2728 /* n_map_members is a power of two, so this computes modulo. */
2729 s = cand->id & (use->n_map_members - 1);
2730 for (i = s; i < use->n_map_members; i++)
2731 if (!use->cost_map[i].cand)
2732 goto found;
2733 for (i = 0; i < s; i++)
2734 if (!use->cost_map[i].cand)
2735 goto found;
2737 gcc_unreachable ();
2739 found:
2740 use->cost_map[i].cand = cand;
2741 use->cost_map[i].cost = cost;
2742 use->cost_map[i].depends_on = depends_on;
2743 use->cost_map[i].value = value;
2744 use->cost_map[i].comp = comp;
2745 use->cost_map[i].inv_expr_id = inv_expr_id;
2748 /* Gets cost of (USE, CANDIDATE) pair. */
2750 static struct cost_pair *
2751 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2752 struct iv_cand *cand)
2754 unsigned i, s;
2755 struct cost_pair *ret;
2757 if (!cand)
2758 return NULL;
2760 if (data->consider_all_candidates)
2762 ret = use->cost_map + cand->id;
2763 if (!ret->cand)
2764 return NULL;
2766 return ret;
2769 /* n_map_members is a power of two, so this computes modulo. */
2770 s = cand->id & (use->n_map_members - 1);
2771 for (i = s; i < use->n_map_members; i++)
2772 if (use->cost_map[i].cand == cand)
2773 return use->cost_map + i;
2775 for (i = 0; i < s; i++)
2776 if (use->cost_map[i].cand == cand)
2777 return use->cost_map + i;
2779 return NULL;
2782 /* Returns estimate on cost of computing SEQ. */
2784 static unsigned
2785 seq_cost (rtx seq, bool speed)
2787 unsigned cost = 0;
2788 rtx set;
2790 for (; seq; seq = NEXT_INSN (seq))
2792 set = single_set (seq);
2793 if (set)
2794 cost += set_src_cost (SET_SRC (set), speed);
2795 else
2796 cost++;
2799 return cost;
2802 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2803 static rtx
2804 produce_memory_decl_rtl (tree obj, int *regno)
2806 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2807 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2808 rtx x;
2810 gcc_assert (obj);
2811 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2813 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2814 x = gen_rtx_SYMBOL_REF (address_mode, name);
2815 SET_SYMBOL_REF_DECL (x, obj);
2816 x = gen_rtx_MEM (DECL_MODE (obj), x);
2817 set_mem_addr_space (x, as);
2818 targetm.encode_section_info (obj, x, true);
2820 else
2822 x = gen_raw_REG (address_mode, (*regno)++);
2823 x = gen_rtx_MEM (DECL_MODE (obj), x);
2824 set_mem_addr_space (x, as);
2827 return x;
2830 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2831 walk_tree. DATA contains the actual fake register number. */
2833 static tree
2834 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2836 tree obj = NULL_TREE;
2837 rtx x = NULL_RTX;
2838 int *regno = (int *) data;
2840 switch (TREE_CODE (*expr_p))
2842 case ADDR_EXPR:
2843 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2844 handled_component_p (*expr_p);
2845 expr_p = &TREE_OPERAND (*expr_p, 0))
2846 continue;
2847 obj = *expr_p;
2848 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2849 x = produce_memory_decl_rtl (obj, regno);
2850 break;
2852 case SSA_NAME:
2853 *ws = 0;
2854 obj = SSA_NAME_VAR (*expr_p);
2855 if (!DECL_RTL_SET_P (obj))
2856 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2857 break;
2859 case VAR_DECL:
2860 case PARM_DECL:
2861 case RESULT_DECL:
2862 *ws = 0;
2863 obj = *expr_p;
2865 if (DECL_RTL_SET_P (obj))
2866 break;
2868 if (DECL_MODE (obj) == BLKmode)
2869 x = produce_memory_decl_rtl (obj, regno);
2870 else
2871 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2873 break;
2875 default:
2876 break;
2879 if (x)
2881 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2882 SET_DECL_RTL (obj, x);
2885 return NULL_TREE;
2888 /* Determines cost of the computation of EXPR. */
2890 static unsigned
2891 computation_cost (tree expr, bool speed)
2893 rtx seq, rslt;
2894 tree type = TREE_TYPE (expr);
2895 unsigned cost;
2896 /* Avoid using hard regs in ways which may be unsupported. */
2897 int regno = LAST_VIRTUAL_REGISTER + 1;
2898 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2899 enum node_frequency real_frequency = node->frequency;
2901 node->frequency = NODE_FREQUENCY_NORMAL;
2902 crtl->maybe_hot_insn_p = speed;
2903 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2904 start_sequence ();
2905 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2906 seq = get_insns ();
2907 end_sequence ();
2908 default_rtl_profile ();
2909 node->frequency = real_frequency;
2911 cost = seq_cost (seq, speed);
2912 if (MEM_P (rslt))
2913 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2914 TYPE_ADDR_SPACE (type), speed);
2915 else if (!REG_P (rslt))
2916 cost += set_src_cost (rslt, speed);
2918 return cost;
2921 /* Returns variable containing the value of candidate CAND at statement AT. */
2923 static tree
2924 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2926 if (stmt_after_increment (loop, cand, stmt))
2927 return cand->var_after;
2928 else
2929 return cand->var_before;
2932 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2933 same precision that is at least as wide as the precision of TYPE, stores
2934 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2935 type of A and B. */
2937 static tree
2938 determine_common_wider_type (tree *a, tree *b)
2940 tree wider_type = NULL;
2941 tree suba, subb;
2942 tree atype = TREE_TYPE (*a);
2944 if (CONVERT_EXPR_P (*a))
2946 suba = TREE_OPERAND (*a, 0);
2947 wider_type = TREE_TYPE (suba);
2948 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2949 return atype;
2951 else
2952 return atype;
2954 if (CONVERT_EXPR_P (*b))
2956 subb = TREE_OPERAND (*b, 0);
2957 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2958 return atype;
2960 else
2961 return atype;
2963 *a = suba;
2964 *b = subb;
2965 return wider_type;
2968 /* Determines the expression by that USE is expressed from induction variable
2969 CAND at statement AT in LOOP. The expression is stored in a decomposed
2970 form into AFF. Returns false if USE cannot be expressed using CAND. */
2972 static bool
2973 get_computation_aff (struct loop *loop,
2974 struct iv_use *use, struct iv_cand *cand, gimple at,
2975 struct affine_tree_combination *aff)
2977 tree ubase = use->iv->base;
2978 tree ustep = use->iv->step;
2979 tree cbase = cand->iv->base;
2980 tree cstep = cand->iv->step, cstep_common;
2981 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2982 tree common_type, var;
2983 tree uutype;
2984 aff_tree cbase_aff, var_aff;
2985 double_int rat;
2987 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2989 /* We do not have a precision to express the values of use. */
2990 return false;
2993 var = var_at_stmt (loop, cand, at);
2994 uutype = unsigned_type_for (utype);
2996 /* If the conversion is not noop, perform it. */
2997 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2999 cstep = fold_convert (uutype, cstep);
3000 cbase = fold_convert (uutype, cbase);
3001 var = fold_convert (uutype, var);
3004 if (!constant_multiple_of (ustep, cstep, &rat))
3005 return false;
3007 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3008 type, we achieve better folding by computing their difference in this
3009 wider type, and cast the result to UUTYPE. We do not need to worry about
3010 overflows, as all the arithmetics will in the end be performed in UUTYPE
3011 anyway. */
3012 common_type = determine_common_wider_type (&ubase, &cbase);
3014 /* use = ubase - ratio * cbase + ratio * var. */
3015 tree_to_aff_combination (ubase, common_type, aff);
3016 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3017 tree_to_aff_combination (var, uutype, &var_aff);
3019 /* We need to shift the value if we are after the increment. */
3020 if (stmt_after_increment (loop, cand, at))
3022 aff_tree cstep_aff;
3024 if (common_type != uutype)
3025 cstep_common = fold_convert (common_type, cstep);
3026 else
3027 cstep_common = cstep;
3029 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3030 aff_combination_add (&cbase_aff, &cstep_aff);
3033 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3034 aff_combination_add (aff, &cbase_aff);
3035 if (common_type != uutype)
3036 aff_combination_convert (aff, uutype);
3038 aff_combination_scale (&var_aff, rat);
3039 aff_combination_add (aff, &var_aff);
3041 return true;
3044 /* Determines the expression by that USE is expressed from induction variable
3045 CAND at statement AT in LOOP. The computation is unshared. */
3047 static tree
3048 get_computation_at (struct loop *loop,
3049 struct iv_use *use, struct iv_cand *cand, gimple at)
3051 aff_tree aff;
3052 tree type = TREE_TYPE (use->iv->base);
3054 if (!get_computation_aff (loop, use, cand, at, &aff))
3055 return NULL_TREE;
3056 unshare_aff_combination (&aff);
3057 return fold_convert (type, aff_combination_to_tree (&aff));
3060 /* Determines the expression by that USE is expressed from induction variable
3061 CAND in LOOP. The computation is unshared. */
3063 static tree
3064 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3066 return get_computation_at (loop, use, cand, use->stmt);
3069 /* Adjust the cost COST for being in loop setup rather than loop body.
3070 If we're optimizing for space, the loop setup overhead is constant;
3071 if we're optimizing for speed, amortize it over the per-iteration cost. */
3072 static unsigned
3073 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3075 if (cost == INFTY)
3076 return cost;
3077 else if (optimize_loop_for_speed_p (data->current_loop))
3078 return cost / avg_loop_niter (data->current_loop);
3079 else
3080 return cost;
3083 /* Returns cost of addition in MODE. */
3085 unsigned
3086 add_regs_cost (enum machine_mode mode, bool speed)
3088 static unsigned costs[NUM_MACHINE_MODES][2];
3089 rtx seq;
3090 unsigned cost;
3092 if (costs[mode][speed])
3093 return costs[mode][speed];
3095 start_sequence ();
3096 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3097 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3098 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3099 NULL_RTX);
3100 seq = get_insns ();
3101 end_sequence ();
3103 cost = seq_cost (seq, speed);
3104 if (!cost)
3105 cost = 1;
3107 costs[mode][speed] = cost;
3109 if (dump_file && (dump_flags & TDF_DETAILS))
3110 fprintf (dump_file, "Addition in %s costs %d\n",
3111 GET_MODE_NAME (mode), cost);
3112 return cost;
3115 /* Returns cost of multiplication in MODE. */
3117 unsigned
3118 multiply_regs_cost (enum machine_mode mode, bool speed)
3120 static unsigned costs[NUM_MACHINE_MODES][2];
3121 rtx seq;
3122 unsigned cost;
3124 if (costs[mode][speed])
3125 return costs[mode][speed];
3127 start_sequence ();
3128 force_operand (gen_rtx_fmt_ee (MULT, mode,
3129 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3130 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3131 NULL_RTX);
3132 seq = get_insns ();
3133 end_sequence ();
3135 cost = seq_cost (seq, speed);
3136 if (!cost)
3137 cost = 1;
3139 costs[mode][speed] = cost;
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 fprintf (dump_file, "Multiplication in %s costs %d\n",
3143 GET_MODE_NAME (mode), cost);
3144 return cost;
3147 /* Returns cost of addition with a constant in MODE. */
3149 unsigned
3150 add_const_cost (enum machine_mode mode, bool speed)
3152 static unsigned costs[NUM_MACHINE_MODES][2];
3153 rtx seq;
3154 unsigned cost;
3156 if (costs[mode][speed])
3157 return costs[mode][speed];
3159 /* Arbitrarily generate insns for x + 2, as the exact constant
3160 shouldn't matter. */
3161 start_sequence ();
3162 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3163 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3164 gen_int_mode (2, mode)),
3165 NULL_RTX);
3166 seq = get_insns ();
3167 end_sequence ();
3169 cost = seq_cost (seq, speed);
3170 if (!cost)
3171 cost = 1;
3173 costs[mode][speed] = cost;
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, "Addition to constant in %s costs %d\n",
3177 GET_MODE_NAME (mode), cost);
3178 return cost;
3181 /* Returns cost of extend or truncate in MODE. */
3183 unsigned
3184 extend_or_trunc_reg_cost (tree type_to, tree type_from, bool speed)
3186 static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES][2];
3187 rtx seq;
3188 unsigned cost;
3189 enum machine_mode mode_to = TYPE_MODE (type_to);
3190 enum machine_mode mode_from = TYPE_MODE (type_from);
3191 tree size_to = TYPE_SIZE (type_to);
3192 tree size_from = TYPE_SIZE (type_from);
3193 enum rtx_code code;
3195 gcc_assert (TREE_CODE (size_to) == INTEGER_CST
3196 && TREE_CODE (size_from) == INTEGER_CST);
3198 if (costs[mode_to][mode_from][speed])
3199 return costs[mode_to][mode_from][speed];
3201 if (tree_int_cst_lt (size_to, size_from))
3202 code = TRUNCATE;
3203 else if (TYPE_UNSIGNED (type_to))
3204 code = ZERO_EXTEND;
3205 else
3206 code = SIGN_EXTEND;
3208 start_sequence ();
3209 gen_rtx_fmt_e (code, mode_to,
3210 gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1));
3211 seq = get_insns ();
3212 end_sequence ();
3214 cost = seq_cost (seq, speed);
3215 if (!cost)
3216 cost = 1;
3218 if (dump_file && (dump_flags & TDF_DETAILS))
3219 fprintf (dump_file, "Conversion from %s to %s costs %d\n",
3220 GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost);
3222 costs[mode_to][mode_from][speed] = cost;
3223 return cost;
3226 /* Returns cost of negation in MODE. */
3228 unsigned
3229 negate_reg_cost (enum machine_mode mode, bool speed)
3231 static unsigned costs[NUM_MACHINE_MODES][2];
3232 rtx seq;
3233 unsigned cost;
3235 if (costs[mode][speed])
3236 return costs[mode][speed];
3238 start_sequence ();
3239 force_operand (gen_rtx_fmt_e (NEG, mode,
3240 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)),
3241 NULL_RTX);
3242 seq = get_insns ();
3243 end_sequence ();
3245 cost = seq_cost (seq, speed);
3246 if (!cost)
3247 cost = 1;
3249 costs[mode][speed] = cost;
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3252 fprintf (dump_file, "Negation in %s costs %d\n",
3253 GET_MODE_NAME (mode), cost);
3254 return cost;
3257 /* Entry in a hashtable of already known costs for multiplication. */
3258 struct mbc_entry
3260 HOST_WIDE_INT cst; /* The constant to multiply by. */
3261 enum machine_mode mode; /* In mode. */
3262 unsigned cost; /* The cost. */
3265 /* Counts hash value for the ENTRY. */
3267 static hashval_t
3268 mbc_entry_hash (const void *entry)
3270 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3272 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3275 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3277 static int
3278 mbc_entry_eq (const void *entry1, const void *entry2)
3280 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3281 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3283 return (e1->mode == e2->mode
3284 && e1->cst == e2->cst);
3287 /* Returns cost of multiplication by constant CST in MODE. */
3289 unsigned
3290 multiply_by_const_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3292 struct mbc_entry **cached, act;
3293 rtx seq;
3294 unsigned cost;
3296 gcc_assert (cost_tables_exist);
3298 act.mode = mode;
3299 act.cst = cst;
3300 cached = (struct mbc_entry **)
3301 htab_find_slot (mult_costs[speed], &act, INSERT);
3303 if (*cached)
3304 return (*cached)->cost;
3306 *cached = XNEW (struct mbc_entry);
3307 (*cached)->mode = mode;
3308 (*cached)->cst = cst;
3310 start_sequence ();
3311 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3312 gen_int_mode (cst, mode), NULL_RTX, 0);
3313 seq = get_insns ();
3314 end_sequence ();
3316 cost = seq_cost (seq, speed);
3318 if (dump_file && (dump_flags & TDF_DETAILS))
3319 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3320 (int) cst, GET_MODE_NAME (mode), cost);
3322 (*cached)->cost = cost;
3324 return cost;
3327 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3328 validity for a memory reference accessing memory of mode MODE in
3329 address space AS. */
3331 DEF_VEC_P (sbitmap);
3332 DEF_VEC_ALLOC_P (sbitmap, heap);
3334 bool
3335 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3336 addr_space_t as)
3338 #define MAX_RATIO 128
3339 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3340 static VEC (sbitmap, heap) *valid_mult_list;
3341 sbitmap valid_mult;
3343 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3344 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3346 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3347 if (!valid_mult)
3349 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3350 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351 rtx addr;
3352 HOST_WIDE_INT i;
3354 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3355 sbitmap_zero (valid_mult);
3356 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3357 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3359 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3360 if (memory_address_addr_space_p (mode, addr, as))
3361 SET_BIT (valid_mult, i + MAX_RATIO);
3364 if (dump_file && (dump_flags & TDF_DETAILS))
3366 fprintf (dump_file, " allowed multipliers:");
3367 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3368 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3369 fprintf (dump_file, " %d", (int) i);
3370 fprintf (dump_file, "\n");
3371 fprintf (dump_file, "\n");
3374 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3377 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3378 return false;
3380 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3383 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3384 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3385 variable is omitted. Compute the cost for a memory reference that accesses
3386 a memory location of mode MEM_MODE in address space AS.
3388 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3389 size of MEM_MODE / RATIO) is available. To make this determination, we
3390 look at the size of the increment to be made, which is given in CSTEP.
3391 CSTEP may be zero if the step is unknown.
3392 STMT_AFTER_INC is true iff the statement we're looking at is after the
3393 increment of the original biv.
3395 TODO -- there must be some better way. This all is quite crude. */
3397 typedef struct
3399 HOST_WIDE_INT min_offset, max_offset;
3400 unsigned costs[2][2][2][2];
3401 } *address_cost_data;
3403 DEF_VEC_P (address_cost_data);
3404 DEF_VEC_ALLOC_P (address_cost_data, heap);
3406 static comp_cost
3407 get_address_cost (bool symbol_present, bool var_present,
3408 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3409 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3410 addr_space_t as, bool speed,
3411 bool stmt_after_inc, bool *may_autoinc)
3413 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3414 static VEC(address_cost_data, heap) *address_cost_data_list;
3415 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3416 address_cost_data data;
3417 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3418 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3419 unsigned cost, acost, complexity;
3420 bool offset_p, ratio_p, autoinc;
3421 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3422 unsigned HOST_WIDE_INT mask;
3423 unsigned bits;
3425 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3426 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3427 data_index + 1);
3429 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3430 if (!data)
3432 HOST_WIDE_INT i;
3433 HOST_WIDE_INT rat, off = 0;
3434 int old_cse_not_expected, width;
3435 unsigned sym_p, var_p, off_p, rat_p, add_c;
3436 rtx seq, addr, base;
3437 rtx reg0, reg1;
3439 data = (address_cost_data) xcalloc (1, sizeof (*data));
3441 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3443 width = GET_MODE_BITSIZE (address_mode) - 1;
3444 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3445 width = HOST_BITS_PER_WIDE_INT - 1;
3446 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3448 for (i = width; i >= 0; i--)
3450 off = -((HOST_WIDE_INT) 1 << i);
3451 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3452 if (memory_address_addr_space_p (mem_mode, addr, as))
3453 break;
3455 data->min_offset = (i == -1? 0 : off);
3457 for (i = width; i >= 0; i--)
3459 off = ((HOST_WIDE_INT) 1 << i) - 1;
3460 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3461 if (memory_address_addr_space_p (mem_mode, addr, as))
3462 break;
3464 if (i == -1)
3465 off = 0;
3466 data->max_offset = off;
3468 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, "get_address_cost:\n");
3471 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3472 GET_MODE_NAME (mem_mode),
3473 data->min_offset);
3474 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3475 GET_MODE_NAME (mem_mode),
3476 data->max_offset);
3479 rat = 1;
3480 for (i = 2; i <= MAX_RATIO; i++)
3481 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3483 rat = i;
3484 break;
3487 /* Compute the cost of various addressing modes. */
3488 acost = 0;
3489 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3490 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3492 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3493 || USE_STORE_PRE_DECREMENT (mem_mode))
3495 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3496 has_predec[mem_mode]
3497 = memory_address_addr_space_p (mem_mode, addr, as);
3499 if (USE_LOAD_POST_DECREMENT (mem_mode)
3500 || USE_STORE_POST_DECREMENT (mem_mode))
3502 addr = gen_rtx_POST_DEC (address_mode, reg0);
3503 has_postdec[mem_mode]
3504 = memory_address_addr_space_p (mem_mode, addr, as);
3506 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3507 || USE_STORE_PRE_DECREMENT (mem_mode))
3509 addr = gen_rtx_PRE_INC (address_mode, reg0);
3510 has_preinc[mem_mode]
3511 = memory_address_addr_space_p (mem_mode, addr, as);
3513 if (USE_LOAD_POST_INCREMENT (mem_mode)
3514 || USE_STORE_POST_INCREMENT (mem_mode))
3516 addr = gen_rtx_POST_INC (address_mode, reg0);
3517 has_postinc[mem_mode]
3518 = memory_address_addr_space_p (mem_mode, addr, as);
3520 for (i = 0; i < 16; i++)
3522 sym_p = i & 1;
3523 var_p = (i >> 1) & 1;
3524 off_p = (i >> 2) & 1;
3525 rat_p = (i >> 3) & 1;
3527 addr = reg0;
3528 if (rat_p)
3529 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3530 gen_int_mode (rat, address_mode));
3532 if (var_p)
3533 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3535 if (sym_p)
3537 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3538 /* ??? We can run into trouble with some backends by presenting
3539 it with symbols which haven't been properly passed through
3540 targetm.encode_section_info. By setting the local bit, we
3541 enhance the probability of things working. */
3542 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3544 if (off_p)
3545 base = gen_rtx_fmt_e (CONST, address_mode,
3546 gen_rtx_fmt_ee
3547 (PLUS, address_mode, base,
3548 gen_int_mode (off, address_mode)));
3550 else if (off_p)
3551 base = gen_int_mode (off, address_mode);
3552 else
3553 base = NULL_RTX;
3555 if (base)
3556 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3558 start_sequence ();
3559 /* To avoid splitting addressing modes, pretend that no cse will
3560 follow. */
3561 old_cse_not_expected = cse_not_expected;
3562 cse_not_expected = true;
3563 addr = memory_address_addr_space (mem_mode, addr, as);
3564 cse_not_expected = old_cse_not_expected;
3565 seq = get_insns ();
3566 end_sequence ();
3568 acost = seq_cost (seq, speed);
3569 acost += address_cost (addr, mem_mode, as, speed);
3571 if (!acost)
3572 acost = 1;
3573 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3576 /* On some targets, it is quite expensive to load symbol to a register,
3577 which makes addresses that contain symbols look much more expensive.
3578 However, the symbol will have to be loaded in any case before the
3579 loop (and quite likely we have it in register already), so it does not
3580 make much sense to penalize them too heavily. So make some final
3581 tweaks for the SYMBOL_PRESENT modes:
3583 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3584 var is cheaper, use this mode with small penalty.
3585 If VAR_PRESENT is true, try whether the mode with
3586 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3587 if this is the case, use it. */
3588 add_c = add_regs_cost (address_mode, speed);
3589 for (i = 0; i < 8; i++)
3591 var_p = i & 1;
3592 off_p = (i >> 1) & 1;
3593 rat_p = (i >> 2) & 1;
3595 acost = data->costs[0][1][off_p][rat_p] + 1;
3596 if (var_p)
3597 acost += add_c;
3599 if (acost < data->costs[1][var_p][off_p][rat_p])
3600 data->costs[1][var_p][off_p][rat_p] = acost;
3603 if (dump_file && (dump_flags & TDF_DETAILS))
3605 fprintf (dump_file, "Address costs:\n");
3607 for (i = 0; i < 16; i++)
3609 sym_p = i & 1;
3610 var_p = (i >> 1) & 1;
3611 off_p = (i >> 2) & 1;
3612 rat_p = (i >> 3) & 1;
3614 fprintf (dump_file, " ");
3615 if (sym_p)
3616 fprintf (dump_file, "sym + ");
3617 if (var_p)
3618 fprintf (dump_file, "var + ");
3619 if (off_p)
3620 fprintf (dump_file, "cst + ");
3621 if (rat_p)
3622 fprintf (dump_file, "rat * ");
3624 acost = data->costs[sym_p][var_p][off_p][rat_p];
3625 fprintf (dump_file, "index costs %d\n", acost);
3627 if (has_predec[mem_mode] || has_postdec[mem_mode]
3628 || has_preinc[mem_mode] || has_postinc[mem_mode])
3629 fprintf (dump_file, " May include autoinc/dec\n");
3630 fprintf (dump_file, "\n");
3633 VEC_replace (address_cost_data, address_cost_data_list,
3634 data_index, data);
3637 bits = GET_MODE_BITSIZE (address_mode);
3638 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3639 offset &= mask;
3640 if ((offset >> (bits - 1) & 1))
3641 offset |= ~mask;
3642 s_offset = offset;
3644 autoinc = false;
3645 msize = GET_MODE_SIZE (mem_mode);
3646 autoinc_offset = offset;
3647 if (stmt_after_inc)
3648 autoinc_offset += ratio * cstep;
3649 if (symbol_present || var_present || ratio != 1)
3650 autoinc = false;
3651 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3652 && msize == cstep)
3653 || (has_postdec[mem_mode] && autoinc_offset == 0
3654 && msize == -cstep)
3655 || (has_preinc[mem_mode] && autoinc_offset == msize
3656 && msize == cstep)
3657 || (has_predec[mem_mode] && autoinc_offset == -msize
3658 && msize == -cstep))
3659 autoinc = true;
3661 cost = 0;
3662 offset_p = (s_offset != 0
3663 && data->min_offset <= s_offset
3664 && s_offset <= data->max_offset);
3665 ratio_p = (ratio != 1
3666 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3668 if (ratio != 1 && !ratio_p)
3669 cost += multiply_by_const_cost (ratio, address_mode, speed);
3671 if (s_offset && !offset_p && !symbol_present)
3672 cost += add_regs_cost (address_mode, speed);
3674 if (may_autoinc)
3675 *may_autoinc = autoinc;
3676 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3677 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3678 return new_cost (cost + acost, complexity);
3681 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3682 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3683 calculating the operands of EXPR. Returns true if successful, and returns
3684 the cost in COST. */
3686 static bool
3687 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3688 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3690 comp_cost res;
3691 tree op1 = TREE_OPERAND (expr, 1);
3692 tree cst = TREE_OPERAND (mult, 1);
3693 tree multop = TREE_OPERAND (mult, 0);
3694 int m = exact_log2 (int_cst_value (cst));
3695 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3696 int sa_cost;
3698 if (!(m >= 0 && m < maxm))
3699 return false;
3701 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3702 ? shiftadd_cost[speed][mode][m]
3703 : (mult == op1
3704 ? shiftsub1_cost[speed][mode][m]
3705 : shiftsub0_cost[speed][mode][m]));
3706 res = new_cost (sa_cost, 0);
3707 res = add_costs (res, mult == op1 ? cost0 : cost1);
3709 STRIP_NOPS (multop);
3710 if (!is_gimple_val (multop))
3711 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3713 *cost = res;
3714 return true;
3717 /* Estimates cost of forcing expression EXPR into a variable. */
3719 static comp_cost
3720 force_expr_to_var_cost (tree expr, bool speed)
3722 static bool costs_initialized = false;
3723 static unsigned integer_cost [2];
3724 static unsigned symbol_cost [2];
3725 static unsigned address_cost [2];
3726 tree op0, op1;
3727 comp_cost cost0, cost1, cost;
3728 enum machine_mode mode;
3730 if (!costs_initialized)
3732 tree type = build_pointer_type (integer_type_node);
3733 tree var, addr;
3734 rtx x;
3735 int i;
3737 var = create_tmp_var_raw (integer_type_node, "test_var");
3738 TREE_STATIC (var) = 1;
3739 x = produce_memory_decl_rtl (var, NULL);
3740 SET_DECL_RTL (var, x);
3742 addr = build1 (ADDR_EXPR, type, var);
3745 for (i = 0; i < 2; i++)
3747 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3748 2000), i);
3750 symbol_cost[i] = computation_cost (addr, i) + 1;
3752 address_cost[i]
3753 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3754 if (dump_file && (dump_flags & TDF_DETAILS))
3756 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3757 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3758 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3759 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3760 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3761 fprintf (dump_file, "\n");
3765 costs_initialized = true;
3768 STRIP_NOPS (expr);
3770 if (SSA_VAR_P (expr))
3771 return no_cost;
3773 if (is_gimple_min_invariant (expr))
3775 if (TREE_CODE (expr) == INTEGER_CST)
3776 return new_cost (integer_cost [speed], 0);
3778 if (TREE_CODE (expr) == ADDR_EXPR)
3780 tree obj = TREE_OPERAND (expr, 0);
3782 if (TREE_CODE (obj) == VAR_DECL
3783 || TREE_CODE (obj) == PARM_DECL
3784 || TREE_CODE (obj) == RESULT_DECL)
3785 return new_cost (symbol_cost [speed], 0);
3788 return new_cost (address_cost [speed], 0);
3791 switch (TREE_CODE (expr))
3793 case POINTER_PLUS_EXPR:
3794 case PLUS_EXPR:
3795 case MINUS_EXPR:
3796 case MULT_EXPR:
3797 op0 = TREE_OPERAND (expr, 0);
3798 op1 = TREE_OPERAND (expr, 1);
3799 STRIP_NOPS (op0);
3800 STRIP_NOPS (op1);
3802 if (is_gimple_val (op0))
3803 cost0 = no_cost;
3804 else
3805 cost0 = force_expr_to_var_cost (op0, speed);
3807 if (is_gimple_val (op1))
3808 cost1 = no_cost;
3809 else
3810 cost1 = force_expr_to_var_cost (op1, speed);
3812 break;
3814 case NEGATE_EXPR:
3815 op0 = TREE_OPERAND (expr, 0);
3816 STRIP_NOPS (op0);
3817 op1 = NULL_TREE;
3819 if (is_gimple_val (op0))
3820 cost0 = no_cost;
3821 else
3822 cost0 = force_expr_to_var_cost (op0, speed);
3824 cost1 = no_cost;
3825 break;
3827 default:
3828 /* Just an arbitrary value, FIXME. */
3829 return new_cost (target_spill_cost[speed], 0);
3832 mode = TYPE_MODE (TREE_TYPE (expr));
3833 switch (TREE_CODE (expr))
3835 case POINTER_PLUS_EXPR:
3836 case PLUS_EXPR:
3837 case MINUS_EXPR:
3838 case NEGATE_EXPR:
3839 cost = new_cost (add_regs_cost (mode, speed), 0);
3840 if (TREE_CODE (expr) != NEGATE_EXPR)
3842 tree mult = NULL_TREE;
3843 comp_cost sa_cost;
3844 if (TREE_CODE (op1) == MULT_EXPR)
3845 mult = op1;
3846 else if (TREE_CODE (op0) == MULT_EXPR)
3847 mult = op0;
3849 if (mult != NULL_TREE
3850 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3851 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3852 &sa_cost))
3853 return sa_cost;
3855 break;
3857 case MULT_EXPR:
3858 if (cst_and_fits_in_hwi (op0))
3859 cost = new_cost (multiply_by_const_cost (int_cst_value (op0),
3860 mode, speed), 0);
3861 else if (cst_and_fits_in_hwi (op1))
3862 cost = new_cost (multiply_by_const_cost (int_cst_value (op1),
3863 mode, speed), 0);
3864 else
3865 return new_cost (target_spill_cost [speed], 0);
3866 break;
3868 default:
3869 gcc_unreachable ();
3872 cost = add_costs (cost, cost0);
3873 cost = add_costs (cost, cost1);
3875 /* Bound the cost by target_spill_cost. The parts of complicated
3876 computations often are either loop invariant or at least can
3877 be shared between several iv uses, so letting this grow without
3878 limits would not give reasonable results. */
3879 if (cost.cost > (int) target_spill_cost [speed])
3880 cost.cost = target_spill_cost [speed];
3882 return cost;
3885 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3886 invariants the computation depends on. */
3888 static comp_cost
3889 force_var_cost (struct ivopts_data *data,
3890 tree expr, bitmap *depends_on)
3892 if (depends_on)
3894 fd_ivopts_data = data;
3895 walk_tree (&expr, find_depends, depends_on, NULL);
3898 return force_expr_to_var_cost (expr, data->speed);
3901 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3902 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3903 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3904 invariants the computation depends on. */
3906 static comp_cost
3907 split_address_cost (struct ivopts_data *data,
3908 tree addr, bool *symbol_present, bool *var_present,
3909 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3911 tree core;
3912 HOST_WIDE_INT bitsize;
3913 HOST_WIDE_INT bitpos;
3914 tree toffset;
3915 enum machine_mode mode;
3916 int unsignedp, volatilep;
3918 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3919 &unsignedp, &volatilep, false);
3921 if (toffset != 0
3922 || bitpos % BITS_PER_UNIT != 0
3923 || TREE_CODE (core) != VAR_DECL)
3925 *symbol_present = false;
3926 *var_present = true;
3927 fd_ivopts_data = data;
3928 walk_tree (&addr, find_depends, depends_on, NULL);
3929 return new_cost (target_spill_cost[data->speed], 0);
3932 *offset += bitpos / BITS_PER_UNIT;
3933 if (TREE_STATIC (core)
3934 || DECL_EXTERNAL (core))
3936 *symbol_present = true;
3937 *var_present = false;
3938 return no_cost;
3941 *symbol_present = false;
3942 *var_present = true;
3943 return no_cost;
3946 /* Estimates cost of expressing difference of addresses E1 - E2 as
3947 var + symbol + offset. The value of offset is added to OFFSET,
3948 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949 part is missing. DEPENDS_ON is a set of the invariants the computation
3950 depends on. */
3952 static comp_cost
3953 ptr_difference_cost (struct ivopts_data *data,
3954 tree e1, tree e2, bool *symbol_present, bool *var_present,
3955 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3957 HOST_WIDE_INT diff = 0;
3958 aff_tree aff_e1, aff_e2;
3959 tree type;
3961 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3963 if (ptr_difference_const (e1, e2, &diff))
3965 *offset += diff;
3966 *symbol_present = false;
3967 *var_present = false;
3968 return no_cost;
3971 if (integer_zerop (e2))
3972 return split_address_cost (data, TREE_OPERAND (e1, 0),
3973 symbol_present, var_present, offset, depends_on);
3975 *symbol_present = false;
3976 *var_present = true;
3978 type = signed_type_for (TREE_TYPE (e1));
3979 tree_to_aff_combination (e1, type, &aff_e1);
3980 tree_to_aff_combination (e2, type, &aff_e2);
3981 aff_combination_scale (&aff_e2, double_int_minus_one);
3982 aff_combination_add (&aff_e1, &aff_e2);
3984 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3987 /* Estimates cost of expressing difference E1 - E2 as
3988 var + symbol + offset. The value of offset is added to OFFSET,
3989 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3990 part is missing. DEPENDS_ON is a set of the invariants the computation
3991 depends on. */
3993 static comp_cost
3994 difference_cost (struct ivopts_data *data,
3995 tree e1, tree e2, bool *symbol_present, bool *var_present,
3996 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3998 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3999 unsigned HOST_WIDE_INT off1, off2;
4000 aff_tree aff_e1, aff_e2;
4001 tree type;
4003 e1 = strip_offset (e1, &off1);
4004 e2 = strip_offset (e2, &off2);
4005 *offset += off1 - off2;
4007 STRIP_NOPS (e1);
4008 STRIP_NOPS (e2);
4010 if (TREE_CODE (e1) == ADDR_EXPR)
4011 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4012 offset, depends_on);
4013 *symbol_present = false;
4015 if (operand_equal_p (e1, e2, 0))
4017 *var_present = false;
4018 return no_cost;
4021 *var_present = true;
4023 if (integer_zerop (e2))
4024 return force_var_cost (data, e1, depends_on);
4026 if (integer_zerop (e1))
4028 comp_cost cost = force_var_cost (data, e2, depends_on);
4029 cost.cost += multiply_by_const_cost (-1, mode, data->speed);
4030 return cost;
4033 type = signed_type_for (TREE_TYPE (e1));
4034 tree_to_aff_combination (e1, type, &aff_e1);
4035 tree_to_aff_combination (e2, type, &aff_e2);
4036 aff_combination_scale (&aff_e2, double_int_minus_one);
4037 aff_combination_add (&aff_e1, &aff_e2);
4039 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4042 /* Returns true if AFF1 and AFF2 are identical. */
4044 static bool
4045 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4047 unsigned i;
4049 if (aff1->n != aff2->n)
4050 return false;
4052 for (i = 0; i < aff1->n; i++)
4054 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
4055 return false;
4057 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4058 return false;
4060 return true;
4063 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4065 static int
4066 get_expr_id (struct ivopts_data *data, tree expr)
4068 struct iv_inv_expr_ent ent;
4069 struct iv_inv_expr_ent **slot;
4071 ent.expr = expr;
4072 ent.hash = iterative_hash_expr (expr, 0);
4073 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
4074 &ent, INSERT);
4075 if (*slot)
4076 return (*slot)->id;
4078 *slot = XNEW (struct iv_inv_expr_ent);
4079 (*slot)->expr = expr;
4080 (*slot)->hash = ent.hash;
4081 (*slot)->id = data->inv_expr_id++;
4082 return (*slot)->id;
4085 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4086 requires a new compiler generated temporary. Returns -1 otherwise.
4087 ADDRESS_P is a flag indicating if the expression is for address
4088 computation. */
4090 static int
4091 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4092 tree cbase, HOST_WIDE_INT ratio,
4093 bool address_p)
4095 aff_tree ubase_aff, cbase_aff;
4096 tree expr, ub, cb;
4098 STRIP_NOPS (ubase);
4099 STRIP_NOPS (cbase);
4100 ub = ubase;
4101 cb = cbase;
4103 if ((TREE_CODE (ubase) == INTEGER_CST)
4104 && (TREE_CODE (cbase) == INTEGER_CST))
4105 return -1;
4107 /* Strips the constant part. */
4108 if (TREE_CODE (ubase) == PLUS_EXPR
4109 || TREE_CODE (ubase) == MINUS_EXPR
4110 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4112 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4113 ubase = TREE_OPERAND (ubase, 0);
4116 /* Strips the constant part. */
4117 if (TREE_CODE (cbase) == PLUS_EXPR
4118 || TREE_CODE (cbase) == MINUS_EXPR
4119 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4121 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4122 cbase = TREE_OPERAND (cbase, 0);
4125 if (address_p)
4127 if (((TREE_CODE (ubase) == SSA_NAME)
4128 || (TREE_CODE (ubase) == ADDR_EXPR
4129 && is_gimple_min_invariant (ubase)))
4130 && (TREE_CODE (cbase) == INTEGER_CST))
4131 return -1;
4133 if (((TREE_CODE (cbase) == SSA_NAME)
4134 || (TREE_CODE (cbase) == ADDR_EXPR
4135 && is_gimple_min_invariant (cbase)))
4136 && (TREE_CODE (ubase) == INTEGER_CST))
4137 return -1;
4140 if (ratio == 1)
4142 if(operand_equal_p (ubase, cbase, 0))
4143 return -1;
4145 if (TREE_CODE (ubase) == ADDR_EXPR
4146 && TREE_CODE (cbase) == ADDR_EXPR)
4148 tree usym, csym;
4150 usym = TREE_OPERAND (ubase, 0);
4151 csym = TREE_OPERAND (cbase, 0);
4152 if (TREE_CODE (usym) == ARRAY_REF)
4154 tree ind = TREE_OPERAND (usym, 1);
4155 if (TREE_CODE (ind) == INTEGER_CST
4156 && host_integerp (ind, 0)
4157 && TREE_INT_CST_LOW (ind) == 0)
4158 usym = TREE_OPERAND (usym, 0);
4160 if (TREE_CODE (csym) == ARRAY_REF)
4162 tree ind = TREE_OPERAND (csym, 1);
4163 if (TREE_CODE (ind) == INTEGER_CST
4164 && host_integerp (ind, 0)
4165 && TREE_INT_CST_LOW (ind) == 0)
4166 csym = TREE_OPERAND (csym, 0);
4168 if (operand_equal_p (usym, csym, 0))
4169 return -1;
4171 /* Now do more complex comparison */
4172 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4173 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4174 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4175 return -1;
4178 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4179 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4181 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4182 aff_combination_add (&ubase_aff, &cbase_aff);
4183 expr = aff_combination_to_tree (&ubase_aff);
4184 return get_expr_id (data, expr);
4189 /* Determines the cost of the computation by that USE is expressed
4190 from induction variable CAND. If ADDRESS_P is true, we just need
4191 to create an address from it, otherwise we want to get it into
4192 register. A set of invariants we depend on is stored in
4193 DEPENDS_ON. AT is the statement at that the value is computed.
4194 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4195 addressing is likely. */
4197 static comp_cost
4198 get_computation_cost_at (struct ivopts_data *data,
4199 struct iv_use *use, struct iv_cand *cand,
4200 bool address_p, bitmap *depends_on, gimple at,
4201 bool *can_autoinc,
4202 int *inv_expr_id)
4204 tree ubase = use->iv->base, ustep = use->iv->step;
4205 tree cbase, cstep;
4206 tree utype = TREE_TYPE (ubase), ctype;
4207 unsigned HOST_WIDE_INT cstepi, offset = 0;
4208 HOST_WIDE_INT ratio, aratio;
4209 bool var_present, symbol_present, stmt_is_after_inc;
4210 comp_cost cost;
4211 double_int rat;
4212 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4214 *depends_on = NULL;
4216 /* Only consider real candidates. */
4217 if (!cand->iv)
4218 return infinite_cost;
4220 cbase = cand->iv->base;
4221 cstep = cand->iv->step;
4222 ctype = TREE_TYPE (cbase);
4224 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4226 /* We do not have a precision to express the values of use. */
4227 return infinite_cost;
4230 if (address_p
4231 || (use->iv->base_object
4232 && cand->iv->base_object
4233 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4234 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4236 /* Do not try to express address of an object with computation based
4237 on address of a different object. This may cause problems in rtl
4238 level alias analysis (that does not expect this to be happening,
4239 as this is illegal in C), and would be unlikely to be useful
4240 anyway. */
4241 if (use->iv->base_object
4242 && cand->iv->base_object
4243 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4244 return infinite_cost;
4247 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4249 /* TODO -- add direct handling of this case. */
4250 goto fallback;
4253 /* CSTEPI is removed from the offset in case statement is after the
4254 increment. If the step is not constant, we use zero instead.
4255 This is a bit imprecise (there is the extra addition), but
4256 redundancy elimination is likely to transform the code so that
4257 it uses value of the variable before increment anyway,
4258 so it is not that much unrealistic. */
4259 if (cst_and_fits_in_hwi (cstep))
4260 cstepi = int_cst_value (cstep);
4261 else
4262 cstepi = 0;
4264 if (!constant_multiple_of (ustep, cstep, &rat))
4265 return infinite_cost;
4267 if (double_int_fits_in_shwi_p (rat))
4268 ratio = double_int_to_shwi (rat);
4269 else
4270 return infinite_cost;
4272 STRIP_NOPS (cbase);
4273 ctype = TREE_TYPE (cbase);
4275 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4277 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4278 or ratio == 1, it is better to handle this like
4280 ubase - ratio * cbase + ratio * var
4282 (also holds in the case ratio == -1, TODO. */
4284 if (cst_and_fits_in_hwi (cbase))
4286 offset = - ratio * int_cst_value (cbase);
4287 cost = difference_cost (data,
4288 ubase, build_int_cst (utype, 0),
4289 &symbol_present, &var_present, &offset,
4290 depends_on);
4291 cost.cost /= avg_loop_niter (data->current_loop);
4293 else if (ratio == 1)
4295 tree real_cbase = cbase;
4297 /* Check to see if any adjustment is needed. */
4298 if (cstepi == 0 && stmt_is_after_inc)
4300 aff_tree real_cbase_aff;
4301 aff_tree cstep_aff;
4303 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4304 &real_cbase_aff);
4305 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4307 aff_combination_add (&real_cbase_aff, &cstep_aff);
4308 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4311 cost = difference_cost (data,
4312 ubase, real_cbase,
4313 &symbol_present, &var_present, &offset,
4314 depends_on);
4315 cost.cost /= avg_loop_niter (data->current_loop);
4317 else if (address_p
4318 && !POINTER_TYPE_P (ctype)
4319 && multiplier_allowed_in_address_p
4320 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4321 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4323 cbase
4324 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4325 cost = difference_cost (data,
4326 ubase, cbase,
4327 &symbol_present, &var_present, &offset,
4328 depends_on);
4329 cost.cost /= avg_loop_niter (data->current_loop);
4331 else
4333 cost = force_var_cost (data, cbase, depends_on);
4334 cost = add_costs (cost,
4335 difference_cost (data,
4336 ubase, build_int_cst (utype, 0),
4337 &symbol_present, &var_present,
4338 &offset, depends_on));
4339 cost.cost /= avg_loop_niter (data->current_loop);
4340 cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed);
4343 if (inv_expr_id)
4345 *inv_expr_id =
4346 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4347 /* Clear depends on. */
4348 if (*inv_expr_id != -1 && depends_on && *depends_on)
4349 bitmap_clear (*depends_on);
4352 /* If we are after the increment, the value of the candidate is higher by
4353 one iteration. */
4354 if (stmt_is_after_inc)
4355 offset -= ratio * cstepi;
4357 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4358 (symbol/var1/const parts may be omitted). If we are looking for an
4359 address, find the cost of addressing this. */
4360 if (address_p)
4361 return add_costs (cost,
4362 get_address_cost (symbol_present, var_present,
4363 offset, ratio, cstepi,
4364 TYPE_MODE (TREE_TYPE (utype)),
4365 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4366 speed, stmt_is_after_inc,
4367 can_autoinc));
4369 /* Otherwise estimate the costs for computing the expression. */
4370 if (!symbol_present && !var_present && !offset)
4372 if (ratio != 1)
4373 cost.cost += multiply_by_const_cost (ratio, TYPE_MODE (ctype), speed);
4374 return cost;
4377 /* Symbol + offset should be compile-time computable so consider that they
4378 are added once to the variable, if present. */
4379 if (var_present && (symbol_present || offset))
4380 cost.cost += adjust_setup_cost (data,
4381 add_regs_cost (TYPE_MODE (ctype), speed));
4383 /* Having offset does not affect runtime cost in case it is added to
4384 symbol, but it increases complexity. */
4385 if (offset)
4386 cost.complexity++;
4388 cost.cost += add_regs_cost (TYPE_MODE (ctype), speed);
4390 aratio = ratio > 0 ? ratio : -ratio;
4391 if (aratio != 1)
4392 cost.cost += multiply_by_const_cost (aratio, TYPE_MODE (ctype), speed);
4393 return cost;
4395 fallback:
4396 if (can_autoinc)
4397 *can_autoinc = false;
4400 /* Just get the expression, expand it and measure the cost. */
4401 tree comp = get_computation_at (data->current_loop, use, cand, at);
4403 if (!comp)
4404 return infinite_cost;
4406 if (address_p)
4407 comp = build_simple_mem_ref (comp);
4409 return new_cost (computation_cost (comp, speed), 0);
4413 /* Determines the cost of the computation by that USE is expressed
4414 from induction variable CAND. If ADDRESS_P is true, we just need
4415 to create an address from it, otherwise we want to get it into
4416 register. A set of invariants we depend on is stored in
4417 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4418 autoinc addressing is likely. */
4420 static comp_cost
4421 get_computation_cost (struct ivopts_data *data,
4422 struct iv_use *use, struct iv_cand *cand,
4423 bool address_p, bitmap *depends_on,
4424 bool *can_autoinc, int *inv_expr_id)
4426 return get_computation_cost_at (data,
4427 use, cand, address_p, depends_on, use->stmt,
4428 can_autoinc, inv_expr_id);
4431 /* Determines cost of basing replacement of USE on CAND in a generic
4432 expression. */
4434 static bool
4435 determine_use_iv_cost_generic (struct ivopts_data *data,
4436 struct iv_use *use, struct iv_cand *cand)
4438 bitmap depends_on;
4439 comp_cost cost;
4440 int inv_expr_id = -1;
4442 /* The simple case first -- if we need to express value of the preserved
4443 original biv, the cost is 0. This also prevents us from counting the
4444 cost of increment twice -- once at this use and once in the cost of
4445 the candidate. */
4446 if (cand->pos == IP_ORIGINAL
4447 && cand->incremented_at == use->stmt)
4449 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4450 ERROR_MARK, -1);
4451 return true;
4454 cost = get_computation_cost (data, use, cand, false, &depends_on,
4455 NULL, &inv_expr_id);
4457 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4458 inv_expr_id);
4460 return !infinite_cost_p (cost);
4463 /* Determines cost of basing replacement of USE on CAND in an address. */
4465 static bool
4466 determine_use_iv_cost_address (struct ivopts_data *data,
4467 struct iv_use *use, struct iv_cand *cand)
4469 bitmap depends_on;
4470 bool can_autoinc;
4471 int inv_expr_id = -1;
4472 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4473 &can_autoinc, &inv_expr_id);
4475 if (cand->ainc_use == use)
4477 if (can_autoinc)
4478 cost.cost -= cand->cost_step;
4479 /* If we generated the candidate solely for exploiting autoincrement
4480 opportunities, and it turns out it can't be used, set the cost to
4481 infinity to make sure we ignore it. */
4482 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4483 cost = infinite_cost;
4485 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4486 inv_expr_id);
4488 return !infinite_cost_p (cost);
4491 /* Computes value of candidate CAND at position AT in iteration NITER, and
4492 stores it to VAL. */
4494 static void
4495 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4496 aff_tree *val)
4498 aff_tree step, delta, nit;
4499 struct iv *iv = cand->iv;
4500 tree type = TREE_TYPE (iv->base);
4501 tree steptype = type;
4502 if (POINTER_TYPE_P (type))
4503 steptype = sizetype;
4505 tree_to_aff_combination (iv->step, steptype, &step);
4506 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4507 aff_combination_convert (&nit, steptype);
4508 aff_combination_mult (&nit, &step, &delta);
4509 if (stmt_after_increment (loop, cand, at))
4510 aff_combination_add (&delta, &step);
4512 tree_to_aff_combination (iv->base, type, val);
4513 aff_combination_add (val, &delta);
4516 /* Returns period of induction variable iv. */
4518 static tree
4519 iv_period (struct iv *iv)
4521 tree step = iv->step, period, type;
4522 tree pow2div;
4524 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4526 type = unsigned_type_for (TREE_TYPE (step));
4527 /* Period of the iv is lcm (step, type_range)/step -1,
4528 i.e., N*type_range/step - 1. Since type range is power
4529 of two, N == (step >> num_of_ending_zeros_binary (step),
4530 so the final result is
4532 (type_range >> num_of_ending_zeros_binary (step)) - 1
4535 pow2div = num_ending_zeros (step);
4537 period = build_low_bits_mask (type,
4538 (TYPE_PRECISION (type)
4539 - tree_low_cst (pow2div, 1)));
4541 return period;
4544 /* Returns the comparison operator used when eliminating the iv USE. */
4546 static enum tree_code
4547 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4549 struct loop *loop = data->current_loop;
4550 basic_block ex_bb;
4551 edge exit;
4553 ex_bb = gimple_bb (use->stmt);
4554 exit = EDGE_SUCC (ex_bb, 0);
4555 if (flow_bb_inside_loop_p (loop, exit->dest))
4556 exit = EDGE_SUCC (ex_bb, 1);
4558 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4561 static tree
4562 strip_wrap_conserving_type_conversions (tree exp)
4564 while (tree_ssa_useless_type_conversion (exp)
4565 && (nowrap_type_p (TREE_TYPE (exp))
4566 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4567 exp = TREE_OPERAND (exp, 0);
4568 return exp;
4571 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4572 check for an exact match. */
4574 static bool
4575 expr_equal_p (tree e, tree what)
4577 gimple stmt;
4578 enum tree_code code;
4580 e = strip_wrap_conserving_type_conversions (e);
4581 what = strip_wrap_conserving_type_conversions (what);
4583 code = TREE_CODE (what);
4584 if (TREE_TYPE (e) != TREE_TYPE (what))
4585 return false;
4587 if (operand_equal_p (e, what, 0))
4588 return true;
4590 if (TREE_CODE (e) != SSA_NAME)
4591 return false;
4593 stmt = SSA_NAME_DEF_STMT (e);
4594 if (gimple_code (stmt) != GIMPLE_ASSIGN
4595 || gimple_assign_rhs_code (stmt) != code)
4596 return false;
4598 switch (get_gimple_rhs_class (code))
4600 case GIMPLE_BINARY_RHS:
4601 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4602 return false;
4603 /* Fallthru. */
4605 case GIMPLE_UNARY_RHS:
4606 case GIMPLE_SINGLE_RHS:
4607 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4608 default:
4609 return false;
4613 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4614 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4615 calculation is performed in non-wrapping type.
4617 TODO: More generally, we could test for the situation that
4618 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4619 This would require knowing the sign of OFFSET.
4621 Also, we only look for the first addition in the computation of BASE.
4622 More complex analysis would be better, but introducing it just for
4623 this optimization seems like an overkill. */
4625 static bool
4626 difference_cannot_overflow_p (tree base, tree offset)
4628 enum tree_code code;
4629 tree e1, e2;
4631 if (!nowrap_type_p (TREE_TYPE (base)))
4632 return false;
4634 base = expand_simple_operations (base);
4636 if (TREE_CODE (base) == SSA_NAME)
4638 gimple stmt = SSA_NAME_DEF_STMT (base);
4640 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4641 return false;
4643 code = gimple_assign_rhs_code (stmt);
4644 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4645 return false;
4647 e1 = gimple_assign_rhs1 (stmt);
4648 e2 = gimple_assign_rhs2 (stmt);
4650 else
4652 code = TREE_CODE (base);
4653 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4654 return false;
4655 e1 = TREE_OPERAND (base, 0);
4656 e2 = TREE_OPERAND (base, 1);
4659 /* TODO: deeper inspection may be necessary to prove the equality. */
4660 switch (code)
4662 case PLUS_EXPR:
4663 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4664 case POINTER_PLUS_EXPR:
4665 return expr_equal_p (e2, offset);
4667 default:
4668 return false;
4672 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4673 comparison with CAND. NITER describes the number of iterations of
4674 the loops. If successful, the comparison in COMP_P is altered accordingly.
4676 We aim to handle the following situation:
4678 sometype *base, *p;
4679 int a, b, i;
4681 i = a;
4682 p = p_0 = base + a;
4686 bla (*p);
4687 p++;
4688 i++;
4690 while (i < b);
4692 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4693 We aim to optimize this to
4695 p = p_0 = base + a;
4698 bla (*p);
4699 p++;
4701 while (p < p_0 - a + b);
4703 This preserves the correctness, since the pointer arithmetics does not
4704 overflow. More precisely:
4706 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4707 overflow in computing it or the values of p.
4708 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4709 overflow. To prove this, we use the fact that p_0 = base + a. */
4711 static bool
4712 iv_elimination_compare_lt (struct ivopts_data *data,
4713 struct iv_cand *cand, enum tree_code *comp_p,
4714 struct tree_niter_desc *niter)
4716 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4717 struct affine_tree_combination nit, tmpa, tmpb;
4718 enum tree_code comp;
4719 HOST_WIDE_INT step;
4721 /* We need to know that the candidate induction variable does not overflow.
4722 While more complex analysis may be used to prove this, for now just
4723 check that the variable appears in the original program and that it
4724 is computed in a type that guarantees no overflows. */
4725 cand_type = TREE_TYPE (cand->iv->base);
4726 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4727 return false;
4729 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4730 the calculation of the BOUND could overflow, making the comparison
4731 invalid. */
4732 if (!data->loop_single_exit_p)
4733 return false;
4735 /* We need to be able to decide whether candidate is increasing or decreasing
4736 in order to choose the right comparison operator. */
4737 if (!cst_and_fits_in_hwi (cand->iv->step))
4738 return false;
4739 step = int_cst_value (cand->iv->step);
4741 /* Check that the number of iterations matches the expected pattern:
4742 a + 1 > b ? 0 : b - a - 1. */
4743 mbz = niter->may_be_zero;
4744 if (TREE_CODE (mbz) == GT_EXPR)
4746 /* Handle a + 1 > b. */
4747 tree op0 = TREE_OPERAND (mbz, 0);
4748 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4750 a = TREE_OPERAND (op0, 0);
4751 b = TREE_OPERAND (mbz, 1);
4753 else
4754 return false;
4756 else if (TREE_CODE (mbz) == LT_EXPR)
4758 tree op1 = TREE_OPERAND (mbz, 1);
4760 /* Handle b < a + 1. */
4761 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4763 a = TREE_OPERAND (op1, 0);
4764 b = TREE_OPERAND (mbz, 0);
4766 else
4767 return false;
4769 else
4770 return false;
4772 /* Expected number of iterations is B - A - 1. Check that it matches
4773 the actual number, i.e., that B - A - NITER = 1. */
4774 tree_to_aff_combination (niter->niter, nit_type, &nit);
4775 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4776 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4777 aff_combination_scale (&nit, double_int_minus_one);
4778 aff_combination_scale (&tmpa, double_int_minus_one);
4779 aff_combination_add (&tmpb, &tmpa);
4780 aff_combination_add (&tmpb, &nit);
4781 if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4782 return false;
4784 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4785 overflow. */
4786 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4787 cand->iv->step,
4788 fold_convert (TREE_TYPE (cand->iv->step), a));
4789 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4790 return false;
4792 /* Determine the new comparison operator. */
4793 comp = step < 0 ? GT_EXPR : LT_EXPR;
4794 if (*comp_p == NE_EXPR)
4795 *comp_p = comp;
4796 else if (*comp_p == EQ_EXPR)
4797 *comp_p = invert_tree_comparison (comp, false);
4798 else
4799 gcc_unreachable ();
4801 return true;
4804 /* Check whether it is possible to express the condition in USE by comparison
4805 of candidate CAND. If so, store the value compared with to BOUND, and the
4806 comparison operator to COMP. */
4808 static bool
4809 may_eliminate_iv (struct ivopts_data *data,
4810 struct iv_use *use, struct iv_cand *cand, tree *bound,
4811 enum tree_code *comp)
4813 basic_block ex_bb;
4814 edge exit;
4815 tree period;
4816 struct loop *loop = data->current_loop;
4817 aff_tree bnd;
4818 struct tree_niter_desc *desc = NULL;
4820 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4821 return false;
4823 /* For now works only for exits that dominate the loop latch.
4824 TODO: extend to other conditions inside loop body. */
4825 ex_bb = gimple_bb (use->stmt);
4826 if (use->stmt != last_stmt (ex_bb)
4827 || gimple_code (use->stmt) != GIMPLE_COND
4828 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4829 return false;
4831 exit = EDGE_SUCC (ex_bb, 0);
4832 if (flow_bb_inside_loop_p (loop, exit->dest))
4833 exit = EDGE_SUCC (ex_bb, 1);
4834 if (flow_bb_inside_loop_p (loop, exit->dest))
4835 return false;
4837 desc = niter_for_exit (data, exit);
4838 if (!desc)
4839 return false;
4841 /* Determine whether we can use the variable to test the exit condition.
4842 This is the case iff the period of the induction variable is greater
4843 than the number of iterations for which the exit condition is true. */
4844 period = iv_period (cand->iv);
4846 /* If the number of iterations is constant, compare against it directly. */
4847 if (TREE_CODE (desc->niter) == INTEGER_CST)
4849 /* See cand_value_at. */
4850 if (stmt_after_increment (loop, cand, use->stmt))
4852 if (!tree_int_cst_lt (desc->niter, period))
4853 return false;
4855 else
4857 if (tree_int_cst_lt (period, desc->niter))
4858 return false;
4862 /* If not, and if this is the only possible exit of the loop, see whether
4863 we can get a conservative estimate on the number of iterations of the
4864 entire loop and compare against that instead. */
4865 else
4867 double_int period_value, max_niter;
4869 max_niter = desc->max;
4870 if (stmt_after_increment (loop, cand, use->stmt))
4871 max_niter = double_int_add (max_niter, double_int_one);
4872 period_value = tree_to_double_int (period);
4873 if (double_int_ucmp (max_niter, period_value) > 0)
4875 /* See if we can take advantage of inferred loop bound information. */
4876 if (data->loop_single_exit_p)
4878 if (!max_loop_iterations (loop, &max_niter))
4879 return false;
4880 /* The loop bound is already adjusted by adding 1. */
4881 if (double_int_ucmp (max_niter, period_value) > 0)
4882 return false;
4884 else
4885 return false;
4889 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4891 *bound = aff_combination_to_tree (&bnd);
4892 *comp = iv_elimination_compare (data, use);
4894 /* It is unlikely that computing the number of iterations using division
4895 would be more profitable than keeping the original induction variable. */
4896 if (expression_expensive_p (*bound))
4897 return false;
4899 /* Sometimes, it is possible to handle the situation that the number of
4900 iterations may be zero unless additional assumtions by using <
4901 instead of != in the exit condition.
4903 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4904 base the exit condition on it. However, that is often too
4905 expensive. */
4906 if (!integer_zerop (desc->may_be_zero))
4907 return iv_elimination_compare_lt (data, cand, comp, desc);
4909 return true;
4912 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4913 be copied, if is is used in the loop body and DATA->body_includes_call. */
4915 static int
4916 parm_decl_cost (struct ivopts_data *data, tree bound)
4918 tree sbound = bound;
4919 STRIP_NOPS (sbound);
4921 if (TREE_CODE (sbound) == SSA_NAME
4922 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4923 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4924 && data->body_includes_call)
4925 return COSTS_N_INSNS (1);
4927 return 0;
4930 /* Determines cost of basing replacement of USE on CAND in a condition. */
4932 static bool
4933 determine_use_iv_cost_condition (struct ivopts_data *data,
4934 struct iv_use *use, struct iv_cand *cand)
4936 tree bound = NULL_TREE;
4937 struct iv *cmp_iv;
4938 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4939 comp_cost elim_cost, express_cost, cost, bound_cost;
4940 bool ok;
4941 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4942 tree *control_var, *bound_cst;
4943 enum tree_code comp = ERROR_MARK;
4945 /* Only consider real candidates. */
4946 if (!cand->iv)
4948 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4949 ERROR_MARK, -1);
4950 return false;
4953 /* Try iv elimination. */
4954 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4956 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4957 if (elim_cost.cost == 0)
4958 elim_cost.cost = parm_decl_cost (data, bound);
4959 else if (TREE_CODE (bound) == INTEGER_CST)
4960 elim_cost.cost = 0;
4961 /* If we replace a loop condition 'i < n' with 'p < base + n',
4962 depends_on_elim will have 'base' and 'n' set, which implies
4963 that both 'base' and 'n' will be live during the loop. More likely,
4964 'base + n' will be loop invariant, resulting in only one live value
4965 during the loop. So in that case we clear depends_on_elim and set
4966 elim_inv_expr_id instead. */
4967 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4969 elim_inv_expr_id = get_expr_id (data, bound);
4970 bitmap_clear (depends_on_elim);
4972 /* The bound is a loop invariant, so it will be only computed
4973 once. */
4974 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4976 else
4977 elim_cost = infinite_cost;
4979 /* Try expressing the original giv. If it is compared with an invariant,
4980 note that we cannot get rid of it. */
4981 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4982 NULL, &cmp_iv);
4983 gcc_assert (ok);
4985 /* When the condition is a comparison of the candidate IV against
4986 zero, prefer this IV.
4988 TODO: The constant that we're subtracting from the cost should
4989 be target-dependent. This information should be added to the
4990 target costs for each backend. */
4991 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4992 && integer_zerop (*bound_cst)
4993 && (operand_equal_p (*control_var, cand->var_after, 0)
4994 || operand_equal_p (*control_var, cand->var_before, 0)))
4995 elim_cost.cost -= 1;
4997 express_cost = get_computation_cost (data, use, cand, false,
4998 &depends_on_express, NULL,
4999 &express_inv_expr_id);
5000 fd_ivopts_data = data;
5001 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5003 /* Count the cost of the original bound as well. */
5004 bound_cost = force_var_cost (data, *bound_cst, NULL);
5005 if (bound_cost.cost == 0)
5006 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5007 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5008 bound_cost.cost = 0;
5009 express_cost.cost += bound_cost.cost;
5011 /* Choose the better approach, preferring the eliminated IV. */
5012 if (compare_costs (elim_cost, express_cost) <= 0)
5014 cost = elim_cost;
5015 depends_on = depends_on_elim;
5016 depends_on_elim = NULL;
5017 inv_expr_id = elim_inv_expr_id;
5019 else
5021 cost = express_cost;
5022 depends_on = depends_on_express;
5023 depends_on_express = NULL;
5024 bound = NULL_TREE;
5025 comp = ERROR_MARK;
5026 inv_expr_id = express_inv_expr_id;
5029 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5031 if (depends_on_elim)
5032 BITMAP_FREE (depends_on_elim);
5033 if (depends_on_express)
5034 BITMAP_FREE (depends_on_express);
5036 return !infinite_cost_p (cost);
5039 /* Determines cost of basing replacement of USE on CAND. Returns false
5040 if USE cannot be based on CAND. */
5042 static bool
5043 determine_use_iv_cost (struct ivopts_data *data,
5044 struct iv_use *use, struct iv_cand *cand)
5046 switch (use->type)
5048 case USE_NONLINEAR_EXPR:
5049 return determine_use_iv_cost_generic (data, use, cand);
5051 case USE_ADDRESS:
5052 return determine_use_iv_cost_address (data, use, cand);
5054 case USE_COMPARE:
5055 return determine_use_iv_cost_condition (data, use, cand);
5057 default:
5058 gcc_unreachable ();
5062 /* Return true if get_computation_cost indicates that autoincrement is
5063 a possibility for the pair of USE and CAND, false otherwise. */
5065 static bool
5066 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5067 struct iv_cand *cand)
5069 bitmap depends_on;
5070 bool can_autoinc;
5071 comp_cost cost;
5073 if (use->type != USE_ADDRESS)
5074 return false;
5076 cost = get_computation_cost (data, use, cand, true, &depends_on,
5077 &can_autoinc, NULL);
5079 BITMAP_FREE (depends_on);
5081 return !infinite_cost_p (cost) && can_autoinc;
5084 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5085 use that allows autoincrement, and set their AINC_USE if possible. */
5087 static void
5088 set_autoinc_for_original_candidates (struct ivopts_data *data)
5090 unsigned i, j;
5092 for (i = 0; i < n_iv_cands (data); i++)
5094 struct iv_cand *cand = iv_cand (data, i);
5095 struct iv_use *closest = NULL;
5096 if (cand->pos != IP_ORIGINAL)
5097 continue;
5098 for (j = 0; j < n_iv_uses (data); j++)
5100 struct iv_use *use = iv_use (data, j);
5101 unsigned uid = gimple_uid (use->stmt);
5102 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
5103 || uid > gimple_uid (cand->incremented_at))
5104 continue;
5105 if (closest == NULL || uid > gimple_uid (closest->stmt))
5106 closest = use;
5108 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
5109 continue;
5110 cand->ainc_use = closest;
5114 /* Finds the candidates for the induction variables. */
5116 static void
5117 find_iv_candidates (struct ivopts_data *data)
5119 /* Add commonly used ivs. */
5120 add_standard_iv_candidates (data);
5122 /* Add old induction variables. */
5123 add_old_ivs_candidates (data);
5125 /* Add induction variables derived from uses. */
5126 add_derived_ivs_candidates (data);
5128 set_autoinc_for_original_candidates (data);
5130 /* Record the important candidates. */
5131 record_important_candidates (data);
5134 /* Determines costs of basing the use of the iv on an iv candidate. */
5136 static void
5137 determine_use_iv_costs (struct ivopts_data *data)
5139 unsigned i, j;
5140 struct iv_use *use;
5141 struct iv_cand *cand;
5142 bitmap to_clear = BITMAP_ALLOC (NULL);
5144 alloc_use_cost_map (data);
5146 for (i = 0; i < n_iv_uses (data); i++)
5148 use = iv_use (data, i);
5150 if (data->consider_all_candidates)
5152 for (j = 0; j < n_iv_cands (data); j++)
5154 cand = iv_cand (data, j);
5155 determine_use_iv_cost (data, use, cand);
5158 else
5160 bitmap_iterator bi;
5162 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5164 cand = iv_cand (data, j);
5165 if (!determine_use_iv_cost (data, use, cand))
5166 bitmap_set_bit (to_clear, j);
5169 /* Remove the candidates for that the cost is infinite from
5170 the list of related candidates. */
5171 bitmap_and_compl_into (use->related_cands, to_clear);
5172 bitmap_clear (to_clear);
5176 BITMAP_FREE (to_clear);
5178 if (dump_file && (dump_flags & TDF_DETAILS))
5180 fprintf (dump_file, "Use-candidate costs:\n");
5182 for (i = 0; i < n_iv_uses (data); i++)
5184 use = iv_use (data, i);
5186 fprintf (dump_file, "Use %d:\n", i);
5187 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5188 for (j = 0; j < use->n_map_members; j++)
5190 if (!use->cost_map[j].cand
5191 || infinite_cost_p (use->cost_map[j].cost))
5192 continue;
5194 fprintf (dump_file, " %d\t%d\t%d\t",
5195 use->cost_map[j].cand->id,
5196 use->cost_map[j].cost.cost,
5197 use->cost_map[j].cost.complexity);
5198 if (use->cost_map[j].depends_on)
5199 bitmap_print (dump_file,
5200 use->cost_map[j].depends_on, "","");
5201 if (use->cost_map[j].inv_expr_id != -1)
5202 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5203 fprintf (dump_file, "\n");
5206 fprintf (dump_file, "\n");
5208 fprintf (dump_file, "\n");
5212 /* Determines cost of the candidate CAND. */
5214 static void
5215 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5217 comp_cost cost_base;
5218 unsigned cost, cost_step;
5219 tree base;
5221 if (!cand->iv)
5223 cand->cost = 0;
5224 return;
5227 /* There are two costs associated with the candidate -- its increment
5228 and its initialization. The second is almost negligible for any loop
5229 that rolls enough, so we take it just very little into account. */
5231 base = cand->iv->base;
5232 cost_base = force_var_cost (data, base, NULL);
5233 /* It will be exceptional that the iv register happens to be initialized with
5234 the proper value at no cost. In general, there will at least be a regcopy
5235 or a const set. */
5236 if (cost_base.cost == 0)
5237 cost_base.cost = COSTS_N_INSNS (1);
5238 cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5240 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5242 /* Prefer the original ivs unless we may gain something by replacing it.
5243 The reason is to make debugging simpler; so this is not relevant for
5244 artificial ivs created by other optimization passes. */
5245 if (cand->pos != IP_ORIGINAL
5246 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5247 cost++;
5249 /* Prefer not to insert statements into latch unless there are some
5250 already (so that we do not create unnecessary jumps). */
5251 if (cand->pos == IP_END
5252 && empty_block_p (ip_end_pos (data->current_loop)))
5253 cost++;
5255 cand->cost = cost;
5256 cand->cost_step = cost_step;
5259 /* Determines costs of computation of the candidates. */
5261 static void
5262 determine_iv_costs (struct ivopts_data *data)
5264 unsigned i;
5266 if (dump_file && (dump_flags & TDF_DETAILS))
5268 fprintf (dump_file, "Candidate costs:\n");
5269 fprintf (dump_file, " cand\tcost\n");
5272 for (i = 0; i < n_iv_cands (data); i++)
5274 struct iv_cand *cand = iv_cand (data, i);
5276 determine_iv_cost (data, cand);
5278 if (dump_file && (dump_flags & TDF_DETAILS))
5279 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5282 if (dump_file && (dump_flags & TDF_DETAILS))
5283 fprintf (dump_file, "\n");
5286 /* Calculates cost for having SIZE induction variables. */
5288 static unsigned
5289 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5291 /* We add size to the cost, so that we prefer eliminating ivs
5292 if possible. */
5293 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5294 data->body_includes_call);
5297 /* For each size of the induction variable set determine the penalty. */
5299 static void
5300 determine_set_costs (struct ivopts_data *data)
5302 unsigned j, n;
5303 gimple phi;
5304 gimple_stmt_iterator psi;
5305 tree op;
5306 struct loop *loop = data->current_loop;
5307 bitmap_iterator bi;
5309 if (dump_file && (dump_flags & TDF_DETAILS))
5311 fprintf (dump_file, "Global costs:\n");
5312 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5313 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5314 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5315 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5318 n = 0;
5319 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5321 phi = gsi_stmt (psi);
5322 op = PHI_RESULT (phi);
5324 if (!is_gimple_reg (op))
5325 continue;
5327 if (get_iv (data, op))
5328 continue;
5330 n++;
5333 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5335 struct version_info *info = ver_info (data, j);
5337 if (info->inv_id && info->has_nonlin_use)
5338 n++;
5341 data->regs_used = n;
5342 if (dump_file && (dump_flags & TDF_DETAILS))
5343 fprintf (dump_file, " regs_used %d\n", n);
5345 if (dump_file && (dump_flags & TDF_DETAILS))
5347 fprintf (dump_file, " cost for size:\n");
5348 fprintf (dump_file, " ivs\tcost\n");
5349 for (j = 0; j <= 2 * target_avail_regs; j++)
5350 fprintf (dump_file, " %d\t%d\n", j,
5351 ivopts_global_cost_for_size (data, j));
5352 fprintf (dump_file, "\n");
5356 /* Returns true if A is a cheaper cost pair than B. */
5358 static bool
5359 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5361 int cmp;
5363 if (!a)
5364 return false;
5366 if (!b)
5367 return true;
5369 cmp = compare_costs (a->cost, b->cost);
5370 if (cmp < 0)
5371 return true;
5373 if (cmp > 0)
5374 return false;
5376 /* In case the costs are the same, prefer the cheaper candidate. */
5377 if (a->cand->cost < b->cand->cost)
5378 return true;
5380 return false;
5384 /* Returns candidate by that USE is expressed in IVS. */
5386 static struct cost_pair *
5387 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5389 return ivs->cand_for_use[use->id];
5392 /* Computes the cost field of IVS structure. */
5394 static void
5395 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5397 comp_cost cost = ivs->cand_use_cost;
5399 cost.cost += ivs->cand_cost;
5401 cost.cost += ivopts_global_cost_for_size (data,
5402 ivs->n_regs + ivs->num_used_inv_expr);
5404 ivs->cost = cost;
5407 /* Remove invariants in set INVS to set IVS. */
5409 static void
5410 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5412 bitmap_iterator bi;
5413 unsigned iid;
5415 if (!invs)
5416 return;
5418 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5420 ivs->n_invariant_uses[iid]--;
5421 if (ivs->n_invariant_uses[iid] == 0)
5422 ivs->n_regs--;
5426 /* Set USE not to be expressed by any candidate in IVS. */
5428 static void
5429 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5430 struct iv_use *use)
5432 unsigned uid = use->id, cid;
5433 struct cost_pair *cp;
5435 cp = ivs->cand_for_use[uid];
5436 if (!cp)
5437 return;
5438 cid = cp->cand->id;
5440 ivs->bad_uses++;
5441 ivs->cand_for_use[uid] = NULL;
5442 ivs->n_cand_uses[cid]--;
5444 if (ivs->n_cand_uses[cid] == 0)
5446 bitmap_clear_bit (ivs->cands, cid);
5447 /* Do not count the pseudocandidates. */
5448 if (cp->cand->iv)
5449 ivs->n_regs--;
5450 ivs->n_cands--;
5451 ivs->cand_cost -= cp->cand->cost;
5453 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5456 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5458 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5460 if (cp->inv_expr_id != -1)
5462 ivs->used_inv_expr[cp->inv_expr_id]--;
5463 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5464 ivs->num_used_inv_expr--;
5466 iv_ca_recount_cost (data, ivs);
5469 /* Add invariants in set INVS to set IVS. */
5471 static void
5472 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5474 bitmap_iterator bi;
5475 unsigned iid;
5477 if (!invs)
5478 return;
5480 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5482 ivs->n_invariant_uses[iid]++;
5483 if (ivs->n_invariant_uses[iid] == 1)
5484 ivs->n_regs++;
5488 /* Set cost pair for USE in set IVS to CP. */
5490 static void
5491 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5492 struct iv_use *use, struct cost_pair *cp)
5494 unsigned uid = use->id, cid;
5496 if (ivs->cand_for_use[uid] == cp)
5497 return;
5499 if (ivs->cand_for_use[uid])
5500 iv_ca_set_no_cp (data, ivs, use);
5502 if (cp)
5504 cid = cp->cand->id;
5506 ivs->bad_uses--;
5507 ivs->cand_for_use[uid] = cp;
5508 ivs->n_cand_uses[cid]++;
5509 if (ivs->n_cand_uses[cid] == 1)
5511 bitmap_set_bit (ivs->cands, cid);
5512 /* Do not count the pseudocandidates. */
5513 if (cp->cand->iv)
5514 ivs->n_regs++;
5515 ivs->n_cands++;
5516 ivs->cand_cost += cp->cand->cost;
5518 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5521 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5522 iv_ca_set_add_invariants (ivs, cp->depends_on);
5524 if (cp->inv_expr_id != -1)
5526 ivs->used_inv_expr[cp->inv_expr_id]++;
5527 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5528 ivs->num_used_inv_expr++;
5530 iv_ca_recount_cost (data, ivs);
5534 /* Extend set IVS by expressing USE by some of the candidates in it
5535 if possible. All important candidates will be considered
5536 if IMPORTANT_CANDIDATES is true. */
5538 static void
5539 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5540 struct iv_use *use, bool important_candidates)
5542 struct cost_pair *best_cp = NULL, *cp;
5543 bitmap_iterator bi;
5544 bitmap cands;
5545 unsigned i;
5547 gcc_assert (ivs->upto >= use->id);
5549 if (ivs->upto == use->id)
5551 ivs->upto++;
5552 ivs->bad_uses++;
5555 cands = (important_candidates ? data->important_candidates : ivs->cands);
5556 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5558 struct iv_cand *cand = iv_cand (data, i);
5560 cp = get_use_iv_cost (data, use, cand);
5562 if (cheaper_cost_pair (cp, best_cp))
5563 best_cp = cp;
5566 iv_ca_set_cp (data, ivs, use, best_cp);
5569 /* Get cost for assignment IVS. */
5571 static comp_cost
5572 iv_ca_cost (struct iv_ca *ivs)
5574 /* This was a conditional expression but it triggered a bug in
5575 Sun C 5.5. */
5576 if (ivs->bad_uses)
5577 return infinite_cost;
5578 else
5579 return ivs->cost;
5582 /* Returns true if all dependences of CP are among invariants in IVS. */
5584 static bool
5585 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5587 unsigned i;
5588 bitmap_iterator bi;
5590 if (!cp->depends_on)
5591 return true;
5593 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5595 if (ivs->n_invariant_uses[i] == 0)
5596 return false;
5599 return true;
5602 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5603 it before NEXT_CHANGE. */
5605 static struct iv_ca_delta *
5606 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5607 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5609 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5611 change->use = use;
5612 change->old_cp = old_cp;
5613 change->new_cp = new_cp;
5614 change->next_change = next_change;
5616 return change;
5619 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5620 are rewritten. */
5622 static struct iv_ca_delta *
5623 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5625 struct iv_ca_delta *last;
5627 if (!l2)
5628 return l1;
5630 if (!l1)
5631 return l2;
5633 for (last = l1; last->next_change; last = last->next_change)
5634 continue;
5635 last->next_change = l2;
5637 return l1;
5640 /* Reverse the list of changes DELTA, forming the inverse to it. */
5642 static struct iv_ca_delta *
5643 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5645 struct iv_ca_delta *act, *next, *prev = NULL;
5646 struct cost_pair *tmp;
5648 for (act = delta; act; act = next)
5650 next = act->next_change;
5651 act->next_change = prev;
5652 prev = act;
5654 tmp = act->old_cp;
5655 act->old_cp = act->new_cp;
5656 act->new_cp = tmp;
5659 return prev;
5662 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5663 reverted instead. */
5665 static void
5666 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5667 struct iv_ca_delta *delta, bool forward)
5669 struct cost_pair *from, *to;
5670 struct iv_ca_delta *act;
5672 if (!forward)
5673 delta = iv_ca_delta_reverse (delta);
5675 for (act = delta; act; act = act->next_change)
5677 from = act->old_cp;
5678 to = act->new_cp;
5679 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5680 iv_ca_set_cp (data, ivs, act->use, to);
5683 if (!forward)
5684 iv_ca_delta_reverse (delta);
5687 /* Returns true if CAND is used in IVS. */
5689 static bool
5690 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5692 return ivs->n_cand_uses[cand->id] > 0;
5695 /* Returns number of induction variable candidates in the set IVS. */
5697 static unsigned
5698 iv_ca_n_cands (struct iv_ca *ivs)
5700 return ivs->n_cands;
5703 /* Free the list of changes DELTA. */
5705 static void
5706 iv_ca_delta_free (struct iv_ca_delta **delta)
5708 struct iv_ca_delta *act, *next;
5710 for (act = *delta; act; act = next)
5712 next = act->next_change;
5713 free (act);
5716 *delta = NULL;
5719 /* Allocates new iv candidates assignment. */
5721 static struct iv_ca *
5722 iv_ca_new (struct ivopts_data *data)
5724 struct iv_ca *nw = XNEW (struct iv_ca);
5726 nw->upto = 0;
5727 nw->bad_uses = 0;
5728 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5729 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5730 nw->cands = BITMAP_ALLOC (NULL);
5731 nw->n_cands = 0;
5732 nw->n_regs = 0;
5733 nw->cand_use_cost = no_cost;
5734 nw->cand_cost = 0;
5735 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5736 nw->cost = no_cost;
5737 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5738 nw->num_used_inv_expr = 0;
5740 return nw;
5743 /* Free memory occupied by the set IVS. */
5745 static void
5746 iv_ca_free (struct iv_ca **ivs)
5748 free ((*ivs)->cand_for_use);
5749 free ((*ivs)->n_cand_uses);
5750 BITMAP_FREE ((*ivs)->cands);
5751 free ((*ivs)->n_invariant_uses);
5752 free ((*ivs)->used_inv_expr);
5753 free (*ivs);
5754 *ivs = NULL;
5757 /* Dumps IVS to FILE. */
5759 static void
5760 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5762 const char *pref = " invariants ";
5763 unsigned i;
5764 comp_cost cost = iv_ca_cost (ivs);
5766 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5767 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5768 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5769 bitmap_print (file, ivs->cands, " candidates: ","\n");
5771 for (i = 0; i < ivs->upto; i++)
5773 struct iv_use *use = iv_use (data, i);
5774 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5775 if (cp)
5776 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5777 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5778 else
5779 fprintf (file, " use:%d --> ??\n", use->id);
5782 for (i = 1; i <= data->max_inv_id; i++)
5783 if (ivs->n_invariant_uses[i])
5785 fprintf (file, "%s%d", pref, i);
5786 pref = ", ";
5788 fprintf (file, "\n\n");
5791 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5792 new set, and store differences in DELTA. Number of induction variables
5793 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5794 the function will try to find a solution with mimimal iv candidates. */
5796 static comp_cost
5797 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5798 struct iv_cand *cand, struct iv_ca_delta **delta,
5799 unsigned *n_ivs, bool min_ncand)
5801 unsigned i;
5802 comp_cost cost;
5803 struct iv_use *use;
5804 struct cost_pair *old_cp, *new_cp;
5806 *delta = NULL;
5807 for (i = 0; i < ivs->upto; i++)
5809 use = iv_use (data, i);
5810 old_cp = iv_ca_cand_for_use (ivs, use);
5812 if (old_cp
5813 && old_cp->cand == cand)
5814 continue;
5816 new_cp = get_use_iv_cost (data, use, cand);
5817 if (!new_cp)
5818 continue;
5820 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5821 continue;
5823 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5824 continue;
5826 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5829 iv_ca_delta_commit (data, ivs, *delta, true);
5830 cost = iv_ca_cost (ivs);
5831 if (n_ivs)
5832 *n_ivs = iv_ca_n_cands (ivs);
5833 iv_ca_delta_commit (data, ivs, *delta, false);
5835 return cost;
5838 /* Try narrowing set IVS by removing CAND. Return the cost of
5839 the new set and store the differences in DELTA. */
5841 static comp_cost
5842 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5843 struct iv_cand *cand, struct iv_ca_delta **delta)
5845 unsigned i, ci;
5846 struct iv_use *use;
5847 struct cost_pair *old_cp, *new_cp, *cp;
5848 bitmap_iterator bi;
5849 struct iv_cand *cnd;
5850 comp_cost cost;
5852 *delta = NULL;
5853 for (i = 0; i < n_iv_uses (data); i++)
5855 use = iv_use (data, i);
5857 old_cp = iv_ca_cand_for_use (ivs, use);
5858 if (old_cp->cand != cand)
5859 continue;
5861 new_cp = NULL;
5863 if (data->consider_all_candidates)
5865 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5867 if (ci == cand->id)
5868 continue;
5870 cnd = iv_cand (data, ci);
5872 cp = get_use_iv_cost (data, use, cnd);
5873 if (!cp)
5874 continue;
5876 if (!iv_ca_has_deps (ivs, cp))
5877 continue;
5879 if (!cheaper_cost_pair (cp, new_cp))
5880 continue;
5882 new_cp = cp;
5885 else
5887 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5889 if (ci == cand->id)
5890 continue;
5892 cnd = iv_cand (data, ci);
5894 cp = get_use_iv_cost (data, use, cnd);
5895 if (!cp)
5896 continue;
5897 if (!iv_ca_has_deps (ivs, cp))
5898 continue;
5900 if (!cheaper_cost_pair (cp, new_cp))
5901 continue;
5903 new_cp = cp;
5907 if (!new_cp)
5909 iv_ca_delta_free (delta);
5910 return infinite_cost;
5913 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5916 iv_ca_delta_commit (data, ivs, *delta, true);
5917 cost = iv_ca_cost (ivs);
5918 iv_ca_delta_commit (data, ivs, *delta, false);
5920 return cost;
5923 /* Try optimizing the set of candidates IVS by removing candidates different
5924 from to EXCEPT_CAND from it. Return cost of the new set, and store
5925 differences in DELTA. */
5927 static comp_cost
5928 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5929 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5931 bitmap_iterator bi;
5932 struct iv_ca_delta *act_delta, *best_delta;
5933 unsigned i;
5934 comp_cost best_cost, acost;
5935 struct iv_cand *cand;
5937 best_delta = NULL;
5938 best_cost = iv_ca_cost (ivs);
5940 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5942 cand = iv_cand (data, i);
5944 if (cand == except_cand)
5945 continue;
5947 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5949 if (compare_costs (acost, best_cost) < 0)
5951 best_cost = acost;
5952 iv_ca_delta_free (&best_delta);
5953 best_delta = act_delta;
5955 else
5956 iv_ca_delta_free (&act_delta);
5959 if (!best_delta)
5961 *delta = NULL;
5962 return best_cost;
5965 /* Recurse to possibly remove other unnecessary ivs. */
5966 iv_ca_delta_commit (data, ivs, best_delta, true);
5967 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5968 iv_ca_delta_commit (data, ivs, best_delta, false);
5969 *delta = iv_ca_delta_join (best_delta, *delta);
5970 return best_cost;
5973 /* Tries to extend the sets IVS in the best possible way in order
5974 to express the USE. If ORIGINALP is true, prefer candidates from
5975 the original set of IVs, otherwise favor important candidates not
5976 based on any memory object. */
5978 static bool
5979 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5980 struct iv_use *use, bool originalp)
5982 comp_cost best_cost, act_cost;
5983 unsigned i;
5984 bitmap_iterator bi;
5985 struct iv_cand *cand;
5986 struct iv_ca_delta *best_delta = NULL, *act_delta;
5987 struct cost_pair *cp;
5989 iv_ca_add_use (data, ivs, use, false);
5990 best_cost = iv_ca_cost (ivs);
5992 cp = iv_ca_cand_for_use (ivs, use);
5993 if (!cp)
5995 ivs->upto--;
5996 ivs->bad_uses--;
5997 iv_ca_add_use (data, ivs, use, true);
5998 best_cost = iv_ca_cost (ivs);
5999 cp = iv_ca_cand_for_use (ivs, use);
6001 if (cp)
6003 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6004 iv_ca_set_no_cp (data, ivs, use);
6007 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6008 first try important candidates not based on any memory object. Only if
6009 this fails, try the specific ones. Rationale -- in loops with many
6010 variables the best choice often is to use just one generic biv. If we
6011 added here many ivs specific to the uses, the optimization algorithm later
6012 would be likely to get stuck in a local minimum, thus causing us to create
6013 too many ivs. The approach from few ivs to more seems more likely to be
6014 successful -- starting from few ivs, replacing an expensive use by a
6015 specific iv should always be a win. */
6016 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6018 cand = iv_cand (data, i);
6020 if (originalp && cand->pos !=IP_ORIGINAL)
6021 continue;
6023 if (!originalp && cand->iv->base_object != NULL_TREE)
6024 continue;
6026 if (iv_ca_cand_used_p (ivs, cand))
6027 continue;
6029 cp = get_use_iv_cost (data, use, cand);
6030 if (!cp)
6031 continue;
6033 iv_ca_set_cp (data, ivs, use, cp);
6034 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6035 true);
6036 iv_ca_set_no_cp (data, ivs, use);
6037 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6039 if (compare_costs (act_cost, best_cost) < 0)
6041 best_cost = act_cost;
6043 iv_ca_delta_free (&best_delta);
6044 best_delta = act_delta;
6046 else
6047 iv_ca_delta_free (&act_delta);
6050 if (infinite_cost_p (best_cost))
6052 for (i = 0; i < use->n_map_members; i++)
6054 cp = use->cost_map + i;
6055 cand = cp->cand;
6056 if (!cand)
6057 continue;
6059 /* Already tried this. */
6060 if (cand->important)
6062 if (originalp && cand->pos == IP_ORIGINAL)
6063 continue;
6064 if (!originalp && cand->iv->base_object == NULL_TREE)
6065 continue;
6068 if (iv_ca_cand_used_p (ivs, cand))
6069 continue;
6071 act_delta = NULL;
6072 iv_ca_set_cp (data, ivs, use, cp);
6073 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6074 iv_ca_set_no_cp (data, ivs, use);
6075 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6076 cp, act_delta);
6078 if (compare_costs (act_cost, best_cost) < 0)
6080 best_cost = act_cost;
6082 if (best_delta)
6083 iv_ca_delta_free (&best_delta);
6084 best_delta = act_delta;
6086 else
6087 iv_ca_delta_free (&act_delta);
6091 iv_ca_delta_commit (data, ivs, best_delta, true);
6092 iv_ca_delta_free (&best_delta);
6094 return !infinite_cost_p (best_cost);
6097 /* Finds an initial assignment of candidates to uses. */
6099 static struct iv_ca *
6100 get_initial_solution (struct ivopts_data *data, bool originalp)
6102 struct iv_ca *ivs = iv_ca_new (data);
6103 unsigned i;
6105 for (i = 0; i < n_iv_uses (data); i++)
6106 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6108 iv_ca_free (&ivs);
6109 return NULL;
6112 return ivs;
6115 /* Tries to improve set of induction variables IVS. */
6117 static bool
6118 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6120 unsigned i, n_ivs;
6121 comp_cost acost, best_cost = iv_ca_cost (ivs);
6122 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6123 struct iv_cand *cand;
6125 /* Try extending the set of induction variables by one. */
6126 for (i = 0; i < n_iv_cands (data); i++)
6128 cand = iv_cand (data, i);
6130 if (iv_ca_cand_used_p (ivs, cand))
6131 continue;
6133 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6134 if (!act_delta)
6135 continue;
6137 /* If we successfully added the candidate and the set is small enough,
6138 try optimizing it by removing other candidates. */
6139 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6141 iv_ca_delta_commit (data, ivs, act_delta, true);
6142 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6143 iv_ca_delta_commit (data, ivs, act_delta, false);
6144 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6147 if (compare_costs (acost, best_cost) < 0)
6149 best_cost = acost;
6150 iv_ca_delta_free (&best_delta);
6151 best_delta = act_delta;
6153 else
6154 iv_ca_delta_free (&act_delta);
6157 if (!best_delta)
6159 /* Try removing the candidates from the set instead. */
6160 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6162 /* Nothing more we can do. */
6163 if (!best_delta)
6164 return false;
6167 iv_ca_delta_commit (data, ivs, best_delta, true);
6168 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6169 iv_ca_delta_free (&best_delta);
6170 return true;
6173 /* Attempts to find the optimal set of induction variables. We do simple
6174 greedy heuristic -- we try to replace at most one candidate in the selected
6175 solution and remove the unused ivs while this improves the cost. */
6177 static struct iv_ca *
6178 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6180 struct iv_ca *set;
6182 /* Get the initial solution. */
6183 set = get_initial_solution (data, originalp);
6184 if (!set)
6186 if (dump_file && (dump_flags & TDF_DETAILS))
6187 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6188 return NULL;
6191 if (dump_file && (dump_flags & TDF_DETAILS))
6193 fprintf (dump_file, "Initial set of candidates:\n");
6194 iv_ca_dump (data, dump_file, set);
6197 while (try_improve_iv_set (data, set))
6199 if (dump_file && (dump_flags & TDF_DETAILS))
6201 fprintf (dump_file, "Improved to:\n");
6202 iv_ca_dump (data, dump_file, set);
6206 return set;
6209 static struct iv_ca *
6210 find_optimal_iv_set (struct ivopts_data *data)
6212 unsigned i;
6213 struct iv_ca *set, *origset;
6214 struct iv_use *use;
6215 comp_cost cost, origcost;
6217 /* Determine the cost based on a strategy that starts with original IVs,
6218 and try again using a strategy that prefers candidates not based
6219 on any IVs. */
6220 origset = find_optimal_iv_set_1 (data, true);
6221 set = find_optimal_iv_set_1 (data, false);
6223 if (!origset && !set)
6224 return NULL;
6226 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6227 cost = set ? iv_ca_cost (set) : infinite_cost;
6229 if (dump_file && (dump_flags & TDF_DETAILS))
6231 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6232 origcost.cost, origcost.complexity);
6233 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6234 cost.cost, cost.complexity);
6237 /* Choose the one with the best cost. */
6238 if (compare_costs (origcost, cost) <= 0)
6240 if (set)
6241 iv_ca_free (&set);
6242 set = origset;
6244 else if (origset)
6245 iv_ca_free (&origset);
6247 for (i = 0; i < n_iv_uses (data); i++)
6249 use = iv_use (data, i);
6250 use->selected = iv_ca_cand_for_use (set, use)->cand;
6253 return set;
6256 /* Creates a new induction variable corresponding to CAND. */
6258 static void
6259 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6261 gimple_stmt_iterator incr_pos;
6262 tree base;
6263 bool after = false;
6265 if (!cand->iv)
6266 return;
6268 switch (cand->pos)
6270 case IP_NORMAL:
6271 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6272 break;
6274 case IP_END:
6275 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6276 after = true;
6277 break;
6279 case IP_AFTER_USE:
6280 after = true;
6281 /* fall through */
6282 case IP_BEFORE_USE:
6283 incr_pos = gsi_for_stmt (cand->incremented_at);
6284 break;
6286 case IP_ORIGINAL:
6287 /* Mark that the iv is preserved. */
6288 name_info (data, cand->var_before)->preserve_biv = true;
6289 name_info (data, cand->var_after)->preserve_biv = true;
6291 /* Rewrite the increment so that it uses var_before directly. */
6292 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6293 return;
6296 gimple_add_tmp_var (cand->var_before);
6297 add_referenced_var (cand->var_before);
6299 base = unshare_expr (cand->iv->base);
6301 create_iv (base, unshare_expr (cand->iv->step),
6302 cand->var_before, data->current_loop,
6303 &incr_pos, after, &cand->var_before, &cand->var_after);
6306 /* Creates new induction variables described in SET. */
6308 static void
6309 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6311 unsigned i;
6312 struct iv_cand *cand;
6313 bitmap_iterator bi;
6315 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6317 cand = iv_cand (data, i);
6318 create_new_iv (data, cand);
6321 if (dump_file && (dump_flags & TDF_DETAILS))
6323 fprintf (dump_file, "\nSelected IV set: \n");
6324 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6326 cand = iv_cand (data, i);
6327 dump_cand (dump_file, cand);
6329 fprintf (dump_file, "\n");
6333 /* Rewrites USE (definition of iv used in a nonlinear expression)
6334 using candidate CAND. */
6336 static void
6337 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6338 struct iv_use *use, struct iv_cand *cand)
6340 tree comp;
6341 tree op, tgt;
6342 gimple ass;
6343 gimple_stmt_iterator bsi;
6345 /* An important special case -- if we are asked to express value of
6346 the original iv by itself, just exit; there is no need to
6347 introduce a new computation (that might also need casting the
6348 variable to unsigned and back). */
6349 if (cand->pos == IP_ORIGINAL
6350 && cand->incremented_at == use->stmt)
6352 tree step, ctype, utype;
6353 enum tree_code incr_code = PLUS_EXPR, old_code;
6355 gcc_assert (is_gimple_assign (use->stmt));
6356 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6358 step = cand->iv->step;
6359 ctype = TREE_TYPE (step);
6360 utype = TREE_TYPE (cand->var_after);
6361 if (TREE_CODE (step) == NEGATE_EXPR)
6363 incr_code = MINUS_EXPR;
6364 step = TREE_OPERAND (step, 0);
6367 /* Check whether we may leave the computation unchanged.
6368 This is the case only if it does not rely on other
6369 computations in the loop -- otherwise, the computation
6370 we rely upon may be removed in remove_unused_ivs,
6371 thus leading to ICE. */
6372 old_code = gimple_assign_rhs_code (use->stmt);
6373 if (old_code == PLUS_EXPR
6374 || old_code == MINUS_EXPR
6375 || old_code == POINTER_PLUS_EXPR)
6377 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6378 op = gimple_assign_rhs2 (use->stmt);
6379 else if (old_code != MINUS_EXPR
6380 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6381 op = gimple_assign_rhs1 (use->stmt);
6382 else
6383 op = NULL_TREE;
6385 else
6386 op = NULL_TREE;
6388 if (op
6389 && (TREE_CODE (op) == INTEGER_CST
6390 || operand_equal_p (op, step, 0)))
6391 return;
6393 /* Otherwise, add the necessary computations to express
6394 the iv. */
6395 op = fold_convert (ctype, cand->var_before);
6396 comp = fold_convert (utype,
6397 build2 (incr_code, ctype, op,
6398 unshare_expr (step)));
6400 else
6402 comp = get_computation (data->current_loop, use, cand);
6403 gcc_assert (comp != NULL_TREE);
6406 switch (gimple_code (use->stmt))
6408 case GIMPLE_PHI:
6409 tgt = PHI_RESULT (use->stmt);
6411 /* If we should keep the biv, do not replace it. */
6412 if (name_info (data, tgt)->preserve_biv)
6413 return;
6415 bsi = gsi_after_labels (gimple_bb (use->stmt));
6416 break;
6418 case GIMPLE_ASSIGN:
6419 tgt = gimple_assign_lhs (use->stmt);
6420 bsi = gsi_for_stmt (use->stmt);
6421 break;
6423 default:
6424 gcc_unreachable ();
6427 if (!valid_gimple_rhs_p (comp)
6428 || (gimple_code (use->stmt) != GIMPLE_PHI
6429 /* We can't allow re-allocating the stmt as it might be pointed
6430 to still. */
6431 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6432 >= gimple_num_ops (gsi_stmt (bsi)))))
6434 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6435 true, GSI_SAME_STMT);
6436 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6438 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6439 /* As this isn't a plain copy we have to reset alignment
6440 information. */
6441 if (SSA_NAME_PTR_INFO (comp))
6442 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6446 if (gimple_code (use->stmt) == GIMPLE_PHI)
6448 ass = gimple_build_assign (tgt, comp);
6449 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6451 bsi = gsi_for_stmt (use->stmt);
6452 remove_phi_node (&bsi, false);
6454 else
6456 gimple_assign_set_rhs_from_tree (&bsi, comp);
6457 use->stmt = gsi_stmt (bsi);
6461 /* Performs a peephole optimization to reorder the iv update statement with
6462 a mem ref to enable instruction combining in later phases. The mem ref uses
6463 the iv value before the update, so the reordering transformation requires
6464 adjustment of the offset. CAND is the selected IV_CAND.
6466 Example:
6468 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6469 iv2 = iv1 + 1;
6471 if (t < val) (1)
6472 goto L;
6473 goto Head;
6476 directly propagating t over to (1) will introduce overlapping live range
6477 thus increase register pressure. This peephole transform it into:
6480 iv2 = iv1 + 1;
6481 t = MEM_REF (base, iv2, 8, 8);
6482 if (t < val)
6483 goto L;
6484 goto Head;
6487 static void
6488 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6490 tree var_after;
6491 gimple iv_update, stmt;
6492 basic_block bb;
6493 gimple_stmt_iterator gsi, gsi_iv;
6495 if (cand->pos != IP_NORMAL)
6496 return;
6498 var_after = cand->var_after;
6499 iv_update = SSA_NAME_DEF_STMT (var_after);
6501 bb = gimple_bb (iv_update);
6502 gsi = gsi_last_nondebug_bb (bb);
6503 stmt = gsi_stmt (gsi);
6505 /* Only handle conditional statement for now. */
6506 if (gimple_code (stmt) != GIMPLE_COND)
6507 return;
6509 gsi_prev_nondebug (&gsi);
6510 stmt = gsi_stmt (gsi);
6511 if (stmt != iv_update)
6512 return;
6514 gsi_prev_nondebug (&gsi);
6515 if (gsi_end_p (gsi))
6516 return;
6518 stmt = gsi_stmt (gsi);
6519 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6520 return;
6522 if (stmt != use->stmt)
6523 return;
6525 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6526 return;
6528 if (dump_file && (dump_flags & TDF_DETAILS))
6530 fprintf (dump_file, "Reordering \n");
6531 print_gimple_stmt (dump_file, iv_update, 0, 0);
6532 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6533 fprintf (dump_file, "\n");
6536 gsi = gsi_for_stmt (use->stmt);
6537 gsi_iv = gsi_for_stmt (iv_update);
6538 gsi_move_before (&gsi_iv, &gsi);
6540 cand->pos = IP_BEFORE_USE;
6541 cand->incremented_at = use->stmt;
6544 /* Rewrites USE (address that is an iv) using candidate CAND. */
6546 static void
6547 rewrite_use_address (struct ivopts_data *data,
6548 struct iv_use *use, struct iv_cand *cand)
6550 aff_tree aff;
6551 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6552 tree base_hint = NULL_TREE;
6553 tree ref, iv;
6554 bool ok;
6556 adjust_iv_update_pos (cand, use);
6557 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6558 gcc_assert (ok);
6559 unshare_aff_combination (&aff);
6561 /* To avoid undefined overflow problems, all IV candidates use unsigned
6562 integer types. The drawback is that this makes it impossible for
6563 create_mem_ref to distinguish an IV that is based on a memory object
6564 from one that represents simply an offset.
6566 To work around this problem, we pass a hint to create_mem_ref that
6567 indicates which variable (if any) in aff is an IV based on a memory
6568 object. Note that we only consider the candidate. If this is not
6569 based on an object, the base of the reference is in some subexpression
6570 of the use -- but these will use pointer types, so they are recognized
6571 by the create_mem_ref heuristics anyway. */
6572 if (cand->iv->base_object)
6573 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6575 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6576 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6577 reference_alias_ptr_type (*use->op_p),
6578 iv, base_hint, data->speed);
6579 copy_ref_info (ref, *use->op_p);
6580 *use->op_p = ref;
6583 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6584 candidate CAND. */
6586 static void
6587 rewrite_use_compare (struct ivopts_data *data,
6588 struct iv_use *use, struct iv_cand *cand)
6590 tree comp, *var_p, op, bound;
6591 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6592 enum tree_code compare;
6593 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6594 bool ok;
6596 bound = cp->value;
6597 if (bound)
6599 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6600 tree var_type = TREE_TYPE (var);
6601 gimple_seq stmts;
6603 if (dump_file && (dump_flags & TDF_DETAILS))
6605 fprintf (dump_file, "Replacing exit test: ");
6606 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6608 compare = cp->comp;
6609 bound = unshare_expr (fold_convert (var_type, bound));
6610 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6611 if (stmts)
6612 gsi_insert_seq_on_edge_immediate (
6613 loop_preheader_edge (data->current_loop),
6614 stmts);
6616 gimple_cond_set_lhs (use->stmt, var);
6617 gimple_cond_set_code (use->stmt, compare);
6618 gimple_cond_set_rhs (use->stmt, op);
6619 return;
6622 /* The induction variable elimination failed; just express the original
6623 giv. */
6624 comp = get_computation (data->current_loop, use, cand);
6625 gcc_assert (comp != NULL_TREE);
6627 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6628 gcc_assert (ok);
6630 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6631 true, GSI_SAME_STMT);
6634 /* Rewrites USE using candidate CAND. */
6636 static void
6637 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6639 switch (use->type)
6641 case USE_NONLINEAR_EXPR:
6642 rewrite_use_nonlinear_expr (data, use, cand);
6643 break;
6645 case USE_ADDRESS:
6646 rewrite_use_address (data, use, cand);
6647 break;
6649 case USE_COMPARE:
6650 rewrite_use_compare (data, use, cand);
6651 break;
6653 default:
6654 gcc_unreachable ();
6657 update_stmt (use->stmt);
6660 /* Rewrite the uses using the selected induction variables. */
6662 static void
6663 rewrite_uses (struct ivopts_data *data)
6665 unsigned i;
6666 struct iv_cand *cand;
6667 struct iv_use *use;
6669 for (i = 0; i < n_iv_uses (data); i++)
6671 use = iv_use (data, i);
6672 cand = use->selected;
6673 gcc_assert (cand);
6675 rewrite_use (data, use, cand);
6679 /* Removes the ivs that are not used after rewriting. */
6681 static void
6682 remove_unused_ivs (struct ivopts_data *data)
6684 unsigned j;
6685 bitmap_iterator bi;
6686 bitmap toremove = BITMAP_ALLOC (NULL);
6688 /* Figure out an order in which to release SSA DEFs so that we don't
6689 release something that we'd have to propagate into a debug stmt
6690 afterwards. */
6691 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6693 struct version_info *info;
6695 info = ver_info (data, j);
6696 if (info->iv
6697 && !integer_zerop (info->iv->step)
6698 && !info->inv_id
6699 && !info->iv->have_use_for
6700 && !info->preserve_biv)
6701 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6704 release_defs_bitset (toremove);
6706 BITMAP_FREE (toremove);
6709 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6710 for pointer_map_traverse. */
6712 static bool
6713 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6714 void *data ATTRIBUTE_UNUSED)
6716 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6718 free (niter);
6719 return true;
6722 /* Frees data allocated by the optimization of a single loop. */
6724 static void
6725 free_loop_data (struct ivopts_data *data)
6727 unsigned i, j;
6728 bitmap_iterator bi;
6729 tree obj;
6731 if (data->niters)
6733 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6734 pointer_map_destroy (data->niters);
6735 data->niters = NULL;
6738 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6740 struct version_info *info;
6742 info = ver_info (data, i);
6743 free (info->iv);
6744 info->iv = NULL;
6745 info->has_nonlin_use = false;
6746 info->preserve_biv = false;
6747 info->inv_id = 0;
6749 bitmap_clear (data->relevant);
6750 bitmap_clear (data->important_candidates);
6752 for (i = 0; i < n_iv_uses (data); i++)
6754 struct iv_use *use = iv_use (data, i);
6756 free (use->iv);
6757 BITMAP_FREE (use->related_cands);
6758 for (j = 0; j < use->n_map_members; j++)
6759 if (use->cost_map[j].depends_on)
6760 BITMAP_FREE (use->cost_map[j].depends_on);
6761 free (use->cost_map);
6762 free (use);
6764 VEC_truncate (iv_use_p, data->iv_uses, 0);
6766 for (i = 0; i < n_iv_cands (data); i++)
6768 struct iv_cand *cand = iv_cand (data, i);
6770 free (cand->iv);
6771 if (cand->depends_on)
6772 BITMAP_FREE (cand->depends_on);
6773 free (cand);
6775 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6777 if (data->version_info_size < num_ssa_names)
6779 data->version_info_size = 2 * num_ssa_names;
6780 free (data->version_info);
6781 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6784 data->max_inv_id = 0;
6786 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6787 SET_DECL_RTL (obj, NULL_RTX);
6789 VEC_truncate (tree, decl_rtl_to_reset, 0);
6791 htab_empty (data->inv_expr_tab);
6792 data->inv_expr_id = 0;
6795 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6796 loop tree. */
6798 static void
6799 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6801 free_loop_data (data);
6802 free (data->version_info);
6803 BITMAP_FREE (data->relevant);
6804 BITMAP_FREE (data->important_candidates);
6806 VEC_free (tree, heap, decl_rtl_to_reset);
6807 VEC_free (iv_use_p, heap, data->iv_uses);
6808 VEC_free (iv_cand_p, heap, data->iv_candidates);
6809 htab_delete (data->inv_expr_tab);
6811 finalize_costs ();
6814 /* Returns true if the loop body BODY includes any function calls. */
6816 static bool
6817 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6819 gimple_stmt_iterator gsi;
6820 unsigned i;
6822 for (i = 0; i < num_nodes; i++)
6823 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6825 gimple stmt = gsi_stmt (gsi);
6826 if (is_gimple_call (stmt)
6827 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6828 return true;
6830 return false;
6833 /* Optimizes the LOOP. Returns true if anything changed. */
6835 static bool
6836 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6838 bool changed = false;
6839 struct iv_ca *iv_ca;
6840 edge exit = single_dom_exit (loop);
6841 basic_block *body;
6843 gcc_assert (!data->niters);
6844 data->current_loop = loop;
6845 data->speed = optimize_loop_for_speed_p (loop);
6847 if (dump_file && (dump_flags & TDF_DETAILS))
6849 fprintf (dump_file, "Processing loop %d\n", loop->num);
6851 if (exit)
6853 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6854 exit->src->index, exit->dest->index);
6855 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6856 fprintf (dump_file, "\n");
6859 fprintf (dump_file, "\n");
6862 body = get_loop_body (loop);
6863 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6864 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6865 free (body);
6867 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6869 /* For each ssa name determines whether it behaves as an induction variable
6870 in some loop. */
6871 if (!find_induction_variables (data))
6872 goto finish;
6874 /* Finds interesting uses (item 1). */
6875 find_interesting_uses (data);
6876 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6877 goto finish;
6879 /* Finds candidates for the induction variables (item 2). */
6880 find_iv_candidates (data);
6882 /* Calculates the costs (item 3, part 1). */
6883 determine_iv_costs (data);
6884 determine_use_iv_costs (data);
6885 determine_set_costs (data);
6887 /* Find the optimal set of induction variables (item 3, part 2). */
6888 iv_ca = find_optimal_iv_set (data);
6889 if (!iv_ca)
6890 goto finish;
6891 changed = true;
6893 /* Create the new induction variables (item 4, part 1). */
6894 create_new_ivs (data, iv_ca);
6895 iv_ca_free (&iv_ca);
6897 /* Rewrite the uses (item 4, part 2). */
6898 rewrite_uses (data);
6900 /* Remove the ivs that are unused after rewriting. */
6901 remove_unused_ivs (data);
6903 /* We have changed the structure of induction variables; it might happen
6904 that definitions in the scev database refer to some of them that were
6905 eliminated. */
6906 scev_reset ();
6908 finish:
6909 free_loop_data (data);
6911 return changed;
6914 /* Main entry point. Optimizes induction variables in loops. */
6916 void
6917 tree_ssa_iv_optimize (void)
6919 struct loop *loop;
6920 struct ivopts_data data;
6921 loop_iterator li;
6923 tree_ssa_iv_optimize_init (&data);
6925 /* Optimize the loops starting with the innermost ones. */
6926 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6928 if (dump_file && (dump_flags & TDF_DETAILS))
6929 flow_loop_dump (loop, dump_file, NULL, 1);
6931 tree_ssa_iv_optimize_loop (&data, loop);
6934 tree_ssa_iv_optimize_finalize (&data);