1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
72 #include "tree-pretty-print.h"
73 #include "gimple-pretty-print.h"
74 #include "tree-flow.h"
75 #include "tree-dump.h"
78 #include "tree-pass.h"
80 #include "insn-config.h"
81 #include "pointer-set.h"
83 #include "tree-chrec.h"
84 #include "tree-scalar-evolution.h"
87 #include "langhooks.h"
88 #include "tree-affine.h"
90 #include "tree-inline.h"
91 #include "tree-ssa-propagate.h"
94 static hashval_t
mbc_entry_hash (const void *);
95 static int mbc_entry_eq (const void*, const void *);
97 /* FIXME: Expressions are expanded to RTL in this pass to determine the
98 cost of different addressing modes. This should be moved to a TBD
99 interface between the GIMPLE and RTL worlds. */
103 /* The infinite cost. */
104 #define INFTY 10000000
106 #define AVG_LOOP_NITER(LOOP) 5
108 /* Returns the expected number of loop iterations for LOOP.
109 The average trip count is computed from profile data if it
112 static inline HOST_WIDE_INT
113 avg_loop_niter (struct loop
*loop
)
115 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
117 return AVG_LOOP_NITER (loop
);
122 /* Representation of the induction variable. */
125 tree base
; /* Initial value of the iv. */
126 tree base_object
; /* A memory object to that the induction variable points. */
127 tree step
; /* Step of the iv (constant only). */
128 tree ssa_name
; /* The ssa name with the value. */
129 bool biv_p
; /* Is it a biv? */
130 bool have_use_for
; /* Do we already have a use for it? */
131 unsigned use_id
; /* The identifier in the use if it is the case. */
134 /* Per-ssa version information (induction variable descriptions, etc.). */
137 tree name
; /* The ssa name. */
138 struct iv
*iv
; /* Induction variable description. */
139 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
140 an expression that is not an induction variable. */
141 bool preserve_biv
; /* For the original biv, whether to preserve it. */
142 unsigned inv_id
; /* Id of an invariant. */
148 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
149 USE_ADDRESS
, /* Use in an address. */
150 USE_COMPARE
/* Use is a compare. */
153 /* Cost of a computation. */
156 int cost
; /* The runtime cost. */
157 unsigned complexity
; /* The estimate of the complexity of the code for
158 the computation (in no concrete units --
159 complexity field should be larger for more
160 complex expressions and addressing modes). */
163 static const comp_cost no_cost
= {0, 0};
164 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
166 /* The candidate - cost pair. */
169 struct iv_cand
*cand
; /* The candidate. */
170 comp_cost cost
; /* The cost. */
171 bitmap depends_on
; /* The list of invariants that have to be
173 tree value
; /* For final value elimination, the expression for
174 the final value of the iv. For iv elimination,
175 the new bound to compare with. */
176 enum tree_code comp
; /* For iv elimination, the comparison. */
177 int inv_expr_id
; /* Loop invariant expression id. */
183 unsigned id
; /* The id of the use. */
184 enum use_type type
; /* Type of the use. */
185 struct iv
*iv
; /* The induction variable it is based on. */
186 gimple stmt
; /* Statement in that it occurs. */
187 tree
*op_p
; /* The place where it occurs. */
188 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
191 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
192 struct cost_pair
*cost_map
;
193 /* The costs wrto the iv candidates. */
195 struct iv_cand
*selected
;
196 /* The selected candidate. */
199 /* The position where the iv is computed. */
202 IP_NORMAL
, /* At the end, just before the exit condition. */
203 IP_END
, /* At the end of the latch block. */
204 IP_BEFORE_USE
, /* Immediately before a specific use. */
205 IP_AFTER_USE
, /* Immediately after a specific use. */
206 IP_ORIGINAL
/* The original biv. */
209 /* The induction variable candidate. */
212 unsigned id
; /* The number of the candidate. */
213 bool important
; /* Whether this is an "important" candidate, i.e. such
214 that it should be considered by all uses. */
215 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
216 gimple incremented_at
;/* For original biv, the statement where it is
218 tree var_before
; /* The variable used for it before increment. */
219 tree var_after
; /* The variable used for it after increment. */
220 struct iv
*iv
; /* The value of the candidate. NULL for
221 "pseudocandidate" used to indicate the possibility
222 to replace the final value of an iv by direct
223 computation of the value. */
224 unsigned cost
; /* Cost of the candidate. */
225 unsigned cost_step
; /* Cost of the candidate's increment operation. */
226 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
227 where it is incremented. */
228 bitmap depends_on
; /* The list of invariants that are used in step of the
232 /* Loop invariant expression hashtable entry. */
233 struct iv_inv_expr_ent
240 /* The data used by the induction variable optimizations. */
242 typedef struct iv_use
*iv_use_p
;
244 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
246 typedef struct iv_cand
*iv_cand_p
;
247 DEF_VEC_P(iv_cand_p
);
248 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
252 /* The currently optimized loop. */
253 struct loop
*current_loop
;
255 /* Numbers of iterations for all exits of the current loop. */
256 struct pointer_map_t
*niters
;
258 /* Number of registers used in it. */
261 /* The size of version_info array allocated. */
262 unsigned version_info_size
;
264 /* The array of information for the ssa names. */
265 struct version_info
*version_info
;
267 /* The hashtable of loop invariant expressions created
271 /* Loop invariant expression id. */
274 /* The bitmap of indices in version_info whose value was changed. */
277 /* The uses of induction variables. */
278 VEC(iv_use_p
,heap
) *iv_uses
;
280 /* The candidates. */
281 VEC(iv_cand_p
,heap
) *iv_candidates
;
283 /* A bitmap of important candidates. */
284 bitmap important_candidates
;
286 /* The maximum invariant id. */
289 /* Whether to consider just related and important candidates when replacing a
291 bool consider_all_candidates
;
293 /* Are we optimizing for speed? */
296 /* Whether the loop body includes any function calls. */
297 bool body_includes_call
;
299 /* Whether the loop body can only be exited via single exit. */
300 bool loop_single_exit_p
;
303 /* An assignment of iv candidates to uses. */
307 /* The number of uses covered by the assignment. */
310 /* Number of uses that cannot be expressed by the candidates in the set. */
313 /* Candidate assigned to a use, together with the related costs. */
314 struct cost_pair
**cand_for_use
;
316 /* Number of times each candidate is used. */
317 unsigned *n_cand_uses
;
319 /* The candidates used. */
322 /* The number of candidates in the set. */
325 /* Total number of registers needed. */
328 /* Total cost of expressing uses. */
329 comp_cost cand_use_cost
;
331 /* Total cost of candidates. */
334 /* Number of times each invariant is used. */
335 unsigned *n_invariant_uses
;
337 /* The array holding the number of uses of each loop
338 invariant expressions created by ivopt. */
339 unsigned *used_inv_expr
;
341 /* The number of created loop invariants. */
342 unsigned num_used_inv_expr
;
344 /* Total cost of the assignment. */
348 /* Difference of two iv candidate assignments. */
355 /* An old assignment (for rollback purposes). */
356 struct cost_pair
*old_cp
;
358 /* A new assignment. */
359 struct cost_pair
*new_cp
;
361 /* Next change in the list. */
362 struct iv_ca_delta
*next_change
;
365 /* Bound on number of candidates below that all candidates are considered. */
367 #define CONSIDER_ALL_CANDIDATES_BOUND \
368 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
370 /* If there are more iv occurrences, we just give up (it is quite unlikely that
371 optimizing such a loop would help, and it would take ages). */
373 #define MAX_CONSIDERED_USES \
374 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
376 /* If there are at most this number of ivs in the set, try removing unnecessary
377 ivs from the set always. */
379 #define ALWAYS_PRUNE_CAND_SET_BOUND \
380 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
382 /* The list of trees for that the decl_rtl field must be reset is stored
385 static VEC(tree
,heap
) *decl_rtl_to_reset
;
387 /* Cached costs for multiplies by constants, and a flag to indicate
388 when they're valid. */
389 static htab_t mult_costs
[2];
390 static bool cost_tables_exist
= false;
392 static comp_cost
force_expr_to_var_cost (tree
, bool);
394 /* Number of uses recorded in DATA. */
396 static inline unsigned
397 n_iv_uses (struct ivopts_data
*data
)
399 return VEC_length (iv_use_p
, data
->iv_uses
);
402 /* Ith use recorded in DATA. */
404 static inline struct iv_use
*
405 iv_use (struct ivopts_data
*data
, unsigned i
)
407 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
410 /* Number of candidates recorded in DATA. */
412 static inline unsigned
413 n_iv_cands (struct ivopts_data
*data
)
415 return VEC_length (iv_cand_p
, data
->iv_candidates
);
418 /* Ith candidate recorded in DATA. */
420 static inline struct iv_cand
*
421 iv_cand (struct ivopts_data
*data
, unsigned i
)
423 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
426 /* The single loop exit if it dominates the latch, NULL otherwise. */
429 single_dom_exit (struct loop
*loop
)
431 edge exit
= single_exit (loop
);
436 if (!just_once_each_iteration_p (loop
, exit
->src
))
442 /* Dumps information about the induction variable IV to FILE. */
444 extern void dump_iv (FILE *, struct iv
*);
446 dump_iv (FILE *file
, struct iv
*iv
)
450 fprintf (file
, "ssa name ");
451 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
452 fprintf (file
, "\n");
455 fprintf (file
, " type ");
456 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
457 fprintf (file
, "\n");
461 fprintf (file
, " base ");
462 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
463 fprintf (file
, "\n");
465 fprintf (file
, " step ");
466 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
467 fprintf (file
, "\n");
471 fprintf (file
, " invariant ");
472 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
473 fprintf (file
, "\n");
478 fprintf (file
, " base object ");
479 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
480 fprintf (file
, "\n");
484 fprintf (file
, " is a biv\n");
487 /* Dumps information about the USE to FILE. */
489 extern void dump_use (FILE *, struct iv_use
*);
491 dump_use (FILE *file
, struct iv_use
*use
)
493 fprintf (file
, "use %d\n", use
->id
);
497 case USE_NONLINEAR_EXPR
:
498 fprintf (file
, " generic\n");
502 fprintf (file
, " address\n");
506 fprintf (file
, " compare\n");
513 fprintf (file
, " in statement ");
514 print_gimple_stmt (file
, use
->stmt
, 0, 0);
515 fprintf (file
, "\n");
517 fprintf (file
, " at position ");
519 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
520 fprintf (file
, "\n");
522 dump_iv (file
, use
->iv
);
524 if (use
->related_cands
)
526 fprintf (file
, " related candidates ");
527 dump_bitmap (file
, use
->related_cands
);
531 /* Dumps information about the uses to FILE. */
533 extern void dump_uses (FILE *, struct ivopts_data
*);
535 dump_uses (FILE *file
, struct ivopts_data
*data
)
540 for (i
= 0; i
< n_iv_uses (data
); i
++)
542 use
= iv_use (data
, i
);
544 dump_use (file
, use
);
545 fprintf (file
, "\n");
549 /* Dumps information about induction variable candidate CAND to FILE. */
551 extern void dump_cand (FILE *, struct iv_cand
*);
553 dump_cand (FILE *file
, struct iv_cand
*cand
)
555 struct iv
*iv
= cand
->iv
;
557 fprintf (file
, "candidate %d%s\n",
558 cand
->id
, cand
->important
? " (important)" : "");
560 if (cand
->depends_on
)
562 fprintf (file
, " depends on ");
563 dump_bitmap (file
, cand
->depends_on
);
568 fprintf (file
, " final value replacement\n");
572 if (cand
->var_before
)
574 fprintf (file
, " var_before ");
575 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
576 fprintf (file
, "\n");
580 fprintf (file
, " var_after ");
581 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
582 fprintf (file
, "\n");
588 fprintf (file
, " incremented before exit test\n");
592 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
596 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
600 fprintf (file
, " incremented at end\n");
604 fprintf (file
, " original biv\n");
611 /* Returns the info for ssa version VER. */
613 static inline struct version_info
*
614 ver_info (struct ivopts_data
*data
, unsigned ver
)
616 return data
->version_info
+ ver
;
619 /* Returns the info for ssa name NAME. */
621 static inline struct version_info
*
622 name_info (struct ivopts_data
*data
, tree name
)
624 return ver_info (data
, SSA_NAME_VERSION (name
));
627 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
631 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
633 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
637 if (sbb
== loop
->latch
)
643 return stmt
== last_stmt (bb
);
646 /* Returns true if STMT if after the place where the original induction
647 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
648 if the positions are identical. */
651 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
653 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
654 basic_block stmt_bb
= gimple_bb (stmt
);
656 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
659 if (stmt_bb
!= cand_bb
)
663 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
665 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
668 /* Returns true if STMT if after the place where the induction variable
669 CAND is incremented in LOOP. */
672 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
680 return stmt_after_ip_normal_pos (loop
, stmt
);
684 return stmt_after_inc_pos (cand
, stmt
, false);
687 return stmt_after_inc_pos (cand
, stmt
, true);
694 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
697 abnormal_ssa_name_p (tree exp
)
702 if (TREE_CODE (exp
) != SSA_NAME
)
705 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
708 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
709 abnormal phi node. Callback for for_each_index. */
712 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
713 void *data ATTRIBUTE_UNUSED
)
715 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
717 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
719 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
723 return !abnormal_ssa_name_p (*index
);
726 /* Returns true if EXPR contains a ssa name that occurs in an
727 abnormal phi node. */
730 contains_abnormal_ssa_name_p (tree expr
)
733 enum tree_code_class codeclass
;
738 code
= TREE_CODE (expr
);
739 codeclass
= TREE_CODE_CLASS (code
);
741 if (code
== SSA_NAME
)
742 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
744 if (code
== INTEGER_CST
745 || is_gimple_min_invariant (expr
))
748 if (code
== ADDR_EXPR
)
749 return !for_each_index (&TREE_OPERAND (expr
, 0),
750 idx_contains_abnormal_ssa_name_p
,
753 if (code
== COND_EXPR
)
754 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
755 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
756 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
762 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
767 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
779 /* Returns the structure describing number of iterations determined from
780 EXIT of DATA->current_loop, or NULL if something goes wrong. */
782 static struct tree_niter_desc
*
783 niter_for_exit (struct ivopts_data
*data
, edge exit
)
785 struct tree_niter_desc
*desc
;
790 data
->niters
= pointer_map_create ();
794 slot
= pointer_map_contains (data
->niters
, exit
);
798 /* Try to determine number of iterations. We cannot safely work with ssa
799 names that appear in phi nodes on abnormal edges, so that we do not
800 create overlapping life ranges for them (PR 27283). */
801 desc
= XNEW (struct tree_niter_desc
);
802 if (!number_of_iterations_exit (data
->current_loop
,
804 || contains_abnormal_ssa_name_p (desc
->niter
))
809 slot
= pointer_map_insert (data
->niters
, exit
);
813 desc
= (struct tree_niter_desc
*) *slot
;
818 /* Returns the structure describing number of iterations determined from
819 single dominating exit of DATA->current_loop, or NULL if something
822 static struct tree_niter_desc
*
823 niter_for_single_dom_exit (struct ivopts_data
*data
)
825 edge exit
= single_dom_exit (data
->current_loop
);
830 return niter_for_exit (data
, exit
);
833 /* Hash table equality function for expressions. */
836 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
838 const struct iv_inv_expr_ent
*expr1
=
839 (const struct iv_inv_expr_ent
*)ent1
;
840 const struct iv_inv_expr_ent
*expr2
=
841 (const struct iv_inv_expr_ent
*)ent2
;
843 return expr1
->hash
== expr2
->hash
844 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
847 /* Hash function for loop invariant expressions. */
850 htab_inv_expr_hash (const void *ent
)
852 const struct iv_inv_expr_ent
*expr
=
853 (const struct iv_inv_expr_ent
*)ent
;
857 /* Allocate data structures for the cost model. */
860 initialize_costs (void)
862 mult_costs
[0] = htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
863 mult_costs
[1] = htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
864 cost_tables_exist
= true;
867 /* Release data structures for the cost model. */
870 finalize_costs (void)
872 cost_tables_exist
= false;
873 htab_delete (mult_costs
[0]);
874 htab_delete (mult_costs
[1]);
877 /* Initializes data structures used by the iv optimization pass, stored
881 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
883 data
->version_info_size
= 2 * num_ssa_names
;
884 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
885 data
->relevant
= BITMAP_ALLOC (NULL
);
886 data
->important_candidates
= BITMAP_ALLOC (NULL
);
887 data
->max_inv_id
= 0;
889 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
890 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
891 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
892 htab_inv_expr_eq
, free
);
893 data
->inv_expr_id
= 0;
894 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
899 /* Returns a memory object to that EXPR points. In case we are able to
900 determine that it does not point to any such object, NULL is returned. */
903 determine_base_object (tree expr
)
905 enum tree_code code
= TREE_CODE (expr
);
908 /* If this is a pointer casted to any type, we need to determine
909 the base object for the pointer; so handle conversions before
910 throwing away non-pointer expressions. */
911 if (CONVERT_EXPR_P (expr
))
912 return determine_base_object (TREE_OPERAND (expr
, 0));
914 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
923 obj
= TREE_OPERAND (expr
, 0);
924 base
= get_base_address (obj
);
929 if (TREE_CODE (base
) == MEM_REF
)
930 return determine_base_object (TREE_OPERAND (base
, 0));
932 return fold_convert (ptr_type_node
,
933 build_fold_addr_expr (base
));
935 case POINTER_PLUS_EXPR
:
936 return determine_base_object (TREE_OPERAND (expr
, 0));
940 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
944 return fold_convert (ptr_type_node
, expr
);
948 /* Allocates an induction variable with given initial value BASE and step STEP
952 alloc_iv (tree base
, tree step
)
954 struct iv
*iv
= XCNEW (struct iv
);
955 gcc_assert (step
!= NULL_TREE
);
958 iv
->base_object
= determine_base_object (base
);
961 iv
->have_use_for
= false;
963 iv
->ssa_name
= NULL_TREE
;
968 /* Sets STEP and BASE for induction variable IV. */
971 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
973 struct version_info
*info
= name_info (data
, iv
);
975 gcc_assert (!info
->iv
);
977 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
978 info
->iv
= alloc_iv (base
, step
);
979 info
->iv
->ssa_name
= iv
;
982 /* Finds induction variable declaration for VAR. */
985 get_iv (struct ivopts_data
*data
, tree var
)
988 tree type
= TREE_TYPE (var
);
990 if (!POINTER_TYPE_P (type
)
991 && !INTEGRAL_TYPE_P (type
))
994 if (!name_info (data
, var
)->iv
)
996 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
999 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1000 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1003 return name_info (data
, var
)->iv
;
1006 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1007 not define a simple affine biv with nonzero step. */
1010 determine_biv_step (gimple phi
)
1012 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1013 tree name
= PHI_RESULT (phi
);
1016 if (!is_gimple_reg (name
))
1019 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1022 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1025 /* Finds basic ivs. */
1028 find_bivs (struct ivopts_data
*data
)
1031 tree step
, type
, base
;
1033 struct loop
*loop
= data
->current_loop
;
1034 gimple_stmt_iterator psi
;
1036 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1038 phi
= gsi_stmt (psi
);
1040 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1043 step
= determine_biv_step (phi
);
1047 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1048 base
= expand_simple_operations (base
);
1049 if (contains_abnormal_ssa_name_p (base
)
1050 || contains_abnormal_ssa_name_p (step
))
1053 type
= TREE_TYPE (PHI_RESULT (phi
));
1054 base
= fold_convert (type
, base
);
1057 if (POINTER_TYPE_P (type
))
1058 step
= convert_to_ptrofftype (step
);
1060 step
= fold_convert (type
, step
);
1063 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1070 /* Marks basic ivs. */
1073 mark_bivs (struct ivopts_data
*data
)
1077 struct iv
*iv
, *incr_iv
;
1078 struct loop
*loop
= data
->current_loop
;
1079 basic_block incr_bb
;
1080 gimple_stmt_iterator psi
;
1082 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1084 phi
= gsi_stmt (psi
);
1086 iv
= get_iv (data
, PHI_RESULT (phi
));
1090 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1091 incr_iv
= get_iv (data
, var
);
1095 /* If the increment is in the subloop, ignore it. */
1096 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1097 if (incr_bb
->loop_father
!= data
->current_loop
1098 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1102 incr_iv
->biv_p
= true;
1106 /* Checks whether STMT defines a linear induction variable and stores its
1107 parameters to IV. */
1110 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1113 struct loop
*loop
= data
->current_loop
;
1115 iv
->base
= NULL_TREE
;
1116 iv
->step
= NULL_TREE
;
1118 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1121 lhs
= gimple_assign_lhs (stmt
);
1122 if (TREE_CODE (lhs
) != SSA_NAME
)
1125 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1127 iv
->base
= expand_simple_operations (iv
->base
);
1129 if (contains_abnormal_ssa_name_p (iv
->base
)
1130 || contains_abnormal_ssa_name_p (iv
->step
))
1133 /* If STMT could throw, then do not consider STMT as defining a GIV.
1134 While this will suppress optimizations, we can not safely delete this
1135 GIV and associated statements, even if it appears it is not used. */
1136 if (stmt_could_throw_p (stmt
))
1142 /* Finds general ivs in statement STMT. */
1145 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1149 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1152 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1155 /* Finds general ivs in basic block BB. */
1158 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1160 gimple_stmt_iterator bsi
;
1162 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1163 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1166 /* Finds general ivs. */
1169 find_givs (struct ivopts_data
*data
)
1171 struct loop
*loop
= data
->current_loop
;
1172 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1175 for (i
= 0; i
< loop
->num_nodes
; i
++)
1176 find_givs_in_bb (data
, body
[i
]);
1180 /* For each ssa name defined in LOOP determines whether it is an induction
1181 variable and if so, its initial value and step. */
1184 find_induction_variables (struct ivopts_data
*data
)
1189 if (!find_bivs (data
))
1195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1197 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1201 fprintf (dump_file
, " number of iterations ");
1202 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1203 if (!integer_zerop (niter
->may_be_zero
))
1205 fprintf (dump_file
, "; zero if ");
1206 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1208 fprintf (dump_file
, "\n\n");
1211 fprintf (dump_file
, "Induction variables:\n\n");
1213 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1215 if (ver_info (data
, i
)->iv
)
1216 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1223 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1225 static struct iv_use
*
1226 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1227 gimple stmt
, enum use_type use_type
)
1229 struct iv_use
*use
= XCNEW (struct iv_use
);
1231 use
->id
= n_iv_uses (data
);
1232 use
->type
= use_type
;
1236 use
->related_cands
= BITMAP_ALLOC (NULL
);
1238 /* To avoid showing ssa name in the dumps, if it was not reset by the
1240 iv
->ssa_name
= NULL_TREE
;
1242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1243 dump_use (dump_file
, use
);
1245 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1250 /* Checks whether OP is a loop-level invariant and if so, records it.
1251 NONLINEAR_USE is true if the invariant is used in a way we do not
1252 handle specially. */
1255 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1258 struct version_info
*info
;
1260 if (TREE_CODE (op
) != SSA_NAME
1261 || !is_gimple_reg (op
))
1264 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1266 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1269 info
= name_info (data
, op
);
1271 info
->has_nonlin_use
|= nonlinear_use
;
1273 info
->inv_id
= ++data
->max_inv_id
;
1274 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1277 /* Checks whether the use OP is interesting and if so, records it. */
1279 static struct iv_use
*
1280 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1287 if (TREE_CODE (op
) != SSA_NAME
)
1290 iv
= get_iv (data
, op
);
1294 if (iv
->have_use_for
)
1296 use
= iv_use (data
, iv
->use_id
);
1298 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1302 if (integer_zerop (iv
->step
))
1304 record_invariant (data
, op
, true);
1307 iv
->have_use_for
= true;
1309 civ
= XNEW (struct iv
);
1312 stmt
= SSA_NAME_DEF_STMT (op
);
1313 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1314 || is_gimple_assign (stmt
));
1316 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1317 iv
->use_id
= use
->id
;
1322 /* Given a condition in statement STMT, checks whether it is a compare
1323 of an induction variable and an invariant. If this is the case,
1324 CONTROL_VAR is set to location of the iv, BOUND to the location of
1325 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1326 induction variable descriptions, and true is returned. If this is not
1327 the case, CONTROL_VAR and BOUND are set to the arguments of the
1328 condition and false is returned. */
1331 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1332 tree
**control_var
, tree
**bound
,
1333 struct iv
**iv_var
, struct iv
**iv_bound
)
1335 /* The objects returned when COND has constant operands. */
1336 static struct iv const_iv
;
1338 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1339 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1342 if (gimple_code (stmt
) == GIMPLE_COND
)
1344 op0
= gimple_cond_lhs_ptr (stmt
);
1345 op1
= gimple_cond_rhs_ptr (stmt
);
1349 op0
= gimple_assign_rhs1_ptr (stmt
);
1350 op1
= gimple_assign_rhs2_ptr (stmt
);
1353 zero
= integer_zero_node
;
1354 const_iv
.step
= integer_zero_node
;
1356 if (TREE_CODE (*op0
) == SSA_NAME
)
1357 iv0
= get_iv (data
, *op0
);
1358 if (TREE_CODE (*op1
) == SSA_NAME
)
1359 iv1
= get_iv (data
, *op1
);
1361 /* Exactly one of the compared values must be an iv, and the other one must
1366 if (integer_zerop (iv0
->step
))
1368 /* Control variable may be on the other side. */
1369 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1370 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1372 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1376 *control_var
= op0
;;
1387 /* Checks whether the condition in STMT is interesting and if so,
1391 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1393 tree
*var_p
, *bound_p
;
1394 struct iv
*var_iv
, *civ
;
1396 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1398 find_interesting_uses_op (data
, *var_p
);
1399 find_interesting_uses_op (data
, *bound_p
);
1403 civ
= XNEW (struct iv
);
1405 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1408 /* Returns true if expression EXPR is obviously invariant in LOOP,
1409 i.e. if all its operands are defined outside of the LOOP. LOOP
1410 should not be the function body. */
1413 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1418 gcc_assert (loop_depth (loop
) > 0);
1420 if (is_gimple_min_invariant (expr
))
1423 if (TREE_CODE (expr
) == SSA_NAME
)
1425 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1427 && flow_bb_inside_loop_p (loop
, def_bb
))
1436 len
= TREE_OPERAND_LENGTH (expr
);
1437 for (i
= 0; i
< len
; i
++)
1438 if (TREE_OPERAND (expr
, i
)
1439 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1445 /* Returns true if statement STMT is obviously invariant in LOOP,
1446 i.e. if all its operands on the RHS are defined outside of the LOOP.
1447 LOOP should not be the function body. */
1450 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1455 gcc_assert (loop_depth (loop
) > 0);
1457 lhs
= gimple_get_lhs (stmt
);
1458 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1460 tree op
= gimple_op (stmt
, i
);
1461 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1468 /* Cumulates the steps of indices into DATA and replaces their values with the
1469 initial ones. Returns false when the value of the index cannot be determined.
1470 Callback for for_each_index. */
1472 struct ifs_ivopts_data
1474 struct ivopts_data
*ivopts_data
;
1480 idx_find_step (tree base
, tree
*idx
, void *data
)
1482 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1484 tree step
, iv_base
, iv_step
, lbound
, off
;
1485 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1487 /* If base is a component ref, require that the offset of the reference
1489 if (TREE_CODE (base
) == COMPONENT_REF
)
1491 off
= component_ref_field_offset (base
);
1492 return expr_invariant_in_loop_p (loop
, off
);
1495 /* If base is array, first check whether we will be able to move the
1496 reference out of the loop (in order to take its address in strength
1497 reduction). In order for this to work we need both lower bound
1498 and step to be loop invariants. */
1499 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1501 /* Moreover, for a range, the size needs to be invariant as well. */
1502 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1503 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1506 step
= array_ref_element_size (base
);
1507 lbound
= array_ref_low_bound (base
);
1509 if (!expr_invariant_in_loop_p (loop
, step
)
1510 || !expr_invariant_in_loop_p (loop
, lbound
))
1514 if (TREE_CODE (*idx
) != SSA_NAME
)
1517 iv
= get_iv (dta
->ivopts_data
, *idx
);
1521 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1522 *&x[0], which is not folded and does not trigger the
1523 ARRAY_REF path below. */
1526 if (integer_zerop (iv
->step
))
1529 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1531 step
= array_ref_element_size (base
);
1533 /* We only handle addresses whose step is an integer constant. */
1534 if (TREE_CODE (step
) != INTEGER_CST
)
1538 /* The step for pointer arithmetics already is 1 byte. */
1539 step
= size_one_node
;
1543 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1544 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1547 /* The index might wrap. */
1551 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1552 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1557 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1558 object is passed to it in DATA. */
1561 idx_record_use (tree base
, tree
*idx
,
1564 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1565 find_interesting_uses_op (data
, *idx
);
1566 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1568 find_interesting_uses_op (data
, array_ref_element_size (base
));
1569 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1574 /* If we can prove that TOP = cst * BOT for some constant cst,
1575 store cst to MUL and return true. Otherwise return false.
1576 The returned value is always sign-extended, regardless of the
1577 signedness of TOP and BOT. */
1580 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1583 enum tree_code code
;
1584 double_int res
, p0
, p1
;
1585 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1590 if (operand_equal_p (top
, bot
, 0))
1592 *mul
= double_int_one
;
1596 code
= TREE_CODE (top
);
1600 mby
= TREE_OPERAND (top
, 1);
1601 if (TREE_CODE (mby
) != INTEGER_CST
)
1604 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1607 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1613 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1614 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1617 if (code
== MINUS_EXPR
)
1618 p1
= double_int_neg (p1
);
1619 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1623 if (TREE_CODE (bot
) != INTEGER_CST
)
1626 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1627 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1628 if (double_int_zero_p (p1
))
1630 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1632 return double_int_zero_p (res
);
1639 /* Returns true if memory reference REF with step STEP may be unaligned. */
1642 may_be_unaligned_p (tree ref
, tree step
)
1646 HOST_WIDE_INT bitsize
;
1647 HOST_WIDE_INT bitpos
;
1649 enum machine_mode mode
;
1650 int unsignedp
, volatilep
;
1651 unsigned base_align
;
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1654 thus they are not misaligned. */
1655 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1658 /* The test below is basically copy of what expr.c:normal_inner_ref
1659 does to check whether the object must be loaded by parts when
1660 STRICT_ALIGNMENT is true. */
1661 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1662 &unsignedp
, &volatilep
, true);
1663 base_type
= TREE_TYPE (base
);
1664 base_align
= get_object_alignment (base
);
1665 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1667 if (mode
!= BLKmode
)
1669 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1671 if (base_align
< mode_align
1672 || (bitpos
% mode_align
) != 0
1673 || (bitpos
% BITS_PER_UNIT
) != 0)
1677 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1680 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1687 /* Return true if EXPR may be non-addressable. */
1690 may_be_nonaddressable_p (tree expr
)
1692 switch (TREE_CODE (expr
))
1694 case TARGET_MEM_REF
:
1695 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1696 target, thus they are always addressable. */
1700 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1701 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1703 case VIEW_CONVERT_EXPR
:
1704 /* This kind of view-conversions may wrap non-addressable objects
1705 and make them look addressable. After some processing the
1706 non-addressability may be uncovered again, causing ADDR_EXPRs
1707 of inappropriate objects to be built. */
1708 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1709 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1712 /* ... fall through ... */
1715 case ARRAY_RANGE_REF
:
1716 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1728 /* Finds addresses in *OP_P inside STMT. */
1731 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1733 tree base
= *op_p
, step
= size_zero_node
;
1735 struct ifs_ivopts_data ifs_ivopts_data
;
1737 /* Do not play with volatile memory references. A bit too conservative,
1738 perhaps, but safe. */
1739 if (gimple_has_volatile_ops (stmt
))
1742 /* Ignore bitfields for now. Not really something terribly complicated
1744 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1747 base
= unshare_expr (base
);
1749 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1751 tree type
= build_pointer_type (TREE_TYPE (base
));
1755 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1757 civ
= get_iv (data
, TMR_BASE (base
));
1761 TMR_BASE (base
) = civ
->base
;
1764 if (TMR_INDEX2 (base
)
1765 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1767 civ
= get_iv (data
, TMR_INDEX2 (base
));
1771 TMR_INDEX2 (base
) = civ
->base
;
1774 if (TMR_INDEX (base
)
1775 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1777 civ
= get_iv (data
, TMR_INDEX (base
));
1781 TMR_INDEX (base
) = civ
->base
;
1786 if (TMR_STEP (base
))
1787 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1789 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1793 if (integer_zerop (step
))
1795 base
= tree_mem_ref_addr (type
, base
);
1799 ifs_ivopts_data
.ivopts_data
= data
;
1800 ifs_ivopts_data
.stmt
= stmt
;
1801 ifs_ivopts_data
.step
= size_zero_node
;
1802 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1803 || integer_zerop (ifs_ivopts_data
.step
))
1805 step
= ifs_ivopts_data
.step
;
1807 /* Check that the base expression is addressable. This needs
1808 to be done after substituting bases of IVs into it. */
1809 if (may_be_nonaddressable_p (base
))
1812 /* Moreover, on strict alignment platforms, check that it is
1813 sufficiently aligned. */
1814 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1817 base
= build_fold_addr_expr (base
);
1819 /* Substituting bases of IVs into the base expression might
1820 have caused folding opportunities. */
1821 if (TREE_CODE (base
) == ADDR_EXPR
)
1823 tree
*ref
= &TREE_OPERAND (base
, 0);
1824 while (handled_component_p (*ref
))
1825 ref
= &TREE_OPERAND (*ref
, 0);
1826 if (TREE_CODE (*ref
) == MEM_REF
)
1828 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1829 TREE_OPERAND (*ref
, 0),
1830 TREE_OPERAND (*ref
, 1));
1837 civ
= alloc_iv (base
, step
);
1838 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1842 for_each_index (op_p
, idx_record_use
, data
);
1845 /* Finds and records invariants used in STMT. */
1848 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1851 use_operand_p use_p
;
1854 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1856 op
= USE_FROM_PTR (use_p
);
1857 record_invariant (data
, op
, false);
1861 /* Finds interesting uses of induction variables in the statement STMT. */
1864 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1867 tree op
, *lhs
, *rhs
;
1869 use_operand_p use_p
;
1870 enum tree_code code
;
1872 find_invariants_stmt (data
, stmt
);
1874 if (gimple_code (stmt
) == GIMPLE_COND
)
1876 find_interesting_uses_cond (data
, stmt
);
1880 if (is_gimple_assign (stmt
))
1882 lhs
= gimple_assign_lhs_ptr (stmt
);
1883 rhs
= gimple_assign_rhs1_ptr (stmt
);
1885 if (TREE_CODE (*lhs
) == SSA_NAME
)
1887 /* If the statement defines an induction variable, the uses are not
1888 interesting by themselves. */
1890 iv
= get_iv (data
, *lhs
);
1892 if (iv
&& !integer_zerop (iv
->step
))
1896 code
= gimple_assign_rhs_code (stmt
);
1897 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1898 && (REFERENCE_CLASS_P (*rhs
)
1899 || is_gimple_val (*rhs
)))
1901 if (REFERENCE_CLASS_P (*rhs
))
1902 find_interesting_uses_address (data
, stmt
, rhs
);
1904 find_interesting_uses_op (data
, *rhs
);
1906 if (REFERENCE_CLASS_P (*lhs
))
1907 find_interesting_uses_address (data
, stmt
, lhs
);
1910 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1912 find_interesting_uses_cond (data
, stmt
);
1916 /* TODO -- we should also handle address uses of type
1918 memory = call (whatever);
1925 if (gimple_code (stmt
) == GIMPLE_PHI
1926 && gimple_bb (stmt
) == data
->current_loop
->header
)
1928 iv
= get_iv (data
, PHI_RESULT (stmt
));
1930 if (iv
&& !integer_zerop (iv
->step
))
1934 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1936 op
= USE_FROM_PTR (use_p
);
1938 if (TREE_CODE (op
) != SSA_NAME
)
1941 iv
= get_iv (data
, op
);
1945 find_interesting_uses_op (data
, op
);
1949 /* Finds interesting uses of induction variables outside of loops
1950 on loop exit edge EXIT. */
1953 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1956 gimple_stmt_iterator psi
;
1959 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1961 phi
= gsi_stmt (psi
);
1962 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1963 if (is_gimple_reg (def
))
1964 find_interesting_uses_op (data
, def
);
1968 /* Finds uses of the induction variables that are interesting. */
1971 find_interesting_uses (struct ivopts_data
*data
)
1974 gimple_stmt_iterator bsi
;
1975 basic_block
*body
= get_loop_body (data
->current_loop
);
1977 struct version_info
*info
;
1980 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1981 fprintf (dump_file
, "Uses:\n\n");
1983 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1988 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1989 if (e
->dest
!= EXIT_BLOCK_PTR
1990 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1991 find_interesting_uses_outside (data
, e
);
1993 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1994 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1995 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1996 if (!is_gimple_debug (gsi_stmt (bsi
)))
1997 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2000 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2004 fprintf (dump_file
, "\n");
2006 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2008 info
= ver_info (data
, i
);
2011 fprintf (dump_file
, " ");
2012 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2013 fprintf (dump_file
, " is invariant (%d)%s\n",
2014 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2018 fprintf (dump_file
, "\n");
2024 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2025 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2026 we are at the top-level of the processed address. */
2029 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2030 unsigned HOST_WIDE_INT
*offset
)
2032 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2033 enum tree_code code
;
2034 tree type
, orig_type
= TREE_TYPE (expr
);
2035 unsigned HOST_WIDE_INT off0
, off1
, st
;
2036 tree orig_expr
= expr
;
2040 type
= TREE_TYPE (expr
);
2041 code
= TREE_CODE (expr
);
2047 if (!cst_and_fits_in_hwi (expr
)
2048 || integer_zerop (expr
))
2051 *offset
= int_cst_value (expr
);
2052 return build_int_cst (orig_type
, 0);
2054 case POINTER_PLUS_EXPR
:
2057 op0
= TREE_OPERAND (expr
, 0);
2058 op1
= TREE_OPERAND (expr
, 1);
2060 op0
= strip_offset_1 (op0
, false, false, &off0
);
2061 op1
= strip_offset_1 (op1
, false, false, &off1
);
2063 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2064 if (op0
== TREE_OPERAND (expr
, 0)
2065 && op1
== TREE_OPERAND (expr
, 1))
2068 if (integer_zerop (op1
))
2070 else if (integer_zerop (op0
))
2072 if (code
== MINUS_EXPR
)
2073 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2078 expr
= fold_build2 (code
, type
, op0
, op1
);
2080 return fold_convert (orig_type
, expr
);
2083 op1
= TREE_OPERAND (expr
, 1);
2084 if (!cst_and_fits_in_hwi (op1
))
2087 op0
= TREE_OPERAND (expr
, 0);
2088 op0
= strip_offset_1 (op0
, false, false, &off0
);
2089 if (op0
== TREE_OPERAND (expr
, 0))
2092 *offset
= off0
* int_cst_value (op1
);
2093 if (integer_zerop (op0
))
2096 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2098 return fold_convert (orig_type
, expr
);
2101 case ARRAY_RANGE_REF
:
2105 step
= array_ref_element_size (expr
);
2106 if (!cst_and_fits_in_hwi (step
))
2109 st
= int_cst_value (step
);
2110 op1
= TREE_OPERAND (expr
, 1);
2111 op1
= strip_offset_1 (op1
, false, false, &off1
);
2112 *offset
= off1
* st
;
2115 && integer_zerop (op1
))
2117 /* Strip the component reference completely. */
2118 op0
= TREE_OPERAND (expr
, 0);
2119 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2129 tmp
= component_ref_field_offset (expr
);
2131 && cst_and_fits_in_hwi (tmp
))
2133 /* Strip the component reference completely. */
2134 op0
= TREE_OPERAND (expr
, 0);
2135 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2136 *offset
= off0
+ int_cst_value (tmp
);
2142 op0
= TREE_OPERAND (expr
, 0);
2143 op0
= strip_offset_1 (op0
, true, true, &off0
);
2146 if (op0
== TREE_OPERAND (expr
, 0))
2149 expr
= build_fold_addr_expr (op0
);
2150 return fold_convert (orig_type
, expr
);
2153 /* ??? Offset operand? */
2154 inside_addr
= false;
2161 /* Default handling of expressions for that we want to recurse into
2162 the first operand. */
2163 op0
= TREE_OPERAND (expr
, 0);
2164 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2167 if (op0
== TREE_OPERAND (expr
, 0)
2168 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2171 expr
= copy_node (expr
);
2172 TREE_OPERAND (expr
, 0) = op0
;
2174 TREE_OPERAND (expr
, 1) = op1
;
2176 /* Inside address, we might strip the top level component references,
2177 thus changing type of the expression. Handling of ADDR_EXPR
2179 expr
= fold_convert (orig_type
, expr
);
2184 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2187 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2189 return strip_offset_1 (expr
, false, false, offset
);
2192 /* Returns variant of TYPE that can be used as base for different uses.
2193 We return unsigned type with the same precision, which avoids problems
2197 generic_type_for (tree type
)
2199 if (POINTER_TYPE_P (type
))
2200 return unsigned_type_for (type
);
2202 if (TYPE_UNSIGNED (type
))
2205 return unsigned_type_for (type
);
2208 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2209 the bitmap to that we should store it. */
2211 static struct ivopts_data
*fd_ivopts_data
;
2213 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2215 bitmap
*depends_on
= (bitmap
*) data
;
2216 struct version_info
*info
;
2218 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2220 info
= name_info (fd_ivopts_data
, *expr_p
);
2222 if (!info
->inv_id
|| info
->has_nonlin_use
)
2226 *depends_on
= BITMAP_ALLOC (NULL
);
2227 bitmap_set_bit (*depends_on
, info
->inv_id
);
2232 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2233 position to POS. If USE is not NULL, the candidate is set as related to
2234 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2235 replacement of the final value of the iv by a direct computation. */
2237 static struct iv_cand
*
2238 add_candidate_1 (struct ivopts_data
*data
,
2239 tree base
, tree step
, bool important
, enum iv_position pos
,
2240 struct iv_use
*use
, gimple incremented_at
)
2243 struct iv_cand
*cand
= NULL
;
2244 tree type
, orig_type
;
2246 /* For non-original variables, make sure their values are computed in a type
2247 that does not invoke undefined behavior on overflows (since in general,
2248 we cannot prove that these induction variables are non-wrapping). */
2249 if (pos
!= IP_ORIGINAL
)
2251 orig_type
= TREE_TYPE (base
);
2252 type
= generic_type_for (orig_type
);
2253 if (type
!= orig_type
)
2255 base
= fold_convert (type
, base
);
2256 step
= fold_convert (type
, step
);
2260 for (i
= 0; i
< n_iv_cands (data
); i
++)
2262 cand
= iv_cand (data
, i
);
2264 if (cand
->pos
!= pos
)
2267 if (cand
->incremented_at
!= incremented_at
2268 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2269 && cand
->ainc_use
!= use
))
2283 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2284 && operand_equal_p (step
, cand
->iv
->step
, 0)
2285 && (TYPE_PRECISION (TREE_TYPE (base
))
2286 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2290 if (i
== n_iv_cands (data
))
2292 cand
= XCNEW (struct iv_cand
);
2298 cand
->iv
= alloc_iv (base
, step
);
2301 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2303 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2304 cand
->var_after
= cand
->var_before
;
2306 cand
->important
= important
;
2307 cand
->incremented_at
= incremented_at
;
2308 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2311 && TREE_CODE (step
) != INTEGER_CST
)
2313 fd_ivopts_data
= data
;
2314 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2317 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2318 cand
->ainc_use
= use
;
2320 cand
->ainc_use
= NULL
;
2322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2323 dump_cand (dump_file
, cand
);
2326 if (important
&& !cand
->important
)
2328 cand
->important
= true;
2329 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2330 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2335 bitmap_set_bit (use
->related_cands
, i
);
2336 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2337 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2344 /* Returns true if incrementing the induction variable at the end of the LOOP
2347 The purpose is to avoid splitting latch edge with a biv increment, thus
2348 creating a jump, possibly confusing other optimization passes and leaving
2349 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2350 is not available (so we do not have a better alternative), or if the latch
2351 edge is already nonempty. */
2354 allow_ip_end_pos_p (struct loop
*loop
)
2356 if (!ip_normal_pos (loop
))
2359 if (!empty_block_p (ip_end_pos (loop
)))
2365 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2366 Important field is set to IMPORTANT. */
2369 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2370 bool important
, struct iv_use
*use
)
2372 basic_block use_bb
= gimple_bb (use
->stmt
);
2373 enum machine_mode mem_mode
;
2374 unsigned HOST_WIDE_INT cstepi
;
2376 /* If we insert the increment in any position other than the standard
2377 ones, we must ensure that it is incremented once per iteration.
2378 It must not be in an inner nested loop, or one side of an if
2380 if (use_bb
->loop_father
!= data
->current_loop
2381 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2382 || stmt_could_throw_p (use
->stmt
)
2383 || !cst_and_fits_in_hwi (step
))
2386 cstepi
= int_cst_value (step
);
2388 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2389 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2390 || USE_STORE_PRE_INCREMENT (mem_mode
))
2391 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2392 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2393 || USE_STORE_PRE_DECREMENT (mem_mode
))
2394 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2396 enum tree_code code
= MINUS_EXPR
;
2398 tree new_step
= step
;
2400 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2402 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2403 code
= POINTER_PLUS_EXPR
;
2406 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2407 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2408 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2411 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2412 || USE_STORE_POST_INCREMENT (mem_mode
))
2413 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2414 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2415 || USE_STORE_POST_DECREMENT (mem_mode
))
2416 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2418 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2423 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2424 position to POS. If USE is not NULL, the candidate is set as related to
2425 it. The candidate computation is scheduled on all available positions. */
2428 add_candidate (struct ivopts_data
*data
,
2429 tree base
, tree step
, bool important
, struct iv_use
*use
)
2431 if (ip_normal_pos (data
->current_loop
))
2432 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2433 if (ip_end_pos (data
->current_loop
)
2434 && allow_ip_end_pos_p (data
->current_loop
))
2435 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2437 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2438 add_autoinc_candidates (data
, base
, step
, important
, use
);
2441 /* Adds standard iv candidates. */
2444 add_standard_iv_candidates (struct ivopts_data
*data
)
2446 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2448 /* The same for a double-integer type if it is still fast enough. */
2450 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2451 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2452 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2453 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2455 /* The same for a double-integer type if it is still fast enough. */
2457 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2458 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2459 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2460 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2464 /* Adds candidates bases on the old induction variable IV. */
2467 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2471 struct iv_cand
*cand
;
2473 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2475 /* The same, but with initial value zero. */
2476 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2477 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2479 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2480 iv
->step
, true, NULL
);
2482 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2483 if (gimple_code (phi
) == GIMPLE_PHI
)
2485 /* Additionally record the possibility of leaving the original iv
2487 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2488 cand
= add_candidate_1 (data
,
2489 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2490 SSA_NAME_DEF_STMT (def
));
2491 cand
->var_before
= iv
->ssa_name
;
2492 cand
->var_after
= def
;
2496 /* Adds candidates based on the old induction variables. */
2499 add_old_ivs_candidates (struct ivopts_data
*data
)
2505 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2507 iv
= ver_info (data
, i
)->iv
;
2508 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2509 add_old_iv_candidates (data
, iv
);
2513 /* Adds candidates based on the value of the induction variable IV and USE. */
2516 add_iv_value_candidates (struct ivopts_data
*data
,
2517 struct iv
*iv
, struct iv_use
*use
)
2519 unsigned HOST_WIDE_INT offset
;
2523 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2525 /* The same, but with initial value zero. Make such variable important,
2526 since it is generic enough so that possibly many uses may be based
2528 basetype
= TREE_TYPE (iv
->base
);
2529 if (POINTER_TYPE_P (basetype
))
2530 basetype
= sizetype
;
2531 add_candidate (data
, build_int_cst (basetype
, 0),
2532 iv
->step
, true, use
);
2534 /* Third, try removing the constant offset. Make sure to even
2535 add a candidate for &a[0] vs. (T *)&a. */
2536 base
= strip_offset (iv
->base
, &offset
);
2538 || base
!= iv
->base
)
2539 add_candidate (data
, base
, iv
->step
, false, use
);
2542 /* Adds candidates based on the uses. */
2545 add_derived_ivs_candidates (struct ivopts_data
*data
)
2549 for (i
= 0; i
< n_iv_uses (data
); i
++)
2551 struct iv_use
*use
= iv_use (data
, i
);
2558 case USE_NONLINEAR_EXPR
:
2561 /* Just add the ivs based on the value of the iv used here. */
2562 add_iv_value_candidates (data
, use
->iv
, use
);
2571 /* Record important candidates and add them to related_cands bitmaps
2575 record_important_candidates (struct ivopts_data
*data
)
2580 for (i
= 0; i
< n_iv_cands (data
); i
++)
2582 struct iv_cand
*cand
= iv_cand (data
, i
);
2584 if (cand
->important
)
2585 bitmap_set_bit (data
->important_candidates
, i
);
2588 data
->consider_all_candidates
= (n_iv_cands (data
)
2589 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2591 if (data
->consider_all_candidates
)
2593 /* We will not need "related_cands" bitmaps in this case,
2594 so release them to decrease peak memory consumption. */
2595 for (i
= 0; i
< n_iv_uses (data
); i
++)
2597 use
= iv_use (data
, i
);
2598 BITMAP_FREE (use
->related_cands
);
2603 /* Add important candidates to the related_cands bitmaps. */
2604 for (i
= 0; i
< n_iv_uses (data
); i
++)
2605 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2606 data
->important_candidates
);
2610 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2611 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2612 we allocate a simple list to every use. */
2615 alloc_use_cost_map (struct ivopts_data
*data
)
2617 unsigned i
, size
, s
, j
;
2619 for (i
= 0; i
< n_iv_uses (data
); i
++)
2621 struct iv_use
*use
= iv_use (data
, i
);
2624 if (data
->consider_all_candidates
)
2625 size
= n_iv_cands (data
);
2629 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2634 /* Round up to the power of two, so that moduling by it is fast. */
2635 for (size
= 1; size
< s
; size
<<= 1)
2639 use
->n_map_members
= size
;
2640 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2644 /* Returns description of computation cost of expression whose runtime
2645 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2648 new_cost (unsigned runtime
, unsigned complexity
)
2652 cost
.cost
= runtime
;
2653 cost
.complexity
= complexity
;
2658 /* Adds costs COST1 and COST2. */
2661 add_costs (comp_cost cost1
, comp_cost cost2
)
2663 cost1
.cost
+= cost2
.cost
;
2664 cost1
.complexity
+= cost2
.complexity
;
2668 /* Subtracts costs COST1 and COST2. */
2671 sub_costs (comp_cost cost1
, comp_cost cost2
)
2673 cost1
.cost
-= cost2
.cost
;
2674 cost1
.complexity
-= cost2
.complexity
;
2679 /* Returns a negative number if COST1 < COST2, a positive number if
2680 COST1 > COST2, and 0 if COST1 = COST2. */
2683 compare_costs (comp_cost cost1
, comp_cost cost2
)
2685 if (cost1
.cost
== cost2
.cost
)
2686 return cost1
.complexity
- cost2
.complexity
;
2688 return cost1
.cost
- cost2
.cost
;
2691 /* Returns true if COST is infinite. */
2694 infinite_cost_p (comp_cost cost
)
2696 return cost
.cost
== INFTY
;
2699 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2700 on invariants DEPENDS_ON and that the value used in expressing it
2701 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2704 set_use_iv_cost (struct ivopts_data
*data
,
2705 struct iv_use
*use
, struct iv_cand
*cand
,
2706 comp_cost cost
, bitmap depends_on
, tree value
,
2707 enum tree_code comp
, int inv_expr_id
)
2711 if (infinite_cost_p (cost
))
2713 BITMAP_FREE (depends_on
);
2717 if (data
->consider_all_candidates
)
2719 use
->cost_map
[cand
->id
].cand
= cand
;
2720 use
->cost_map
[cand
->id
].cost
= cost
;
2721 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2722 use
->cost_map
[cand
->id
].value
= value
;
2723 use
->cost_map
[cand
->id
].comp
= comp
;
2724 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2728 /* n_map_members is a power of two, so this computes modulo. */
2729 s
= cand
->id
& (use
->n_map_members
- 1);
2730 for (i
= s
; i
< use
->n_map_members
; i
++)
2731 if (!use
->cost_map
[i
].cand
)
2733 for (i
= 0; i
< s
; i
++)
2734 if (!use
->cost_map
[i
].cand
)
2740 use
->cost_map
[i
].cand
= cand
;
2741 use
->cost_map
[i
].cost
= cost
;
2742 use
->cost_map
[i
].depends_on
= depends_on
;
2743 use
->cost_map
[i
].value
= value
;
2744 use
->cost_map
[i
].comp
= comp
;
2745 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2748 /* Gets cost of (USE, CANDIDATE) pair. */
2750 static struct cost_pair
*
2751 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2752 struct iv_cand
*cand
)
2755 struct cost_pair
*ret
;
2760 if (data
->consider_all_candidates
)
2762 ret
= use
->cost_map
+ cand
->id
;
2769 /* n_map_members is a power of two, so this computes modulo. */
2770 s
= cand
->id
& (use
->n_map_members
- 1);
2771 for (i
= s
; i
< use
->n_map_members
; i
++)
2772 if (use
->cost_map
[i
].cand
== cand
)
2773 return use
->cost_map
+ i
;
2775 for (i
= 0; i
< s
; i
++)
2776 if (use
->cost_map
[i
].cand
== cand
)
2777 return use
->cost_map
+ i
;
2782 /* Returns estimate on cost of computing SEQ. */
2785 seq_cost (rtx seq
, bool speed
)
2790 for (; seq
; seq
= NEXT_INSN (seq
))
2792 set
= single_set (seq
);
2794 cost
+= set_src_cost (SET_SRC (set
), speed
);
2802 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2804 produce_memory_decl_rtl (tree obj
, int *regno
)
2806 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2807 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2811 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2813 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2814 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2815 SET_SYMBOL_REF_DECL (x
, obj
);
2816 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2817 set_mem_addr_space (x
, as
);
2818 targetm
.encode_section_info (obj
, x
, true);
2822 x
= gen_raw_REG (address_mode
, (*regno
)++);
2823 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2824 set_mem_addr_space (x
, as
);
2830 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2831 walk_tree. DATA contains the actual fake register number. */
2834 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2836 tree obj
= NULL_TREE
;
2838 int *regno
= (int *) data
;
2840 switch (TREE_CODE (*expr_p
))
2843 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2844 handled_component_p (*expr_p
);
2845 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2848 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2849 x
= produce_memory_decl_rtl (obj
, regno
);
2854 obj
= SSA_NAME_VAR (*expr_p
);
2855 if (!DECL_RTL_SET_P (obj
))
2856 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2865 if (DECL_RTL_SET_P (obj
))
2868 if (DECL_MODE (obj
) == BLKmode
)
2869 x
= produce_memory_decl_rtl (obj
, regno
);
2871 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2881 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2882 SET_DECL_RTL (obj
, x
);
2888 /* Determines cost of the computation of EXPR. */
2891 computation_cost (tree expr
, bool speed
)
2894 tree type
= TREE_TYPE (expr
);
2896 /* Avoid using hard regs in ways which may be unsupported. */
2897 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2898 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2899 enum node_frequency real_frequency
= node
->frequency
;
2901 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2902 crtl
->maybe_hot_insn_p
= speed
;
2903 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2905 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2908 default_rtl_profile ();
2909 node
->frequency
= real_frequency
;
2911 cost
= seq_cost (seq
, speed
);
2913 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2914 TYPE_ADDR_SPACE (type
), speed
);
2915 else if (!REG_P (rslt
))
2916 cost
+= set_src_cost (rslt
, speed
);
2921 /* Returns variable containing the value of candidate CAND at statement AT. */
2924 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2926 if (stmt_after_increment (loop
, cand
, stmt
))
2927 return cand
->var_after
;
2929 return cand
->var_before
;
2932 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2933 same precision that is at least as wide as the precision of TYPE, stores
2934 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2938 determine_common_wider_type (tree
*a
, tree
*b
)
2940 tree wider_type
= NULL
;
2942 tree atype
= TREE_TYPE (*a
);
2944 if (CONVERT_EXPR_P (*a
))
2946 suba
= TREE_OPERAND (*a
, 0);
2947 wider_type
= TREE_TYPE (suba
);
2948 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2954 if (CONVERT_EXPR_P (*b
))
2956 subb
= TREE_OPERAND (*b
, 0);
2957 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2968 /* Determines the expression by that USE is expressed from induction variable
2969 CAND at statement AT in LOOP. The expression is stored in a decomposed
2970 form into AFF. Returns false if USE cannot be expressed using CAND. */
2973 get_computation_aff (struct loop
*loop
,
2974 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2975 struct affine_tree_combination
*aff
)
2977 tree ubase
= use
->iv
->base
;
2978 tree ustep
= use
->iv
->step
;
2979 tree cbase
= cand
->iv
->base
;
2980 tree cstep
= cand
->iv
->step
, cstep_common
;
2981 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2982 tree common_type
, var
;
2984 aff_tree cbase_aff
, var_aff
;
2987 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2989 /* We do not have a precision to express the values of use. */
2993 var
= var_at_stmt (loop
, cand
, at
);
2994 uutype
= unsigned_type_for (utype
);
2996 /* If the conversion is not noop, perform it. */
2997 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2999 cstep
= fold_convert (uutype
, cstep
);
3000 cbase
= fold_convert (uutype
, cbase
);
3001 var
= fold_convert (uutype
, var
);
3004 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3007 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3008 type, we achieve better folding by computing their difference in this
3009 wider type, and cast the result to UUTYPE. We do not need to worry about
3010 overflows, as all the arithmetics will in the end be performed in UUTYPE
3012 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3014 /* use = ubase - ratio * cbase + ratio * var. */
3015 tree_to_aff_combination (ubase
, common_type
, aff
);
3016 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3017 tree_to_aff_combination (var
, uutype
, &var_aff
);
3019 /* We need to shift the value if we are after the increment. */
3020 if (stmt_after_increment (loop
, cand
, at
))
3024 if (common_type
!= uutype
)
3025 cstep_common
= fold_convert (common_type
, cstep
);
3027 cstep_common
= cstep
;
3029 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3030 aff_combination_add (&cbase_aff
, &cstep_aff
);
3033 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
3034 aff_combination_add (aff
, &cbase_aff
);
3035 if (common_type
!= uutype
)
3036 aff_combination_convert (aff
, uutype
);
3038 aff_combination_scale (&var_aff
, rat
);
3039 aff_combination_add (aff
, &var_aff
);
3044 /* Determines the expression by that USE is expressed from induction variable
3045 CAND at statement AT in LOOP. The computation is unshared. */
3048 get_computation_at (struct loop
*loop
,
3049 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3052 tree type
= TREE_TYPE (use
->iv
->base
);
3054 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3056 unshare_aff_combination (&aff
);
3057 return fold_convert (type
, aff_combination_to_tree (&aff
));
3060 /* Determines the expression by that USE is expressed from induction variable
3061 CAND in LOOP. The computation is unshared. */
3064 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3066 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3069 /* Adjust the cost COST for being in loop setup rather than loop body.
3070 If we're optimizing for space, the loop setup overhead is constant;
3071 if we're optimizing for speed, amortize it over the per-iteration cost. */
3073 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3077 else if (optimize_loop_for_speed_p (data
->current_loop
))
3078 return cost
/ avg_loop_niter (data
->current_loop
);
3083 /* Returns cost of addition in MODE. */
3086 add_regs_cost (enum machine_mode mode
, bool speed
)
3088 static unsigned costs
[NUM_MACHINE_MODES
][2];
3092 if (costs
[mode
][speed
])
3093 return costs
[mode
][speed
];
3096 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3097 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3098 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3103 cost
= seq_cost (seq
, speed
);
3107 costs
[mode
][speed
] = cost
;
3109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3110 fprintf (dump_file
, "Addition in %s costs %d\n",
3111 GET_MODE_NAME (mode
), cost
);
3115 /* Returns cost of multiplication in MODE. */
3118 multiply_regs_cost (enum machine_mode mode
, bool speed
)
3120 static unsigned costs
[NUM_MACHINE_MODES
][2];
3124 if (costs
[mode
][speed
])
3125 return costs
[mode
][speed
];
3128 force_operand (gen_rtx_fmt_ee (MULT
, mode
,
3129 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3130 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3135 cost
= seq_cost (seq
, speed
);
3139 costs
[mode
][speed
] = cost
;
3141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3142 fprintf (dump_file
, "Multiplication in %s costs %d\n",
3143 GET_MODE_NAME (mode
), cost
);
3147 /* Returns cost of addition with a constant in MODE. */
3150 add_const_cost (enum machine_mode mode
, bool speed
)
3152 static unsigned costs
[NUM_MACHINE_MODES
][2];
3156 if (costs
[mode
][speed
])
3157 return costs
[mode
][speed
];
3159 /* Arbitrarily generate insns for x + 2, as the exact constant
3160 shouldn't matter. */
3162 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3163 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3164 gen_int_mode (2, mode
)),
3169 cost
= seq_cost (seq
, speed
);
3173 costs
[mode
][speed
] = cost
;
3175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3176 fprintf (dump_file
, "Addition to constant in %s costs %d\n",
3177 GET_MODE_NAME (mode
), cost
);
3181 /* Returns cost of extend or truncate in MODE. */
3184 extend_or_trunc_reg_cost (tree type_to
, tree type_from
, bool speed
)
3186 static unsigned costs
[NUM_MACHINE_MODES
][NUM_MACHINE_MODES
][2];
3189 enum machine_mode mode_to
= TYPE_MODE (type_to
);
3190 enum machine_mode mode_from
= TYPE_MODE (type_from
);
3191 tree size_to
= TYPE_SIZE (type_to
);
3192 tree size_from
= TYPE_SIZE (type_from
);
3195 gcc_assert (TREE_CODE (size_to
) == INTEGER_CST
3196 && TREE_CODE (size_from
) == INTEGER_CST
);
3198 if (costs
[mode_to
][mode_from
][speed
])
3199 return costs
[mode_to
][mode_from
][speed
];
3201 if (tree_int_cst_lt (size_to
, size_from
))
3203 else if (TYPE_UNSIGNED (type_to
))
3209 gen_rtx_fmt_e (code
, mode_to
,
3210 gen_raw_REG (mode_from
, LAST_VIRTUAL_REGISTER
+ 1));
3214 cost
= seq_cost (seq
, speed
);
3218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3219 fprintf (dump_file
, "Conversion from %s to %s costs %d\n",
3220 GET_MODE_NAME (mode_to
), GET_MODE_NAME (mode_from
), cost
);
3222 costs
[mode_to
][mode_from
][speed
] = cost
;
3226 /* Returns cost of negation in MODE. */
3229 negate_reg_cost (enum machine_mode mode
, bool speed
)
3231 static unsigned costs
[NUM_MACHINE_MODES
][2];
3235 if (costs
[mode
][speed
])
3236 return costs
[mode
][speed
];
3239 force_operand (gen_rtx_fmt_e (NEG
, mode
,
3240 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1)),
3245 cost
= seq_cost (seq
, speed
);
3249 costs
[mode
][speed
] = cost
;
3251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3252 fprintf (dump_file
, "Negation in %s costs %d\n",
3253 GET_MODE_NAME (mode
), cost
);
3257 /* Entry in a hashtable of already known costs for multiplication. */
3260 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3261 enum machine_mode mode
; /* In mode. */
3262 unsigned cost
; /* The cost. */
3265 /* Counts hash value for the ENTRY. */
3268 mbc_entry_hash (const void *entry
)
3270 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3272 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3275 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3278 mbc_entry_eq (const void *entry1
, const void *entry2
)
3280 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3281 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3283 return (e1
->mode
== e2
->mode
3284 && e1
->cst
== e2
->cst
);
3287 /* Returns cost of multiplication by constant CST in MODE. */
3290 multiply_by_const_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3292 struct mbc_entry
**cached
, act
;
3296 gcc_assert (cost_tables_exist
);
3300 cached
= (struct mbc_entry
**)
3301 htab_find_slot (mult_costs
[speed
], &act
, INSERT
);
3304 return (*cached
)->cost
;
3306 *cached
= XNEW (struct mbc_entry
);
3307 (*cached
)->mode
= mode
;
3308 (*cached
)->cst
= cst
;
3311 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3312 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3316 cost
= seq_cost (seq
, speed
);
3318 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3319 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3320 (int) cst
, GET_MODE_NAME (mode
), cost
);
3322 (*cached
)->cost
= cost
;
3327 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3328 validity for a memory reference accessing memory of mode MODE in
3329 address space AS. */
3331 DEF_VEC_P (sbitmap
);
3332 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3335 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3338 #define MAX_RATIO 128
3339 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3340 static VEC (sbitmap
, heap
) *valid_mult_list
;
3343 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3344 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3346 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3349 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3350 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3354 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3355 sbitmap_zero (valid_mult
);
3356 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3357 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3359 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3360 if (memory_address_addr_space_p (mode
, addr
, as
))
3361 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3364 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3366 fprintf (dump_file
, " allowed multipliers:");
3367 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3368 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3369 fprintf (dump_file
, " %d", (int) i
);
3370 fprintf (dump_file
, "\n");
3371 fprintf (dump_file
, "\n");
3374 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3377 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3380 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3383 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3384 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3385 variable is omitted. Compute the cost for a memory reference that accesses
3386 a memory location of mode MEM_MODE in address space AS.
3388 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3389 size of MEM_MODE / RATIO) is available. To make this determination, we
3390 look at the size of the increment to be made, which is given in CSTEP.
3391 CSTEP may be zero if the step is unknown.
3392 STMT_AFTER_INC is true iff the statement we're looking at is after the
3393 increment of the original biv.
3395 TODO -- there must be some better way. This all is quite crude. */
3399 HOST_WIDE_INT min_offset
, max_offset
;
3400 unsigned costs
[2][2][2][2];
3401 } *address_cost_data
;
3403 DEF_VEC_P (address_cost_data
);
3404 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3407 get_address_cost (bool symbol_present
, bool var_present
,
3408 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3409 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3410 addr_space_t as
, bool speed
,
3411 bool stmt_after_inc
, bool *may_autoinc
)
3413 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3414 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3415 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3416 address_cost_data data
;
3417 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3418 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3419 unsigned cost
, acost
, complexity
;
3420 bool offset_p
, ratio_p
, autoinc
;
3421 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3422 unsigned HOST_WIDE_INT mask
;
3425 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3426 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3429 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3433 HOST_WIDE_INT rat
, off
= 0;
3434 int old_cse_not_expected
, width
;
3435 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3436 rtx seq
, addr
, base
;
3439 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3441 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3443 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3444 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3445 width
= HOST_BITS_PER_WIDE_INT
- 1;
3446 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3448 for (i
= width
; i
>= 0; i
--)
3450 off
= -((HOST_WIDE_INT
) 1 << i
);
3451 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3452 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3455 data
->min_offset
= (i
== -1? 0 : off
);
3457 for (i
= width
; i
>= 0; i
--)
3459 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3460 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3461 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3466 data
->max_offset
= off
;
3468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3470 fprintf (dump_file
, "get_address_cost:\n");
3471 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3472 GET_MODE_NAME (mem_mode
),
3474 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3475 GET_MODE_NAME (mem_mode
),
3480 for (i
= 2; i
<= MAX_RATIO
; i
++)
3481 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3487 /* Compute the cost of various addressing modes. */
3489 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3490 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3492 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3493 || USE_STORE_PRE_DECREMENT (mem_mode
))
3495 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3496 has_predec
[mem_mode
]
3497 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3499 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3500 || USE_STORE_POST_DECREMENT (mem_mode
))
3502 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3503 has_postdec
[mem_mode
]
3504 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3506 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3507 || USE_STORE_PRE_DECREMENT (mem_mode
))
3509 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3510 has_preinc
[mem_mode
]
3511 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3513 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3514 || USE_STORE_POST_INCREMENT (mem_mode
))
3516 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3517 has_postinc
[mem_mode
]
3518 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3520 for (i
= 0; i
< 16; i
++)
3523 var_p
= (i
>> 1) & 1;
3524 off_p
= (i
>> 2) & 1;
3525 rat_p
= (i
>> 3) & 1;
3529 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3530 gen_int_mode (rat
, address_mode
));
3533 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3537 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3538 /* ??? We can run into trouble with some backends by presenting
3539 it with symbols which haven't been properly passed through
3540 targetm.encode_section_info. By setting the local bit, we
3541 enhance the probability of things working. */
3542 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3545 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3547 (PLUS
, address_mode
, base
,
3548 gen_int_mode (off
, address_mode
)));
3551 base
= gen_int_mode (off
, address_mode
);
3556 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3559 /* To avoid splitting addressing modes, pretend that no cse will
3561 old_cse_not_expected
= cse_not_expected
;
3562 cse_not_expected
= true;
3563 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3564 cse_not_expected
= old_cse_not_expected
;
3568 acost
= seq_cost (seq
, speed
);
3569 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3573 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3576 /* On some targets, it is quite expensive to load symbol to a register,
3577 which makes addresses that contain symbols look much more expensive.
3578 However, the symbol will have to be loaded in any case before the
3579 loop (and quite likely we have it in register already), so it does not
3580 make much sense to penalize them too heavily. So make some final
3581 tweaks for the SYMBOL_PRESENT modes:
3583 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3584 var is cheaper, use this mode with small penalty.
3585 If VAR_PRESENT is true, try whether the mode with
3586 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3587 if this is the case, use it. */
3588 add_c
= add_regs_cost (address_mode
, speed
);
3589 for (i
= 0; i
< 8; i
++)
3592 off_p
= (i
>> 1) & 1;
3593 rat_p
= (i
>> 2) & 1;
3595 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3599 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3600 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3605 fprintf (dump_file
, "Address costs:\n");
3607 for (i
= 0; i
< 16; i
++)
3610 var_p
= (i
>> 1) & 1;
3611 off_p
= (i
>> 2) & 1;
3612 rat_p
= (i
>> 3) & 1;
3614 fprintf (dump_file
, " ");
3616 fprintf (dump_file
, "sym + ");
3618 fprintf (dump_file
, "var + ");
3620 fprintf (dump_file
, "cst + ");
3622 fprintf (dump_file
, "rat * ");
3624 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3625 fprintf (dump_file
, "index costs %d\n", acost
);
3627 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3628 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3629 fprintf (dump_file
, " May include autoinc/dec\n");
3630 fprintf (dump_file
, "\n");
3633 VEC_replace (address_cost_data
, address_cost_data_list
,
3637 bits
= GET_MODE_BITSIZE (address_mode
);
3638 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3640 if ((offset
>> (bits
- 1) & 1))
3645 msize
= GET_MODE_SIZE (mem_mode
);
3646 autoinc_offset
= offset
;
3648 autoinc_offset
+= ratio
* cstep
;
3649 if (symbol_present
|| var_present
|| ratio
!= 1)
3651 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3653 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3655 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3657 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3658 && msize
== -cstep
))
3662 offset_p
= (s_offset
!= 0
3663 && data
->min_offset
<= s_offset
3664 && s_offset
<= data
->max_offset
);
3665 ratio_p
= (ratio
!= 1
3666 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3668 if (ratio
!= 1 && !ratio_p
)
3669 cost
+= multiply_by_const_cost (ratio
, address_mode
, speed
);
3671 if (s_offset
&& !offset_p
&& !symbol_present
)
3672 cost
+= add_regs_cost (address_mode
, speed
);
3675 *may_autoinc
= autoinc
;
3676 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3677 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3678 return new_cost (cost
+ acost
, complexity
);
3681 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3682 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3683 calculating the operands of EXPR. Returns true if successful, and returns
3684 the cost in COST. */
3687 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3688 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3691 tree op1
= TREE_OPERAND (expr
, 1);
3692 tree cst
= TREE_OPERAND (mult
, 1);
3693 tree multop
= TREE_OPERAND (mult
, 0);
3694 int m
= exact_log2 (int_cst_value (cst
));
3695 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3698 if (!(m
>= 0 && m
< maxm
))
3701 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3702 ? shiftadd_cost
[speed
][mode
][m
]
3704 ? shiftsub1_cost
[speed
][mode
][m
]
3705 : shiftsub0_cost
[speed
][mode
][m
]));
3706 res
= new_cost (sa_cost
, 0);
3707 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3709 STRIP_NOPS (multop
);
3710 if (!is_gimple_val (multop
))
3711 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3717 /* Estimates cost of forcing expression EXPR into a variable. */
3720 force_expr_to_var_cost (tree expr
, bool speed
)
3722 static bool costs_initialized
= false;
3723 static unsigned integer_cost
[2];
3724 static unsigned symbol_cost
[2];
3725 static unsigned address_cost
[2];
3727 comp_cost cost0
, cost1
, cost
;
3728 enum machine_mode mode
;
3730 if (!costs_initialized
)
3732 tree type
= build_pointer_type (integer_type_node
);
3737 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3738 TREE_STATIC (var
) = 1;
3739 x
= produce_memory_decl_rtl (var
, NULL
);
3740 SET_DECL_RTL (var
, x
);
3742 addr
= build1 (ADDR_EXPR
, type
, var
);
3745 for (i
= 0; i
< 2; i
++)
3747 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3750 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3753 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3756 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3757 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3758 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3759 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3760 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3761 fprintf (dump_file
, "\n");
3765 costs_initialized
= true;
3770 if (SSA_VAR_P (expr
))
3773 if (is_gimple_min_invariant (expr
))
3775 if (TREE_CODE (expr
) == INTEGER_CST
)
3776 return new_cost (integer_cost
[speed
], 0);
3778 if (TREE_CODE (expr
) == ADDR_EXPR
)
3780 tree obj
= TREE_OPERAND (expr
, 0);
3782 if (TREE_CODE (obj
) == VAR_DECL
3783 || TREE_CODE (obj
) == PARM_DECL
3784 || TREE_CODE (obj
) == RESULT_DECL
)
3785 return new_cost (symbol_cost
[speed
], 0);
3788 return new_cost (address_cost
[speed
], 0);
3791 switch (TREE_CODE (expr
))
3793 case POINTER_PLUS_EXPR
:
3797 op0
= TREE_OPERAND (expr
, 0);
3798 op1
= TREE_OPERAND (expr
, 1);
3802 if (is_gimple_val (op0
))
3805 cost0
= force_expr_to_var_cost (op0
, speed
);
3807 if (is_gimple_val (op1
))
3810 cost1
= force_expr_to_var_cost (op1
, speed
);
3815 op0
= TREE_OPERAND (expr
, 0);
3819 if (is_gimple_val (op0
))
3822 cost0
= force_expr_to_var_cost (op0
, speed
);
3828 /* Just an arbitrary value, FIXME. */
3829 return new_cost (target_spill_cost
[speed
], 0);
3832 mode
= TYPE_MODE (TREE_TYPE (expr
));
3833 switch (TREE_CODE (expr
))
3835 case POINTER_PLUS_EXPR
:
3839 cost
= new_cost (add_regs_cost (mode
, speed
), 0);
3840 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3842 tree mult
= NULL_TREE
;
3844 if (TREE_CODE (op1
) == MULT_EXPR
)
3846 else if (TREE_CODE (op0
) == MULT_EXPR
)
3849 if (mult
!= NULL_TREE
3850 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3851 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
, speed
,
3858 if (cst_and_fits_in_hwi (op0
))
3859 cost
= new_cost (multiply_by_const_cost (int_cst_value (op0
),
3861 else if (cst_and_fits_in_hwi (op1
))
3862 cost
= new_cost (multiply_by_const_cost (int_cst_value (op1
),
3865 return new_cost (target_spill_cost
[speed
], 0);
3872 cost
= add_costs (cost
, cost0
);
3873 cost
= add_costs (cost
, cost1
);
3875 /* Bound the cost by target_spill_cost. The parts of complicated
3876 computations often are either loop invariant or at least can
3877 be shared between several iv uses, so letting this grow without
3878 limits would not give reasonable results. */
3879 if (cost
.cost
> (int) target_spill_cost
[speed
])
3880 cost
.cost
= target_spill_cost
[speed
];
3885 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3886 invariants the computation depends on. */
3889 force_var_cost (struct ivopts_data
*data
,
3890 tree expr
, bitmap
*depends_on
)
3894 fd_ivopts_data
= data
;
3895 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3898 return force_expr_to_var_cost (expr
, data
->speed
);
3901 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3902 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3903 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3904 invariants the computation depends on. */
3907 split_address_cost (struct ivopts_data
*data
,
3908 tree addr
, bool *symbol_present
, bool *var_present
,
3909 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3912 HOST_WIDE_INT bitsize
;
3913 HOST_WIDE_INT bitpos
;
3915 enum machine_mode mode
;
3916 int unsignedp
, volatilep
;
3918 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3919 &unsignedp
, &volatilep
, false);
3922 || bitpos
% BITS_PER_UNIT
!= 0
3923 || TREE_CODE (core
) != VAR_DECL
)
3925 *symbol_present
= false;
3926 *var_present
= true;
3927 fd_ivopts_data
= data
;
3928 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3929 return new_cost (target_spill_cost
[data
->speed
], 0);
3932 *offset
+= bitpos
/ BITS_PER_UNIT
;
3933 if (TREE_STATIC (core
)
3934 || DECL_EXTERNAL (core
))
3936 *symbol_present
= true;
3937 *var_present
= false;
3941 *symbol_present
= false;
3942 *var_present
= true;
3946 /* Estimates cost of expressing difference of addresses E1 - E2 as
3947 var + symbol + offset. The value of offset is added to OFFSET,
3948 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949 part is missing. DEPENDS_ON is a set of the invariants the computation
3953 ptr_difference_cost (struct ivopts_data
*data
,
3954 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3955 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3957 HOST_WIDE_INT diff
= 0;
3958 aff_tree aff_e1
, aff_e2
;
3961 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3963 if (ptr_difference_const (e1
, e2
, &diff
))
3966 *symbol_present
= false;
3967 *var_present
= false;
3971 if (integer_zerop (e2
))
3972 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3973 symbol_present
, var_present
, offset
, depends_on
);
3975 *symbol_present
= false;
3976 *var_present
= true;
3978 type
= signed_type_for (TREE_TYPE (e1
));
3979 tree_to_aff_combination (e1
, type
, &aff_e1
);
3980 tree_to_aff_combination (e2
, type
, &aff_e2
);
3981 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3982 aff_combination_add (&aff_e1
, &aff_e2
);
3984 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3987 /* Estimates cost of expressing difference E1 - E2 as
3988 var + symbol + offset. The value of offset is added to OFFSET,
3989 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3990 part is missing. DEPENDS_ON is a set of the invariants the computation
3994 difference_cost (struct ivopts_data
*data
,
3995 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3996 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3998 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3999 unsigned HOST_WIDE_INT off1
, off2
;
4000 aff_tree aff_e1
, aff_e2
;
4003 e1
= strip_offset (e1
, &off1
);
4004 e2
= strip_offset (e2
, &off2
);
4005 *offset
+= off1
- off2
;
4010 if (TREE_CODE (e1
) == ADDR_EXPR
)
4011 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4012 offset
, depends_on
);
4013 *symbol_present
= false;
4015 if (operand_equal_p (e1
, e2
, 0))
4017 *var_present
= false;
4021 *var_present
= true;
4023 if (integer_zerop (e2
))
4024 return force_var_cost (data
, e1
, depends_on
);
4026 if (integer_zerop (e1
))
4028 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4029 cost
.cost
+= multiply_by_const_cost (-1, mode
, data
->speed
);
4033 type
= signed_type_for (TREE_TYPE (e1
));
4034 tree_to_aff_combination (e1
, type
, &aff_e1
);
4035 tree_to_aff_combination (e2
, type
, &aff_e2
);
4036 aff_combination_scale (&aff_e2
, double_int_minus_one
);
4037 aff_combination_add (&aff_e1
, &aff_e2
);
4039 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4042 /* Returns true if AFF1 and AFF2 are identical. */
4045 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4049 if (aff1
->n
!= aff2
->n
)
4052 for (i
= 0; i
< aff1
->n
; i
++)
4054 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
4057 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4063 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4066 get_expr_id (struct ivopts_data
*data
, tree expr
)
4068 struct iv_inv_expr_ent ent
;
4069 struct iv_inv_expr_ent
**slot
;
4072 ent
.hash
= iterative_hash_expr (expr
, 0);
4073 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
4078 *slot
= XNEW (struct iv_inv_expr_ent
);
4079 (*slot
)->expr
= expr
;
4080 (*slot
)->hash
= ent
.hash
;
4081 (*slot
)->id
= data
->inv_expr_id
++;
4085 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4086 requires a new compiler generated temporary. Returns -1 otherwise.
4087 ADDRESS_P is a flag indicating if the expression is for address
4091 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4092 tree cbase
, HOST_WIDE_INT ratio
,
4095 aff_tree ubase_aff
, cbase_aff
;
4103 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4104 && (TREE_CODE (cbase
) == INTEGER_CST
))
4107 /* Strips the constant part. */
4108 if (TREE_CODE (ubase
) == PLUS_EXPR
4109 || TREE_CODE (ubase
) == MINUS_EXPR
4110 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4112 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4113 ubase
= TREE_OPERAND (ubase
, 0);
4116 /* Strips the constant part. */
4117 if (TREE_CODE (cbase
) == PLUS_EXPR
4118 || TREE_CODE (cbase
) == MINUS_EXPR
4119 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4121 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4122 cbase
= TREE_OPERAND (cbase
, 0);
4127 if (((TREE_CODE (ubase
) == SSA_NAME
)
4128 || (TREE_CODE (ubase
) == ADDR_EXPR
4129 && is_gimple_min_invariant (ubase
)))
4130 && (TREE_CODE (cbase
) == INTEGER_CST
))
4133 if (((TREE_CODE (cbase
) == SSA_NAME
)
4134 || (TREE_CODE (cbase
) == ADDR_EXPR
4135 && is_gimple_min_invariant (cbase
)))
4136 && (TREE_CODE (ubase
) == INTEGER_CST
))
4142 if(operand_equal_p (ubase
, cbase
, 0))
4145 if (TREE_CODE (ubase
) == ADDR_EXPR
4146 && TREE_CODE (cbase
) == ADDR_EXPR
)
4150 usym
= TREE_OPERAND (ubase
, 0);
4151 csym
= TREE_OPERAND (cbase
, 0);
4152 if (TREE_CODE (usym
) == ARRAY_REF
)
4154 tree ind
= TREE_OPERAND (usym
, 1);
4155 if (TREE_CODE (ind
) == INTEGER_CST
4156 && host_integerp (ind
, 0)
4157 && TREE_INT_CST_LOW (ind
) == 0)
4158 usym
= TREE_OPERAND (usym
, 0);
4160 if (TREE_CODE (csym
) == ARRAY_REF
)
4162 tree ind
= TREE_OPERAND (csym
, 1);
4163 if (TREE_CODE (ind
) == INTEGER_CST
4164 && host_integerp (ind
, 0)
4165 && TREE_INT_CST_LOW (ind
) == 0)
4166 csym
= TREE_OPERAND (csym
, 0);
4168 if (operand_equal_p (usym
, csym
, 0))
4171 /* Now do more complex comparison */
4172 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4173 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4174 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4178 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4179 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4181 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
4182 aff_combination_add (&ubase_aff
, &cbase_aff
);
4183 expr
= aff_combination_to_tree (&ubase_aff
);
4184 return get_expr_id (data
, expr
);
4189 /* Determines the cost of the computation by that USE is expressed
4190 from induction variable CAND. If ADDRESS_P is true, we just need
4191 to create an address from it, otherwise we want to get it into
4192 register. A set of invariants we depend on is stored in
4193 DEPENDS_ON. AT is the statement at that the value is computed.
4194 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4195 addressing is likely. */
4198 get_computation_cost_at (struct ivopts_data
*data
,
4199 struct iv_use
*use
, struct iv_cand
*cand
,
4200 bool address_p
, bitmap
*depends_on
, gimple at
,
4204 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4206 tree utype
= TREE_TYPE (ubase
), ctype
;
4207 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4208 HOST_WIDE_INT ratio
, aratio
;
4209 bool var_present
, symbol_present
, stmt_is_after_inc
;
4212 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4216 /* Only consider real candidates. */
4218 return infinite_cost
;
4220 cbase
= cand
->iv
->base
;
4221 cstep
= cand
->iv
->step
;
4222 ctype
= TREE_TYPE (cbase
);
4224 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4226 /* We do not have a precision to express the values of use. */
4227 return infinite_cost
;
4231 || (use
->iv
->base_object
4232 && cand
->iv
->base_object
4233 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4234 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4236 /* Do not try to express address of an object with computation based
4237 on address of a different object. This may cause problems in rtl
4238 level alias analysis (that does not expect this to be happening,
4239 as this is illegal in C), and would be unlikely to be useful
4241 if (use
->iv
->base_object
4242 && cand
->iv
->base_object
4243 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4244 return infinite_cost
;
4247 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4249 /* TODO -- add direct handling of this case. */
4253 /* CSTEPI is removed from the offset in case statement is after the
4254 increment. If the step is not constant, we use zero instead.
4255 This is a bit imprecise (there is the extra addition), but
4256 redundancy elimination is likely to transform the code so that
4257 it uses value of the variable before increment anyway,
4258 so it is not that much unrealistic. */
4259 if (cst_and_fits_in_hwi (cstep
))
4260 cstepi
= int_cst_value (cstep
);
4264 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4265 return infinite_cost
;
4267 if (double_int_fits_in_shwi_p (rat
))
4268 ratio
= double_int_to_shwi (rat
);
4270 return infinite_cost
;
4273 ctype
= TREE_TYPE (cbase
);
4275 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4277 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4278 or ratio == 1, it is better to handle this like
4280 ubase - ratio * cbase + ratio * var
4282 (also holds in the case ratio == -1, TODO. */
4284 if (cst_and_fits_in_hwi (cbase
))
4286 offset
= - ratio
* int_cst_value (cbase
);
4287 cost
= difference_cost (data
,
4288 ubase
, build_int_cst (utype
, 0),
4289 &symbol_present
, &var_present
, &offset
,
4291 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4293 else if (ratio
== 1)
4295 tree real_cbase
= cbase
;
4297 /* Check to see if any adjustment is needed. */
4298 if (cstepi
== 0 && stmt_is_after_inc
)
4300 aff_tree real_cbase_aff
;
4303 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4305 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4307 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4308 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4311 cost
= difference_cost (data
,
4313 &symbol_present
, &var_present
, &offset
,
4315 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4318 && !POINTER_TYPE_P (ctype
)
4319 && multiplier_allowed_in_address_p
4320 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4321 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4324 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4325 cost
= difference_cost (data
,
4327 &symbol_present
, &var_present
, &offset
,
4329 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4333 cost
= force_var_cost (data
, cbase
, depends_on
);
4334 cost
= add_costs (cost
,
4335 difference_cost (data
,
4336 ubase
, build_int_cst (utype
, 0),
4337 &symbol_present
, &var_present
,
4338 &offset
, depends_on
));
4339 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4340 cost
.cost
+= add_regs_cost (TYPE_MODE (ctype
), data
->speed
);
4346 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4347 /* Clear depends on. */
4348 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4349 bitmap_clear (*depends_on
);
4352 /* If we are after the increment, the value of the candidate is higher by
4354 if (stmt_is_after_inc
)
4355 offset
-= ratio
* cstepi
;
4357 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4358 (symbol/var1/const parts may be omitted). If we are looking for an
4359 address, find the cost of addressing this. */
4361 return add_costs (cost
,
4362 get_address_cost (symbol_present
, var_present
,
4363 offset
, ratio
, cstepi
,
4364 TYPE_MODE (TREE_TYPE (utype
)),
4365 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4366 speed
, stmt_is_after_inc
,
4369 /* Otherwise estimate the costs for computing the expression. */
4370 if (!symbol_present
&& !var_present
&& !offset
)
4373 cost
.cost
+= multiply_by_const_cost (ratio
, TYPE_MODE (ctype
), speed
);
4377 /* Symbol + offset should be compile-time computable so consider that they
4378 are added once to the variable, if present. */
4379 if (var_present
&& (symbol_present
|| offset
))
4380 cost
.cost
+= adjust_setup_cost (data
,
4381 add_regs_cost (TYPE_MODE (ctype
), speed
));
4383 /* Having offset does not affect runtime cost in case it is added to
4384 symbol, but it increases complexity. */
4388 cost
.cost
+= add_regs_cost (TYPE_MODE (ctype
), speed
);
4390 aratio
= ratio
> 0 ? ratio
: -ratio
;
4392 cost
.cost
+= multiply_by_const_cost (aratio
, TYPE_MODE (ctype
), speed
);
4397 *can_autoinc
= false;
4400 /* Just get the expression, expand it and measure the cost. */
4401 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4404 return infinite_cost
;
4407 comp
= build_simple_mem_ref (comp
);
4409 return new_cost (computation_cost (comp
, speed
), 0);
4413 /* Determines the cost of the computation by that USE is expressed
4414 from induction variable CAND. If ADDRESS_P is true, we just need
4415 to create an address from it, otherwise we want to get it into
4416 register. A set of invariants we depend on is stored in
4417 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4418 autoinc addressing is likely. */
4421 get_computation_cost (struct ivopts_data
*data
,
4422 struct iv_use
*use
, struct iv_cand
*cand
,
4423 bool address_p
, bitmap
*depends_on
,
4424 bool *can_autoinc
, int *inv_expr_id
)
4426 return get_computation_cost_at (data
,
4427 use
, cand
, address_p
, depends_on
, use
->stmt
,
4428 can_autoinc
, inv_expr_id
);
4431 /* Determines cost of basing replacement of USE on CAND in a generic
4435 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4436 struct iv_use
*use
, struct iv_cand
*cand
)
4440 int inv_expr_id
= -1;
4442 /* The simple case first -- if we need to express value of the preserved
4443 original biv, the cost is 0. This also prevents us from counting the
4444 cost of increment twice -- once at this use and once in the cost of
4446 if (cand
->pos
== IP_ORIGINAL
4447 && cand
->incremented_at
== use
->stmt
)
4449 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4454 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4455 NULL
, &inv_expr_id
);
4457 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4460 return !infinite_cost_p (cost
);
4463 /* Determines cost of basing replacement of USE on CAND in an address. */
4466 determine_use_iv_cost_address (struct ivopts_data
*data
,
4467 struct iv_use
*use
, struct iv_cand
*cand
)
4471 int inv_expr_id
= -1;
4472 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4473 &can_autoinc
, &inv_expr_id
);
4475 if (cand
->ainc_use
== use
)
4478 cost
.cost
-= cand
->cost_step
;
4479 /* If we generated the candidate solely for exploiting autoincrement
4480 opportunities, and it turns out it can't be used, set the cost to
4481 infinity to make sure we ignore it. */
4482 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4483 cost
= infinite_cost
;
4485 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4488 return !infinite_cost_p (cost
);
4491 /* Computes value of candidate CAND at position AT in iteration NITER, and
4492 stores it to VAL. */
4495 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4498 aff_tree step
, delta
, nit
;
4499 struct iv
*iv
= cand
->iv
;
4500 tree type
= TREE_TYPE (iv
->base
);
4501 tree steptype
= type
;
4502 if (POINTER_TYPE_P (type
))
4503 steptype
= sizetype
;
4505 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4506 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4507 aff_combination_convert (&nit
, steptype
);
4508 aff_combination_mult (&nit
, &step
, &delta
);
4509 if (stmt_after_increment (loop
, cand
, at
))
4510 aff_combination_add (&delta
, &step
);
4512 tree_to_aff_combination (iv
->base
, type
, val
);
4513 aff_combination_add (val
, &delta
);
4516 /* Returns period of induction variable iv. */
4519 iv_period (struct iv
*iv
)
4521 tree step
= iv
->step
, period
, type
;
4524 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4526 type
= unsigned_type_for (TREE_TYPE (step
));
4527 /* Period of the iv is lcm (step, type_range)/step -1,
4528 i.e., N*type_range/step - 1. Since type range is power
4529 of two, N == (step >> num_of_ending_zeros_binary (step),
4530 so the final result is
4532 (type_range >> num_of_ending_zeros_binary (step)) - 1
4535 pow2div
= num_ending_zeros (step
);
4537 period
= build_low_bits_mask (type
,
4538 (TYPE_PRECISION (type
)
4539 - tree_low_cst (pow2div
, 1)));
4544 /* Returns the comparison operator used when eliminating the iv USE. */
4546 static enum tree_code
4547 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4549 struct loop
*loop
= data
->current_loop
;
4553 ex_bb
= gimple_bb (use
->stmt
);
4554 exit
= EDGE_SUCC (ex_bb
, 0);
4555 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4556 exit
= EDGE_SUCC (ex_bb
, 1);
4558 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4562 strip_wrap_conserving_type_conversions (tree exp
)
4564 while (tree_ssa_useless_type_conversion (exp
)
4565 && (nowrap_type_p (TREE_TYPE (exp
))
4566 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4567 exp
= TREE_OPERAND (exp
, 0);
4571 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4572 check for an exact match. */
4575 expr_equal_p (tree e
, tree what
)
4578 enum tree_code code
;
4580 e
= strip_wrap_conserving_type_conversions (e
);
4581 what
= strip_wrap_conserving_type_conversions (what
);
4583 code
= TREE_CODE (what
);
4584 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4587 if (operand_equal_p (e
, what
, 0))
4590 if (TREE_CODE (e
) != SSA_NAME
)
4593 stmt
= SSA_NAME_DEF_STMT (e
);
4594 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4595 || gimple_assign_rhs_code (stmt
) != code
)
4598 switch (get_gimple_rhs_class (code
))
4600 case GIMPLE_BINARY_RHS
:
4601 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4605 case GIMPLE_UNARY_RHS
:
4606 case GIMPLE_SINGLE_RHS
:
4607 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4613 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4614 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4615 calculation is performed in non-wrapping type.
4617 TODO: More generally, we could test for the situation that
4618 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4619 This would require knowing the sign of OFFSET.
4621 Also, we only look for the first addition in the computation of BASE.
4622 More complex analysis would be better, but introducing it just for
4623 this optimization seems like an overkill. */
4626 difference_cannot_overflow_p (tree base
, tree offset
)
4628 enum tree_code code
;
4631 if (!nowrap_type_p (TREE_TYPE (base
)))
4634 base
= expand_simple_operations (base
);
4636 if (TREE_CODE (base
) == SSA_NAME
)
4638 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4640 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4643 code
= gimple_assign_rhs_code (stmt
);
4644 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4647 e1
= gimple_assign_rhs1 (stmt
);
4648 e2
= gimple_assign_rhs2 (stmt
);
4652 code
= TREE_CODE (base
);
4653 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4655 e1
= TREE_OPERAND (base
, 0);
4656 e2
= TREE_OPERAND (base
, 1);
4659 /* TODO: deeper inspection may be necessary to prove the equality. */
4663 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4664 case POINTER_PLUS_EXPR
:
4665 return expr_equal_p (e2
, offset
);
4672 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4673 comparison with CAND. NITER describes the number of iterations of
4674 the loops. If successful, the comparison in COMP_P is altered accordingly.
4676 We aim to handle the following situation:
4692 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4693 We aim to optimize this to
4701 while (p < p_0 - a + b);
4703 This preserves the correctness, since the pointer arithmetics does not
4704 overflow. More precisely:
4706 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4707 overflow in computing it or the values of p.
4708 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4709 overflow. To prove this, we use the fact that p_0 = base + a. */
4712 iv_elimination_compare_lt (struct ivopts_data
*data
,
4713 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4714 struct tree_niter_desc
*niter
)
4716 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4717 struct affine_tree_combination nit
, tmpa
, tmpb
;
4718 enum tree_code comp
;
4721 /* We need to know that the candidate induction variable does not overflow.
4722 While more complex analysis may be used to prove this, for now just
4723 check that the variable appears in the original program and that it
4724 is computed in a type that guarantees no overflows. */
4725 cand_type
= TREE_TYPE (cand
->iv
->base
);
4726 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4729 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4730 the calculation of the BOUND could overflow, making the comparison
4732 if (!data
->loop_single_exit_p
)
4735 /* We need to be able to decide whether candidate is increasing or decreasing
4736 in order to choose the right comparison operator. */
4737 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4739 step
= int_cst_value (cand
->iv
->step
);
4741 /* Check that the number of iterations matches the expected pattern:
4742 a + 1 > b ? 0 : b - a - 1. */
4743 mbz
= niter
->may_be_zero
;
4744 if (TREE_CODE (mbz
) == GT_EXPR
)
4746 /* Handle a + 1 > b. */
4747 tree op0
= TREE_OPERAND (mbz
, 0);
4748 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4750 a
= TREE_OPERAND (op0
, 0);
4751 b
= TREE_OPERAND (mbz
, 1);
4756 else if (TREE_CODE (mbz
) == LT_EXPR
)
4758 tree op1
= TREE_OPERAND (mbz
, 1);
4760 /* Handle b < a + 1. */
4761 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4763 a
= TREE_OPERAND (op1
, 0);
4764 b
= TREE_OPERAND (mbz
, 0);
4772 /* Expected number of iterations is B - A - 1. Check that it matches
4773 the actual number, i.e., that B - A - NITER = 1. */
4774 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4775 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4776 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4777 aff_combination_scale (&nit
, double_int_minus_one
);
4778 aff_combination_scale (&tmpa
, double_int_minus_one
);
4779 aff_combination_add (&tmpb
, &tmpa
);
4780 aff_combination_add (&tmpb
, &nit
);
4781 if (tmpb
.n
!= 0 || !double_int_equal_p (tmpb
.offset
, double_int_one
))
4784 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4786 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4788 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4789 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4792 /* Determine the new comparison operator. */
4793 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4794 if (*comp_p
== NE_EXPR
)
4796 else if (*comp_p
== EQ_EXPR
)
4797 *comp_p
= invert_tree_comparison (comp
, false);
4804 /* Check whether it is possible to express the condition in USE by comparison
4805 of candidate CAND. If so, store the value compared with to BOUND, and the
4806 comparison operator to COMP. */
4809 may_eliminate_iv (struct ivopts_data
*data
,
4810 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4811 enum tree_code
*comp
)
4816 struct loop
*loop
= data
->current_loop
;
4818 struct tree_niter_desc
*desc
= NULL
;
4820 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4823 /* For now works only for exits that dominate the loop latch.
4824 TODO: extend to other conditions inside loop body. */
4825 ex_bb
= gimple_bb (use
->stmt
);
4826 if (use
->stmt
!= last_stmt (ex_bb
)
4827 || gimple_code (use
->stmt
) != GIMPLE_COND
4828 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4831 exit
= EDGE_SUCC (ex_bb
, 0);
4832 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4833 exit
= EDGE_SUCC (ex_bb
, 1);
4834 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4837 desc
= niter_for_exit (data
, exit
);
4841 /* Determine whether we can use the variable to test the exit condition.
4842 This is the case iff the period of the induction variable is greater
4843 than the number of iterations for which the exit condition is true. */
4844 period
= iv_period (cand
->iv
);
4846 /* If the number of iterations is constant, compare against it directly. */
4847 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4849 /* See cand_value_at. */
4850 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4852 if (!tree_int_cst_lt (desc
->niter
, period
))
4857 if (tree_int_cst_lt (period
, desc
->niter
))
4862 /* If not, and if this is the only possible exit of the loop, see whether
4863 we can get a conservative estimate on the number of iterations of the
4864 entire loop and compare against that instead. */
4867 double_int period_value
, max_niter
;
4869 max_niter
= desc
->max
;
4870 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4871 max_niter
= double_int_add (max_niter
, double_int_one
);
4872 period_value
= tree_to_double_int (period
);
4873 if (double_int_ucmp (max_niter
, period_value
) > 0)
4875 /* See if we can take advantage of inferred loop bound information. */
4876 if (data
->loop_single_exit_p
)
4878 if (!max_loop_iterations (loop
, &max_niter
))
4880 /* The loop bound is already adjusted by adding 1. */
4881 if (double_int_ucmp (max_niter
, period_value
) > 0)
4889 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4891 *bound
= aff_combination_to_tree (&bnd
);
4892 *comp
= iv_elimination_compare (data
, use
);
4894 /* It is unlikely that computing the number of iterations using division
4895 would be more profitable than keeping the original induction variable. */
4896 if (expression_expensive_p (*bound
))
4899 /* Sometimes, it is possible to handle the situation that the number of
4900 iterations may be zero unless additional assumtions by using <
4901 instead of != in the exit condition.
4903 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4904 base the exit condition on it. However, that is often too
4906 if (!integer_zerop (desc
->may_be_zero
))
4907 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4912 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4913 be copied, if is is used in the loop body and DATA->body_includes_call. */
4916 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4918 tree sbound
= bound
;
4919 STRIP_NOPS (sbound
);
4921 if (TREE_CODE (sbound
) == SSA_NAME
4922 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4923 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound
))
4924 && data
->body_includes_call
)
4925 return COSTS_N_INSNS (1);
4930 /* Determines cost of basing replacement of USE on CAND in a condition. */
4933 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4934 struct iv_use
*use
, struct iv_cand
*cand
)
4936 tree bound
= NULL_TREE
;
4938 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4939 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4941 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4942 tree
*control_var
, *bound_cst
;
4943 enum tree_code comp
= ERROR_MARK
;
4945 /* Only consider real candidates. */
4948 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4953 /* Try iv elimination. */
4954 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4956 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4957 if (elim_cost
.cost
== 0)
4958 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4959 else if (TREE_CODE (bound
) == INTEGER_CST
)
4961 /* If we replace a loop condition 'i < n' with 'p < base + n',
4962 depends_on_elim will have 'base' and 'n' set, which implies
4963 that both 'base' and 'n' will be live during the loop. More likely,
4964 'base + n' will be loop invariant, resulting in only one live value
4965 during the loop. So in that case we clear depends_on_elim and set
4966 elim_inv_expr_id instead. */
4967 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4969 elim_inv_expr_id
= get_expr_id (data
, bound
);
4970 bitmap_clear (depends_on_elim
);
4972 /* The bound is a loop invariant, so it will be only computed
4974 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4977 elim_cost
= infinite_cost
;
4979 /* Try expressing the original giv. If it is compared with an invariant,
4980 note that we cannot get rid of it. */
4981 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4985 /* When the condition is a comparison of the candidate IV against
4986 zero, prefer this IV.
4988 TODO: The constant that we're subtracting from the cost should
4989 be target-dependent. This information should be added to the
4990 target costs for each backend. */
4991 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4992 && integer_zerop (*bound_cst
)
4993 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4994 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4995 elim_cost
.cost
-= 1;
4997 express_cost
= get_computation_cost (data
, use
, cand
, false,
4998 &depends_on_express
, NULL
,
4999 &express_inv_expr_id
);
5000 fd_ivopts_data
= data
;
5001 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5003 /* Count the cost of the original bound as well. */
5004 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5005 if (bound_cost
.cost
== 0)
5006 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5007 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5008 bound_cost
.cost
= 0;
5009 express_cost
.cost
+= bound_cost
.cost
;
5011 /* Choose the better approach, preferring the eliminated IV. */
5012 if (compare_costs (elim_cost
, express_cost
) <= 0)
5015 depends_on
= depends_on_elim
;
5016 depends_on_elim
= NULL
;
5017 inv_expr_id
= elim_inv_expr_id
;
5021 cost
= express_cost
;
5022 depends_on
= depends_on_express
;
5023 depends_on_express
= NULL
;
5026 inv_expr_id
= express_inv_expr_id
;
5029 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5031 if (depends_on_elim
)
5032 BITMAP_FREE (depends_on_elim
);
5033 if (depends_on_express
)
5034 BITMAP_FREE (depends_on_express
);
5036 return !infinite_cost_p (cost
);
5039 /* Determines cost of basing replacement of USE on CAND. Returns false
5040 if USE cannot be based on CAND. */
5043 determine_use_iv_cost (struct ivopts_data
*data
,
5044 struct iv_use
*use
, struct iv_cand
*cand
)
5048 case USE_NONLINEAR_EXPR
:
5049 return determine_use_iv_cost_generic (data
, use
, cand
);
5052 return determine_use_iv_cost_address (data
, use
, cand
);
5055 return determine_use_iv_cost_condition (data
, use
, cand
);
5062 /* Return true if get_computation_cost indicates that autoincrement is
5063 a possibility for the pair of USE and CAND, false otherwise. */
5066 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5067 struct iv_cand
*cand
)
5073 if (use
->type
!= USE_ADDRESS
)
5076 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5077 &can_autoinc
, NULL
);
5079 BITMAP_FREE (depends_on
);
5081 return !infinite_cost_p (cost
) && can_autoinc
;
5084 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5085 use that allows autoincrement, and set their AINC_USE if possible. */
5088 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5092 for (i
= 0; i
< n_iv_cands (data
); i
++)
5094 struct iv_cand
*cand
= iv_cand (data
, i
);
5095 struct iv_use
*closest
= NULL
;
5096 if (cand
->pos
!= IP_ORIGINAL
)
5098 for (j
= 0; j
< n_iv_uses (data
); j
++)
5100 struct iv_use
*use
= iv_use (data
, j
);
5101 unsigned uid
= gimple_uid (use
->stmt
);
5102 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
5103 || uid
> gimple_uid (cand
->incremented_at
))
5105 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
5108 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
5110 cand
->ainc_use
= closest
;
5114 /* Finds the candidates for the induction variables. */
5117 find_iv_candidates (struct ivopts_data
*data
)
5119 /* Add commonly used ivs. */
5120 add_standard_iv_candidates (data
);
5122 /* Add old induction variables. */
5123 add_old_ivs_candidates (data
);
5125 /* Add induction variables derived from uses. */
5126 add_derived_ivs_candidates (data
);
5128 set_autoinc_for_original_candidates (data
);
5130 /* Record the important candidates. */
5131 record_important_candidates (data
);
5134 /* Determines costs of basing the use of the iv on an iv candidate. */
5137 determine_use_iv_costs (struct ivopts_data
*data
)
5141 struct iv_cand
*cand
;
5142 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5144 alloc_use_cost_map (data
);
5146 for (i
= 0; i
< n_iv_uses (data
); i
++)
5148 use
= iv_use (data
, i
);
5150 if (data
->consider_all_candidates
)
5152 for (j
= 0; j
< n_iv_cands (data
); j
++)
5154 cand
= iv_cand (data
, j
);
5155 determine_use_iv_cost (data
, use
, cand
);
5162 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5164 cand
= iv_cand (data
, j
);
5165 if (!determine_use_iv_cost (data
, use
, cand
))
5166 bitmap_set_bit (to_clear
, j
);
5169 /* Remove the candidates for that the cost is infinite from
5170 the list of related candidates. */
5171 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5172 bitmap_clear (to_clear
);
5176 BITMAP_FREE (to_clear
);
5178 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5180 fprintf (dump_file
, "Use-candidate costs:\n");
5182 for (i
= 0; i
< n_iv_uses (data
); i
++)
5184 use
= iv_use (data
, i
);
5186 fprintf (dump_file
, "Use %d:\n", i
);
5187 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5188 for (j
= 0; j
< use
->n_map_members
; j
++)
5190 if (!use
->cost_map
[j
].cand
5191 || infinite_cost_p (use
->cost_map
[j
].cost
))
5194 fprintf (dump_file
, " %d\t%d\t%d\t",
5195 use
->cost_map
[j
].cand
->id
,
5196 use
->cost_map
[j
].cost
.cost
,
5197 use
->cost_map
[j
].cost
.complexity
);
5198 if (use
->cost_map
[j
].depends_on
)
5199 bitmap_print (dump_file
,
5200 use
->cost_map
[j
].depends_on
, "","");
5201 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5202 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5203 fprintf (dump_file
, "\n");
5206 fprintf (dump_file
, "\n");
5208 fprintf (dump_file
, "\n");
5212 /* Determines cost of the candidate CAND. */
5215 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5217 comp_cost cost_base
;
5218 unsigned cost
, cost_step
;
5227 /* There are two costs associated with the candidate -- its increment
5228 and its initialization. The second is almost negligible for any loop
5229 that rolls enough, so we take it just very little into account. */
5231 base
= cand
->iv
->base
;
5232 cost_base
= force_var_cost (data
, base
, NULL
);
5233 /* It will be exceptional that the iv register happens to be initialized with
5234 the proper value at no cost. In general, there will at least be a regcopy
5236 if (cost_base
.cost
== 0)
5237 cost_base
.cost
= COSTS_N_INSNS (1);
5238 cost_step
= add_regs_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
5240 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5242 /* Prefer the original ivs unless we may gain something by replacing it.
5243 The reason is to make debugging simpler; so this is not relevant for
5244 artificial ivs created by other optimization passes. */
5245 if (cand
->pos
!= IP_ORIGINAL
5246 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5249 /* Prefer not to insert statements into latch unless there are some
5250 already (so that we do not create unnecessary jumps). */
5251 if (cand
->pos
== IP_END
5252 && empty_block_p (ip_end_pos (data
->current_loop
)))
5256 cand
->cost_step
= cost_step
;
5259 /* Determines costs of computation of the candidates. */
5262 determine_iv_costs (struct ivopts_data
*data
)
5266 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5268 fprintf (dump_file
, "Candidate costs:\n");
5269 fprintf (dump_file
, " cand\tcost\n");
5272 for (i
= 0; i
< n_iv_cands (data
); i
++)
5274 struct iv_cand
*cand
= iv_cand (data
, i
);
5276 determine_iv_cost (data
, cand
);
5278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5279 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5283 fprintf (dump_file
, "\n");
5286 /* Calculates cost for having SIZE induction variables. */
5289 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5291 /* We add size to the cost, so that we prefer eliminating ivs
5293 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5294 data
->body_includes_call
);
5297 /* For each size of the induction variable set determine the penalty. */
5300 determine_set_costs (struct ivopts_data
*data
)
5304 gimple_stmt_iterator psi
;
5306 struct loop
*loop
= data
->current_loop
;
5309 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5311 fprintf (dump_file
, "Global costs:\n");
5312 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5313 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5314 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5315 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5319 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5321 phi
= gsi_stmt (psi
);
5322 op
= PHI_RESULT (phi
);
5324 if (!is_gimple_reg (op
))
5327 if (get_iv (data
, op
))
5333 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5335 struct version_info
*info
= ver_info (data
, j
);
5337 if (info
->inv_id
&& info
->has_nonlin_use
)
5341 data
->regs_used
= n
;
5342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5343 fprintf (dump_file
, " regs_used %d\n", n
);
5345 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5347 fprintf (dump_file
, " cost for size:\n");
5348 fprintf (dump_file
, " ivs\tcost\n");
5349 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5350 fprintf (dump_file
, " %d\t%d\n", j
,
5351 ivopts_global_cost_for_size (data
, j
));
5352 fprintf (dump_file
, "\n");
5356 /* Returns true if A is a cheaper cost pair than B. */
5359 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5369 cmp
= compare_costs (a
->cost
, b
->cost
);
5376 /* In case the costs are the same, prefer the cheaper candidate. */
5377 if (a
->cand
->cost
< b
->cand
->cost
)
5384 /* Returns candidate by that USE is expressed in IVS. */
5386 static struct cost_pair
*
5387 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5389 return ivs
->cand_for_use
[use
->id
];
5392 /* Computes the cost field of IVS structure. */
5395 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5397 comp_cost cost
= ivs
->cand_use_cost
;
5399 cost
.cost
+= ivs
->cand_cost
;
5401 cost
.cost
+= ivopts_global_cost_for_size (data
,
5402 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5407 /* Remove invariants in set INVS to set IVS. */
5410 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5418 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5420 ivs
->n_invariant_uses
[iid
]--;
5421 if (ivs
->n_invariant_uses
[iid
] == 0)
5426 /* Set USE not to be expressed by any candidate in IVS. */
5429 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5432 unsigned uid
= use
->id
, cid
;
5433 struct cost_pair
*cp
;
5435 cp
= ivs
->cand_for_use
[uid
];
5441 ivs
->cand_for_use
[uid
] = NULL
;
5442 ivs
->n_cand_uses
[cid
]--;
5444 if (ivs
->n_cand_uses
[cid
] == 0)
5446 bitmap_clear_bit (ivs
->cands
, cid
);
5447 /* Do not count the pseudocandidates. */
5451 ivs
->cand_cost
-= cp
->cand
->cost
;
5453 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5456 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5458 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5460 if (cp
->inv_expr_id
!= -1)
5462 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5463 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5464 ivs
->num_used_inv_expr
--;
5466 iv_ca_recount_cost (data
, ivs
);
5469 /* Add invariants in set INVS to set IVS. */
5472 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5480 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5482 ivs
->n_invariant_uses
[iid
]++;
5483 if (ivs
->n_invariant_uses
[iid
] == 1)
5488 /* Set cost pair for USE in set IVS to CP. */
5491 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5492 struct iv_use
*use
, struct cost_pair
*cp
)
5494 unsigned uid
= use
->id
, cid
;
5496 if (ivs
->cand_for_use
[uid
] == cp
)
5499 if (ivs
->cand_for_use
[uid
])
5500 iv_ca_set_no_cp (data
, ivs
, use
);
5507 ivs
->cand_for_use
[uid
] = cp
;
5508 ivs
->n_cand_uses
[cid
]++;
5509 if (ivs
->n_cand_uses
[cid
] == 1)
5511 bitmap_set_bit (ivs
->cands
, cid
);
5512 /* Do not count the pseudocandidates. */
5516 ivs
->cand_cost
+= cp
->cand
->cost
;
5518 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5521 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5522 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5524 if (cp
->inv_expr_id
!= -1)
5526 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5527 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5528 ivs
->num_used_inv_expr
++;
5530 iv_ca_recount_cost (data
, ivs
);
5534 /* Extend set IVS by expressing USE by some of the candidates in it
5535 if possible. All important candidates will be considered
5536 if IMPORTANT_CANDIDATES is true. */
5539 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5540 struct iv_use
*use
, bool important_candidates
)
5542 struct cost_pair
*best_cp
= NULL
, *cp
;
5547 gcc_assert (ivs
->upto
>= use
->id
);
5549 if (ivs
->upto
== use
->id
)
5555 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5556 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5558 struct iv_cand
*cand
= iv_cand (data
, i
);
5560 cp
= get_use_iv_cost (data
, use
, cand
);
5562 if (cheaper_cost_pair (cp
, best_cp
))
5566 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5569 /* Get cost for assignment IVS. */
5572 iv_ca_cost (struct iv_ca
*ivs
)
5574 /* This was a conditional expression but it triggered a bug in
5577 return infinite_cost
;
5582 /* Returns true if all dependences of CP are among invariants in IVS. */
5585 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5590 if (!cp
->depends_on
)
5593 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5595 if (ivs
->n_invariant_uses
[i
] == 0)
5602 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5603 it before NEXT_CHANGE. */
5605 static struct iv_ca_delta
*
5606 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5607 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5609 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5612 change
->old_cp
= old_cp
;
5613 change
->new_cp
= new_cp
;
5614 change
->next_change
= next_change
;
5619 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5622 static struct iv_ca_delta
*
5623 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5625 struct iv_ca_delta
*last
;
5633 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5635 last
->next_change
= l2
;
5640 /* Reverse the list of changes DELTA, forming the inverse to it. */
5642 static struct iv_ca_delta
*
5643 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5645 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5646 struct cost_pair
*tmp
;
5648 for (act
= delta
; act
; act
= next
)
5650 next
= act
->next_change
;
5651 act
->next_change
= prev
;
5655 act
->old_cp
= act
->new_cp
;
5662 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5663 reverted instead. */
5666 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5667 struct iv_ca_delta
*delta
, bool forward
)
5669 struct cost_pair
*from
, *to
;
5670 struct iv_ca_delta
*act
;
5673 delta
= iv_ca_delta_reverse (delta
);
5675 for (act
= delta
; act
; act
= act
->next_change
)
5679 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5680 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5684 iv_ca_delta_reverse (delta
);
5687 /* Returns true if CAND is used in IVS. */
5690 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5692 return ivs
->n_cand_uses
[cand
->id
] > 0;
5695 /* Returns number of induction variable candidates in the set IVS. */
5698 iv_ca_n_cands (struct iv_ca
*ivs
)
5700 return ivs
->n_cands
;
5703 /* Free the list of changes DELTA. */
5706 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5708 struct iv_ca_delta
*act
, *next
;
5710 for (act
= *delta
; act
; act
= next
)
5712 next
= act
->next_change
;
5719 /* Allocates new iv candidates assignment. */
5721 static struct iv_ca
*
5722 iv_ca_new (struct ivopts_data
*data
)
5724 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5728 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5729 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5730 nw
->cands
= BITMAP_ALLOC (NULL
);
5733 nw
->cand_use_cost
= no_cost
;
5735 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5737 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5738 nw
->num_used_inv_expr
= 0;
5743 /* Free memory occupied by the set IVS. */
5746 iv_ca_free (struct iv_ca
**ivs
)
5748 free ((*ivs
)->cand_for_use
);
5749 free ((*ivs
)->n_cand_uses
);
5750 BITMAP_FREE ((*ivs
)->cands
);
5751 free ((*ivs
)->n_invariant_uses
);
5752 free ((*ivs
)->used_inv_expr
);
5757 /* Dumps IVS to FILE. */
5760 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5762 const char *pref
= " invariants ";
5764 comp_cost cost
= iv_ca_cost (ivs
);
5766 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5767 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5768 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5769 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5771 for (i
= 0; i
< ivs
->upto
; i
++)
5773 struct iv_use
*use
= iv_use (data
, i
);
5774 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5776 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5777 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5779 fprintf (file
, " use:%d --> ??\n", use
->id
);
5782 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5783 if (ivs
->n_invariant_uses
[i
])
5785 fprintf (file
, "%s%d", pref
, i
);
5788 fprintf (file
, "\n\n");
5791 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5792 new set, and store differences in DELTA. Number of induction variables
5793 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5794 the function will try to find a solution with mimimal iv candidates. */
5797 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5798 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5799 unsigned *n_ivs
, bool min_ncand
)
5804 struct cost_pair
*old_cp
, *new_cp
;
5807 for (i
= 0; i
< ivs
->upto
; i
++)
5809 use
= iv_use (data
, i
);
5810 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5813 && old_cp
->cand
== cand
)
5816 new_cp
= get_use_iv_cost (data
, use
, cand
);
5820 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5823 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5826 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5829 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5830 cost
= iv_ca_cost (ivs
);
5832 *n_ivs
= iv_ca_n_cands (ivs
);
5833 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5838 /* Try narrowing set IVS by removing CAND. Return the cost of
5839 the new set and store the differences in DELTA. */
5842 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5843 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5847 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5849 struct iv_cand
*cnd
;
5853 for (i
= 0; i
< n_iv_uses (data
); i
++)
5855 use
= iv_use (data
, i
);
5857 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5858 if (old_cp
->cand
!= cand
)
5863 if (data
->consider_all_candidates
)
5865 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5870 cnd
= iv_cand (data
, ci
);
5872 cp
= get_use_iv_cost (data
, use
, cnd
);
5876 if (!iv_ca_has_deps (ivs
, cp
))
5879 if (!cheaper_cost_pair (cp
, new_cp
))
5887 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5892 cnd
= iv_cand (data
, ci
);
5894 cp
= get_use_iv_cost (data
, use
, cnd
);
5897 if (!iv_ca_has_deps (ivs
, cp
))
5900 if (!cheaper_cost_pair (cp
, new_cp
))
5909 iv_ca_delta_free (delta
);
5910 return infinite_cost
;
5913 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5916 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5917 cost
= iv_ca_cost (ivs
);
5918 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5923 /* Try optimizing the set of candidates IVS by removing candidates different
5924 from to EXCEPT_CAND from it. Return cost of the new set, and store
5925 differences in DELTA. */
5928 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5929 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5932 struct iv_ca_delta
*act_delta
, *best_delta
;
5934 comp_cost best_cost
, acost
;
5935 struct iv_cand
*cand
;
5938 best_cost
= iv_ca_cost (ivs
);
5940 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5942 cand
= iv_cand (data
, i
);
5944 if (cand
== except_cand
)
5947 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5949 if (compare_costs (acost
, best_cost
) < 0)
5952 iv_ca_delta_free (&best_delta
);
5953 best_delta
= act_delta
;
5956 iv_ca_delta_free (&act_delta
);
5965 /* Recurse to possibly remove other unnecessary ivs. */
5966 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5967 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5968 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5969 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5973 /* Tries to extend the sets IVS in the best possible way in order
5974 to express the USE. If ORIGINALP is true, prefer candidates from
5975 the original set of IVs, otherwise favor important candidates not
5976 based on any memory object. */
5979 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5980 struct iv_use
*use
, bool originalp
)
5982 comp_cost best_cost
, act_cost
;
5985 struct iv_cand
*cand
;
5986 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5987 struct cost_pair
*cp
;
5989 iv_ca_add_use (data
, ivs
, use
, false);
5990 best_cost
= iv_ca_cost (ivs
);
5992 cp
= iv_ca_cand_for_use (ivs
, use
);
5997 iv_ca_add_use (data
, ivs
, use
, true);
5998 best_cost
= iv_ca_cost (ivs
);
5999 cp
= iv_ca_cand_for_use (ivs
, use
);
6003 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6004 iv_ca_set_no_cp (data
, ivs
, use
);
6007 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6008 first try important candidates not based on any memory object. Only if
6009 this fails, try the specific ones. Rationale -- in loops with many
6010 variables the best choice often is to use just one generic biv. If we
6011 added here many ivs specific to the uses, the optimization algorithm later
6012 would be likely to get stuck in a local minimum, thus causing us to create
6013 too many ivs. The approach from few ivs to more seems more likely to be
6014 successful -- starting from few ivs, replacing an expensive use by a
6015 specific iv should always be a win. */
6016 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6018 cand
= iv_cand (data
, i
);
6020 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6023 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6026 if (iv_ca_cand_used_p (ivs
, cand
))
6029 cp
= get_use_iv_cost (data
, use
, cand
);
6033 iv_ca_set_cp (data
, ivs
, use
, cp
);
6034 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6036 iv_ca_set_no_cp (data
, ivs
, use
);
6037 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6039 if (compare_costs (act_cost
, best_cost
) < 0)
6041 best_cost
= act_cost
;
6043 iv_ca_delta_free (&best_delta
);
6044 best_delta
= act_delta
;
6047 iv_ca_delta_free (&act_delta
);
6050 if (infinite_cost_p (best_cost
))
6052 for (i
= 0; i
< use
->n_map_members
; i
++)
6054 cp
= use
->cost_map
+ i
;
6059 /* Already tried this. */
6060 if (cand
->important
)
6062 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6064 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6068 if (iv_ca_cand_used_p (ivs
, cand
))
6072 iv_ca_set_cp (data
, ivs
, use
, cp
);
6073 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6074 iv_ca_set_no_cp (data
, ivs
, use
);
6075 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6078 if (compare_costs (act_cost
, best_cost
) < 0)
6080 best_cost
= act_cost
;
6083 iv_ca_delta_free (&best_delta
);
6084 best_delta
= act_delta
;
6087 iv_ca_delta_free (&act_delta
);
6091 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6092 iv_ca_delta_free (&best_delta
);
6094 return !infinite_cost_p (best_cost
);
6097 /* Finds an initial assignment of candidates to uses. */
6099 static struct iv_ca
*
6100 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6102 struct iv_ca
*ivs
= iv_ca_new (data
);
6105 for (i
= 0; i
< n_iv_uses (data
); i
++)
6106 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6115 /* Tries to improve set of induction variables IVS. */
6118 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6121 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6122 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6123 struct iv_cand
*cand
;
6125 /* Try extending the set of induction variables by one. */
6126 for (i
= 0; i
< n_iv_cands (data
); i
++)
6128 cand
= iv_cand (data
, i
);
6130 if (iv_ca_cand_used_p (ivs
, cand
))
6133 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6137 /* If we successfully added the candidate and the set is small enough,
6138 try optimizing it by removing other candidates. */
6139 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6141 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6142 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6143 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6144 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6147 if (compare_costs (acost
, best_cost
) < 0)
6150 iv_ca_delta_free (&best_delta
);
6151 best_delta
= act_delta
;
6154 iv_ca_delta_free (&act_delta
);
6159 /* Try removing the candidates from the set instead. */
6160 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6162 /* Nothing more we can do. */
6167 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6168 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6169 iv_ca_delta_free (&best_delta
);
6173 /* Attempts to find the optimal set of induction variables. We do simple
6174 greedy heuristic -- we try to replace at most one candidate in the selected
6175 solution and remove the unused ivs while this improves the cost. */
6177 static struct iv_ca
*
6178 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6182 /* Get the initial solution. */
6183 set
= get_initial_solution (data
, originalp
);
6186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6187 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6193 fprintf (dump_file
, "Initial set of candidates:\n");
6194 iv_ca_dump (data
, dump_file
, set
);
6197 while (try_improve_iv_set (data
, set
))
6199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6201 fprintf (dump_file
, "Improved to:\n");
6202 iv_ca_dump (data
, dump_file
, set
);
6209 static struct iv_ca
*
6210 find_optimal_iv_set (struct ivopts_data
*data
)
6213 struct iv_ca
*set
, *origset
;
6215 comp_cost cost
, origcost
;
6217 /* Determine the cost based on a strategy that starts with original IVs,
6218 and try again using a strategy that prefers candidates not based
6220 origset
= find_optimal_iv_set_1 (data
, true);
6221 set
= find_optimal_iv_set_1 (data
, false);
6223 if (!origset
&& !set
)
6226 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6227 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6231 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6232 origcost
.cost
, origcost
.complexity
);
6233 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6234 cost
.cost
, cost
.complexity
);
6237 /* Choose the one with the best cost. */
6238 if (compare_costs (origcost
, cost
) <= 0)
6245 iv_ca_free (&origset
);
6247 for (i
= 0; i
< n_iv_uses (data
); i
++)
6249 use
= iv_use (data
, i
);
6250 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6256 /* Creates a new induction variable corresponding to CAND. */
6259 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6261 gimple_stmt_iterator incr_pos
;
6271 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6275 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6283 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6287 /* Mark that the iv is preserved. */
6288 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6289 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6291 /* Rewrite the increment so that it uses var_before directly. */
6292 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6296 gimple_add_tmp_var (cand
->var_before
);
6297 add_referenced_var (cand
->var_before
);
6299 base
= unshare_expr (cand
->iv
->base
);
6301 create_iv (base
, unshare_expr (cand
->iv
->step
),
6302 cand
->var_before
, data
->current_loop
,
6303 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6306 /* Creates new induction variables described in SET. */
6309 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6312 struct iv_cand
*cand
;
6315 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6317 cand
= iv_cand (data
, i
);
6318 create_new_iv (data
, cand
);
6321 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6323 fprintf (dump_file
, "\nSelected IV set: \n");
6324 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6326 cand
= iv_cand (data
, i
);
6327 dump_cand (dump_file
, cand
);
6329 fprintf (dump_file
, "\n");
6333 /* Rewrites USE (definition of iv used in a nonlinear expression)
6334 using candidate CAND. */
6337 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6338 struct iv_use
*use
, struct iv_cand
*cand
)
6343 gimple_stmt_iterator bsi
;
6345 /* An important special case -- if we are asked to express value of
6346 the original iv by itself, just exit; there is no need to
6347 introduce a new computation (that might also need casting the
6348 variable to unsigned and back). */
6349 if (cand
->pos
== IP_ORIGINAL
6350 && cand
->incremented_at
== use
->stmt
)
6352 tree step
, ctype
, utype
;
6353 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
6355 gcc_assert (is_gimple_assign (use
->stmt
));
6356 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6358 step
= cand
->iv
->step
;
6359 ctype
= TREE_TYPE (step
);
6360 utype
= TREE_TYPE (cand
->var_after
);
6361 if (TREE_CODE (step
) == NEGATE_EXPR
)
6363 incr_code
= MINUS_EXPR
;
6364 step
= TREE_OPERAND (step
, 0);
6367 /* Check whether we may leave the computation unchanged.
6368 This is the case only if it does not rely on other
6369 computations in the loop -- otherwise, the computation
6370 we rely upon may be removed in remove_unused_ivs,
6371 thus leading to ICE. */
6372 old_code
= gimple_assign_rhs_code (use
->stmt
);
6373 if (old_code
== PLUS_EXPR
6374 || old_code
== MINUS_EXPR
6375 || old_code
== POINTER_PLUS_EXPR
)
6377 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6378 op
= gimple_assign_rhs2 (use
->stmt
);
6379 else if (old_code
!= MINUS_EXPR
6380 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6381 op
= gimple_assign_rhs1 (use
->stmt
);
6389 && (TREE_CODE (op
) == INTEGER_CST
6390 || operand_equal_p (op
, step
, 0)))
6393 /* Otherwise, add the necessary computations to express
6395 op
= fold_convert (ctype
, cand
->var_before
);
6396 comp
= fold_convert (utype
,
6397 build2 (incr_code
, ctype
, op
,
6398 unshare_expr (step
)));
6402 comp
= get_computation (data
->current_loop
, use
, cand
);
6403 gcc_assert (comp
!= NULL_TREE
);
6406 switch (gimple_code (use
->stmt
))
6409 tgt
= PHI_RESULT (use
->stmt
);
6411 /* If we should keep the biv, do not replace it. */
6412 if (name_info (data
, tgt
)->preserve_biv
)
6415 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6419 tgt
= gimple_assign_lhs (use
->stmt
);
6420 bsi
= gsi_for_stmt (use
->stmt
);
6427 if (!valid_gimple_rhs_p (comp
)
6428 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6429 /* We can't allow re-allocating the stmt as it might be pointed
6431 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6432 >= gimple_num_ops (gsi_stmt (bsi
)))))
6434 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6435 true, GSI_SAME_STMT
);
6436 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6438 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6439 /* As this isn't a plain copy we have to reset alignment
6441 if (SSA_NAME_PTR_INFO (comp
))
6442 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6446 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6448 ass
= gimple_build_assign (tgt
, comp
);
6449 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6451 bsi
= gsi_for_stmt (use
->stmt
);
6452 remove_phi_node (&bsi
, false);
6456 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6457 use
->stmt
= gsi_stmt (bsi
);
6461 /* Performs a peephole optimization to reorder the iv update statement with
6462 a mem ref to enable instruction combining in later phases. The mem ref uses
6463 the iv value before the update, so the reordering transformation requires
6464 adjustment of the offset. CAND is the selected IV_CAND.
6468 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6476 directly propagating t over to (1) will introduce overlapping live range
6477 thus increase register pressure. This peephole transform it into:
6481 t = MEM_REF (base, iv2, 8, 8);
6488 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6491 gimple iv_update
, stmt
;
6493 gimple_stmt_iterator gsi
, gsi_iv
;
6495 if (cand
->pos
!= IP_NORMAL
)
6498 var_after
= cand
->var_after
;
6499 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6501 bb
= gimple_bb (iv_update
);
6502 gsi
= gsi_last_nondebug_bb (bb
);
6503 stmt
= gsi_stmt (gsi
);
6505 /* Only handle conditional statement for now. */
6506 if (gimple_code (stmt
) != GIMPLE_COND
)
6509 gsi_prev_nondebug (&gsi
);
6510 stmt
= gsi_stmt (gsi
);
6511 if (stmt
!= iv_update
)
6514 gsi_prev_nondebug (&gsi
);
6515 if (gsi_end_p (gsi
))
6518 stmt
= gsi_stmt (gsi
);
6519 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6522 if (stmt
!= use
->stmt
)
6525 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6530 fprintf (dump_file
, "Reordering \n");
6531 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6532 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6533 fprintf (dump_file
, "\n");
6536 gsi
= gsi_for_stmt (use
->stmt
);
6537 gsi_iv
= gsi_for_stmt (iv_update
);
6538 gsi_move_before (&gsi_iv
, &gsi
);
6540 cand
->pos
= IP_BEFORE_USE
;
6541 cand
->incremented_at
= use
->stmt
;
6544 /* Rewrites USE (address that is an iv) using candidate CAND. */
6547 rewrite_use_address (struct ivopts_data
*data
,
6548 struct iv_use
*use
, struct iv_cand
*cand
)
6551 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6552 tree base_hint
= NULL_TREE
;
6556 adjust_iv_update_pos (cand
, use
);
6557 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6559 unshare_aff_combination (&aff
);
6561 /* To avoid undefined overflow problems, all IV candidates use unsigned
6562 integer types. The drawback is that this makes it impossible for
6563 create_mem_ref to distinguish an IV that is based on a memory object
6564 from one that represents simply an offset.
6566 To work around this problem, we pass a hint to create_mem_ref that
6567 indicates which variable (if any) in aff is an IV based on a memory
6568 object. Note that we only consider the candidate. If this is not
6569 based on an object, the base of the reference is in some subexpression
6570 of the use -- but these will use pointer types, so they are recognized
6571 by the create_mem_ref heuristics anyway. */
6572 if (cand
->iv
->base_object
)
6573 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6575 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6576 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6577 reference_alias_ptr_type (*use
->op_p
),
6578 iv
, base_hint
, data
->speed
);
6579 copy_ref_info (ref
, *use
->op_p
);
6583 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6587 rewrite_use_compare (struct ivopts_data
*data
,
6588 struct iv_use
*use
, struct iv_cand
*cand
)
6590 tree comp
, *var_p
, op
, bound
;
6591 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6592 enum tree_code compare
;
6593 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6599 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6600 tree var_type
= TREE_TYPE (var
);
6603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6605 fprintf (dump_file
, "Replacing exit test: ");
6606 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6609 bound
= unshare_expr (fold_convert (var_type
, bound
));
6610 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6612 gsi_insert_seq_on_edge_immediate (
6613 loop_preheader_edge (data
->current_loop
),
6616 gimple_cond_set_lhs (use
->stmt
, var
);
6617 gimple_cond_set_code (use
->stmt
, compare
);
6618 gimple_cond_set_rhs (use
->stmt
, op
);
6622 /* The induction variable elimination failed; just express the original
6624 comp
= get_computation (data
->current_loop
, use
, cand
);
6625 gcc_assert (comp
!= NULL_TREE
);
6627 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6630 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6631 true, GSI_SAME_STMT
);
6634 /* Rewrites USE using candidate CAND. */
6637 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6641 case USE_NONLINEAR_EXPR
:
6642 rewrite_use_nonlinear_expr (data
, use
, cand
);
6646 rewrite_use_address (data
, use
, cand
);
6650 rewrite_use_compare (data
, use
, cand
);
6657 update_stmt (use
->stmt
);
6660 /* Rewrite the uses using the selected induction variables. */
6663 rewrite_uses (struct ivopts_data
*data
)
6666 struct iv_cand
*cand
;
6669 for (i
= 0; i
< n_iv_uses (data
); i
++)
6671 use
= iv_use (data
, i
);
6672 cand
= use
->selected
;
6675 rewrite_use (data
, use
, cand
);
6679 /* Removes the ivs that are not used after rewriting. */
6682 remove_unused_ivs (struct ivopts_data
*data
)
6686 bitmap toremove
= BITMAP_ALLOC (NULL
);
6688 /* Figure out an order in which to release SSA DEFs so that we don't
6689 release something that we'd have to propagate into a debug stmt
6691 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6693 struct version_info
*info
;
6695 info
= ver_info (data
, j
);
6697 && !integer_zerop (info
->iv
->step
)
6699 && !info
->iv
->have_use_for
6700 && !info
->preserve_biv
)
6701 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6704 release_defs_bitset (toremove
);
6706 BITMAP_FREE (toremove
);
6709 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6710 for pointer_map_traverse. */
6713 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6714 void *data ATTRIBUTE_UNUSED
)
6716 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6722 /* Frees data allocated by the optimization of a single loop. */
6725 free_loop_data (struct ivopts_data
*data
)
6733 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6734 pointer_map_destroy (data
->niters
);
6735 data
->niters
= NULL
;
6738 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6740 struct version_info
*info
;
6742 info
= ver_info (data
, i
);
6745 info
->has_nonlin_use
= false;
6746 info
->preserve_biv
= false;
6749 bitmap_clear (data
->relevant
);
6750 bitmap_clear (data
->important_candidates
);
6752 for (i
= 0; i
< n_iv_uses (data
); i
++)
6754 struct iv_use
*use
= iv_use (data
, i
);
6757 BITMAP_FREE (use
->related_cands
);
6758 for (j
= 0; j
< use
->n_map_members
; j
++)
6759 if (use
->cost_map
[j
].depends_on
)
6760 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6761 free (use
->cost_map
);
6764 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6766 for (i
= 0; i
< n_iv_cands (data
); i
++)
6768 struct iv_cand
*cand
= iv_cand (data
, i
);
6771 if (cand
->depends_on
)
6772 BITMAP_FREE (cand
->depends_on
);
6775 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6777 if (data
->version_info_size
< num_ssa_names
)
6779 data
->version_info_size
= 2 * num_ssa_names
;
6780 free (data
->version_info
);
6781 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6784 data
->max_inv_id
= 0;
6786 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6787 SET_DECL_RTL (obj
, NULL_RTX
);
6789 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6791 htab_empty (data
->inv_expr_tab
);
6792 data
->inv_expr_id
= 0;
6795 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6799 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6801 free_loop_data (data
);
6802 free (data
->version_info
);
6803 BITMAP_FREE (data
->relevant
);
6804 BITMAP_FREE (data
->important_candidates
);
6806 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6807 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6808 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6809 htab_delete (data
->inv_expr_tab
);
6814 /* Returns true if the loop body BODY includes any function calls. */
6817 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6819 gimple_stmt_iterator gsi
;
6822 for (i
= 0; i
< num_nodes
; i
++)
6823 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6825 gimple stmt
= gsi_stmt (gsi
);
6826 if (is_gimple_call (stmt
)
6827 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6833 /* Optimizes the LOOP. Returns true if anything changed. */
6836 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6838 bool changed
= false;
6839 struct iv_ca
*iv_ca
;
6840 edge exit
= single_dom_exit (loop
);
6843 gcc_assert (!data
->niters
);
6844 data
->current_loop
= loop
;
6845 data
->speed
= optimize_loop_for_speed_p (loop
);
6847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6849 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6853 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6854 exit
->src
->index
, exit
->dest
->index
);
6855 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6856 fprintf (dump_file
, "\n");
6859 fprintf (dump_file
, "\n");
6862 body
= get_loop_body (loop
);
6863 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6864 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6867 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6869 /* For each ssa name determines whether it behaves as an induction variable
6871 if (!find_induction_variables (data
))
6874 /* Finds interesting uses (item 1). */
6875 find_interesting_uses (data
);
6876 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6879 /* Finds candidates for the induction variables (item 2). */
6880 find_iv_candidates (data
);
6882 /* Calculates the costs (item 3, part 1). */
6883 determine_iv_costs (data
);
6884 determine_use_iv_costs (data
);
6885 determine_set_costs (data
);
6887 /* Find the optimal set of induction variables (item 3, part 2). */
6888 iv_ca
= find_optimal_iv_set (data
);
6893 /* Create the new induction variables (item 4, part 1). */
6894 create_new_ivs (data
, iv_ca
);
6895 iv_ca_free (&iv_ca
);
6897 /* Rewrite the uses (item 4, part 2). */
6898 rewrite_uses (data
);
6900 /* Remove the ivs that are unused after rewriting. */
6901 remove_unused_ivs (data
);
6903 /* We have changed the structure of induction variables; it might happen
6904 that definitions in the scev database refer to some of them that were
6909 free_loop_data (data
);
6914 /* Main entry point. Optimizes induction variables in loops. */
6917 tree_ssa_iv_optimize (void)
6920 struct ivopts_data data
;
6923 tree_ssa_iv_optimize_init (&data
);
6925 /* Optimize the loops starting with the innermost ones. */
6926 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6929 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6931 tree_ssa_iv_optimize_loop (&data
, loop
);
6934 tree_ssa_iv_optimize_finalize (&data
);