1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-ssa.h"
76 #include "tree-phinodes.h"
77 #include "ssa-iterators.h"
78 #include "tree-ssanames.h"
79 #include "tree-ssa-loop-ivopts.h"
80 #include "tree-ssa-loop-manip.h"
81 #include "tree-ssa-loop-niter.h"
82 #include "tree-ssa-loop.h"
86 #include "tree-pass.h"
88 #include "insn-config.h"
89 #include "pointer-set.h"
90 #include "hash-table.h"
91 #include "tree-chrec.h"
92 #include "tree-scalar-evolution.h"
95 #include "langhooks.h"
96 #include "tree-affine.h"
98 #include "tree-inline.h"
99 #include "tree-ssa-propagate.h"
101 #include "tree-ssa-address.h"
103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
104 cost of different addressing modes. This should be moved to a TBD
105 interface between the GIMPLE and RTL worlds. */
109 /* The infinite cost. */
110 #define INFTY 10000000
112 #define AVG_LOOP_NITER(LOOP) 5
114 /* Returns the expected number of loop iterations for LOOP.
115 The average trip count is computed from profile data if it
118 static inline HOST_WIDE_INT
119 avg_loop_niter (struct loop
*loop
)
121 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
123 return AVG_LOOP_NITER (loop
);
128 /* Representation of the induction variable. */
131 tree base
; /* Initial value of the iv. */
132 tree base_object
; /* A memory object to that the induction variable points. */
133 tree step
; /* Step of the iv (constant only). */
134 tree ssa_name
; /* The ssa name with the value. */
135 bool biv_p
; /* Is it a biv? */
136 bool have_use_for
; /* Do we already have a use for it? */
137 unsigned use_id
; /* The identifier in the use if it is the case. */
140 /* Per-ssa version information (induction variable descriptions, etc.). */
143 tree name
; /* The ssa name. */
144 struct iv
*iv
; /* Induction variable description. */
145 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
146 an expression that is not an induction variable. */
147 bool preserve_biv
; /* For the original biv, whether to preserve it. */
148 unsigned inv_id
; /* Id of an invariant. */
154 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
155 USE_ADDRESS
, /* Use in an address. */
156 USE_COMPARE
/* Use is a compare. */
159 /* Cost of a computation. */
162 int cost
; /* The runtime cost. */
163 unsigned complexity
; /* The estimate of the complexity of the code for
164 the computation (in no concrete units --
165 complexity field should be larger for more
166 complex expressions and addressing modes). */
169 static const comp_cost no_cost
= {0, 0};
170 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
172 /* The candidate - cost pair. */
175 struct iv_cand
*cand
; /* The candidate. */
176 comp_cost cost
; /* The cost. */
177 bitmap depends_on
; /* The list of invariants that have to be
179 tree value
; /* For final value elimination, the expression for
180 the final value of the iv. For iv elimination,
181 the new bound to compare with. */
182 enum tree_code comp
; /* For iv elimination, the comparison. */
183 int inv_expr_id
; /* Loop invariant expression id. */
189 unsigned id
; /* The id of the use. */
190 enum use_type type
; /* Type of the use. */
191 struct iv
*iv
; /* The induction variable it is based on. */
192 gimple stmt
; /* Statement in that it occurs. */
193 tree
*op_p
; /* The place where it occurs. */
194 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
197 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
198 struct cost_pair
*cost_map
;
199 /* The costs wrto the iv candidates. */
201 struct iv_cand
*selected
;
202 /* The selected candidate. */
205 /* The position where the iv is computed. */
208 IP_NORMAL
, /* At the end, just before the exit condition. */
209 IP_END
, /* At the end of the latch block. */
210 IP_BEFORE_USE
, /* Immediately before a specific use. */
211 IP_AFTER_USE
, /* Immediately after a specific use. */
212 IP_ORIGINAL
/* The original biv. */
215 /* The induction variable candidate. */
218 unsigned id
; /* The number of the candidate. */
219 bool important
; /* Whether this is an "important" candidate, i.e. such
220 that it should be considered by all uses. */
221 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
222 gimple incremented_at
;/* For original biv, the statement where it is
224 tree var_before
; /* The variable used for it before increment. */
225 tree var_after
; /* The variable used for it after increment. */
226 struct iv
*iv
; /* The value of the candidate. NULL for
227 "pseudocandidate" used to indicate the possibility
228 to replace the final value of an iv by direct
229 computation of the value. */
230 unsigned cost
; /* Cost of the candidate. */
231 unsigned cost_step
; /* Cost of the candidate's increment operation. */
232 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
233 where it is incremented. */
234 bitmap depends_on
; /* The list of invariants that are used in step of the
238 /* Loop invariant expression hashtable entry. */
239 struct iv_inv_expr_ent
246 /* The data used by the induction variable optimizations. */
248 typedef struct iv_use
*iv_use_p
;
250 typedef struct iv_cand
*iv_cand_p
;
252 /* Hashtable helpers. */
254 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
256 typedef iv_inv_expr_ent value_type
;
257 typedef iv_inv_expr_ent compare_type
;
258 static inline hashval_t
hash (const value_type
*);
259 static inline bool equal (const value_type
*, const compare_type
*);
262 /* Hash function for loop invariant expressions. */
265 iv_inv_expr_hasher::hash (const value_type
*expr
)
270 /* Hash table equality function for expressions. */
273 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
275 return expr1
->hash
== expr2
->hash
276 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
281 /* The currently optimized loop. */
282 struct loop
*current_loop
;
284 /* Numbers of iterations for all exits of the current loop. */
285 struct pointer_map_t
*niters
;
287 /* Number of registers used in it. */
290 /* The size of version_info array allocated. */
291 unsigned version_info_size
;
293 /* The array of information for the ssa names. */
294 struct version_info
*version_info
;
296 /* The hashtable of loop invariant expressions created
298 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
300 /* Loop invariant expression id. */
303 /* The bitmap of indices in version_info whose value was changed. */
306 /* The uses of induction variables. */
307 vec
<iv_use_p
> iv_uses
;
309 /* The candidates. */
310 vec
<iv_cand_p
> iv_candidates
;
312 /* A bitmap of important candidates. */
313 bitmap important_candidates
;
315 /* The maximum invariant id. */
318 /* Whether to consider just related and important candidates when replacing a
320 bool consider_all_candidates
;
322 /* Are we optimizing for speed? */
325 /* Whether the loop body includes any function calls. */
326 bool body_includes_call
;
328 /* Whether the loop body can only be exited via single exit. */
329 bool loop_single_exit_p
;
332 /* An assignment of iv candidates to uses. */
336 /* The number of uses covered by the assignment. */
339 /* Number of uses that cannot be expressed by the candidates in the set. */
342 /* Candidate assigned to a use, together with the related costs. */
343 struct cost_pair
**cand_for_use
;
345 /* Number of times each candidate is used. */
346 unsigned *n_cand_uses
;
348 /* The candidates used. */
351 /* The number of candidates in the set. */
354 /* Total number of registers needed. */
357 /* Total cost of expressing uses. */
358 comp_cost cand_use_cost
;
360 /* Total cost of candidates. */
363 /* Number of times each invariant is used. */
364 unsigned *n_invariant_uses
;
366 /* The array holding the number of uses of each loop
367 invariant expressions created by ivopt. */
368 unsigned *used_inv_expr
;
370 /* The number of created loop invariants. */
371 unsigned num_used_inv_expr
;
373 /* Total cost of the assignment. */
377 /* Difference of two iv candidate assignments. */
384 /* An old assignment (for rollback purposes). */
385 struct cost_pair
*old_cp
;
387 /* A new assignment. */
388 struct cost_pair
*new_cp
;
390 /* Next change in the list. */
391 struct iv_ca_delta
*next_change
;
394 /* Bound on number of candidates below that all candidates are considered. */
396 #define CONSIDER_ALL_CANDIDATES_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
400 optimizing such a loop would help, and it would take ages). */
402 #define MAX_CONSIDERED_USES \
403 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
405 /* If there are at most this number of ivs in the set, try removing unnecessary
406 ivs from the set always. */
408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
409 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
411 /* The list of trees for that the decl_rtl field must be reset is stored
414 static vec
<tree
> decl_rtl_to_reset
;
416 static comp_cost
force_expr_to_var_cost (tree
, bool);
418 /* Number of uses recorded in DATA. */
420 static inline unsigned
421 n_iv_uses (struct ivopts_data
*data
)
423 return data
->iv_uses
.length ();
426 /* Ith use recorded in DATA. */
428 static inline struct iv_use
*
429 iv_use (struct ivopts_data
*data
, unsigned i
)
431 return data
->iv_uses
[i
];
434 /* Number of candidates recorded in DATA. */
436 static inline unsigned
437 n_iv_cands (struct ivopts_data
*data
)
439 return data
->iv_candidates
.length ();
442 /* Ith candidate recorded in DATA. */
444 static inline struct iv_cand
*
445 iv_cand (struct ivopts_data
*data
, unsigned i
)
447 return data
->iv_candidates
[i
];
450 /* The single loop exit if it dominates the latch, NULL otherwise. */
453 single_dom_exit (struct loop
*loop
)
455 edge exit
= single_exit (loop
);
460 if (!just_once_each_iteration_p (loop
, exit
->src
))
466 /* Dumps information about the induction variable IV to FILE. */
469 dump_iv (FILE *file
, struct iv
*iv
)
473 fprintf (file
, "ssa name ");
474 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
475 fprintf (file
, "\n");
478 fprintf (file
, " type ");
479 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
480 fprintf (file
, "\n");
484 fprintf (file
, " base ");
485 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
486 fprintf (file
, "\n");
488 fprintf (file
, " step ");
489 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
490 fprintf (file
, "\n");
494 fprintf (file
, " invariant ");
495 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
496 fprintf (file
, "\n");
501 fprintf (file
, " base object ");
502 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
503 fprintf (file
, "\n");
507 fprintf (file
, " is a biv\n");
510 /* Dumps information about the USE to FILE. */
513 dump_use (FILE *file
, struct iv_use
*use
)
515 fprintf (file
, "use %d\n", use
->id
);
519 case USE_NONLINEAR_EXPR
:
520 fprintf (file
, " generic\n");
524 fprintf (file
, " address\n");
528 fprintf (file
, " compare\n");
535 fprintf (file
, " in statement ");
536 print_gimple_stmt (file
, use
->stmt
, 0, 0);
537 fprintf (file
, "\n");
539 fprintf (file
, " at position ");
541 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
542 fprintf (file
, "\n");
544 dump_iv (file
, use
->iv
);
546 if (use
->related_cands
)
548 fprintf (file
, " related candidates ");
549 dump_bitmap (file
, use
->related_cands
);
553 /* Dumps information about the uses to FILE. */
556 dump_uses (FILE *file
, struct ivopts_data
*data
)
561 for (i
= 0; i
< n_iv_uses (data
); i
++)
563 use
= iv_use (data
, i
);
565 dump_use (file
, use
);
566 fprintf (file
, "\n");
570 /* Dumps information about induction variable candidate CAND to FILE. */
573 dump_cand (FILE *file
, struct iv_cand
*cand
)
575 struct iv
*iv
= cand
->iv
;
577 fprintf (file
, "candidate %d%s\n",
578 cand
->id
, cand
->important
? " (important)" : "");
580 if (cand
->depends_on
)
582 fprintf (file
, " depends on ");
583 dump_bitmap (file
, cand
->depends_on
);
588 fprintf (file
, " final value replacement\n");
592 if (cand
->var_before
)
594 fprintf (file
, " var_before ");
595 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
596 fprintf (file
, "\n");
600 fprintf (file
, " var_after ");
601 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
602 fprintf (file
, "\n");
608 fprintf (file
, " incremented before exit test\n");
612 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
616 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
620 fprintf (file
, " incremented at end\n");
624 fprintf (file
, " original biv\n");
631 /* Returns the info for ssa version VER. */
633 static inline struct version_info
*
634 ver_info (struct ivopts_data
*data
, unsigned ver
)
636 return data
->version_info
+ ver
;
639 /* Returns the info for ssa name NAME. */
641 static inline struct version_info
*
642 name_info (struct ivopts_data
*data
, tree name
)
644 return ver_info (data
, SSA_NAME_VERSION (name
));
647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
651 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
653 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
657 if (sbb
== loop
->latch
)
663 return stmt
== last_stmt (bb
);
666 /* Returns true if STMT if after the place where the original induction
667 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
668 if the positions are identical. */
671 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
673 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
674 basic_block stmt_bb
= gimple_bb (stmt
);
676 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
679 if (stmt_bb
!= cand_bb
)
683 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
685 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
688 /* Returns true if STMT if after the place where the induction variable
689 CAND is incremented in LOOP. */
692 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
700 return stmt_after_ip_normal_pos (loop
, stmt
);
704 return stmt_after_inc_pos (cand
, stmt
, false);
707 return stmt_after_inc_pos (cand
, stmt
, true);
714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
717 abnormal_ssa_name_p (tree exp
)
722 if (TREE_CODE (exp
) != SSA_NAME
)
725 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
729 abnormal phi node. Callback for for_each_index. */
732 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
733 void *data ATTRIBUTE_UNUSED
)
735 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
737 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
739 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
743 return !abnormal_ssa_name_p (*index
);
746 /* Returns true if EXPR contains a ssa name that occurs in an
747 abnormal phi node. */
750 contains_abnormal_ssa_name_p (tree expr
)
753 enum tree_code_class codeclass
;
758 code
= TREE_CODE (expr
);
759 codeclass
= TREE_CODE_CLASS (code
);
761 if (code
== SSA_NAME
)
762 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
764 if (code
== INTEGER_CST
765 || is_gimple_min_invariant (expr
))
768 if (code
== ADDR_EXPR
)
769 return !for_each_index (&TREE_OPERAND (expr
, 0),
770 idx_contains_abnormal_ssa_name_p
,
773 if (code
== COND_EXPR
)
774 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
775 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
776 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
799 /* Returns the structure describing number of iterations determined from
800 EXIT of DATA->current_loop, or NULL if something goes wrong. */
802 static struct tree_niter_desc
*
803 niter_for_exit (struct ivopts_data
*data
, edge exit
)
805 struct tree_niter_desc
*desc
;
810 data
->niters
= pointer_map_create ();
814 slot
= pointer_map_contains (data
->niters
, exit
);
818 /* Try to determine number of iterations. We cannot safely work with ssa
819 names that appear in phi nodes on abnormal edges, so that we do not
820 create overlapping life ranges for them (PR 27283). */
821 desc
= XNEW (struct tree_niter_desc
);
822 if (!number_of_iterations_exit (data
->current_loop
,
824 || contains_abnormal_ssa_name_p (desc
->niter
))
829 slot
= pointer_map_insert (data
->niters
, exit
);
833 desc
= (struct tree_niter_desc
*) *slot
;
838 /* Returns the structure describing number of iterations determined from
839 single dominating exit of DATA->current_loop, or NULL if something
842 static struct tree_niter_desc
*
843 niter_for_single_dom_exit (struct ivopts_data
*data
)
845 edge exit
= single_dom_exit (data
->current_loop
);
850 return niter_for_exit (data
, exit
);
853 /* Initializes data structures used by the iv optimization pass, stored
857 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
859 data
->version_info_size
= 2 * num_ssa_names
;
860 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
861 data
->relevant
= BITMAP_ALLOC (NULL
);
862 data
->important_candidates
= BITMAP_ALLOC (NULL
);
863 data
->max_inv_id
= 0;
865 data
->iv_uses
.create (20);
866 data
->iv_candidates
.create (20);
867 data
->inv_expr_tab
.create (10);
868 data
->inv_expr_id
= 0;
869 decl_rtl_to_reset
.create (20);
872 /* Returns a memory object to that EXPR points. In case we are able to
873 determine that it does not point to any such object, NULL is returned. */
876 determine_base_object (tree expr
)
878 enum tree_code code
= TREE_CODE (expr
);
881 /* If this is a pointer casted to any type, we need to determine
882 the base object for the pointer; so handle conversions before
883 throwing away non-pointer expressions. */
884 if (CONVERT_EXPR_P (expr
))
885 return determine_base_object (TREE_OPERAND (expr
, 0));
887 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
896 obj
= TREE_OPERAND (expr
, 0);
897 base
= get_base_address (obj
);
902 if (TREE_CODE (base
) == MEM_REF
)
903 return determine_base_object (TREE_OPERAND (base
, 0));
905 return fold_convert (ptr_type_node
,
906 build_fold_addr_expr (base
));
908 case POINTER_PLUS_EXPR
:
909 return determine_base_object (TREE_OPERAND (expr
, 0));
913 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
917 return fold_convert (ptr_type_node
, expr
);
921 /* Allocates an induction variable with given initial value BASE and step STEP
925 alloc_iv (tree base
, tree step
)
927 tree base_object
= base
;
928 struct iv
*iv
= XCNEW (struct iv
);
929 gcc_assert (step
!= NULL_TREE
);
931 /* Lower all address expressions except ones with DECL_P as operand.
933 1) More accurate cost can be computed for address expressions;
934 2) Duplicate candidates won't be created for bases in different
935 forms, like &a[0] and &a. */
936 STRIP_NOPS (base_object
);
937 if (TREE_CODE (base_object
) == ADDR_EXPR
938 && !DECL_P (TREE_OPERAND (base_object
, 0)))
942 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
944 gcc_assert (base_object
!= NULL_TREE
);
945 base_object
= build_fold_addr_expr (base_object
);
946 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
950 iv
->base_object
= determine_base_object (base_object
);
953 iv
->have_use_for
= false;
955 iv
->ssa_name
= NULL_TREE
;
960 /* Sets STEP and BASE for induction variable IV. */
963 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
965 struct version_info
*info
= name_info (data
, iv
);
967 gcc_assert (!info
->iv
);
969 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
970 info
->iv
= alloc_iv (base
, step
);
971 info
->iv
->ssa_name
= iv
;
974 /* Finds induction variable declaration for VAR. */
977 get_iv (struct ivopts_data
*data
, tree var
)
980 tree type
= TREE_TYPE (var
);
982 if (!POINTER_TYPE_P (type
)
983 && !INTEGRAL_TYPE_P (type
))
986 if (!name_info (data
, var
)->iv
)
988 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
991 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
992 set_iv (data
, var
, var
, build_int_cst (type
, 0));
995 return name_info (data
, var
)->iv
;
998 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
999 not define a simple affine biv with nonzero step. */
1002 determine_biv_step (gimple phi
)
1004 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1005 tree name
= PHI_RESULT (phi
);
1008 if (virtual_operand_p (name
))
1011 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1014 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1017 /* Finds basic ivs. */
1020 find_bivs (struct ivopts_data
*data
)
1023 tree step
, type
, base
;
1025 struct loop
*loop
= data
->current_loop
;
1026 gimple_stmt_iterator psi
;
1028 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1030 phi
= gsi_stmt (psi
);
1032 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1035 step
= determine_biv_step (phi
);
1039 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1040 base
= expand_simple_operations (base
);
1041 if (contains_abnormal_ssa_name_p (base
)
1042 || contains_abnormal_ssa_name_p (step
))
1045 type
= TREE_TYPE (PHI_RESULT (phi
));
1046 base
= fold_convert (type
, base
);
1049 if (POINTER_TYPE_P (type
))
1050 step
= convert_to_ptrofftype (step
);
1052 step
= fold_convert (type
, step
);
1055 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1062 /* Marks basic ivs. */
1065 mark_bivs (struct ivopts_data
*data
)
1069 struct iv
*iv
, *incr_iv
;
1070 struct loop
*loop
= data
->current_loop
;
1071 basic_block incr_bb
;
1072 gimple_stmt_iterator psi
;
1074 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1076 phi
= gsi_stmt (psi
);
1078 iv
= get_iv (data
, PHI_RESULT (phi
));
1082 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1083 incr_iv
= get_iv (data
, var
);
1087 /* If the increment is in the subloop, ignore it. */
1088 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1089 if (incr_bb
->loop_father
!= data
->current_loop
1090 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1094 incr_iv
->biv_p
= true;
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099 parameters to IV. */
1102 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1105 struct loop
*loop
= data
->current_loop
;
1107 iv
->base
= NULL_TREE
;
1108 iv
->step
= NULL_TREE
;
1110 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1113 lhs
= gimple_assign_lhs (stmt
);
1114 if (TREE_CODE (lhs
) != SSA_NAME
)
1117 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1119 iv
->base
= expand_simple_operations (iv
->base
);
1121 if (contains_abnormal_ssa_name_p (iv
->base
)
1122 || contains_abnormal_ssa_name_p (iv
->step
))
1125 /* If STMT could throw, then do not consider STMT as defining a GIV.
1126 While this will suppress optimizations, we can not safely delete this
1127 GIV and associated statements, even if it appears it is not used. */
1128 if (stmt_could_throw_p (stmt
))
1134 /* Finds general ivs in statement STMT. */
1137 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1141 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1144 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1147 /* Finds general ivs in basic block BB. */
1150 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1152 gimple_stmt_iterator bsi
;
1154 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1155 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1158 /* Finds general ivs. */
1161 find_givs (struct ivopts_data
*data
)
1163 struct loop
*loop
= data
->current_loop
;
1164 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1167 for (i
= 0; i
< loop
->num_nodes
; i
++)
1168 find_givs_in_bb (data
, body
[i
]);
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173 variable and if so, its initial value and step. */
1176 find_induction_variables (struct ivopts_data
*data
)
1181 if (!find_bivs (data
))
1187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1189 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1193 fprintf (dump_file
, " number of iterations ");
1194 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1195 if (!integer_zerop (niter
->may_be_zero
))
1197 fprintf (dump_file
, "; zero if ");
1198 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1200 fprintf (dump_file
, "\n\n");
1203 fprintf (dump_file
, "Induction variables:\n\n");
1205 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1207 if (ver_info (data
, i
)->iv
)
1208 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1217 static struct iv_use
*
1218 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1219 gimple stmt
, enum use_type use_type
)
1221 struct iv_use
*use
= XCNEW (struct iv_use
);
1223 use
->id
= n_iv_uses (data
);
1224 use
->type
= use_type
;
1228 use
->related_cands
= BITMAP_ALLOC (NULL
);
1230 /* To avoid showing ssa name in the dumps, if it was not reset by the
1232 iv
->ssa_name
= NULL_TREE
;
1234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1235 dump_use (dump_file
, use
);
1237 data
->iv_uses
.safe_push (use
);
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243 NONLINEAR_USE is true if the invariant is used in a way we do not
1244 handle specially. */
1247 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1250 struct version_info
*info
;
1252 if (TREE_CODE (op
) != SSA_NAME
1253 || virtual_operand_p (op
))
1256 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1258 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1261 info
= name_info (data
, op
);
1263 info
->has_nonlin_use
|= nonlinear_use
;
1265 info
->inv_id
= ++data
->max_inv_id
;
1266 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1269 /* Checks whether the use OP is interesting and if so, records it. */
1271 static struct iv_use
*
1272 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1279 if (TREE_CODE (op
) != SSA_NAME
)
1282 iv
= get_iv (data
, op
);
1286 if (iv
->have_use_for
)
1288 use
= iv_use (data
, iv
->use_id
);
1290 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1294 if (integer_zerop (iv
->step
))
1296 record_invariant (data
, op
, true);
1299 iv
->have_use_for
= true;
1301 civ
= XNEW (struct iv
);
1304 stmt
= SSA_NAME_DEF_STMT (op
);
1305 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1306 || is_gimple_assign (stmt
));
1308 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1309 iv
->use_id
= use
->id
;
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315 of an induction variable and an invariant. If this is the case,
1316 CONTROL_VAR is set to location of the iv, BOUND to the location of
1317 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318 induction variable descriptions, and true is returned. If this is not
1319 the case, CONTROL_VAR and BOUND are set to the arguments of the
1320 condition and false is returned. */
1323 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1324 tree
**control_var
, tree
**bound
,
1325 struct iv
**iv_var
, struct iv
**iv_bound
)
1327 /* The objects returned when COND has constant operands. */
1328 static struct iv const_iv
;
1330 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1331 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1334 if (gimple_code (stmt
) == GIMPLE_COND
)
1336 op0
= gimple_cond_lhs_ptr (stmt
);
1337 op1
= gimple_cond_rhs_ptr (stmt
);
1341 op0
= gimple_assign_rhs1_ptr (stmt
);
1342 op1
= gimple_assign_rhs2_ptr (stmt
);
1345 zero
= integer_zero_node
;
1346 const_iv
.step
= integer_zero_node
;
1348 if (TREE_CODE (*op0
) == SSA_NAME
)
1349 iv0
= get_iv (data
, *op0
);
1350 if (TREE_CODE (*op1
) == SSA_NAME
)
1351 iv1
= get_iv (data
, *op1
);
1353 /* Exactly one of the compared values must be an iv, and the other one must
1358 if (integer_zerop (iv0
->step
))
1360 /* Control variable may be on the other side. */
1361 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1362 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1364 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1368 *control_var
= op0
;;
1379 /* Checks whether the condition in STMT is interesting and if so,
1383 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1385 tree
*var_p
, *bound_p
;
1386 struct iv
*var_iv
, *civ
;
1388 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1390 find_interesting_uses_op (data
, *var_p
);
1391 find_interesting_uses_op (data
, *bound_p
);
1395 civ
= XNEW (struct iv
);
1397 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401 relative to the loop LOOP, i.e. if all its operands are defined
1402 outside of the returned loop. Returns NULL if EXPR is not
1403 even obviously invariant in LOOP. */
1406 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1411 if (is_gimple_min_invariant (expr
))
1412 return current_loops
->tree_root
;
1414 if (TREE_CODE (expr
) == SSA_NAME
)
1416 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1419 if (flow_bb_inside_loop_p (loop
, def_bb
))
1421 return superloop_at_depth (loop
,
1422 loop_depth (def_bb
->loop_father
) + 1);
1425 return current_loops
->tree_root
;
1431 unsigned maxdepth
= 0;
1432 len
= TREE_OPERAND_LENGTH (expr
);
1433 for (i
= 0; i
< len
; i
++)
1435 struct loop
*ivloop
;
1436 if (!TREE_OPERAND (expr
, i
))
1439 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1442 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1445 return superloop_at_depth (loop
, maxdepth
);
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449 i.e. if all its operands are defined outside of the LOOP. LOOP
1450 should not be the function body. */
1453 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1458 gcc_assert (loop_depth (loop
) > 0);
1460 if (is_gimple_min_invariant (expr
))
1463 if (TREE_CODE (expr
) == SSA_NAME
)
1465 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1467 && flow_bb_inside_loop_p (loop
, def_bb
))
1476 len
= TREE_OPERAND_LENGTH (expr
);
1477 for (i
= 0; i
< len
; i
++)
1478 if (TREE_OPERAND (expr
, i
)
1479 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486 initial ones. Returns false when the value of the index cannot be determined.
1487 Callback for for_each_index. */
1489 struct ifs_ivopts_data
1491 struct ivopts_data
*ivopts_data
;
1497 idx_find_step (tree base
, tree
*idx
, void *data
)
1499 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1501 tree step
, iv_base
, iv_step
, lbound
, off
;
1502 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1504 /* If base is a component ref, require that the offset of the reference
1506 if (TREE_CODE (base
) == COMPONENT_REF
)
1508 off
= component_ref_field_offset (base
);
1509 return expr_invariant_in_loop_p (loop
, off
);
1512 /* If base is array, first check whether we will be able to move the
1513 reference out of the loop (in order to take its address in strength
1514 reduction). In order for this to work we need both lower bound
1515 and step to be loop invariants. */
1516 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1518 /* Moreover, for a range, the size needs to be invariant as well. */
1519 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1520 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1523 step
= array_ref_element_size (base
);
1524 lbound
= array_ref_low_bound (base
);
1526 if (!expr_invariant_in_loop_p (loop
, step
)
1527 || !expr_invariant_in_loop_p (loop
, lbound
))
1531 if (TREE_CODE (*idx
) != SSA_NAME
)
1534 iv
= get_iv (dta
->ivopts_data
, *idx
);
1538 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1539 *&x[0], which is not folded and does not trigger the
1540 ARRAY_REF path below. */
1543 if (integer_zerop (iv
->step
))
1546 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1548 step
= array_ref_element_size (base
);
1550 /* We only handle addresses whose step is an integer constant. */
1551 if (TREE_CODE (step
) != INTEGER_CST
)
1555 /* The step for pointer arithmetics already is 1 byte. */
1556 step
= size_one_node
;
1560 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1561 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1564 /* The index might wrap. */
1568 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1569 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1574 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1575 object is passed to it in DATA. */
1578 idx_record_use (tree base
, tree
*idx
,
1581 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1582 find_interesting_uses_op (data
, *idx
);
1583 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1585 find_interesting_uses_op (data
, array_ref_element_size (base
));
1586 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592 store cst to MUL and return true. Otherwise return false.
1593 The returned value is always sign-extended, regardless of the
1594 signedness of TOP and BOT. */
1597 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1600 enum tree_code code
;
1601 double_int res
, p0
, p1
;
1602 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1607 if (operand_equal_p (top
, bot
, 0))
1609 *mul
= double_int_one
;
1613 code
= TREE_CODE (top
);
1617 mby
= TREE_OPERAND (top
, 1);
1618 if (TREE_CODE (mby
) != INTEGER_CST
)
1621 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1624 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1629 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1630 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1633 if (code
== MINUS_EXPR
)
1635 *mul
= (p0
+ p1
).sext (precision
);
1639 if (TREE_CODE (bot
) != INTEGER_CST
)
1642 p0
= tree_to_double_int (top
).sext (precision
);
1643 p1
= tree_to_double_int (bot
).sext (precision
);
1646 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1647 return res
.is_zero ();
1654 /* Returns true if memory reference REF with step STEP may be unaligned. */
1657 may_be_unaligned_p (tree ref
, tree step
)
1661 HOST_WIDE_INT bitsize
;
1662 HOST_WIDE_INT bitpos
;
1664 enum machine_mode mode
;
1665 int unsignedp
, volatilep
;
1666 unsigned base_align
;
1668 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669 thus they are not misaligned. */
1670 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1673 /* The test below is basically copy of what expr.c:normal_inner_ref
1674 does to check whether the object must be loaded by parts when
1675 STRICT_ALIGNMENT is true. */
1676 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1677 &unsignedp
, &volatilep
, true);
1678 base_type
= TREE_TYPE (base
);
1679 base_align
= get_object_alignment (base
);
1680 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1682 if (mode
!= BLKmode
)
1684 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1686 if (base_align
< mode_align
1687 || (bitpos
% mode_align
) != 0
1688 || (bitpos
% BITS_PER_UNIT
) != 0)
1692 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1695 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1702 /* Return true if EXPR may be non-addressable. */
1705 may_be_nonaddressable_p (tree expr
)
1707 switch (TREE_CODE (expr
))
1709 case TARGET_MEM_REF
:
1710 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711 target, thus they are always addressable. */
1715 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1716 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1718 case VIEW_CONVERT_EXPR
:
1719 /* This kind of view-conversions may wrap non-addressable objects
1720 and make them look addressable. After some processing the
1721 non-addressability may be uncovered again, causing ADDR_EXPRs
1722 of inappropriate objects to be built. */
1723 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1724 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1727 /* ... fall through ... */
1730 case ARRAY_RANGE_REF
:
1731 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1743 /* Finds addresses in *OP_P inside STMT. */
1746 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1748 tree base
= *op_p
, step
= size_zero_node
;
1750 struct ifs_ivopts_data ifs_ivopts_data
;
1752 /* Do not play with volatile memory references. A bit too conservative,
1753 perhaps, but safe. */
1754 if (gimple_has_volatile_ops (stmt
))
1757 /* Ignore bitfields for now. Not really something terribly complicated
1759 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1762 base
= unshare_expr (base
);
1764 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1766 tree type
= build_pointer_type (TREE_TYPE (base
));
1770 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1772 civ
= get_iv (data
, TMR_BASE (base
));
1776 TMR_BASE (base
) = civ
->base
;
1779 if (TMR_INDEX2 (base
)
1780 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1782 civ
= get_iv (data
, TMR_INDEX2 (base
));
1786 TMR_INDEX2 (base
) = civ
->base
;
1789 if (TMR_INDEX (base
)
1790 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1792 civ
= get_iv (data
, TMR_INDEX (base
));
1796 TMR_INDEX (base
) = civ
->base
;
1801 if (TMR_STEP (base
))
1802 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1804 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1808 if (integer_zerop (step
))
1810 base
= tree_mem_ref_addr (type
, base
);
1814 ifs_ivopts_data
.ivopts_data
= data
;
1815 ifs_ivopts_data
.stmt
= stmt
;
1816 ifs_ivopts_data
.step
= size_zero_node
;
1817 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1818 || integer_zerop (ifs_ivopts_data
.step
))
1820 step
= ifs_ivopts_data
.step
;
1822 /* Check that the base expression is addressable. This needs
1823 to be done after substituting bases of IVs into it. */
1824 if (may_be_nonaddressable_p (base
))
1827 /* Moreover, on strict alignment platforms, check that it is
1828 sufficiently aligned. */
1829 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1832 base
= build_fold_addr_expr (base
);
1834 /* Substituting bases of IVs into the base expression might
1835 have caused folding opportunities. */
1836 if (TREE_CODE (base
) == ADDR_EXPR
)
1838 tree
*ref
= &TREE_OPERAND (base
, 0);
1839 while (handled_component_p (*ref
))
1840 ref
= &TREE_OPERAND (*ref
, 0);
1841 if (TREE_CODE (*ref
) == MEM_REF
)
1843 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1844 TREE_OPERAND (*ref
, 0),
1845 TREE_OPERAND (*ref
, 1));
1852 civ
= alloc_iv (base
, step
);
1853 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1857 for_each_index (op_p
, idx_record_use
, data
);
1860 /* Finds and records invariants used in STMT. */
1863 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1866 use_operand_p use_p
;
1869 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1871 op
= USE_FROM_PTR (use_p
);
1872 record_invariant (data
, op
, false);
1876 /* Finds interesting uses of induction variables in the statement STMT. */
1879 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1882 tree op
, *lhs
, *rhs
;
1884 use_operand_p use_p
;
1885 enum tree_code code
;
1887 find_invariants_stmt (data
, stmt
);
1889 if (gimple_code (stmt
) == GIMPLE_COND
)
1891 find_interesting_uses_cond (data
, stmt
);
1895 if (is_gimple_assign (stmt
))
1897 lhs
= gimple_assign_lhs_ptr (stmt
);
1898 rhs
= gimple_assign_rhs1_ptr (stmt
);
1900 if (TREE_CODE (*lhs
) == SSA_NAME
)
1902 /* If the statement defines an induction variable, the uses are not
1903 interesting by themselves. */
1905 iv
= get_iv (data
, *lhs
);
1907 if (iv
&& !integer_zerop (iv
->step
))
1911 code
= gimple_assign_rhs_code (stmt
);
1912 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1913 && (REFERENCE_CLASS_P (*rhs
)
1914 || is_gimple_val (*rhs
)))
1916 if (REFERENCE_CLASS_P (*rhs
))
1917 find_interesting_uses_address (data
, stmt
, rhs
);
1919 find_interesting_uses_op (data
, *rhs
);
1921 if (REFERENCE_CLASS_P (*lhs
))
1922 find_interesting_uses_address (data
, stmt
, lhs
);
1925 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1927 find_interesting_uses_cond (data
, stmt
);
1931 /* TODO -- we should also handle address uses of type
1933 memory = call (whatever);
1940 if (gimple_code (stmt
) == GIMPLE_PHI
1941 && gimple_bb (stmt
) == data
->current_loop
->header
)
1943 iv
= get_iv (data
, PHI_RESULT (stmt
));
1945 if (iv
&& !integer_zerop (iv
->step
))
1949 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1951 op
= USE_FROM_PTR (use_p
);
1953 if (TREE_CODE (op
) != SSA_NAME
)
1956 iv
= get_iv (data
, op
);
1960 find_interesting_uses_op (data
, op
);
1964 /* Finds interesting uses of induction variables outside of loops
1965 on loop exit edge EXIT. */
1968 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1971 gimple_stmt_iterator psi
;
1974 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1976 phi
= gsi_stmt (psi
);
1977 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1978 if (!virtual_operand_p (def
))
1979 find_interesting_uses_op (data
, def
);
1983 /* Finds uses of the induction variables that are interesting. */
1986 find_interesting_uses (struct ivopts_data
*data
)
1989 gimple_stmt_iterator bsi
;
1990 basic_block
*body
= get_loop_body (data
->current_loop
);
1992 struct version_info
*info
;
1995 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1996 fprintf (dump_file
, "Uses:\n\n");
1998 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2003 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2004 if (e
->dest
!= EXIT_BLOCK_PTR
2005 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2006 find_interesting_uses_outside (data
, e
);
2008 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2009 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2010 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2011 if (!is_gimple_debug (gsi_stmt (bsi
)))
2012 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2015 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2019 fprintf (dump_file
, "\n");
2021 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2023 info
= ver_info (data
, i
);
2026 fprintf (dump_file
, " ");
2027 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2028 fprintf (dump_file
, " is invariant (%d)%s\n",
2029 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2033 fprintf (dump_file
, "\n");
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2040 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2041 we are at the top-level of the processed address. */
2044 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2045 HOST_WIDE_INT
*offset
)
2047 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2048 enum tree_code code
;
2049 tree type
, orig_type
= TREE_TYPE (expr
);
2050 HOST_WIDE_INT off0
, off1
, st
;
2051 tree orig_expr
= expr
;
2055 type
= TREE_TYPE (expr
);
2056 code
= TREE_CODE (expr
);
2062 if (!cst_and_fits_in_hwi (expr
)
2063 || integer_zerop (expr
))
2066 *offset
= int_cst_value (expr
);
2067 return build_int_cst (orig_type
, 0);
2069 case POINTER_PLUS_EXPR
:
2072 op0
= TREE_OPERAND (expr
, 0);
2073 op1
= TREE_OPERAND (expr
, 1);
2075 op0
= strip_offset_1 (op0
, false, false, &off0
);
2076 op1
= strip_offset_1 (op1
, false, false, &off1
);
2078 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2079 if (op0
== TREE_OPERAND (expr
, 0)
2080 && op1
== TREE_OPERAND (expr
, 1))
2083 if (integer_zerop (op1
))
2085 else if (integer_zerop (op0
))
2087 if (code
== MINUS_EXPR
)
2088 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2093 expr
= fold_build2 (code
, type
, op0
, op1
);
2095 return fold_convert (orig_type
, expr
);
2098 op1
= TREE_OPERAND (expr
, 1);
2099 if (!cst_and_fits_in_hwi (op1
))
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, false, false, &off0
);
2104 if (op0
== TREE_OPERAND (expr
, 0))
2107 *offset
= off0
* int_cst_value (op1
);
2108 if (integer_zerop (op0
))
2111 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2113 return fold_convert (orig_type
, expr
);
2116 case ARRAY_RANGE_REF
:
2120 step
= array_ref_element_size (expr
);
2121 if (!cst_and_fits_in_hwi (step
))
2124 st
= int_cst_value (step
);
2125 op1
= TREE_OPERAND (expr
, 1);
2126 op1
= strip_offset_1 (op1
, false, false, &off1
);
2127 *offset
= off1
* st
;
2130 && integer_zerop (op1
))
2132 /* Strip the component reference completely. */
2133 op0
= TREE_OPERAND (expr
, 0);
2134 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2147 tmp
= component_ref_field_offset (expr
);
2148 field
= TREE_OPERAND (expr
, 1);
2150 && cst_and_fits_in_hwi (tmp
)
2151 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2153 HOST_WIDE_INT boffset
, abs_off
;
2155 /* Strip the component reference completely. */
2156 op0
= TREE_OPERAND (expr
, 0);
2157 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2158 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2159 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2163 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2170 op0
= TREE_OPERAND (expr
, 0);
2171 op0
= strip_offset_1 (op0
, true, true, &off0
);
2174 if (op0
== TREE_OPERAND (expr
, 0))
2177 expr
= build_fold_addr_expr (op0
);
2178 return fold_convert (orig_type
, expr
);
2181 /* ??? Offset operand? */
2182 inside_addr
= false;
2189 /* Default handling of expressions for that we want to recurse into
2190 the first operand. */
2191 op0
= TREE_OPERAND (expr
, 0);
2192 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2195 if (op0
== TREE_OPERAND (expr
, 0)
2196 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2199 expr
= copy_node (expr
);
2200 TREE_OPERAND (expr
, 0) = op0
;
2202 TREE_OPERAND (expr
, 1) = op1
;
2204 /* Inside address, we might strip the top level component references,
2205 thus changing type of the expression. Handling of ADDR_EXPR
2207 expr
= fold_convert (orig_type
, expr
);
2212 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2215 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2218 tree core
= strip_offset_1 (expr
, false, false, &off
);
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224 We return unsigned type with the same precision, which avoids problems
2228 generic_type_for (tree type
)
2230 if (POINTER_TYPE_P (type
))
2231 return unsigned_type_for (type
);
2233 if (TYPE_UNSIGNED (type
))
2236 return unsigned_type_for (type
);
2239 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2240 the bitmap to that we should store it. */
2242 static struct ivopts_data
*fd_ivopts_data
;
2244 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2246 bitmap
*depends_on
= (bitmap
*) data
;
2247 struct version_info
*info
;
2249 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2251 info
= name_info (fd_ivopts_data
, *expr_p
);
2253 if (!info
->inv_id
|| info
->has_nonlin_use
)
2257 *depends_on
= BITMAP_ALLOC (NULL
);
2258 bitmap_set_bit (*depends_on
, info
->inv_id
);
2263 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2264 position to POS. If USE is not NULL, the candidate is set as related to
2265 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2266 replacement of the final value of the iv by a direct computation. */
2268 static struct iv_cand
*
2269 add_candidate_1 (struct ivopts_data
*data
,
2270 tree base
, tree step
, bool important
, enum iv_position pos
,
2271 struct iv_use
*use
, gimple incremented_at
)
2274 struct iv_cand
*cand
= NULL
;
2275 tree type
, orig_type
;
2277 /* For non-original variables, make sure their values are computed in a type
2278 that does not invoke undefined behavior on overflows (since in general,
2279 we cannot prove that these induction variables are non-wrapping). */
2280 if (pos
!= IP_ORIGINAL
)
2282 orig_type
= TREE_TYPE (base
);
2283 type
= generic_type_for (orig_type
);
2284 if (type
!= orig_type
)
2286 base
= fold_convert (type
, base
);
2287 step
= fold_convert (type
, step
);
2291 for (i
= 0; i
< n_iv_cands (data
); i
++)
2293 cand
= iv_cand (data
, i
);
2295 if (cand
->pos
!= pos
)
2298 if (cand
->incremented_at
!= incremented_at
2299 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2300 && cand
->ainc_use
!= use
))
2314 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2315 && operand_equal_p (step
, cand
->iv
->step
, 0)
2316 && (TYPE_PRECISION (TREE_TYPE (base
))
2317 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2321 if (i
== n_iv_cands (data
))
2323 cand
= XCNEW (struct iv_cand
);
2329 cand
->iv
= alloc_iv (base
, step
);
2332 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2334 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2335 cand
->var_after
= cand
->var_before
;
2337 cand
->important
= important
;
2338 cand
->incremented_at
= incremented_at
;
2339 data
->iv_candidates
.safe_push (cand
);
2342 && TREE_CODE (step
) != INTEGER_CST
)
2344 fd_ivopts_data
= data
;
2345 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2348 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2349 cand
->ainc_use
= use
;
2351 cand
->ainc_use
= NULL
;
2353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2354 dump_cand (dump_file
, cand
);
2357 if (important
&& !cand
->important
)
2359 cand
->important
= true;
2360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2361 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2366 bitmap_set_bit (use
->related_cands
, i
);
2367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2368 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2378 The purpose is to avoid splitting latch edge with a biv increment, thus
2379 creating a jump, possibly confusing other optimization passes and leaving
2380 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2381 is not available (so we do not have a better alternative), or if the latch
2382 edge is already nonempty. */
2385 allow_ip_end_pos_p (struct loop
*loop
)
2387 if (!ip_normal_pos (loop
))
2390 if (!empty_block_p (ip_end_pos (loop
)))
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397 Important field is set to IMPORTANT. */
2400 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2401 bool important
, struct iv_use
*use
)
2403 basic_block use_bb
= gimple_bb (use
->stmt
);
2404 enum machine_mode mem_mode
;
2405 unsigned HOST_WIDE_INT cstepi
;
2407 /* If we insert the increment in any position other than the standard
2408 ones, we must ensure that it is incremented once per iteration.
2409 It must not be in an inner nested loop, or one side of an if
2411 if (use_bb
->loop_father
!= data
->current_loop
2412 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2413 || stmt_could_throw_p (use
->stmt
)
2414 || !cst_and_fits_in_hwi (step
))
2417 cstepi
= int_cst_value (step
);
2419 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2420 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2421 || USE_STORE_PRE_INCREMENT (mem_mode
))
2422 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2423 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2424 || USE_STORE_PRE_DECREMENT (mem_mode
))
2425 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2427 enum tree_code code
= MINUS_EXPR
;
2429 tree new_step
= step
;
2431 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2433 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2434 code
= POINTER_PLUS_EXPR
;
2437 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2438 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2439 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2442 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2443 || USE_STORE_POST_INCREMENT (mem_mode
))
2444 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2445 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2446 || USE_STORE_POST_DECREMENT (mem_mode
))
2447 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2449 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2454 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2455 position to POS. If USE is not NULL, the candidate is set as related to
2456 it. The candidate computation is scheduled on all available positions. */
2459 add_candidate (struct ivopts_data
*data
,
2460 tree base
, tree step
, bool important
, struct iv_use
*use
)
2462 if (ip_normal_pos (data
->current_loop
))
2463 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2464 if (ip_end_pos (data
->current_loop
)
2465 && allow_ip_end_pos_p (data
->current_loop
))
2466 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2468 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2469 add_autoinc_candidates (data
, base
, step
, important
, use
);
2472 /* Adds standard iv candidates. */
2475 add_standard_iv_candidates (struct ivopts_data
*data
)
2477 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2479 /* The same for a double-integer type if it is still fast enough. */
2481 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2482 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2483 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2484 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2486 /* The same for a double-integer type if it is still fast enough. */
2488 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2489 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2490 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2491 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2495 /* Adds candidates bases on the old induction variable IV. */
2498 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2502 struct iv_cand
*cand
;
2504 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2506 /* The same, but with initial value zero. */
2507 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2508 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2510 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2511 iv
->step
, true, NULL
);
2513 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2514 if (gimple_code (phi
) == GIMPLE_PHI
)
2516 /* Additionally record the possibility of leaving the original iv
2518 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2519 cand
= add_candidate_1 (data
,
2520 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2521 SSA_NAME_DEF_STMT (def
));
2522 cand
->var_before
= iv
->ssa_name
;
2523 cand
->var_after
= def
;
2527 /* Adds candidates based on the old induction variables. */
2530 add_old_ivs_candidates (struct ivopts_data
*data
)
2536 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2538 iv
= ver_info (data
, i
)->iv
;
2539 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2540 add_old_iv_candidates (data
, iv
);
2544 /* Adds candidates based on the value of the induction variable IV and USE. */
2547 add_iv_value_candidates (struct ivopts_data
*data
,
2548 struct iv
*iv
, struct iv_use
*use
)
2550 unsigned HOST_WIDE_INT offset
;
2554 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2556 /* The same, but with initial value zero. Make such variable important,
2557 since it is generic enough so that possibly many uses may be based
2559 basetype
= TREE_TYPE (iv
->base
);
2560 if (POINTER_TYPE_P (basetype
))
2561 basetype
= sizetype
;
2562 add_candidate (data
, build_int_cst (basetype
, 0),
2563 iv
->step
, true, use
);
2565 /* Third, try removing the constant offset. Make sure to even
2566 add a candidate for &a[0] vs. (T *)&a. */
2567 base
= strip_offset (iv
->base
, &offset
);
2569 || base
!= iv
->base
)
2570 add_candidate (data
, base
, iv
->step
, false, use
);
2573 /* Adds candidates based on the uses. */
2576 add_derived_ivs_candidates (struct ivopts_data
*data
)
2580 for (i
= 0; i
< n_iv_uses (data
); i
++)
2582 struct iv_use
*use
= iv_use (data
, i
);
2589 case USE_NONLINEAR_EXPR
:
2592 /* Just add the ivs based on the value of the iv used here. */
2593 add_iv_value_candidates (data
, use
->iv
, use
);
2602 /* Record important candidates and add them to related_cands bitmaps
2606 record_important_candidates (struct ivopts_data
*data
)
2611 for (i
= 0; i
< n_iv_cands (data
); i
++)
2613 struct iv_cand
*cand
= iv_cand (data
, i
);
2615 if (cand
->important
)
2616 bitmap_set_bit (data
->important_candidates
, i
);
2619 data
->consider_all_candidates
= (n_iv_cands (data
)
2620 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2622 if (data
->consider_all_candidates
)
2624 /* We will not need "related_cands" bitmaps in this case,
2625 so release them to decrease peak memory consumption. */
2626 for (i
= 0; i
< n_iv_uses (data
); i
++)
2628 use
= iv_use (data
, i
);
2629 BITMAP_FREE (use
->related_cands
);
2634 /* Add important candidates to the related_cands bitmaps. */
2635 for (i
= 0; i
< n_iv_uses (data
); i
++)
2636 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2637 data
->important_candidates
);
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643 we allocate a simple list to every use. */
2646 alloc_use_cost_map (struct ivopts_data
*data
)
2648 unsigned i
, size
, s
;
2650 for (i
= 0; i
< n_iv_uses (data
); i
++)
2652 struct iv_use
*use
= iv_use (data
, i
);
2654 if (data
->consider_all_candidates
)
2655 size
= n_iv_cands (data
);
2658 s
= bitmap_count_bits (use
->related_cands
);
2660 /* Round up to the power of two, so that moduling by it is fast. */
2661 size
= s
? (1 << ceil_log2 (s
)) : 1;
2664 use
->n_map_members
= size
;
2665 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2669 /* Returns description of computation cost of expression whose runtime
2670 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2673 new_cost (unsigned runtime
, unsigned complexity
)
2677 cost
.cost
= runtime
;
2678 cost
.complexity
= complexity
;
2683 /* Adds costs COST1 and COST2. */
2686 add_costs (comp_cost cost1
, comp_cost cost2
)
2688 cost1
.cost
+= cost2
.cost
;
2689 cost1
.complexity
+= cost2
.complexity
;
2693 /* Subtracts costs COST1 and COST2. */
2696 sub_costs (comp_cost cost1
, comp_cost cost2
)
2698 cost1
.cost
-= cost2
.cost
;
2699 cost1
.complexity
-= cost2
.complexity
;
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705 COST1 > COST2, and 0 if COST1 = COST2. */
2708 compare_costs (comp_cost cost1
, comp_cost cost2
)
2710 if (cost1
.cost
== cost2
.cost
)
2711 return cost1
.complexity
- cost2
.complexity
;
2713 return cost1
.cost
- cost2
.cost
;
2716 /* Returns true if COST is infinite. */
2719 infinite_cost_p (comp_cost cost
)
2721 return cost
.cost
== INFTY
;
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725 on invariants DEPENDS_ON and that the value used in expressing it
2726 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2729 set_use_iv_cost (struct ivopts_data
*data
,
2730 struct iv_use
*use
, struct iv_cand
*cand
,
2731 comp_cost cost
, bitmap depends_on
, tree value
,
2732 enum tree_code comp
, int inv_expr_id
)
2736 if (infinite_cost_p (cost
))
2738 BITMAP_FREE (depends_on
);
2742 if (data
->consider_all_candidates
)
2744 use
->cost_map
[cand
->id
].cand
= cand
;
2745 use
->cost_map
[cand
->id
].cost
= cost
;
2746 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2747 use
->cost_map
[cand
->id
].value
= value
;
2748 use
->cost_map
[cand
->id
].comp
= comp
;
2749 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2753 /* n_map_members is a power of two, so this computes modulo. */
2754 s
= cand
->id
& (use
->n_map_members
- 1);
2755 for (i
= s
; i
< use
->n_map_members
; i
++)
2756 if (!use
->cost_map
[i
].cand
)
2758 for (i
= 0; i
< s
; i
++)
2759 if (!use
->cost_map
[i
].cand
)
2765 use
->cost_map
[i
].cand
= cand
;
2766 use
->cost_map
[i
].cost
= cost
;
2767 use
->cost_map
[i
].depends_on
= depends_on
;
2768 use
->cost_map
[i
].value
= value
;
2769 use
->cost_map
[i
].comp
= comp
;
2770 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2773 /* Gets cost of (USE, CANDIDATE) pair. */
2775 static struct cost_pair
*
2776 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2777 struct iv_cand
*cand
)
2780 struct cost_pair
*ret
;
2785 if (data
->consider_all_candidates
)
2787 ret
= use
->cost_map
+ cand
->id
;
2794 /* n_map_members is a power of two, so this computes modulo. */
2795 s
= cand
->id
& (use
->n_map_members
- 1);
2796 for (i
= s
; i
< use
->n_map_members
; i
++)
2797 if (use
->cost_map
[i
].cand
== cand
)
2798 return use
->cost_map
+ i
;
2799 else if (use
->cost_map
[i
].cand
== NULL
)
2801 for (i
= 0; i
< s
; i
++)
2802 if (use
->cost_map
[i
].cand
== cand
)
2803 return use
->cost_map
+ i
;
2804 else if (use
->cost_map
[i
].cand
== NULL
)
2810 /* Returns estimate on cost of computing SEQ. */
2813 seq_cost (rtx seq
, bool speed
)
2818 for (; seq
; seq
= NEXT_INSN (seq
))
2820 set
= single_set (seq
);
2822 cost
+= set_src_cost (SET_SRC (set
), speed
);
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2832 produce_memory_decl_rtl (tree obj
, int *regno
)
2834 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2835 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2839 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2841 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2842 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2843 SET_SYMBOL_REF_DECL (x
, obj
);
2844 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2845 set_mem_addr_space (x
, as
);
2846 targetm
.encode_section_info (obj
, x
, true);
2850 x
= gen_raw_REG (address_mode
, (*regno
)++);
2851 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2852 set_mem_addr_space (x
, as
);
2858 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2859 walk_tree. DATA contains the actual fake register number. */
2862 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2864 tree obj
= NULL_TREE
;
2866 int *regno
= (int *) data
;
2868 switch (TREE_CODE (*expr_p
))
2871 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2872 handled_component_p (*expr_p
);
2873 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2876 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2877 x
= produce_memory_decl_rtl (obj
, regno
);
2882 obj
= SSA_NAME_VAR (*expr_p
);
2883 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2886 if (!DECL_RTL_SET_P (obj
))
2887 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2896 if (DECL_RTL_SET_P (obj
))
2899 if (DECL_MODE (obj
) == BLKmode
)
2900 x
= produce_memory_decl_rtl (obj
, regno
);
2902 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2912 decl_rtl_to_reset
.safe_push (obj
);
2913 SET_DECL_RTL (obj
, x
);
2919 /* Determines cost of the computation of EXPR. */
2922 computation_cost (tree expr
, bool speed
)
2925 tree type
= TREE_TYPE (expr
);
2927 /* Avoid using hard regs in ways which may be unsupported. */
2928 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2929 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2930 enum node_frequency real_frequency
= node
->frequency
;
2932 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2933 crtl
->maybe_hot_insn_p
= speed
;
2934 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2936 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2939 default_rtl_profile ();
2940 node
->frequency
= real_frequency
;
2942 cost
= seq_cost (seq
, speed
);
2944 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2945 TYPE_ADDR_SPACE (type
), speed
);
2946 else if (!REG_P (rslt
))
2947 cost
+= set_src_cost (rslt
, speed
);
2952 /* Returns variable containing the value of candidate CAND at statement AT. */
2955 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2957 if (stmt_after_increment (loop
, cand
, stmt
))
2958 return cand
->var_after
;
2960 return cand
->var_before
;
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964 same precision that is at least as wide as the precision of TYPE, stores
2965 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2969 determine_common_wider_type (tree
*a
, tree
*b
)
2971 tree wider_type
= NULL
;
2973 tree atype
= TREE_TYPE (*a
);
2975 if (CONVERT_EXPR_P (*a
))
2977 suba
= TREE_OPERAND (*a
, 0);
2978 wider_type
= TREE_TYPE (suba
);
2979 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2985 if (CONVERT_EXPR_P (*b
))
2987 subb
= TREE_OPERAND (*b
, 0);
2988 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2999 /* Determines the expression by that USE is expressed from induction variable
3000 CAND at statement AT in LOOP. The expression is stored in a decomposed
3001 form into AFF. Returns false if USE cannot be expressed using CAND. */
3004 get_computation_aff (struct loop
*loop
,
3005 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3006 struct affine_tree_combination
*aff
)
3008 tree ubase
= use
->iv
->base
;
3009 tree ustep
= use
->iv
->step
;
3010 tree cbase
= cand
->iv
->base
;
3011 tree cstep
= cand
->iv
->step
, cstep_common
;
3012 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3013 tree common_type
, var
;
3015 aff_tree cbase_aff
, var_aff
;
3018 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3020 /* We do not have a precision to express the values of use. */
3024 var
= var_at_stmt (loop
, cand
, at
);
3025 uutype
= unsigned_type_for (utype
);
3027 /* If the conversion is not noop, perform it. */
3028 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3030 cstep
= fold_convert (uutype
, cstep
);
3031 cbase
= fold_convert (uutype
, cbase
);
3032 var
= fold_convert (uutype
, var
);
3035 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3038 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039 type, we achieve better folding by computing their difference in this
3040 wider type, and cast the result to UUTYPE. We do not need to worry about
3041 overflows, as all the arithmetics will in the end be performed in UUTYPE
3043 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3045 /* use = ubase - ratio * cbase + ratio * var. */
3046 tree_to_aff_combination (ubase
, common_type
, aff
);
3047 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3048 tree_to_aff_combination (var
, uutype
, &var_aff
);
3050 /* We need to shift the value if we are after the increment. */
3051 if (stmt_after_increment (loop
, cand
, at
))
3055 if (common_type
!= uutype
)
3056 cstep_common
= fold_convert (common_type
, cstep
);
3058 cstep_common
= cstep
;
3060 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3061 aff_combination_add (&cbase_aff
, &cstep_aff
);
3064 aff_combination_scale (&cbase_aff
, -rat
);
3065 aff_combination_add (aff
, &cbase_aff
);
3066 if (common_type
!= uutype
)
3067 aff_combination_convert (aff
, uutype
);
3069 aff_combination_scale (&var_aff
, rat
);
3070 aff_combination_add (aff
, &var_aff
);
3075 /* Return the type of USE. */
3078 get_use_type (struct iv_use
*use
)
3080 tree base_type
= TREE_TYPE (use
->iv
->base
);
3083 if (use
->type
== USE_ADDRESS
)
3085 /* The base_type may be a void pointer. Create a pointer type based on
3086 the mem_ref instead. */
3087 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3088 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3089 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3097 /* Determines the expression by that USE is expressed from induction variable
3098 CAND at statement AT in LOOP. The computation is unshared. */
3101 get_computation_at (struct loop
*loop
,
3102 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3105 tree type
= get_use_type (use
);
3107 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3109 unshare_aff_combination (&aff
);
3110 return fold_convert (type
, aff_combination_to_tree (&aff
));
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND in LOOP. The computation is unshared. */
3117 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3119 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123 If we're optimizing for space, the loop setup overhead is constant;
3124 if we're optimizing for speed, amortize it over the per-iteration cost. */
3126 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3130 else if (optimize_loop_for_speed_p (data
->current_loop
))
3131 return cost
/ avg_loop_niter (data
->current_loop
);
3136 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3137 validity for a memory reference accessing memory of mode MODE in
3138 address space AS. */
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3145 #define MAX_RATIO 128
3146 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3147 static vec
<sbitmap
> valid_mult_list
;
3150 if (data_index
>= valid_mult_list
.length ())
3151 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3153 valid_mult
= valid_mult_list
[data_index
];
3156 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3157 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3158 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3162 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3163 bitmap_clear (valid_mult
);
3164 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3165 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3166 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3168 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3169 if (memory_address_addr_space_p (mode
, addr
, as
)
3170 || memory_address_addr_space_p (mode
, scaled
, as
))
3171 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3176 fprintf (dump_file
, " allowed multipliers:");
3177 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3178 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3179 fprintf (dump_file
, " %d", (int) i
);
3180 fprintf (dump_file
, "\n");
3181 fprintf (dump_file
, "\n");
3184 valid_mult_list
[data_index
] = valid_mult
;
3187 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3190 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3195 variable is omitted. Compute the cost for a memory reference that accesses
3196 a memory location of mode MEM_MODE in address space AS.
3198 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199 size of MEM_MODE / RATIO) is available. To make this determination, we
3200 look at the size of the increment to be made, which is given in CSTEP.
3201 CSTEP may be zero if the step is unknown.
3202 STMT_AFTER_INC is true iff the statement we're looking at is after the
3203 increment of the original biv.
3205 TODO -- there must be some better way. This all is quite crude. */
3207 typedef struct address_cost_data_s
3209 HOST_WIDE_INT min_offset
, max_offset
;
3210 unsigned costs
[2][2][2][2];
3211 } *address_cost_data
;
3215 get_address_cost (bool symbol_present
, bool var_present
,
3216 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3217 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3218 addr_space_t as
, bool speed
,
3219 bool stmt_after_inc
, bool *may_autoinc
)
3221 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3222 static vec
<address_cost_data
> address_cost_data_list
;
3223 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3224 address_cost_data data
;
3225 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3226 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3227 unsigned cost
, acost
, complexity
;
3228 bool offset_p
, ratio_p
, autoinc
;
3229 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3230 unsigned HOST_WIDE_INT mask
;
3233 if (data_index
>= address_cost_data_list
.length ())
3234 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3236 data
= address_cost_data_list
[data_index
];
3240 HOST_WIDE_INT rat
, off
= 0;
3241 int old_cse_not_expected
, width
;
3242 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3243 rtx seq
, addr
, base
;
3246 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3248 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3250 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3251 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3252 width
= HOST_BITS_PER_WIDE_INT
- 1;
3253 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3255 for (i
= width
; i
>= 0; i
--)
3257 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3258 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3259 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3262 data
->min_offset
= (i
== -1? 0 : off
);
3264 for (i
= width
; i
>= 0; i
--)
3266 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3267 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3268 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3273 data
->max_offset
= off
;
3275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3277 fprintf (dump_file
, "get_address_cost:\n");
3278 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3279 GET_MODE_NAME (mem_mode
),
3281 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3282 GET_MODE_NAME (mem_mode
),
3287 for (i
= 2; i
<= MAX_RATIO
; i
++)
3288 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3294 /* Compute the cost of various addressing modes. */
3296 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3297 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3299 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3300 || USE_STORE_PRE_DECREMENT (mem_mode
))
3302 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3303 has_predec
[mem_mode
]
3304 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3306 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3307 || USE_STORE_POST_DECREMENT (mem_mode
))
3309 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3310 has_postdec
[mem_mode
]
3311 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3313 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3314 || USE_STORE_PRE_DECREMENT (mem_mode
))
3316 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3317 has_preinc
[mem_mode
]
3318 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3320 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3321 || USE_STORE_POST_INCREMENT (mem_mode
))
3323 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3324 has_postinc
[mem_mode
]
3325 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3327 for (i
= 0; i
< 16; i
++)
3330 var_p
= (i
>> 1) & 1;
3331 off_p
= (i
>> 2) & 1;
3332 rat_p
= (i
>> 3) & 1;
3336 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3337 gen_int_mode (rat
, address_mode
));
3340 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3344 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3352 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3354 (PLUS
, address_mode
, base
,
3355 gen_int_mode (off
, address_mode
)));
3358 base
= gen_int_mode (off
, address_mode
);
3363 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3366 /* To avoid splitting addressing modes, pretend that no cse will
3368 old_cse_not_expected
= cse_not_expected
;
3369 cse_not_expected
= true;
3370 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3371 cse_not_expected
= old_cse_not_expected
;
3375 acost
= seq_cost (seq
, speed
);
3376 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3380 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c
= add_cost (speed
, address_mode
);
3396 for (i
= 0; i
< 8; i
++)
3399 off_p
= (i
>> 1) & 1;
3400 rat_p
= (i
>> 2) & 1;
3402 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3406 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3407 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3410 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3412 fprintf (dump_file
, "Address costs:\n");
3414 for (i
= 0; i
< 16; i
++)
3417 var_p
= (i
>> 1) & 1;
3418 off_p
= (i
>> 2) & 1;
3419 rat_p
= (i
>> 3) & 1;
3421 fprintf (dump_file
, " ");
3423 fprintf (dump_file
, "sym + ");
3425 fprintf (dump_file
, "var + ");
3427 fprintf (dump_file
, "cst + ");
3429 fprintf (dump_file
, "rat * ");
3431 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3432 fprintf (dump_file
, "index costs %d\n", acost
);
3434 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3435 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3436 fprintf (dump_file
, " May include autoinc/dec\n");
3437 fprintf (dump_file
, "\n");
3440 address_cost_data_list
[data_index
] = data
;
3443 bits
= GET_MODE_BITSIZE (address_mode
);
3444 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3446 if ((offset
>> (bits
- 1) & 1))
3451 msize
= GET_MODE_SIZE (mem_mode
);
3452 autoinc_offset
= offset
;
3454 autoinc_offset
+= ratio
* cstep
;
3455 if (symbol_present
|| var_present
|| ratio
!= 1)
3457 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3459 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3461 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3463 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3464 && msize
== -cstep
))
3468 offset_p
= (s_offset
!= 0
3469 && data
->min_offset
<= s_offset
3470 && s_offset
<= data
->max_offset
);
3471 ratio_p
= (ratio
!= 1
3472 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3474 if (ratio
!= 1 && !ratio_p
)
3475 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3477 if (s_offset
&& !offset_p
&& !symbol_present
)
3478 cost
+= add_cost (speed
, address_mode
);
3481 *may_autoinc
= autoinc
;
3482 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3483 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3484 return new_cost (cost
+ acost
, complexity
);
3487 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3488 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3489 calculating the operands of EXPR. Returns true if successful, and returns
3490 the cost in COST. */
3493 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3494 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3497 tree op1
= TREE_OPERAND (expr
, 1);
3498 tree cst
= TREE_OPERAND (mult
, 1);
3499 tree multop
= TREE_OPERAND (mult
, 0);
3500 int m
= exact_log2 (int_cst_value (cst
));
3501 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3503 bool equal_p
= false;
3505 if (!(m
>= 0 && m
< maxm
))
3508 if (operand_equal_p (op1
, mult
, 0))
3511 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3512 ? shiftadd_cost (speed
, mode
, m
)
3514 ? shiftsub1_cost (speed
, mode
, m
)
3515 : shiftsub0_cost (speed
, mode
, m
)));
3516 res
= new_cost (sa_cost
, 0);
3517 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3519 STRIP_NOPS (multop
);
3520 if (!is_gimple_val (multop
))
3521 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3527 /* Estimates cost of forcing expression EXPR into a variable. */
3530 force_expr_to_var_cost (tree expr
, bool speed
)
3532 static bool costs_initialized
= false;
3533 static unsigned integer_cost
[2];
3534 static unsigned symbol_cost
[2];
3535 static unsigned address_cost
[2];
3537 comp_cost cost0
, cost1
, cost
;
3538 enum machine_mode mode
;
3540 if (!costs_initialized
)
3542 tree type
= build_pointer_type (integer_type_node
);
3547 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3548 TREE_STATIC (var
) = 1;
3549 x
= produce_memory_decl_rtl (var
, NULL
);
3550 SET_DECL_RTL (var
, x
);
3552 addr
= build1 (ADDR_EXPR
, type
, var
);
3555 for (i
= 0; i
< 2; i
++)
3557 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3560 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3563 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3566 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3567 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3568 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3569 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3570 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3571 fprintf (dump_file
, "\n");
3575 costs_initialized
= true;
3580 if (SSA_VAR_P (expr
))
3583 if (is_gimple_min_invariant (expr
))
3585 if (TREE_CODE (expr
) == INTEGER_CST
)
3586 return new_cost (integer_cost
[speed
], 0);
3588 if (TREE_CODE (expr
) == ADDR_EXPR
)
3590 tree obj
= TREE_OPERAND (expr
, 0);
3592 if (TREE_CODE (obj
) == VAR_DECL
3593 || TREE_CODE (obj
) == PARM_DECL
3594 || TREE_CODE (obj
) == RESULT_DECL
)
3595 return new_cost (symbol_cost
[speed
], 0);
3598 return new_cost (address_cost
[speed
], 0);
3601 switch (TREE_CODE (expr
))
3603 case POINTER_PLUS_EXPR
:
3607 op0
= TREE_OPERAND (expr
, 0);
3608 op1
= TREE_OPERAND (expr
, 1);
3612 if (is_gimple_val (op0
))
3615 cost0
= force_expr_to_var_cost (op0
, speed
);
3617 if (is_gimple_val (op1
))
3620 cost1
= force_expr_to_var_cost (op1
, speed
);
3625 op0
= TREE_OPERAND (expr
, 0);
3629 if (is_gimple_val (op0
))
3632 cost0
= force_expr_to_var_cost (op0
, speed
);
3638 /* Just an arbitrary value, FIXME. */
3639 return new_cost (target_spill_cost
[speed
], 0);
3642 mode
= TYPE_MODE (TREE_TYPE (expr
));
3643 switch (TREE_CODE (expr
))
3645 case POINTER_PLUS_EXPR
:
3649 cost
= new_cost (add_cost (speed
, mode
), 0);
3650 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3652 tree mult
= NULL_TREE
;
3654 if (TREE_CODE (op1
) == MULT_EXPR
)
3656 else if (TREE_CODE (op0
) == MULT_EXPR
)
3659 if (mult
!= NULL_TREE
3660 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3661 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3668 if (cst_and_fits_in_hwi (op0
))
3669 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3671 else if (cst_and_fits_in_hwi (op1
))
3672 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3675 return new_cost (target_spill_cost
[speed
], 0);
3682 cost
= add_costs (cost
, cost0
);
3683 cost
= add_costs (cost
, cost1
);
3685 /* Bound the cost by target_spill_cost. The parts of complicated
3686 computations often are either loop invariant or at least can
3687 be shared between several iv uses, so letting this grow without
3688 limits would not give reasonable results. */
3689 if (cost
.cost
> (int) target_spill_cost
[speed
])
3690 cost
.cost
= target_spill_cost
[speed
];
3695 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3696 invariants the computation depends on. */
3699 force_var_cost (struct ivopts_data
*data
,
3700 tree expr
, bitmap
*depends_on
)
3704 fd_ivopts_data
= data
;
3705 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3708 return force_expr_to_var_cost (expr
, data
->speed
);
3711 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3712 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3713 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3714 invariants the computation depends on. */
3717 split_address_cost (struct ivopts_data
*data
,
3718 tree addr
, bool *symbol_present
, bool *var_present
,
3719 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3722 HOST_WIDE_INT bitsize
;
3723 HOST_WIDE_INT bitpos
;
3725 enum machine_mode mode
;
3726 int unsignedp
, volatilep
;
3728 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3729 &unsignedp
, &volatilep
, false);
3732 || bitpos
% BITS_PER_UNIT
!= 0
3733 || TREE_CODE (core
) != VAR_DECL
)
3735 *symbol_present
= false;
3736 *var_present
= true;
3737 fd_ivopts_data
= data
;
3738 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3739 return new_cost (target_spill_cost
[data
->speed
], 0);
3742 *offset
+= bitpos
/ BITS_PER_UNIT
;
3743 if (TREE_STATIC (core
)
3744 || DECL_EXTERNAL (core
))
3746 *symbol_present
= true;
3747 *var_present
= false;
3751 *symbol_present
= false;
3752 *var_present
= true;
3756 /* Estimates cost of expressing difference of addresses E1 - E2 as
3757 var + symbol + offset. The value of offset is added to OFFSET,
3758 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759 part is missing. DEPENDS_ON is a set of the invariants the computation
3763 ptr_difference_cost (struct ivopts_data
*data
,
3764 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3765 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3767 HOST_WIDE_INT diff
= 0;
3768 aff_tree aff_e1
, aff_e2
;
3771 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3773 if (ptr_difference_const (e1
, e2
, &diff
))
3776 *symbol_present
= false;
3777 *var_present
= false;
3781 if (integer_zerop (e2
))
3782 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3783 symbol_present
, var_present
, offset
, depends_on
);
3785 *symbol_present
= false;
3786 *var_present
= true;
3788 type
= signed_type_for (TREE_TYPE (e1
));
3789 tree_to_aff_combination (e1
, type
, &aff_e1
);
3790 tree_to_aff_combination (e2
, type
, &aff_e2
);
3791 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3792 aff_combination_add (&aff_e1
, &aff_e2
);
3794 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3797 /* Estimates cost of expressing difference E1 - E2 as
3798 var + symbol + offset. The value of offset is added to OFFSET,
3799 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3800 part is missing. DEPENDS_ON is a set of the invariants the computation
3804 difference_cost (struct ivopts_data
*data
,
3805 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3806 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3808 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3809 unsigned HOST_WIDE_INT off1
, off2
;
3810 aff_tree aff_e1
, aff_e2
;
3813 e1
= strip_offset (e1
, &off1
);
3814 e2
= strip_offset (e2
, &off2
);
3815 *offset
+= off1
- off2
;
3820 if (TREE_CODE (e1
) == ADDR_EXPR
)
3821 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3822 offset
, depends_on
);
3823 *symbol_present
= false;
3825 if (operand_equal_p (e1
, e2
, 0))
3827 *var_present
= false;
3831 *var_present
= true;
3833 if (integer_zerop (e2
))
3834 return force_var_cost (data
, e1
, depends_on
);
3836 if (integer_zerop (e1
))
3838 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3839 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3843 type
= signed_type_for (TREE_TYPE (e1
));
3844 tree_to_aff_combination (e1
, type
, &aff_e1
);
3845 tree_to_aff_combination (e2
, type
, &aff_e2
);
3846 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3847 aff_combination_add (&aff_e1
, &aff_e2
);
3849 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3852 /* Returns true if AFF1 and AFF2 are identical. */
3855 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3859 if (aff1
->n
!= aff2
->n
)
3862 for (i
= 0; i
< aff1
->n
; i
++)
3864 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3867 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3873 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3876 get_expr_id (struct ivopts_data
*data
, tree expr
)
3878 struct iv_inv_expr_ent ent
;
3879 struct iv_inv_expr_ent
**slot
;
3882 ent
.hash
= iterative_hash_expr (expr
, 0);
3883 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3887 *slot
= XNEW (struct iv_inv_expr_ent
);
3888 (*slot
)->expr
= expr
;
3889 (*slot
)->hash
= ent
.hash
;
3890 (*slot
)->id
= data
->inv_expr_id
++;
3894 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3895 requires a new compiler generated temporary. Returns -1 otherwise.
3896 ADDRESS_P is a flag indicating if the expression is for address
3900 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3901 tree cbase
, HOST_WIDE_INT ratio
,
3904 aff_tree ubase_aff
, cbase_aff
;
3912 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3913 && (TREE_CODE (cbase
) == INTEGER_CST
))
3916 /* Strips the constant part. */
3917 if (TREE_CODE (ubase
) == PLUS_EXPR
3918 || TREE_CODE (ubase
) == MINUS_EXPR
3919 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3921 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3922 ubase
= TREE_OPERAND (ubase
, 0);
3925 /* Strips the constant part. */
3926 if (TREE_CODE (cbase
) == PLUS_EXPR
3927 || TREE_CODE (cbase
) == MINUS_EXPR
3928 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3930 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3931 cbase
= TREE_OPERAND (cbase
, 0);
3936 if (((TREE_CODE (ubase
) == SSA_NAME
)
3937 || (TREE_CODE (ubase
) == ADDR_EXPR
3938 && is_gimple_min_invariant (ubase
)))
3939 && (TREE_CODE (cbase
) == INTEGER_CST
))
3942 if (((TREE_CODE (cbase
) == SSA_NAME
)
3943 || (TREE_CODE (cbase
) == ADDR_EXPR
3944 && is_gimple_min_invariant (cbase
)))
3945 && (TREE_CODE (ubase
) == INTEGER_CST
))
3951 if (operand_equal_p (ubase
, cbase
, 0))
3954 if (TREE_CODE (ubase
) == ADDR_EXPR
3955 && TREE_CODE (cbase
) == ADDR_EXPR
)
3959 usym
= TREE_OPERAND (ubase
, 0);
3960 csym
= TREE_OPERAND (cbase
, 0);
3961 if (TREE_CODE (usym
) == ARRAY_REF
)
3963 tree ind
= TREE_OPERAND (usym
, 1);
3964 if (TREE_CODE (ind
) == INTEGER_CST
3965 && host_integerp (ind
, 0)
3966 && TREE_INT_CST_LOW (ind
) == 0)
3967 usym
= TREE_OPERAND (usym
, 0);
3969 if (TREE_CODE (csym
) == ARRAY_REF
)
3971 tree ind
= TREE_OPERAND (csym
, 1);
3972 if (TREE_CODE (ind
) == INTEGER_CST
3973 && host_integerp (ind
, 0)
3974 && TREE_INT_CST_LOW (ind
) == 0)
3975 csym
= TREE_OPERAND (csym
, 0);
3977 if (operand_equal_p (usym
, csym
, 0))
3980 /* Now do more complex comparison */
3981 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3982 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3983 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3987 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3988 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3990 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3991 aff_combination_add (&ubase_aff
, &cbase_aff
);
3992 expr
= aff_combination_to_tree (&ubase_aff
);
3993 return get_expr_id (data
, expr
);
3998 /* Determines the cost of the computation by that USE is expressed
3999 from induction variable CAND. If ADDRESS_P is true, we just need
4000 to create an address from it, otherwise we want to get it into
4001 register. A set of invariants we depend on is stored in
4002 DEPENDS_ON. AT is the statement at that the value is computed.
4003 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4004 addressing is likely. */
4007 get_computation_cost_at (struct ivopts_data
*data
,
4008 struct iv_use
*use
, struct iv_cand
*cand
,
4009 bool address_p
, bitmap
*depends_on
, gimple at
,
4013 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4015 tree utype
= TREE_TYPE (ubase
), ctype
;
4016 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4017 HOST_WIDE_INT ratio
, aratio
;
4018 bool var_present
, symbol_present
, stmt_is_after_inc
;
4021 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4022 enum machine_mode mem_mode
= (address_p
4023 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4028 /* Only consider real candidates. */
4030 return infinite_cost
;
4032 cbase
= cand
->iv
->base
;
4033 cstep
= cand
->iv
->step
;
4034 ctype
= TREE_TYPE (cbase
);
4036 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4038 /* We do not have a precision to express the values of use. */
4039 return infinite_cost
;
4043 || (use
->iv
->base_object
4044 && cand
->iv
->base_object
4045 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4046 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4048 /* Do not try to express address of an object with computation based
4049 on address of a different object. This may cause problems in rtl
4050 level alias analysis (that does not expect this to be happening,
4051 as this is illegal in C), and would be unlikely to be useful
4053 if (use
->iv
->base_object
4054 && cand
->iv
->base_object
4055 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4056 return infinite_cost
;
4059 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4061 /* TODO -- add direct handling of this case. */
4065 /* CSTEPI is removed from the offset in case statement is after the
4066 increment. If the step is not constant, we use zero instead.
4067 This is a bit imprecise (there is the extra addition), but
4068 redundancy elimination is likely to transform the code so that
4069 it uses value of the variable before increment anyway,
4070 so it is not that much unrealistic. */
4071 if (cst_and_fits_in_hwi (cstep
))
4072 cstepi
= int_cst_value (cstep
);
4076 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4077 return infinite_cost
;
4079 if (rat
.fits_shwi ())
4080 ratio
= rat
.to_shwi ();
4082 return infinite_cost
;
4085 ctype
= TREE_TYPE (cbase
);
4087 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4089 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4090 or ratio == 1, it is better to handle this like
4092 ubase - ratio * cbase + ratio * var
4094 (also holds in the case ratio == -1, TODO. */
4096 if (cst_and_fits_in_hwi (cbase
))
4098 offset
= - ratio
* int_cst_value (cbase
);
4099 cost
= difference_cost (data
,
4100 ubase
, build_int_cst (utype
, 0),
4101 &symbol_present
, &var_present
, &offset
,
4103 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4105 else if (ratio
== 1)
4107 tree real_cbase
= cbase
;
4109 /* Check to see if any adjustment is needed. */
4110 if (cstepi
== 0 && stmt_is_after_inc
)
4112 aff_tree real_cbase_aff
;
4115 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4117 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4119 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4120 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4123 cost
= difference_cost (data
,
4125 &symbol_present
, &var_present
, &offset
,
4127 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4130 && !POINTER_TYPE_P (ctype
)
4131 && multiplier_allowed_in_address_p
4133 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4136 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4137 cost
= difference_cost (data
,
4139 &symbol_present
, &var_present
, &offset
,
4141 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4145 cost
= force_var_cost (data
, cbase
, depends_on
);
4146 cost
= add_costs (cost
,
4147 difference_cost (data
,
4148 ubase
, build_int_cst (utype
, 0),
4149 &symbol_present
, &var_present
,
4150 &offset
, depends_on
));
4151 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4152 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4158 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4159 /* Clear depends on. */
4160 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4161 bitmap_clear (*depends_on
);
4164 /* If we are after the increment, the value of the candidate is higher by
4166 if (stmt_is_after_inc
)
4167 offset
-= ratio
* cstepi
;
4169 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4170 (symbol/var1/const parts may be omitted). If we are looking for an
4171 address, find the cost of addressing this. */
4173 return add_costs (cost
,
4174 get_address_cost (symbol_present
, var_present
,
4175 offset
, ratio
, cstepi
,
4177 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4178 speed
, stmt_is_after_inc
,
4181 /* Otherwise estimate the costs for computing the expression. */
4182 if (!symbol_present
&& !var_present
&& !offset
)
4185 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4189 /* Symbol + offset should be compile-time computable so consider that they
4190 are added once to the variable, if present. */
4191 if (var_present
&& (symbol_present
|| offset
))
4192 cost
.cost
+= adjust_setup_cost (data
,
4193 add_cost (speed
, TYPE_MODE (ctype
)));
4195 /* Having offset does not affect runtime cost in case it is added to
4196 symbol, but it increases complexity. */
4200 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4202 aratio
= ratio
> 0 ? ratio
: -ratio
;
4204 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4209 *can_autoinc
= false;
4212 /* Just get the expression, expand it and measure the cost. */
4213 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4216 return infinite_cost
;
4219 comp
= build_simple_mem_ref (comp
);
4221 return new_cost (computation_cost (comp
, speed
), 0);
4225 /* Determines the cost of the computation by that USE is expressed
4226 from induction variable CAND. If ADDRESS_P is true, we just need
4227 to create an address from it, otherwise we want to get it into
4228 register. A set of invariants we depend on is stored in
4229 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4230 autoinc addressing is likely. */
4233 get_computation_cost (struct ivopts_data
*data
,
4234 struct iv_use
*use
, struct iv_cand
*cand
,
4235 bool address_p
, bitmap
*depends_on
,
4236 bool *can_autoinc
, int *inv_expr_id
)
4238 return get_computation_cost_at (data
,
4239 use
, cand
, address_p
, depends_on
, use
->stmt
,
4240 can_autoinc
, inv_expr_id
);
4243 /* Determines cost of basing replacement of USE on CAND in a generic
4247 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4248 struct iv_use
*use
, struct iv_cand
*cand
)
4252 int inv_expr_id
= -1;
4254 /* The simple case first -- if we need to express value of the preserved
4255 original biv, the cost is 0. This also prevents us from counting the
4256 cost of increment twice -- once at this use and once in the cost of
4258 if (cand
->pos
== IP_ORIGINAL
4259 && cand
->incremented_at
== use
->stmt
)
4261 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4266 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4267 NULL
, &inv_expr_id
);
4269 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4272 return !infinite_cost_p (cost
);
4275 /* Determines cost of basing replacement of USE on CAND in an address. */
4278 determine_use_iv_cost_address (struct ivopts_data
*data
,
4279 struct iv_use
*use
, struct iv_cand
*cand
)
4283 int inv_expr_id
= -1;
4284 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4285 &can_autoinc
, &inv_expr_id
);
4287 if (cand
->ainc_use
== use
)
4290 cost
.cost
-= cand
->cost_step
;
4291 /* If we generated the candidate solely for exploiting autoincrement
4292 opportunities, and it turns out it can't be used, set the cost to
4293 infinity to make sure we ignore it. */
4294 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4295 cost
= infinite_cost
;
4297 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4300 return !infinite_cost_p (cost
);
4303 /* Computes value of candidate CAND at position AT in iteration NITER, and
4304 stores it to VAL. */
4307 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4310 aff_tree step
, delta
, nit
;
4311 struct iv
*iv
= cand
->iv
;
4312 tree type
= TREE_TYPE (iv
->base
);
4313 tree steptype
= type
;
4314 if (POINTER_TYPE_P (type
))
4315 steptype
= sizetype
;
4317 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4318 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4319 aff_combination_convert (&nit
, steptype
);
4320 aff_combination_mult (&nit
, &step
, &delta
);
4321 if (stmt_after_increment (loop
, cand
, at
))
4322 aff_combination_add (&delta
, &step
);
4324 tree_to_aff_combination (iv
->base
, type
, val
);
4325 aff_combination_add (val
, &delta
);
4328 /* Returns period of induction variable iv. */
4331 iv_period (struct iv
*iv
)
4333 tree step
= iv
->step
, period
, type
;
4336 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4338 type
= unsigned_type_for (TREE_TYPE (step
));
4339 /* Period of the iv is lcm (step, type_range)/step -1,
4340 i.e., N*type_range/step - 1. Since type range is power
4341 of two, N == (step >> num_of_ending_zeros_binary (step),
4342 so the final result is
4344 (type_range >> num_of_ending_zeros_binary (step)) - 1
4347 pow2div
= num_ending_zeros (step
);
4349 period
= build_low_bits_mask (type
,
4350 (TYPE_PRECISION (type
)
4351 - tree_low_cst (pow2div
, 1)));
4356 /* Returns the comparison operator used when eliminating the iv USE. */
4358 static enum tree_code
4359 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4361 struct loop
*loop
= data
->current_loop
;
4365 ex_bb
= gimple_bb (use
->stmt
);
4366 exit
= EDGE_SUCC (ex_bb
, 0);
4367 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4368 exit
= EDGE_SUCC (ex_bb
, 1);
4370 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4374 strip_wrap_conserving_type_conversions (tree exp
)
4376 while (tree_ssa_useless_type_conversion (exp
)
4377 && (nowrap_type_p (TREE_TYPE (exp
))
4378 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4379 exp
= TREE_OPERAND (exp
, 0);
4383 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4384 check for an exact match. */
4387 expr_equal_p (tree e
, tree what
)
4390 enum tree_code code
;
4392 e
= strip_wrap_conserving_type_conversions (e
);
4393 what
= strip_wrap_conserving_type_conversions (what
);
4395 code
= TREE_CODE (what
);
4396 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4399 if (operand_equal_p (e
, what
, 0))
4402 if (TREE_CODE (e
) != SSA_NAME
)
4405 stmt
= SSA_NAME_DEF_STMT (e
);
4406 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4407 || gimple_assign_rhs_code (stmt
) != code
)
4410 switch (get_gimple_rhs_class (code
))
4412 case GIMPLE_BINARY_RHS
:
4413 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4417 case GIMPLE_UNARY_RHS
:
4418 case GIMPLE_SINGLE_RHS
:
4419 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4425 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4426 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4427 calculation is performed in non-wrapping type.
4429 TODO: More generally, we could test for the situation that
4430 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4431 This would require knowing the sign of OFFSET.
4433 Also, we only look for the first addition in the computation of BASE.
4434 More complex analysis would be better, but introducing it just for
4435 this optimization seems like an overkill. */
4438 difference_cannot_overflow_p (tree base
, tree offset
)
4440 enum tree_code code
;
4443 if (!nowrap_type_p (TREE_TYPE (base
)))
4446 base
= expand_simple_operations (base
);
4448 if (TREE_CODE (base
) == SSA_NAME
)
4450 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4452 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4455 code
= gimple_assign_rhs_code (stmt
);
4456 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4459 e1
= gimple_assign_rhs1 (stmt
);
4460 e2
= gimple_assign_rhs2 (stmt
);
4464 code
= TREE_CODE (base
);
4465 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4467 e1
= TREE_OPERAND (base
, 0);
4468 e2
= TREE_OPERAND (base
, 1);
4471 /* TODO: deeper inspection may be necessary to prove the equality. */
4475 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4476 case POINTER_PLUS_EXPR
:
4477 return expr_equal_p (e2
, offset
);
4484 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4485 comparison with CAND. NITER describes the number of iterations of
4486 the loops. If successful, the comparison in COMP_P is altered accordingly.
4488 We aim to handle the following situation:
4504 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4505 We aim to optimize this to
4513 while (p < p_0 - a + b);
4515 This preserves the correctness, since the pointer arithmetics does not
4516 overflow. More precisely:
4518 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4519 overflow in computing it or the values of p.
4520 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4521 overflow. To prove this, we use the fact that p_0 = base + a. */
4524 iv_elimination_compare_lt (struct ivopts_data
*data
,
4525 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4526 struct tree_niter_desc
*niter
)
4528 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4529 struct affine_tree_combination nit
, tmpa
, tmpb
;
4530 enum tree_code comp
;
4533 /* We need to know that the candidate induction variable does not overflow.
4534 While more complex analysis may be used to prove this, for now just
4535 check that the variable appears in the original program and that it
4536 is computed in a type that guarantees no overflows. */
4537 cand_type
= TREE_TYPE (cand
->iv
->base
);
4538 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4541 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4542 the calculation of the BOUND could overflow, making the comparison
4544 if (!data
->loop_single_exit_p
)
4547 /* We need to be able to decide whether candidate is increasing or decreasing
4548 in order to choose the right comparison operator. */
4549 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4551 step
= int_cst_value (cand
->iv
->step
);
4553 /* Check that the number of iterations matches the expected pattern:
4554 a + 1 > b ? 0 : b - a - 1. */
4555 mbz
= niter
->may_be_zero
;
4556 if (TREE_CODE (mbz
) == GT_EXPR
)
4558 /* Handle a + 1 > b. */
4559 tree op0
= TREE_OPERAND (mbz
, 0);
4560 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4562 a
= TREE_OPERAND (op0
, 0);
4563 b
= TREE_OPERAND (mbz
, 1);
4568 else if (TREE_CODE (mbz
) == LT_EXPR
)
4570 tree op1
= TREE_OPERAND (mbz
, 1);
4572 /* Handle b < a + 1. */
4573 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4575 a
= TREE_OPERAND (op1
, 0);
4576 b
= TREE_OPERAND (mbz
, 0);
4584 /* Expected number of iterations is B - A - 1. Check that it matches
4585 the actual number, i.e., that B - A - NITER = 1. */
4586 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4587 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4588 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4589 aff_combination_scale (&nit
, double_int_minus_one
);
4590 aff_combination_scale (&tmpa
, double_int_minus_one
);
4591 aff_combination_add (&tmpb
, &tmpa
);
4592 aff_combination_add (&tmpb
, &nit
);
4593 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4596 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4598 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4600 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4601 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4604 /* Determine the new comparison operator. */
4605 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4606 if (*comp_p
== NE_EXPR
)
4608 else if (*comp_p
== EQ_EXPR
)
4609 *comp_p
= invert_tree_comparison (comp
, false);
4616 /* Check whether it is possible to express the condition in USE by comparison
4617 of candidate CAND. If so, store the value compared with to BOUND, and the
4618 comparison operator to COMP. */
4621 may_eliminate_iv (struct ivopts_data
*data
,
4622 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4623 enum tree_code
*comp
)
4628 struct loop
*loop
= data
->current_loop
;
4630 struct tree_niter_desc
*desc
= NULL
;
4632 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4635 /* For now works only for exits that dominate the loop latch.
4636 TODO: extend to other conditions inside loop body. */
4637 ex_bb
= gimple_bb (use
->stmt
);
4638 if (use
->stmt
!= last_stmt (ex_bb
)
4639 || gimple_code (use
->stmt
) != GIMPLE_COND
4640 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4643 exit
= EDGE_SUCC (ex_bb
, 0);
4644 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4645 exit
= EDGE_SUCC (ex_bb
, 1);
4646 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4649 desc
= niter_for_exit (data
, exit
);
4653 /* Determine whether we can use the variable to test the exit condition.
4654 This is the case iff the period of the induction variable is greater
4655 than the number of iterations for which the exit condition is true. */
4656 period
= iv_period (cand
->iv
);
4658 /* If the number of iterations is constant, compare against it directly. */
4659 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4661 /* See cand_value_at. */
4662 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4664 if (!tree_int_cst_lt (desc
->niter
, period
))
4669 if (tree_int_cst_lt (period
, desc
->niter
))
4674 /* If not, and if this is the only possible exit of the loop, see whether
4675 we can get a conservative estimate on the number of iterations of the
4676 entire loop and compare against that instead. */
4679 double_int period_value
, max_niter
;
4681 max_niter
= desc
->max
;
4682 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4683 max_niter
+= double_int_one
;
4684 period_value
= tree_to_double_int (period
);
4685 if (max_niter
.ugt (period_value
))
4687 /* See if we can take advantage of inferred loop bound information. */
4688 if (data
->loop_single_exit_p
)
4690 if (!max_loop_iterations (loop
, &max_niter
))
4692 /* The loop bound is already adjusted by adding 1. */
4693 if (max_niter
.ugt (period_value
))
4701 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4703 *bound
= aff_combination_to_tree (&bnd
);
4704 *comp
= iv_elimination_compare (data
, use
);
4706 /* It is unlikely that computing the number of iterations using division
4707 would be more profitable than keeping the original induction variable. */
4708 if (expression_expensive_p (*bound
))
4711 /* Sometimes, it is possible to handle the situation that the number of
4712 iterations may be zero unless additional assumtions by using <
4713 instead of != in the exit condition.
4715 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4716 base the exit condition on it. However, that is often too
4718 if (!integer_zerop (desc
->may_be_zero
))
4719 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4724 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4725 be copied, if is is used in the loop body and DATA->body_includes_call. */
4728 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4730 tree sbound
= bound
;
4731 STRIP_NOPS (sbound
);
4733 if (TREE_CODE (sbound
) == SSA_NAME
4734 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4735 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4736 && data
->body_includes_call
)
4737 return COSTS_N_INSNS (1);
4742 /* Determines cost of basing replacement of USE on CAND in a condition. */
4745 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4746 struct iv_use
*use
, struct iv_cand
*cand
)
4748 tree bound
= NULL_TREE
;
4750 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4751 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4753 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4754 tree
*control_var
, *bound_cst
;
4755 enum tree_code comp
= ERROR_MARK
;
4757 /* Only consider real candidates. */
4760 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4765 /* Try iv elimination. */
4766 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4768 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4769 if (elim_cost
.cost
== 0)
4770 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4771 else if (TREE_CODE (bound
) == INTEGER_CST
)
4773 /* If we replace a loop condition 'i < n' with 'p < base + n',
4774 depends_on_elim will have 'base' and 'n' set, which implies
4775 that both 'base' and 'n' will be live during the loop. More likely,
4776 'base + n' will be loop invariant, resulting in only one live value
4777 during the loop. So in that case we clear depends_on_elim and set
4778 elim_inv_expr_id instead. */
4779 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4781 elim_inv_expr_id
= get_expr_id (data
, bound
);
4782 bitmap_clear (depends_on_elim
);
4784 /* The bound is a loop invariant, so it will be only computed
4786 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4789 elim_cost
= infinite_cost
;
4791 /* Try expressing the original giv. If it is compared with an invariant,
4792 note that we cannot get rid of it. */
4793 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4797 /* When the condition is a comparison of the candidate IV against
4798 zero, prefer this IV.
4800 TODO: The constant that we're subtracting from the cost should
4801 be target-dependent. This information should be added to the
4802 target costs for each backend. */
4803 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4804 && integer_zerop (*bound_cst
)
4805 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4806 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4807 elim_cost
.cost
-= 1;
4809 express_cost
= get_computation_cost (data
, use
, cand
, false,
4810 &depends_on_express
, NULL
,
4811 &express_inv_expr_id
);
4812 fd_ivopts_data
= data
;
4813 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4815 /* Count the cost of the original bound as well. */
4816 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4817 if (bound_cost
.cost
== 0)
4818 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4819 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4820 bound_cost
.cost
= 0;
4821 express_cost
.cost
+= bound_cost
.cost
;
4823 /* Choose the better approach, preferring the eliminated IV. */
4824 if (compare_costs (elim_cost
, express_cost
) <= 0)
4827 depends_on
= depends_on_elim
;
4828 depends_on_elim
= NULL
;
4829 inv_expr_id
= elim_inv_expr_id
;
4833 cost
= express_cost
;
4834 depends_on
= depends_on_express
;
4835 depends_on_express
= NULL
;
4838 inv_expr_id
= express_inv_expr_id
;
4841 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4843 if (depends_on_elim
)
4844 BITMAP_FREE (depends_on_elim
);
4845 if (depends_on_express
)
4846 BITMAP_FREE (depends_on_express
);
4848 return !infinite_cost_p (cost
);
4851 /* Determines cost of basing replacement of USE on CAND. Returns false
4852 if USE cannot be based on CAND. */
4855 determine_use_iv_cost (struct ivopts_data
*data
,
4856 struct iv_use
*use
, struct iv_cand
*cand
)
4860 case USE_NONLINEAR_EXPR
:
4861 return determine_use_iv_cost_generic (data
, use
, cand
);
4864 return determine_use_iv_cost_address (data
, use
, cand
);
4867 return determine_use_iv_cost_condition (data
, use
, cand
);
4874 /* Return true if get_computation_cost indicates that autoincrement is
4875 a possibility for the pair of USE and CAND, false otherwise. */
4878 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4879 struct iv_cand
*cand
)
4885 if (use
->type
!= USE_ADDRESS
)
4888 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4889 &can_autoinc
, NULL
);
4891 BITMAP_FREE (depends_on
);
4893 return !infinite_cost_p (cost
) && can_autoinc
;
4896 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4897 use that allows autoincrement, and set their AINC_USE if possible. */
4900 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4904 for (i
= 0; i
< n_iv_cands (data
); i
++)
4906 struct iv_cand
*cand
= iv_cand (data
, i
);
4907 struct iv_use
*closest_before
= NULL
;
4908 struct iv_use
*closest_after
= NULL
;
4909 if (cand
->pos
!= IP_ORIGINAL
)
4912 for (j
= 0; j
< n_iv_uses (data
); j
++)
4914 struct iv_use
*use
= iv_use (data
, j
);
4915 unsigned uid
= gimple_uid (use
->stmt
);
4917 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4920 if (uid
< gimple_uid (cand
->incremented_at
)
4921 && (closest_before
== NULL
4922 || uid
> gimple_uid (closest_before
->stmt
)))
4923 closest_before
= use
;
4925 if (uid
> gimple_uid (cand
->incremented_at
)
4926 && (closest_after
== NULL
4927 || uid
< gimple_uid (closest_after
->stmt
)))
4928 closest_after
= use
;
4931 if (closest_before
!= NULL
4932 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4933 cand
->ainc_use
= closest_before
;
4934 else if (closest_after
!= NULL
4935 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4936 cand
->ainc_use
= closest_after
;
4940 /* Finds the candidates for the induction variables. */
4943 find_iv_candidates (struct ivopts_data
*data
)
4945 /* Add commonly used ivs. */
4946 add_standard_iv_candidates (data
);
4948 /* Add old induction variables. */
4949 add_old_ivs_candidates (data
);
4951 /* Add induction variables derived from uses. */
4952 add_derived_ivs_candidates (data
);
4954 set_autoinc_for_original_candidates (data
);
4956 /* Record the important candidates. */
4957 record_important_candidates (data
);
4960 /* Determines costs of basing the use of the iv on an iv candidate. */
4963 determine_use_iv_costs (struct ivopts_data
*data
)
4967 struct iv_cand
*cand
;
4968 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4970 alloc_use_cost_map (data
);
4972 for (i
= 0; i
< n_iv_uses (data
); i
++)
4974 use
= iv_use (data
, i
);
4976 if (data
->consider_all_candidates
)
4978 for (j
= 0; j
< n_iv_cands (data
); j
++)
4980 cand
= iv_cand (data
, j
);
4981 determine_use_iv_cost (data
, use
, cand
);
4988 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4990 cand
= iv_cand (data
, j
);
4991 if (!determine_use_iv_cost (data
, use
, cand
))
4992 bitmap_set_bit (to_clear
, j
);
4995 /* Remove the candidates for that the cost is infinite from
4996 the list of related candidates. */
4997 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4998 bitmap_clear (to_clear
);
5002 BITMAP_FREE (to_clear
);
5004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5006 fprintf (dump_file
, "Use-candidate costs:\n");
5008 for (i
= 0; i
< n_iv_uses (data
); i
++)
5010 use
= iv_use (data
, i
);
5012 fprintf (dump_file
, "Use %d:\n", i
);
5013 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5014 for (j
= 0; j
< use
->n_map_members
; j
++)
5016 if (!use
->cost_map
[j
].cand
5017 || infinite_cost_p (use
->cost_map
[j
].cost
))
5020 fprintf (dump_file
, " %d\t%d\t%d\t",
5021 use
->cost_map
[j
].cand
->id
,
5022 use
->cost_map
[j
].cost
.cost
,
5023 use
->cost_map
[j
].cost
.complexity
);
5024 if (use
->cost_map
[j
].depends_on
)
5025 bitmap_print (dump_file
,
5026 use
->cost_map
[j
].depends_on
, "","");
5027 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5028 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5029 fprintf (dump_file
, "\n");
5032 fprintf (dump_file
, "\n");
5034 fprintf (dump_file
, "\n");
5038 /* Determines cost of the candidate CAND. */
5041 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5043 comp_cost cost_base
;
5044 unsigned cost
, cost_step
;
5053 /* There are two costs associated with the candidate -- its increment
5054 and its initialization. The second is almost negligible for any loop
5055 that rolls enough, so we take it just very little into account. */
5057 base
= cand
->iv
->base
;
5058 cost_base
= force_var_cost (data
, base
, NULL
);
5059 /* It will be exceptional that the iv register happens to be initialized with
5060 the proper value at no cost. In general, there will at least be a regcopy
5062 if (cost_base
.cost
== 0)
5063 cost_base
.cost
= COSTS_N_INSNS (1);
5064 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5066 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5068 /* Prefer the original ivs unless we may gain something by replacing it.
5069 The reason is to make debugging simpler; so this is not relevant for
5070 artificial ivs created by other optimization passes. */
5071 if (cand
->pos
!= IP_ORIGINAL
5072 || !SSA_NAME_VAR (cand
->var_before
)
5073 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5076 /* Prefer not to insert statements into latch unless there are some
5077 already (so that we do not create unnecessary jumps). */
5078 if (cand
->pos
== IP_END
5079 && empty_block_p (ip_end_pos (data
->current_loop
)))
5083 cand
->cost_step
= cost_step
;
5086 /* Determines costs of computation of the candidates. */
5089 determine_iv_costs (struct ivopts_data
*data
)
5093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5095 fprintf (dump_file
, "Candidate costs:\n");
5096 fprintf (dump_file
, " cand\tcost\n");
5099 for (i
= 0; i
< n_iv_cands (data
); i
++)
5101 struct iv_cand
*cand
= iv_cand (data
, i
);
5103 determine_iv_cost (data
, cand
);
5105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5106 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5109 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5110 fprintf (dump_file
, "\n");
5113 /* Calculates cost for having SIZE induction variables. */
5116 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5118 /* We add size to the cost, so that we prefer eliminating ivs
5120 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5121 data
->body_includes_call
);
5124 /* For each size of the induction variable set determine the penalty. */
5127 determine_set_costs (struct ivopts_data
*data
)
5131 gimple_stmt_iterator psi
;
5133 struct loop
*loop
= data
->current_loop
;
5136 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5138 fprintf (dump_file
, "Global costs:\n");
5139 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5140 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5141 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5142 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5146 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5148 phi
= gsi_stmt (psi
);
5149 op
= PHI_RESULT (phi
);
5151 if (virtual_operand_p (op
))
5154 if (get_iv (data
, op
))
5160 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5162 struct version_info
*info
= ver_info (data
, j
);
5164 if (info
->inv_id
&& info
->has_nonlin_use
)
5168 data
->regs_used
= n
;
5169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5170 fprintf (dump_file
, " regs_used %d\n", n
);
5172 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5174 fprintf (dump_file
, " cost for size:\n");
5175 fprintf (dump_file
, " ivs\tcost\n");
5176 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5177 fprintf (dump_file
, " %d\t%d\n", j
,
5178 ivopts_global_cost_for_size (data
, j
));
5179 fprintf (dump_file
, "\n");
5183 /* Returns true if A is a cheaper cost pair than B. */
5186 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5196 cmp
= compare_costs (a
->cost
, b
->cost
);
5203 /* In case the costs are the same, prefer the cheaper candidate. */
5204 if (a
->cand
->cost
< b
->cand
->cost
)
5211 /* Returns candidate by that USE is expressed in IVS. */
5213 static struct cost_pair
*
5214 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5216 return ivs
->cand_for_use
[use
->id
];
5219 /* Computes the cost field of IVS structure. */
5222 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5224 comp_cost cost
= ivs
->cand_use_cost
;
5226 cost
.cost
+= ivs
->cand_cost
;
5228 cost
.cost
+= ivopts_global_cost_for_size (data
,
5229 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5234 /* Remove invariants in set INVS to set IVS. */
5237 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5245 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5247 ivs
->n_invariant_uses
[iid
]--;
5248 if (ivs
->n_invariant_uses
[iid
] == 0)
5253 /* Set USE not to be expressed by any candidate in IVS. */
5256 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5259 unsigned uid
= use
->id
, cid
;
5260 struct cost_pair
*cp
;
5262 cp
= ivs
->cand_for_use
[uid
];
5268 ivs
->cand_for_use
[uid
] = NULL
;
5269 ivs
->n_cand_uses
[cid
]--;
5271 if (ivs
->n_cand_uses
[cid
] == 0)
5273 bitmap_clear_bit (ivs
->cands
, cid
);
5274 /* Do not count the pseudocandidates. */
5278 ivs
->cand_cost
-= cp
->cand
->cost
;
5280 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5283 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5285 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5287 if (cp
->inv_expr_id
!= -1)
5289 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5290 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5291 ivs
->num_used_inv_expr
--;
5293 iv_ca_recount_cost (data
, ivs
);
5296 /* Add invariants in set INVS to set IVS. */
5299 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5307 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5309 ivs
->n_invariant_uses
[iid
]++;
5310 if (ivs
->n_invariant_uses
[iid
] == 1)
5315 /* Set cost pair for USE in set IVS to CP. */
5318 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5319 struct iv_use
*use
, struct cost_pair
*cp
)
5321 unsigned uid
= use
->id
, cid
;
5323 if (ivs
->cand_for_use
[uid
] == cp
)
5326 if (ivs
->cand_for_use
[uid
])
5327 iv_ca_set_no_cp (data
, ivs
, use
);
5334 ivs
->cand_for_use
[uid
] = cp
;
5335 ivs
->n_cand_uses
[cid
]++;
5336 if (ivs
->n_cand_uses
[cid
] == 1)
5338 bitmap_set_bit (ivs
->cands
, cid
);
5339 /* Do not count the pseudocandidates. */
5343 ivs
->cand_cost
+= cp
->cand
->cost
;
5345 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5348 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5349 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5351 if (cp
->inv_expr_id
!= -1)
5353 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5354 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5355 ivs
->num_used_inv_expr
++;
5357 iv_ca_recount_cost (data
, ivs
);
5361 /* Extend set IVS by expressing USE by some of the candidates in it
5362 if possible. All important candidates will be considered
5363 if IMPORTANT_CANDIDATES is true. */
5366 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5367 struct iv_use
*use
, bool important_candidates
)
5369 struct cost_pair
*best_cp
= NULL
, *cp
;
5374 gcc_assert (ivs
->upto
>= use
->id
);
5376 if (ivs
->upto
== use
->id
)
5382 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5383 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5385 struct iv_cand
*cand
= iv_cand (data
, i
);
5387 cp
= get_use_iv_cost (data
, use
, cand
);
5389 if (cheaper_cost_pair (cp
, best_cp
))
5393 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5396 /* Get cost for assignment IVS. */
5399 iv_ca_cost (struct iv_ca
*ivs
)
5401 /* This was a conditional expression but it triggered a bug in
5404 return infinite_cost
;
5409 /* Returns true if all dependences of CP are among invariants in IVS. */
5412 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5417 if (!cp
->depends_on
)
5420 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5422 if (ivs
->n_invariant_uses
[i
] == 0)
5429 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5430 it before NEXT_CHANGE. */
5432 static struct iv_ca_delta
*
5433 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5434 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5436 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5439 change
->old_cp
= old_cp
;
5440 change
->new_cp
= new_cp
;
5441 change
->next_change
= next_change
;
5446 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5449 static struct iv_ca_delta
*
5450 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5452 struct iv_ca_delta
*last
;
5460 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5462 last
->next_change
= l2
;
5467 /* Reverse the list of changes DELTA, forming the inverse to it. */
5469 static struct iv_ca_delta
*
5470 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5472 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5473 struct cost_pair
*tmp
;
5475 for (act
= delta
; act
; act
= next
)
5477 next
= act
->next_change
;
5478 act
->next_change
= prev
;
5482 act
->old_cp
= act
->new_cp
;
5489 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5490 reverted instead. */
5493 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5494 struct iv_ca_delta
*delta
, bool forward
)
5496 struct cost_pair
*from
, *to
;
5497 struct iv_ca_delta
*act
;
5500 delta
= iv_ca_delta_reverse (delta
);
5502 for (act
= delta
; act
; act
= act
->next_change
)
5506 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5507 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5511 iv_ca_delta_reverse (delta
);
5514 /* Returns true if CAND is used in IVS. */
5517 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5519 return ivs
->n_cand_uses
[cand
->id
] > 0;
5522 /* Returns number of induction variable candidates in the set IVS. */
5525 iv_ca_n_cands (struct iv_ca
*ivs
)
5527 return ivs
->n_cands
;
5530 /* Free the list of changes DELTA. */
5533 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5535 struct iv_ca_delta
*act
, *next
;
5537 for (act
= *delta
; act
; act
= next
)
5539 next
= act
->next_change
;
5546 /* Allocates new iv candidates assignment. */
5548 static struct iv_ca
*
5549 iv_ca_new (struct ivopts_data
*data
)
5551 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5555 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5556 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5557 nw
->cands
= BITMAP_ALLOC (NULL
);
5560 nw
->cand_use_cost
= no_cost
;
5562 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5564 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5565 nw
->num_used_inv_expr
= 0;
5570 /* Free memory occupied by the set IVS. */
5573 iv_ca_free (struct iv_ca
**ivs
)
5575 free ((*ivs
)->cand_for_use
);
5576 free ((*ivs
)->n_cand_uses
);
5577 BITMAP_FREE ((*ivs
)->cands
);
5578 free ((*ivs
)->n_invariant_uses
);
5579 free ((*ivs
)->used_inv_expr
);
5584 /* Dumps IVS to FILE. */
5587 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5589 const char *pref
= " invariants ";
5591 comp_cost cost
= iv_ca_cost (ivs
);
5593 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5594 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5595 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5596 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5598 for (i
= 0; i
< ivs
->upto
; i
++)
5600 struct iv_use
*use
= iv_use (data
, i
);
5601 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5603 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5604 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5606 fprintf (file
, " use:%d --> ??\n", use
->id
);
5609 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5610 if (ivs
->n_invariant_uses
[i
])
5612 fprintf (file
, "%s%d", pref
, i
);
5615 fprintf (file
, "\n\n");
5618 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5619 new set, and store differences in DELTA. Number of induction variables
5620 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5621 the function will try to find a solution with mimimal iv candidates. */
5624 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5625 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5626 unsigned *n_ivs
, bool min_ncand
)
5631 struct cost_pair
*old_cp
, *new_cp
;
5634 for (i
= 0; i
< ivs
->upto
; i
++)
5636 use
= iv_use (data
, i
);
5637 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5640 && old_cp
->cand
== cand
)
5643 new_cp
= get_use_iv_cost (data
, use
, cand
);
5647 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5650 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5653 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5656 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5657 cost
= iv_ca_cost (ivs
);
5659 *n_ivs
= iv_ca_n_cands (ivs
);
5660 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5665 /* Try narrowing set IVS by removing CAND. Return the cost of
5666 the new set and store the differences in DELTA. */
5669 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5670 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5674 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5676 struct iv_cand
*cnd
;
5680 for (i
= 0; i
< n_iv_uses (data
); i
++)
5682 use
= iv_use (data
, i
);
5684 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5685 if (old_cp
->cand
!= cand
)
5690 if (data
->consider_all_candidates
)
5692 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5697 cnd
= iv_cand (data
, ci
);
5699 cp
= get_use_iv_cost (data
, use
, cnd
);
5703 if (!iv_ca_has_deps (ivs
, cp
))
5706 if (!cheaper_cost_pair (cp
, new_cp
))
5714 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5719 cnd
= iv_cand (data
, ci
);
5721 cp
= get_use_iv_cost (data
, use
, cnd
);
5724 if (!iv_ca_has_deps (ivs
, cp
))
5727 if (!cheaper_cost_pair (cp
, new_cp
))
5736 iv_ca_delta_free (delta
);
5737 return infinite_cost
;
5740 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5743 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5744 cost
= iv_ca_cost (ivs
);
5745 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5750 /* Try optimizing the set of candidates IVS by removing candidates different
5751 from to EXCEPT_CAND from it. Return cost of the new set, and store
5752 differences in DELTA. */
5755 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5756 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5759 struct iv_ca_delta
*act_delta
, *best_delta
;
5761 comp_cost best_cost
, acost
;
5762 struct iv_cand
*cand
;
5765 best_cost
= iv_ca_cost (ivs
);
5767 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5769 cand
= iv_cand (data
, i
);
5771 if (cand
== except_cand
)
5774 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5776 if (compare_costs (acost
, best_cost
) < 0)
5779 iv_ca_delta_free (&best_delta
);
5780 best_delta
= act_delta
;
5783 iv_ca_delta_free (&act_delta
);
5792 /* Recurse to possibly remove other unnecessary ivs. */
5793 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5794 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5795 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5796 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5800 /* Tries to extend the sets IVS in the best possible way in order
5801 to express the USE. If ORIGINALP is true, prefer candidates from
5802 the original set of IVs, otherwise favor important candidates not
5803 based on any memory object. */
5806 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5807 struct iv_use
*use
, bool originalp
)
5809 comp_cost best_cost
, act_cost
;
5812 struct iv_cand
*cand
;
5813 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5814 struct cost_pair
*cp
;
5816 iv_ca_add_use (data
, ivs
, use
, false);
5817 best_cost
= iv_ca_cost (ivs
);
5819 cp
= iv_ca_cand_for_use (ivs
, use
);
5824 iv_ca_add_use (data
, ivs
, use
, true);
5825 best_cost
= iv_ca_cost (ivs
);
5826 cp
= iv_ca_cand_for_use (ivs
, use
);
5830 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5831 iv_ca_set_no_cp (data
, ivs
, use
);
5834 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5835 first try important candidates not based on any memory object. Only if
5836 this fails, try the specific ones. Rationale -- in loops with many
5837 variables the best choice often is to use just one generic biv. If we
5838 added here many ivs specific to the uses, the optimization algorithm later
5839 would be likely to get stuck in a local minimum, thus causing us to create
5840 too many ivs. The approach from few ivs to more seems more likely to be
5841 successful -- starting from few ivs, replacing an expensive use by a
5842 specific iv should always be a win. */
5843 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5845 cand
= iv_cand (data
, i
);
5847 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5850 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5853 if (iv_ca_cand_used_p (ivs
, cand
))
5856 cp
= get_use_iv_cost (data
, use
, cand
);
5860 iv_ca_set_cp (data
, ivs
, use
, cp
);
5861 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5863 iv_ca_set_no_cp (data
, ivs
, use
);
5864 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5866 if (compare_costs (act_cost
, best_cost
) < 0)
5868 best_cost
= act_cost
;
5870 iv_ca_delta_free (&best_delta
);
5871 best_delta
= act_delta
;
5874 iv_ca_delta_free (&act_delta
);
5877 if (infinite_cost_p (best_cost
))
5879 for (i
= 0; i
< use
->n_map_members
; i
++)
5881 cp
= use
->cost_map
+ i
;
5886 /* Already tried this. */
5887 if (cand
->important
)
5889 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5891 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5895 if (iv_ca_cand_used_p (ivs
, cand
))
5899 iv_ca_set_cp (data
, ivs
, use
, cp
);
5900 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5901 iv_ca_set_no_cp (data
, ivs
, use
);
5902 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5905 if (compare_costs (act_cost
, best_cost
) < 0)
5907 best_cost
= act_cost
;
5910 iv_ca_delta_free (&best_delta
);
5911 best_delta
= act_delta
;
5914 iv_ca_delta_free (&act_delta
);
5918 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5919 iv_ca_delta_free (&best_delta
);
5921 return !infinite_cost_p (best_cost
);
5924 /* Finds an initial assignment of candidates to uses. */
5926 static struct iv_ca
*
5927 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5929 struct iv_ca
*ivs
= iv_ca_new (data
);
5932 for (i
= 0; i
< n_iv_uses (data
); i
++)
5933 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5942 /* Tries to improve set of induction variables IVS. */
5945 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5948 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5949 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5950 struct iv_cand
*cand
;
5952 /* Try extending the set of induction variables by one. */
5953 for (i
= 0; i
< n_iv_cands (data
); i
++)
5955 cand
= iv_cand (data
, i
);
5957 if (iv_ca_cand_used_p (ivs
, cand
))
5960 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5964 /* If we successfully added the candidate and the set is small enough,
5965 try optimizing it by removing other candidates. */
5966 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5968 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5969 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5970 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5971 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5974 if (compare_costs (acost
, best_cost
) < 0)
5977 iv_ca_delta_free (&best_delta
);
5978 best_delta
= act_delta
;
5981 iv_ca_delta_free (&act_delta
);
5986 /* Try removing the candidates from the set instead. */
5987 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5989 /* Nothing more we can do. */
5994 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5995 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5996 iv_ca_delta_free (&best_delta
);
6000 /* Attempts to find the optimal set of induction variables. We do simple
6001 greedy heuristic -- we try to replace at most one candidate in the selected
6002 solution and remove the unused ivs while this improves the cost. */
6004 static struct iv_ca
*
6005 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6009 /* Get the initial solution. */
6010 set
= get_initial_solution (data
, originalp
);
6013 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6014 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6020 fprintf (dump_file
, "Initial set of candidates:\n");
6021 iv_ca_dump (data
, dump_file
, set
);
6024 while (try_improve_iv_set (data
, set
))
6026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6028 fprintf (dump_file
, "Improved to:\n");
6029 iv_ca_dump (data
, dump_file
, set
);
6036 static struct iv_ca
*
6037 find_optimal_iv_set (struct ivopts_data
*data
)
6040 struct iv_ca
*set
, *origset
;
6042 comp_cost cost
, origcost
;
6044 /* Determine the cost based on a strategy that starts with original IVs,
6045 and try again using a strategy that prefers candidates not based
6047 origset
= find_optimal_iv_set_1 (data
, true);
6048 set
= find_optimal_iv_set_1 (data
, false);
6050 if (!origset
&& !set
)
6053 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6054 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6058 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6059 origcost
.cost
, origcost
.complexity
);
6060 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6061 cost
.cost
, cost
.complexity
);
6064 /* Choose the one with the best cost. */
6065 if (compare_costs (origcost
, cost
) <= 0)
6072 iv_ca_free (&origset
);
6074 for (i
= 0; i
< n_iv_uses (data
); i
++)
6076 use
= iv_use (data
, i
);
6077 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6083 /* Creates a new induction variable corresponding to CAND. */
6086 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6088 gimple_stmt_iterator incr_pos
;
6098 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6102 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6110 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6114 /* Mark that the iv is preserved. */
6115 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6116 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6118 /* Rewrite the increment so that it uses var_before directly. */
6119 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6123 gimple_add_tmp_var (cand
->var_before
);
6125 base
= unshare_expr (cand
->iv
->base
);
6127 create_iv (base
, unshare_expr (cand
->iv
->step
),
6128 cand
->var_before
, data
->current_loop
,
6129 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6132 /* Creates new induction variables described in SET. */
6135 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6138 struct iv_cand
*cand
;
6141 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6143 cand
= iv_cand (data
, i
);
6144 create_new_iv (data
, cand
);
6147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6149 fprintf (dump_file
, "\nSelected IV set: \n");
6150 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6152 cand
= iv_cand (data
, i
);
6153 dump_cand (dump_file
, cand
);
6155 fprintf (dump_file
, "\n");
6159 /* Rewrites USE (definition of iv used in a nonlinear expression)
6160 using candidate CAND. */
6163 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6164 struct iv_use
*use
, struct iv_cand
*cand
)
6169 gimple_stmt_iterator bsi
;
6171 /* An important special case -- if we are asked to express value of
6172 the original iv by itself, just exit; there is no need to
6173 introduce a new computation (that might also need casting the
6174 variable to unsigned and back). */
6175 if (cand
->pos
== IP_ORIGINAL
6176 && cand
->incremented_at
== use
->stmt
)
6178 enum tree_code stmt_code
;
6180 gcc_assert (is_gimple_assign (use
->stmt
));
6181 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6183 /* Check whether we may leave the computation unchanged.
6184 This is the case only if it does not rely on other
6185 computations in the loop -- otherwise, the computation
6186 we rely upon may be removed in remove_unused_ivs,
6187 thus leading to ICE. */
6188 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6189 if (stmt_code
== PLUS_EXPR
6190 || stmt_code
== MINUS_EXPR
6191 || stmt_code
== POINTER_PLUS_EXPR
)
6193 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6194 op
= gimple_assign_rhs2 (use
->stmt
);
6195 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6196 op
= gimple_assign_rhs1 (use
->stmt
);
6203 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6207 comp
= get_computation (data
->current_loop
, use
, cand
);
6208 gcc_assert (comp
!= NULL_TREE
);
6210 switch (gimple_code (use
->stmt
))
6213 tgt
= PHI_RESULT (use
->stmt
);
6215 /* If we should keep the biv, do not replace it. */
6216 if (name_info (data
, tgt
)->preserve_biv
)
6219 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6223 tgt
= gimple_assign_lhs (use
->stmt
);
6224 bsi
= gsi_for_stmt (use
->stmt
);
6231 if (!valid_gimple_rhs_p (comp
)
6232 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6233 /* We can't allow re-allocating the stmt as it might be pointed
6235 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6236 >= gimple_num_ops (gsi_stmt (bsi
)))))
6238 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6239 true, GSI_SAME_STMT
);
6240 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6242 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6243 /* As this isn't a plain copy we have to reset alignment
6245 if (SSA_NAME_PTR_INFO (comp
))
6246 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6250 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6252 ass
= gimple_build_assign (tgt
, comp
);
6253 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6255 bsi
= gsi_for_stmt (use
->stmt
);
6256 remove_phi_node (&bsi
, false);
6260 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6261 use
->stmt
= gsi_stmt (bsi
);
6265 /* Performs a peephole optimization to reorder the iv update statement with
6266 a mem ref to enable instruction combining in later phases. The mem ref uses
6267 the iv value before the update, so the reordering transformation requires
6268 adjustment of the offset. CAND is the selected IV_CAND.
6272 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6280 directly propagating t over to (1) will introduce overlapping live range
6281 thus increase register pressure. This peephole transform it into:
6285 t = MEM_REF (base, iv2, 8, 8);
6292 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6295 gimple iv_update
, stmt
;
6297 gimple_stmt_iterator gsi
, gsi_iv
;
6299 if (cand
->pos
!= IP_NORMAL
)
6302 var_after
= cand
->var_after
;
6303 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6305 bb
= gimple_bb (iv_update
);
6306 gsi
= gsi_last_nondebug_bb (bb
);
6307 stmt
= gsi_stmt (gsi
);
6309 /* Only handle conditional statement for now. */
6310 if (gimple_code (stmt
) != GIMPLE_COND
)
6313 gsi_prev_nondebug (&gsi
);
6314 stmt
= gsi_stmt (gsi
);
6315 if (stmt
!= iv_update
)
6318 gsi_prev_nondebug (&gsi
);
6319 if (gsi_end_p (gsi
))
6322 stmt
= gsi_stmt (gsi
);
6323 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6326 if (stmt
!= use
->stmt
)
6329 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6332 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6334 fprintf (dump_file
, "Reordering \n");
6335 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6336 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6337 fprintf (dump_file
, "\n");
6340 gsi
= gsi_for_stmt (use
->stmt
);
6341 gsi_iv
= gsi_for_stmt (iv_update
);
6342 gsi_move_before (&gsi_iv
, &gsi
);
6344 cand
->pos
= IP_BEFORE_USE
;
6345 cand
->incremented_at
= use
->stmt
;
6348 /* Rewrites USE (address that is an iv) using candidate CAND. */
6351 rewrite_use_address (struct ivopts_data
*data
,
6352 struct iv_use
*use
, struct iv_cand
*cand
)
6355 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6356 tree base_hint
= NULL_TREE
;
6360 adjust_iv_update_pos (cand
, use
);
6361 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6363 unshare_aff_combination (&aff
);
6365 /* To avoid undefined overflow problems, all IV candidates use unsigned
6366 integer types. The drawback is that this makes it impossible for
6367 create_mem_ref to distinguish an IV that is based on a memory object
6368 from one that represents simply an offset.
6370 To work around this problem, we pass a hint to create_mem_ref that
6371 indicates which variable (if any) in aff is an IV based on a memory
6372 object. Note that we only consider the candidate. If this is not
6373 based on an object, the base of the reference is in some subexpression
6374 of the use -- but these will use pointer types, so they are recognized
6375 by the create_mem_ref heuristics anyway. */
6376 if (cand
->iv
->base_object
)
6377 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6379 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6380 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6381 reference_alias_ptr_type (*use
->op_p
),
6382 iv
, base_hint
, data
->speed
);
6383 copy_ref_info (ref
, *use
->op_p
);
6387 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6391 rewrite_use_compare (struct ivopts_data
*data
,
6392 struct iv_use
*use
, struct iv_cand
*cand
)
6394 tree comp
, *var_p
, op
, bound
;
6395 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6396 enum tree_code compare
;
6397 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6403 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6404 tree var_type
= TREE_TYPE (var
);
6407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6409 fprintf (dump_file
, "Replacing exit test: ");
6410 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6413 bound
= unshare_expr (fold_convert (var_type
, bound
));
6414 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6416 gsi_insert_seq_on_edge_immediate (
6417 loop_preheader_edge (data
->current_loop
),
6420 gimple_cond_set_lhs (use
->stmt
, var
);
6421 gimple_cond_set_code (use
->stmt
, compare
);
6422 gimple_cond_set_rhs (use
->stmt
, op
);
6426 /* The induction variable elimination failed; just express the original
6428 comp
= get_computation (data
->current_loop
, use
, cand
);
6429 gcc_assert (comp
!= NULL_TREE
);
6431 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6434 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6435 true, GSI_SAME_STMT
);
6438 /* Rewrites USE using candidate CAND. */
6441 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6445 case USE_NONLINEAR_EXPR
:
6446 rewrite_use_nonlinear_expr (data
, use
, cand
);
6450 rewrite_use_address (data
, use
, cand
);
6454 rewrite_use_compare (data
, use
, cand
);
6461 update_stmt (use
->stmt
);
6464 /* Rewrite the uses using the selected induction variables. */
6467 rewrite_uses (struct ivopts_data
*data
)
6470 struct iv_cand
*cand
;
6473 for (i
= 0; i
< n_iv_uses (data
); i
++)
6475 use
= iv_use (data
, i
);
6476 cand
= use
->selected
;
6479 rewrite_use (data
, use
, cand
);
6483 /* Removes the ivs that are not used after rewriting. */
6486 remove_unused_ivs (struct ivopts_data
*data
)
6490 bitmap toremove
= BITMAP_ALLOC (NULL
);
6492 /* Figure out an order in which to release SSA DEFs so that we don't
6493 release something that we'd have to propagate into a debug stmt
6495 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6497 struct version_info
*info
;
6499 info
= ver_info (data
, j
);
6501 && !integer_zerop (info
->iv
->step
)
6503 && !info
->iv
->have_use_for
6504 && !info
->preserve_biv
)
6506 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6508 tree def
= info
->iv
->ssa_name
;
6510 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6512 imm_use_iterator imm_iter
;
6513 use_operand_p use_p
;
6517 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6519 if (!gimple_debug_bind_p (stmt
))
6522 /* We just want to determine whether to do nothing
6523 (count == 0), to substitute the computed
6524 expression into a single use of the SSA DEF by
6525 itself (count == 1), or to use a debug temp
6526 because the SSA DEF is used multiple times or as
6527 part of a larger expression (count > 1). */
6529 if (gimple_debug_bind_get_value (stmt
) != def
)
6533 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6539 struct iv_use dummy_use
;
6540 struct iv_cand
*best_cand
= NULL
, *cand
;
6541 unsigned i
, best_pref
= 0, cand_pref
;
6543 memset (&dummy_use
, 0, sizeof (dummy_use
));
6544 dummy_use
.iv
= info
->iv
;
6545 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6547 cand
= iv_use (data
, i
)->selected
;
6548 if (cand
== best_cand
)
6550 cand_pref
= operand_equal_p (cand
->iv
->step
,
6554 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6555 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6558 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6560 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6563 best_pref
= cand_pref
;
6570 tree comp
= get_computation_at (data
->current_loop
,
6571 &dummy_use
, best_cand
,
6572 SSA_NAME_DEF_STMT (def
));
6578 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6579 DECL_ARTIFICIAL (vexpr
) = 1;
6580 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6581 if (SSA_NAME_VAR (def
))
6582 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6584 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6585 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6586 gimple_stmt_iterator gsi
;
6588 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6589 gsi
= gsi_after_labels (gimple_bb
6590 (SSA_NAME_DEF_STMT (def
)));
6592 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6594 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6598 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6600 if (!gimple_debug_bind_p (stmt
))
6603 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6604 SET_USE (use_p
, comp
);
6612 release_defs_bitset (toremove
);
6614 BITMAP_FREE (toremove
);
6617 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6618 for pointer_map_traverse. */
6621 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6622 void *data ATTRIBUTE_UNUSED
)
6624 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6630 /* Frees data allocated by the optimization of a single loop. */
6633 free_loop_data (struct ivopts_data
*data
)
6641 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6642 pointer_map_destroy (data
->niters
);
6643 data
->niters
= NULL
;
6646 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6648 struct version_info
*info
;
6650 info
= ver_info (data
, i
);
6653 info
->has_nonlin_use
= false;
6654 info
->preserve_biv
= false;
6657 bitmap_clear (data
->relevant
);
6658 bitmap_clear (data
->important_candidates
);
6660 for (i
= 0; i
< n_iv_uses (data
); i
++)
6662 struct iv_use
*use
= iv_use (data
, i
);
6665 BITMAP_FREE (use
->related_cands
);
6666 for (j
= 0; j
< use
->n_map_members
; j
++)
6667 if (use
->cost_map
[j
].depends_on
)
6668 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6669 free (use
->cost_map
);
6672 data
->iv_uses
.truncate (0);
6674 for (i
= 0; i
< n_iv_cands (data
); i
++)
6676 struct iv_cand
*cand
= iv_cand (data
, i
);
6679 if (cand
->depends_on
)
6680 BITMAP_FREE (cand
->depends_on
);
6683 data
->iv_candidates
.truncate (0);
6685 if (data
->version_info_size
< num_ssa_names
)
6687 data
->version_info_size
= 2 * num_ssa_names
;
6688 free (data
->version_info
);
6689 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6692 data
->max_inv_id
= 0;
6694 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6695 SET_DECL_RTL (obj
, NULL_RTX
);
6697 decl_rtl_to_reset
.truncate (0);
6699 data
->inv_expr_tab
.empty ();
6700 data
->inv_expr_id
= 0;
6703 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6707 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6709 free_loop_data (data
);
6710 free (data
->version_info
);
6711 BITMAP_FREE (data
->relevant
);
6712 BITMAP_FREE (data
->important_candidates
);
6714 decl_rtl_to_reset
.release ();
6715 data
->iv_uses
.release ();
6716 data
->iv_candidates
.release ();
6717 data
->inv_expr_tab
.dispose ();
6720 /* Returns true if the loop body BODY includes any function calls. */
6723 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6725 gimple_stmt_iterator gsi
;
6728 for (i
= 0; i
< num_nodes
; i
++)
6729 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6731 gimple stmt
= gsi_stmt (gsi
);
6732 if (is_gimple_call (stmt
)
6733 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6739 /* Optimizes the LOOP. Returns true if anything changed. */
6742 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6744 bool changed
= false;
6745 struct iv_ca
*iv_ca
;
6746 edge exit
= single_dom_exit (loop
);
6749 gcc_assert (!data
->niters
);
6750 data
->current_loop
= loop
;
6751 data
->speed
= optimize_loop_for_speed_p (loop
);
6753 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6755 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6759 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6760 exit
->src
->index
, exit
->dest
->index
);
6761 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6762 fprintf (dump_file
, "\n");
6765 fprintf (dump_file
, "\n");
6768 body
= get_loop_body (loop
);
6769 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6770 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6773 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6775 /* For each ssa name determines whether it behaves as an induction variable
6777 if (!find_induction_variables (data
))
6780 /* Finds interesting uses (item 1). */
6781 find_interesting_uses (data
);
6782 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6785 /* Finds candidates for the induction variables (item 2). */
6786 find_iv_candidates (data
);
6788 /* Calculates the costs (item 3, part 1). */
6789 determine_iv_costs (data
);
6790 determine_use_iv_costs (data
);
6791 determine_set_costs (data
);
6793 /* Find the optimal set of induction variables (item 3, part 2). */
6794 iv_ca
= find_optimal_iv_set (data
);
6799 /* Create the new induction variables (item 4, part 1). */
6800 create_new_ivs (data
, iv_ca
);
6801 iv_ca_free (&iv_ca
);
6803 /* Rewrite the uses (item 4, part 2). */
6804 rewrite_uses (data
);
6806 /* Remove the ivs that are unused after rewriting. */
6807 remove_unused_ivs (data
);
6809 /* We have changed the structure of induction variables; it might happen
6810 that definitions in the scev database refer to some of them that were
6815 free_loop_data (data
);
6820 /* Main entry point. Optimizes induction variables in loops. */
6823 tree_ssa_iv_optimize (void)
6826 struct ivopts_data data
;
6829 tree_ssa_iv_optimize_init (&data
);
6831 /* Optimize the loops starting with the innermost ones. */
6832 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6835 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6837 tree_ssa_iv_optimize_loop (&data
, loop
);
6840 tree_ssa_iv_optimize_finalize (&data
);