1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
73 #include "gimple-ssa.h"
76 #include "tree-phinodes.h"
77 #include "ssa-iterators.h"
78 #include "tree-ssanames.h"
79 #include "tree-ssa-loop-ivopts.h"
80 #include "tree-ssa-loop-manip.h"
81 #include "tree-ssa-loop-niter.h"
82 #include "tree-ssa-loop.h"
86 #include "tree-pass.h"
88 #include "insn-config.h"
89 #include "pointer-set.h"
90 #include "hash-table.h"
91 #include "tree-chrec.h"
92 #include "tree-scalar-evolution.h"
95 #include "langhooks.h"
96 #include "tree-affine.h"
98 #include "tree-inline.h"
99 #include "tree-ssa-propagate.h"
101 #include "tree-ssa-address.h"
103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
104 cost of different addressing modes. This should be moved to a TBD
105 interface between the GIMPLE and RTL worlds. */
109 /* The infinite cost. */
110 #define INFTY 10000000
112 #define AVG_LOOP_NITER(LOOP) 5
114 /* Returns the expected number of loop iterations for LOOP.
115 The average trip count is computed from profile data if it
118 static inline HOST_WIDE_INT
119 avg_loop_niter (struct loop
*loop
)
121 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
123 return AVG_LOOP_NITER (loop
);
128 /* Representation of the induction variable. */
131 tree base
; /* Initial value of the iv. */
132 tree base_object
; /* A memory object to that the induction variable points. */
133 tree step
; /* Step of the iv (constant only). */
134 tree ssa_name
; /* The ssa name with the value. */
135 bool biv_p
; /* Is it a biv? */
136 bool have_use_for
; /* Do we already have a use for it? */
137 unsigned use_id
; /* The identifier in the use if it is the case. */
140 /* Per-ssa version information (induction variable descriptions, etc.). */
143 tree name
; /* The ssa name. */
144 struct iv
*iv
; /* Induction variable description. */
145 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
146 an expression that is not an induction variable. */
147 bool preserve_biv
; /* For the original biv, whether to preserve it. */
148 unsigned inv_id
; /* Id of an invariant. */
154 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
155 USE_ADDRESS
, /* Use in an address. */
156 USE_COMPARE
/* Use is a compare. */
159 /* Cost of a computation. */
162 int cost
; /* The runtime cost. */
163 unsigned complexity
; /* The estimate of the complexity of the code for
164 the computation (in no concrete units --
165 complexity field should be larger for more
166 complex expressions and addressing modes). */
169 static const comp_cost no_cost
= {0, 0};
170 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
172 /* The candidate - cost pair. */
175 struct iv_cand
*cand
; /* The candidate. */
176 comp_cost cost
; /* The cost. */
177 bitmap depends_on
; /* The list of invariants that have to be
179 tree value
; /* For final value elimination, the expression for
180 the final value of the iv. For iv elimination,
181 the new bound to compare with. */
182 enum tree_code comp
; /* For iv elimination, the comparison. */
183 int inv_expr_id
; /* Loop invariant expression id. */
189 unsigned id
; /* The id of the use. */
190 enum use_type type
; /* Type of the use. */
191 struct iv
*iv
; /* The induction variable it is based on. */
192 gimple stmt
; /* Statement in that it occurs. */
193 tree
*op_p
; /* The place where it occurs. */
194 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
197 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
198 struct cost_pair
*cost_map
;
199 /* The costs wrto the iv candidates. */
201 struct iv_cand
*selected
;
202 /* The selected candidate. */
205 /* The position where the iv is computed. */
208 IP_NORMAL
, /* At the end, just before the exit condition. */
209 IP_END
, /* At the end of the latch block. */
210 IP_BEFORE_USE
, /* Immediately before a specific use. */
211 IP_AFTER_USE
, /* Immediately after a specific use. */
212 IP_ORIGINAL
/* The original biv. */
215 /* The induction variable candidate. */
218 unsigned id
; /* The number of the candidate. */
219 bool important
; /* Whether this is an "important" candidate, i.e. such
220 that it should be considered by all uses. */
221 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
222 gimple incremented_at
;/* For original biv, the statement where it is
224 tree var_before
; /* The variable used for it before increment. */
225 tree var_after
; /* The variable used for it after increment. */
226 struct iv
*iv
; /* The value of the candidate. NULL for
227 "pseudocandidate" used to indicate the possibility
228 to replace the final value of an iv by direct
229 computation of the value. */
230 unsigned cost
; /* Cost of the candidate. */
231 unsigned cost_step
; /* Cost of the candidate's increment operation. */
232 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
233 where it is incremented. */
234 bitmap depends_on
; /* The list of invariants that are used in step of the
238 /* Loop invariant expression hashtable entry. */
239 struct iv_inv_expr_ent
246 /* The data used by the induction variable optimizations. */
248 typedef struct iv_use
*iv_use_p
;
250 typedef struct iv_cand
*iv_cand_p
;
252 /* Hashtable helpers. */
254 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
256 typedef iv_inv_expr_ent value_type
;
257 typedef iv_inv_expr_ent compare_type
;
258 static inline hashval_t
hash (const value_type
*);
259 static inline bool equal (const value_type
*, const compare_type
*);
262 /* Hash function for loop invariant expressions. */
265 iv_inv_expr_hasher::hash (const value_type
*expr
)
270 /* Hash table equality function for expressions. */
273 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
275 return expr1
->hash
== expr2
->hash
276 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
281 /* The currently optimized loop. */
282 struct loop
*current_loop
;
284 /* Numbers of iterations for all exits of the current loop. */
285 struct pointer_map_t
*niters
;
287 /* Number of registers used in it. */
290 /* The size of version_info array allocated. */
291 unsigned version_info_size
;
293 /* The array of information for the ssa names. */
294 struct version_info
*version_info
;
296 /* The hashtable of loop invariant expressions created
298 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
300 /* Loop invariant expression id. */
303 /* The bitmap of indices in version_info whose value was changed. */
306 /* The uses of induction variables. */
307 vec
<iv_use_p
> iv_uses
;
309 /* The candidates. */
310 vec
<iv_cand_p
> iv_candidates
;
312 /* A bitmap of important candidates. */
313 bitmap important_candidates
;
315 /* The maximum invariant id. */
318 /* Whether to consider just related and important candidates when replacing a
320 bool consider_all_candidates
;
322 /* Are we optimizing for speed? */
325 /* Whether the loop body includes any function calls. */
326 bool body_includes_call
;
328 /* Whether the loop body can only be exited via single exit. */
329 bool loop_single_exit_p
;
332 /* An assignment of iv candidates to uses. */
336 /* The number of uses covered by the assignment. */
339 /* Number of uses that cannot be expressed by the candidates in the set. */
342 /* Candidate assigned to a use, together with the related costs. */
343 struct cost_pair
**cand_for_use
;
345 /* Number of times each candidate is used. */
346 unsigned *n_cand_uses
;
348 /* The candidates used. */
351 /* The number of candidates in the set. */
354 /* Total number of registers needed. */
357 /* Total cost of expressing uses. */
358 comp_cost cand_use_cost
;
360 /* Total cost of candidates. */
363 /* Number of times each invariant is used. */
364 unsigned *n_invariant_uses
;
366 /* The array holding the number of uses of each loop
367 invariant expressions created by ivopt. */
368 unsigned *used_inv_expr
;
370 /* The number of created loop invariants. */
371 unsigned num_used_inv_expr
;
373 /* Total cost of the assignment. */
377 /* Difference of two iv candidate assignments. */
384 /* An old assignment (for rollback purposes). */
385 struct cost_pair
*old_cp
;
387 /* A new assignment. */
388 struct cost_pair
*new_cp
;
390 /* Next change in the list. */
391 struct iv_ca_delta
*next_change
;
394 /* Bound on number of candidates below that all candidates are considered. */
396 #define CONSIDER_ALL_CANDIDATES_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
400 optimizing such a loop would help, and it would take ages). */
402 #define MAX_CONSIDERED_USES \
403 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
405 /* If there are at most this number of ivs in the set, try removing unnecessary
406 ivs from the set always. */
408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
409 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
411 /* The list of trees for that the decl_rtl field must be reset is stored
414 static vec
<tree
> decl_rtl_to_reset
;
416 static comp_cost
force_expr_to_var_cost (tree
, bool);
418 /* Number of uses recorded in DATA. */
420 static inline unsigned
421 n_iv_uses (struct ivopts_data
*data
)
423 return data
->iv_uses
.length ();
426 /* Ith use recorded in DATA. */
428 static inline struct iv_use
*
429 iv_use (struct ivopts_data
*data
, unsigned i
)
431 return data
->iv_uses
[i
];
434 /* Number of candidates recorded in DATA. */
436 static inline unsigned
437 n_iv_cands (struct ivopts_data
*data
)
439 return data
->iv_candidates
.length ();
442 /* Ith candidate recorded in DATA. */
444 static inline struct iv_cand
*
445 iv_cand (struct ivopts_data
*data
, unsigned i
)
447 return data
->iv_candidates
[i
];
450 /* The single loop exit if it dominates the latch, NULL otherwise. */
453 single_dom_exit (struct loop
*loop
)
455 edge exit
= single_exit (loop
);
460 if (!just_once_each_iteration_p (loop
, exit
->src
))
466 /* Dumps information about the induction variable IV to FILE. */
469 dump_iv (FILE *file
, struct iv
*iv
)
473 fprintf (file
, "ssa name ");
474 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
475 fprintf (file
, "\n");
478 fprintf (file
, " type ");
479 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
480 fprintf (file
, "\n");
484 fprintf (file
, " base ");
485 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
486 fprintf (file
, "\n");
488 fprintf (file
, " step ");
489 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
490 fprintf (file
, "\n");
494 fprintf (file
, " invariant ");
495 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
496 fprintf (file
, "\n");
501 fprintf (file
, " base object ");
502 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
503 fprintf (file
, "\n");
507 fprintf (file
, " is a biv\n");
510 /* Dumps information about the USE to FILE. */
513 dump_use (FILE *file
, struct iv_use
*use
)
515 fprintf (file
, "use %d\n", use
->id
);
519 case USE_NONLINEAR_EXPR
:
520 fprintf (file
, " generic\n");
524 fprintf (file
, " address\n");
528 fprintf (file
, " compare\n");
535 fprintf (file
, " in statement ");
536 print_gimple_stmt (file
, use
->stmt
, 0, 0);
537 fprintf (file
, "\n");
539 fprintf (file
, " at position ");
541 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
542 fprintf (file
, "\n");
544 dump_iv (file
, use
->iv
);
546 if (use
->related_cands
)
548 fprintf (file
, " related candidates ");
549 dump_bitmap (file
, use
->related_cands
);
553 /* Dumps information about the uses to FILE. */
556 dump_uses (FILE *file
, struct ivopts_data
*data
)
561 for (i
= 0; i
< n_iv_uses (data
); i
++)
563 use
= iv_use (data
, i
);
565 dump_use (file
, use
);
566 fprintf (file
, "\n");
570 /* Dumps information about induction variable candidate CAND to FILE. */
573 dump_cand (FILE *file
, struct iv_cand
*cand
)
575 struct iv
*iv
= cand
->iv
;
577 fprintf (file
, "candidate %d%s\n",
578 cand
->id
, cand
->important
? " (important)" : "");
580 if (cand
->depends_on
)
582 fprintf (file
, " depends on ");
583 dump_bitmap (file
, cand
->depends_on
);
588 fprintf (file
, " final value replacement\n");
592 if (cand
->var_before
)
594 fprintf (file
, " var_before ");
595 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
596 fprintf (file
, "\n");
600 fprintf (file
, " var_after ");
601 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
602 fprintf (file
, "\n");
608 fprintf (file
, " incremented before exit test\n");
612 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
616 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
620 fprintf (file
, " incremented at end\n");
624 fprintf (file
, " original biv\n");
631 /* Returns the info for ssa version VER. */
633 static inline struct version_info
*
634 ver_info (struct ivopts_data
*data
, unsigned ver
)
636 return data
->version_info
+ ver
;
639 /* Returns the info for ssa name NAME. */
641 static inline struct version_info
*
642 name_info (struct ivopts_data
*data
, tree name
)
644 return ver_info (data
, SSA_NAME_VERSION (name
));
647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
651 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
653 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
657 if (sbb
== loop
->latch
)
663 return stmt
== last_stmt (bb
);
666 /* Returns true if STMT if after the place where the original induction
667 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
668 if the positions are identical. */
671 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
673 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
674 basic_block stmt_bb
= gimple_bb (stmt
);
676 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
679 if (stmt_bb
!= cand_bb
)
683 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
685 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
688 /* Returns true if STMT if after the place where the induction variable
689 CAND is incremented in LOOP. */
692 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
700 return stmt_after_ip_normal_pos (loop
, stmt
);
704 return stmt_after_inc_pos (cand
, stmt
, false);
707 return stmt_after_inc_pos (cand
, stmt
, true);
714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
717 abnormal_ssa_name_p (tree exp
)
722 if (TREE_CODE (exp
) != SSA_NAME
)
725 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
729 abnormal phi node. Callback for for_each_index. */
732 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
733 void *data ATTRIBUTE_UNUSED
)
735 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
737 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
739 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
743 return !abnormal_ssa_name_p (*index
);
746 /* Returns true if EXPR contains a ssa name that occurs in an
747 abnormal phi node. */
750 contains_abnormal_ssa_name_p (tree expr
)
753 enum tree_code_class codeclass
;
758 code
= TREE_CODE (expr
);
759 codeclass
= TREE_CODE_CLASS (code
);
761 if (code
== SSA_NAME
)
762 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
764 if (code
== INTEGER_CST
765 || is_gimple_min_invariant (expr
))
768 if (code
== ADDR_EXPR
)
769 return !for_each_index (&TREE_OPERAND (expr
, 0),
770 idx_contains_abnormal_ssa_name_p
,
773 if (code
== COND_EXPR
)
774 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
775 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
776 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
799 /* Returns the structure describing number of iterations determined from
800 EXIT of DATA->current_loop, or NULL if something goes wrong. */
802 static struct tree_niter_desc
*
803 niter_for_exit (struct ivopts_data
*data
, edge exit
)
805 struct tree_niter_desc
*desc
;
810 data
->niters
= pointer_map_create ();
814 slot
= pointer_map_contains (data
->niters
, exit
);
818 /* Try to determine number of iterations. We cannot safely work with ssa
819 names that appear in phi nodes on abnormal edges, so that we do not
820 create overlapping life ranges for them (PR 27283). */
821 desc
= XNEW (struct tree_niter_desc
);
822 if (!number_of_iterations_exit (data
->current_loop
,
824 || contains_abnormal_ssa_name_p (desc
->niter
))
829 slot
= pointer_map_insert (data
->niters
, exit
);
833 desc
= (struct tree_niter_desc
*) *slot
;
838 /* Returns the structure describing number of iterations determined from
839 single dominating exit of DATA->current_loop, or NULL if something
842 static struct tree_niter_desc
*
843 niter_for_single_dom_exit (struct ivopts_data
*data
)
845 edge exit
= single_dom_exit (data
->current_loop
);
850 return niter_for_exit (data
, exit
);
853 /* Initializes data structures used by the iv optimization pass, stored
857 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
859 data
->version_info_size
= 2 * num_ssa_names
;
860 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
861 data
->relevant
= BITMAP_ALLOC (NULL
);
862 data
->important_candidates
= BITMAP_ALLOC (NULL
);
863 data
->max_inv_id
= 0;
865 data
->iv_uses
.create (20);
866 data
->iv_candidates
.create (20);
867 data
->inv_expr_tab
.create (10);
868 data
->inv_expr_id
= 0;
869 decl_rtl_to_reset
.create (20);
872 /* Returns a memory object to that EXPR points. In case we are able to
873 determine that it does not point to any such object, NULL is returned. */
876 determine_base_object (tree expr
)
878 enum tree_code code
= TREE_CODE (expr
);
881 /* If this is a pointer casted to any type, we need to determine
882 the base object for the pointer; so handle conversions before
883 throwing away non-pointer expressions. */
884 if (CONVERT_EXPR_P (expr
))
885 return determine_base_object (TREE_OPERAND (expr
, 0));
887 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
896 obj
= TREE_OPERAND (expr
, 0);
897 base
= get_base_address (obj
);
902 if (TREE_CODE (base
) == MEM_REF
)
903 return determine_base_object (TREE_OPERAND (base
, 0));
905 return fold_convert (ptr_type_node
,
906 build_fold_addr_expr (base
));
908 case POINTER_PLUS_EXPR
:
909 return determine_base_object (TREE_OPERAND (expr
, 0));
913 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
917 return fold_convert (ptr_type_node
, expr
);
921 /* Allocates an induction variable with given initial value BASE and step STEP
925 alloc_iv (tree base
, tree step
)
927 tree base_object
= base
;
928 struct iv
*iv
= XCNEW (struct iv
);
929 gcc_assert (step
!= NULL_TREE
);
931 /* Lower all address expressions except ones with DECL_P as operand.
933 1) More accurate cost can be computed for address expressions;
934 2) Duplicate candidates won't be created for bases in different
935 forms, like &a[0] and &a. */
936 STRIP_NOPS (base_object
);
937 if (TREE_CODE (base_object
) == ADDR_EXPR
938 && !DECL_P (TREE_OPERAND (base_object
, 0)))
942 base_object
= get_inner_reference_aff (TREE_OPERAND (base_object
, 0),
944 gcc_assert (base_object
!= NULL_TREE
);
945 base_object
= build_fold_addr_expr (base_object
);
946 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
950 iv
->base_object
= determine_base_object (base_object
);
953 iv
->have_use_for
= false;
955 iv
->ssa_name
= NULL_TREE
;
960 /* Sets STEP and BASE for induction variable IV. */
963 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
965 struct version_info
*info
= name_info (data
, iv
);
967 gcc_assert (!info
->iv
);
969 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
970 info
->iv
= alloc_iv (base
, step
);
971 info
->iv
->ssa_name
= iv
;
974 /* Finds induction variable declaration for VAR. */
977 get_iv (struct ivopts_data
*data
, tree var
)
980 tree type
= TREE_TYPE (var
);
982 if (!POINTER_TYPE_P (type
)
983 && !INTEGRAL_TYPE_P (type
))
986 if (!name_info (data
, var
)->iv
)
988 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
991 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
992 set_iv (data
, var
, var
, build_int_cst (type
, 0));
995 return name_info (data
, var
)->iv
;
998 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
999 not define a simple affine biv with nonzero step. */
1002 determine_biv_step (gimple phi
)
1004 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1005 tree name
= PHI_RESULT (phi
);
1008 if (virtual_operand_p (name
))
1011 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1014 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1017 /* Finds basic ivs. */
1020 find_bivs (struct ivopts_data
*data
)
1023 tree step
, type
, base
;
1025 struct loop
*loop
= data
->current_loop
;
1026 gimple_stmt_iterator psi
;
1028 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1030 phi
= gsi_stmt (psi
);
1032 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1035 step
= determine_biv_step (phi
);
1039 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1040 base
= expand_simple_operations (base
);
1041 if (contains_abnormal_ssa_name_p (base
)
1042 || contains_abnormal_ssa_name_p (step
))
1045 type
= TREE_TYPE (PHI_RESULT (phi
));
1046 base
= fold_convert (type
, base
);
1049 if (POINTER_TYPE_P (type
))
1050 step
= convert_to_ptrofftype (step
);
1052 step
= fold_convert (type
, step
);
1055 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1062 /* Marks basic ivs. */
1065 mark_bivs (struct ivopts_data
*data
)
1069 struct iv
*iv
, *incr_iv
;
1070 struct loop
*loop
= data
->current_loop
;
1071 basic_block incr_bb
;
1072 gimple_stmt_iterator psi
;
1074 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1076 phi
= gsi_stmt (psi
);
1078 iv
= get_iv (data
, PHI_RESULT (phi
));
1082 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1083 incr_iv
= get_iv (data
, var
);
1087 /* If the increment is in the subloop, ignore it. */
1088 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1089 if (incr_bb
->loop_father
!= data
->current_loop
1090 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1094 incr_iv
->biv_p
= true;
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099 parameters to IV. */
1102 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1105 struct loop
*loop
= data
->current_loop
;
1107 iv
->base
= NULL_TREE
;
1108 iv
->step
= NULL_TREE
;
1110 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1113 lhs
= gimple_assign_lhs (stmt
);
1114 if (TREE_CODE (lhs
) != SSA_NAME
)
1117 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1119 iv
->base
= expand_simple_operations (iv
->base
);
1121 if (contains_abnormal_ssa_name_p (iv
->base
)
1122 || contains_abnormal_ssa_name_p (iv
->step
))
1125 /* If STMT could throw, then do not consider STMT as defining a GIV.
1126 While this will suppress optimizations, we can not safely delete this
1127 GIV and associated statements, even if it appears it is not used. */
1128 if (stmt_could_throw_p (stmt
))
1134 /* Finds general ivs in statement STMT. */
1137 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1141 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1144 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1147 /* Finds general ivs in basic block BB. */
1150 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1152 gimple_stmt_iterator bsi
;
1154 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1155 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1158 /* Finds general ivs. */
1161 find_givs (struct ivopts_data
*data
)
1163 struct loop
*loop
= data
->current_loop
;
1164 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1167 for (i
= 0; i
< loop
->num_nodes
; i
++)
1168 find_givs_in_bb (data
, body
[i
]);
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173 variable and if so, its initial value and step. */
1176 find_induction_variables (struct ivopts_data
*data
)
1181 if (!find_bivs (data
))
1187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1189 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1193 fprintf (dump_file
, " number of iterations ");
1194 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1195 if (!integer_zerop (niter
->may_be_zero
))
1197 fprintf (dump_file
, "; zero if ");
1198 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1200 fprintf (dump_file
, "\n\n");
1203 fprintf (dump_file
, "Induction variables:\n\n");
1205 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1207 if (ver_info (data
, i
)->iv
)
1208 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1217 static struct iv_use
*
1218 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1219 gimple stmt
, enum use_type use_type
)
1221 struct iv_use
*use
= XCNEW (struct iv_use
);
1223 use
->id
= n_iv_uses (data
);
1224 use
->type
= use_type
;
1228 use
->related_cands
= BITMAP_ALLOC (NULL
);
1230 /* To avoid showing ssa name in the dumps, if it was not reset by the
1232 iv
->ssa_name
= NULL_TREE
;
1234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1235 dump_use (dump_file
, use
);
1237 data
->iv_uses
.safe_push (use
);
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243 NONLINEAR_USE is true if the invariant is used in a way we do not
1244 handle specially. */
1247 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1250 struct version_info
*info
;
1252 if (TREE_CODE (op
) != SSA_NAME
1253 || virtual_operand_p (op
))
1256 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1258 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1261 info
= name_info (data
, op
);
1263 info
->has_nonlin_use
|= nonlinear_use
;
1265 info
->inv_id
= ++data
->max_inv_id
;
1266 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1269 /* Checks whether the use OP is interesting and if so, records it. */
1271 static struct iv_use
*
1272 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1279 if (TREE_CODE (op
) != SSA_NAME
)
1282 iv
= get_iv (data
, op
);
1286 if (iv
->have_use_for
)
1288 use
= iv_use (data
, iv
->use_id
);
1290 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1294 if (integer_zerop (iv
->step
))
1296 record_invariant (data
, op
, true);
1299 iv
->have_use_for
= true;
1301 civ
= XNEW (struct iv
);
1304 stmt
= SSA_NAME_DEF_STMT (op
);
1305 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1306 || is_gimple_assign (stmt
));
1308 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1309 iv
->use_id
= use
->id
;
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315 of an induction variable and an invariant. If this is the case,
1316 CONTROL_VAR is set to location of the iv, BOUND to the location of
1317 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318 induction variable descriptions, and true is returned. If this is not
1319 the case, CONTROL_VAR and BOUND are set to the arguments of the
1320 condition and false is returned. */
1323 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1324 tree
**control_var
, tree
**bound
,
1325 struct iv
**iv_var
, struct iv
**iv_bound
)
1327 /* The objects returned when COND has constant operands. */
1328 static struct iv const_iv
;
1330 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1331 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1334 if (gimple_code (stmt
) == GIMPLE_COND
)
1336 op0
= gimple_cond_lhs_ptr (stmt
);
1337 op1
= gimple_cond_rhs_ptr (stmt
);
1341 op0
= gimple_assign_rhs1_ptr (stmt
);
1342 op1
= gimple_assign_rhs2_ptr (stmt
);
1345 zero
= integer_zero_node
;
1346 const_iv
.step
= integer_zero_node
;
1348 if (TREE_CODE (*op0
) == SSA_NAME
)
1349 iv0
= get_iv (data
, *op0
);
1350 if (TREE_CODE (*op1
) == SSA_NAME
)
1351 iv1
= get_iv (data
, *op1
);
1353 /* Exactly one of the compared values must be an iv, and the other one must
1358 if (integer_zerop (iv0
->step
))
1360 /* Control variable may be on the other side. */
1361 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1362 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1364 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1368 *control_var
= op0
;;
1379 /* Checks whether the condition in STMT is interesting and if so,
1383 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1385 tree
*var_p
, *bound_p
;
1386 struct iv
*var_iv
, *civ
;
1388 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1390 find_interesting_uses_op (data
, *var_p
);
1391 find_interesting_uses_op (data
, *bound_p
);
1395 civ
= XNEW (struct iv
);
1397 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401 relative to the loop LOOP, i.e. if all its operands are defined
1402 outside of the returned loop. Returns NULL if EXPR is not
1403 even obviously invariant in LOOP. */
1406 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1411 if (is_gimple_min_invariant (expr
))
1412 return current_loops
->tree_root
;
1414 if (TREE_CODE (expr
) == SSA_NAME
)
1416 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1419 if (flow_bb_inside_loop_p (loop
, def_bb
))
1421 return superloop_at_depth (loop
,
1422 loop_depth (def_bb
->loop_father
) + 1);
1425 return current_loops
->tree_root
;
1431 unsigned maxdepth
= 0;
1432 len
= TREE_OPERAND_LENGTH (expr
);
1433 for (i
= 0; i
< len
; i
++)
1435 struct loop
*ivloop
;
1436 if (!TREE_OPERAND (expr
, i
))
1439 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1442 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1445 return superloop_at_depth (loop
, maxdepth
);
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449 i.e. if all its operands are defined outside of the LOOP. LOOP
1450 should not be the function body. */
1453 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1458 gcc_assert (loop_depth (loop
) > 0);
1460 if (is_gimple_min_invariant (expr
))
1463 if (TREE_CODE (expr
) == SSA_NAME
)
1465 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1467 && flow_bb_inside_loop_p (loop
, def_bb
))
1476 len
= TREE_OPERAND_LENGTH (expr
);
1477 for (i
= 0; i
< len
; i
++)
1478 if (TREE_OPERAND (expr
, i
)
1479 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486 initial ones. Returns false when the value of the index cannot be determined.
1487 Callback for for_each_index. */
1489 struct ifs_ivopts_data
1491 struct ivopts_data
*ivopts_data
;
1497 idx_find_step (tree base
, tree
*idx
, void *data
)
1499 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1501 tree step
, iv_base
, iv_step
, lbound
, off
;
1502 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1504 /* If base is a component ref, require that the offset of the reference
1506 if (TREE_CODE (base
) == COMPONENT_REF
)
1508 off
= component_ref_field_offset (base
);
1509 return expr_invariant_in_loop_p (loop
, off
);
1512 /* If base is array, first check whether we will be able to move the
1513 reference out of the loop (in order to take its address in strength
1514 reduction). In order for this to work we need both lower bound
1515 and step to be loop invariants. */
1516 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1518 /* Moreover, for a range, the size needs to be invariant as well. */
1519 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1520 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1523 step
= array_ref_element_size (base
);
1524 lbound
= array_ref_low_bound (base
);
1526 if (!expr_invariant_in_loop_p (loop
, step
)
1527 || !expr_invariant_in_loop_p (loop
, lbound
))
1531 if (TREE_CODE (*idx
) != SSA_NAME
)
1534 iv
= get_iv (dta
->ivopts_data
, *idx
);
1538 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1539 *&x[0], which is not folded and does not trigger the
1540 ARRAY_REF path below. */
1543 if (integer_zerop (iv
->step
))
1546 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1548 step
= array_ref_element_size (base
);
1550 /* We only handle addresses whose step is an integer constant. */
1551 if (TREE_CODE (step
) != INTEGER_CST
)
1555 /* The step for pointer arithmetics already is 1 byte. */
1556 step
= size_one_node
;
1560 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1561 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1564 /* The index might wrap. */
1568 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1569 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1574 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1575 object is passed to it in DATA. */
1578 idx_record_use (tree base
, tree
*idx
,
1581 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1582 find_interesting_uses_op (data
, *idx
);
1583 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1585 find_interesting_uses_op (data
, array_ref_element_size (base
));
1586 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592 store cst to MUL and return true. Otherwise return false.
1593 The returned value is always sign-extended, regardless of the
1594 signedness of TOP and BOT. */
1597 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1600 enum tree_code code
;
1601 double_int res
, p0
, p1
;
1602 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1607 if (operand_equal_p (top
, bot
, 0))
1609 *mul
= double_int_one
;
1613 code
= TREE_CODE (top
);
1617 mby
= TREE_OPERAND (top
, 1);
1618 if (TREE_CODE (mby
) != INTEGER_CST
)
1621 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1624 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1629 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1630 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1633 if (code
== MINUS_EXPR
)
1635 *mul
= (p0
+ p1
).sext (precision
);
1639 if (TREE_CODE (bot
) != INTEGER_CST
)
1642 p0
= tree_to_double_int (top
).sext (precision
);
1643 p1
= tree_to_double_int (bot
).sext (precision
);
1646 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1647 return res
.is_zero ();
1654 /* Returns true if memory reference REF with step STEP may be unaligned. */
1657 may_be_unaligned_p (tree ref
, tree step
)
1661 HOST_WIDE_INT bitsize
;
1662 HOST_WIDE_INT bitpos
;
1664 enum machine_mode mode
;
1665 int unsignedp
, volatilep
;
1666 unsigned base_align
;
1668 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669 thus they are not misaligned. */
1670 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1673 /* The test below is basically copy of what expr.c:normal_inner_ref
1674 does to check whether the object must be loaded by parts when
1675 STRICT_ALIGNMENT is true. */
1676 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1677 &unsignedp
, &volatilep
, true);
1678 base_type
= TREE_TYPE (base
);
1679 base_align
= get_object_alignment (base
);
1680 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1682 if (mode
!= BLKmode
)
1684 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1686 if (base_align
< mode_align
1687 || (bitpos
% mode_align
) != 0
1688 || (bitpos
% BITS_PER_UNIT
) != 0)
1692 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1695 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1702 /* Return true if EXPR may be non-addressable. */
1705 may_be_nonaddressable_p (tree expr
)
1707 switch (TREE_CODE (expr
))
1709 case TARGET_MEM_REF
:
1710 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711 target, thus they are always addressable. */
1715 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1716 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1718 case VIEW_CONVERT_EXPR
:
1719 /* This kind of view-conversions may wrap non-addressable objects
1720 and make them look addressable. After some processing the
1721 non-addressability may be uncovered again, causing ADDR_EXPRs
1722 of inappropriate objects to be built. */
1723 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1724 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1727 /* ... fall through ... */
1730 case ARRAY_RANGE_REF
:
1731 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1743 /* Finds addresses in *OP_P inside STMT. */
1746 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1748 tree base
= *op_p
, step
= size_zero_node
;
1750 struct ifs_ivopts_data ifs_ivopts_data
;
1752 /* Do not play with volatile memory references. A bit too conservative,
1753 perhaps, but safe. */
1754 if (gimple_has_volatile_ops (stmt
))
1757 /* Ignore bitfields for now. Not really something terribly complicated
1759 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1762 base
= unshare_expr (base
);
1764 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1766 tree type
= build_pointer_type (TREE_TYPE (base
));
1770 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1772 civ
= get_iv (data
, TMR_BASE (base
));
1776 TMR_BASE (base
) = civ
->base
;
1779 if (TMR_INDEX2 (base
)
1780 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1782 civ
= get_iv (data
, TMR_INDEX2 (base
));
1786 TMR_INDEX2 (base
) = civ
->base
;
1789 if (TMR_INDEX (base
)
1790 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1792 civ
= get_iv (data
, TMR_INDEX (base
));
1796 TMR_INDEX (base
) = civ
->base
;
1801 if (TMR_STEP (base
))
1802 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1804 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1808 if (integer_zerop (step
))
1810 base
= tree_mem_ref_addr (type
, base
);
1814 ifs_ivopts_data
.ivopts_data
= data
;
1815 ifs_ivopts_data
.stmt
= stmt
;
1816 ifs_ivopts_data
.step
= size_zero_node
;
1817 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1818 || integer_zerop (ifs_ivopts_data
.step
))
1820 step
= ifs_ivopts_data
.step
;
1822 /* Check that the base expression is addressable. This needs
1823 to be done after substituting bases of IVs into it. */
1824 if (may_be_nonaddressable_p (base
))
1827 /* Moreover, on strict alignment platforms, check that it is
1828 sufficiently aligned. */
1829 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1832 base
= build_fold_addr_expr (base
);
1834 /* Substituting bases of IVs into the base expression might
1835 have caused folding opportunities. */
1836 if (TREE_CODE (base
) == ADDR_EXPR
)
1838 tree
*ref
= &TREE_OPERAND (base
, 0);
1839 while (handled_component_p (*ref
))
1840 ref
= &TREE_OPERAND (*ref
, 0);
1841 if (TREE_CODE (*ref
) == MEM_REF
)
1843 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1844 TREE_OPERAND (*ref
, 0),
1845 TREE_OPERAND (*ref
, 1));
1852 civ
= alloc_iv (base
, step
);
1853 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1857 for_each_index (op_p
, idx_record_use
, data
);
1860 /* Finds and records invariants used in STMT. */
1863 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1866 use_operand_p use_p
;
1869 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1871 op
= USE_FROM_PTR (use_p
);
1872 record_invariant (data
, op
, false);
1876 /* Finds interesting uses of induction variables in the statement STMT. */
1879 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1882 tree op
, *lhs
, *rhs
;
1884 use_operand_p use_p
;
1885 enum tree_code code
;
1887 find_invariants_stmt (data
, stmt
);
1889 if (gimple_code (stmt
) == GIMPLE_COND
)
1891 find_interesting_uses_cond (data
, stmt
);
1895 if (is_gimple_assign (stmt
))
1897 lhs
= gimple_assign_lhs_ptr (stmt
);
1898 rhs
= gimple_assign_rhs1_ptr (stmt
);
1900 if (TREE_CODE (*lhs
) == SSA_NAME
)
1902 /* If the statement defines an induction variable, the uses are not
1903 interesting by themselves. */
1905 iv
= get_iv (data
, *lhs
);
1907 if (iv
&& !integer_zerop (iv
->step
))
1911 code
= gimple_assign_rhs_code (stmt
);
1912 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1913 && (REFERENCE_CLASS_P (*rhs
)
1914 || is_gimple_val (*rhs
)))
1916 if (REFERENCE_CLASS_P (*rhs
))
1917 find_interesting_uses_address (data
, stmt
, rhs
);
1919 find_interesting_uses_op (data
, *rhs
);
1921 if (REFERENCE_CLASS_P (*lhs
))
1922 find_interesting_uses_address (data
, stmt
, lhs
);
1925 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1927 find_interesting_uses_cond (data
, stmt
);
1931 /* TODO -- we should also handle address uses of type
1933 memory = call (whatever);
1940 if (gimple_code (stmt
) == GIMPLE_PHI
1941 && gimple_bb (stmt
) == data
->current_loop
->header
)
1943 iv
= get_iv (data
, PHI_RESULT (stmt
));
1945 if (iv
&& !integer_zerop (iv
->step
))
1949 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1951 op
= USE_FROM_PTR (use_p
);
1953 if (TREE_CODE (op
) != SSA_NAME
)
1956 iv
= get_iv (data
, op
);
1960 find_interesting_uses_op (data
, op
);
1964 /* Finds interesting uses of induction variables outside of loops
1965 on loop exit edge EXIT. */
1968 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1971 gimple_stmt_iterator psi
;
1974 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1976 phi
= gsi_stmt (psi
);
1977 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1978 if (!virtual_operand_p (def
))
1979 find_interesting_uses_op (data
, def
);
1983 /* Finds uses of the induction variables that are interesting. */
1986 find_interesting_uses (struct ivopts_data
*data
)
1989 gimple_stmt_iterator bsi
;
1990 basic_block
*body
= get_loop_body (data
->current_loop
);
1992 struct version_info
*info
;
1995 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1996 fprintf (dump_file
, "Uses:\n\n");
1998 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2003 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2004 if (e
->dest
!= EXIT_BLOCK_PTR
2005 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2006 find_interesting_uses_outside (data
, e
);
2008 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2009 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2010 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2011 if (!is_gimple_debug (gsi_stmt (bsi
)))
2012 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2015 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2019 fprintf (dump_file
, "\n");
2021 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2023 info
= ver_info (data
, i
);
2026 fprintf (dump_file
, " ");
2027 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2028 fprintf (dump_file
, " is invariant (%d)%s\n",
2029 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2033 fprintf (dump_file
, "\n");
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2040 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2041 we are at the top-level of the processed address. */
2044 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2045 HOST_WIDE_INT
*offset
)
2047 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2048 enum tree_code code
;
2049 tree type
, orig_type
= TREE_TYPE (expr
);
2050 HOST_WIDE_INT off0
, off1
, st
;
2051 tree orig_expr
= expr
;
2055 type
= TREE_TYPE (expr
);
2056 code
= TREE_CODE (expr
);
2062 if (!cst_and_fits_in_hwi (expr
)
2063 || integer_zerop (expr
))
2066 *offset
= int_cst_value (expr
);
2067 return build_int_cst (orig_type
, 0);
2069 case POINTER_PLUS_EXPR
:
2072 op0
= TREE_OPERAND (expr
, 0);
2073 op1
= TREE_OPERAND (expr
, 1);
2075 op0
= strip_offset_1 (op0
, false, false, &off0
);
2076 op1
= strip_offset_1 (op1
, false, false, &off1
);
2078 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2079 if (op0
== TREE_OPERAND (expr
, 0)
2080 && op1
== TREE_OPERAND (expr
, 1))
2083 if (integer_zerop (op1
))
2085 else if (integer_zerop (op0
))
2087 if (code
== MINUS_EXPR
)
2088 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2093 expr
= fold_build2 (code
, type
, op0
, op1
);
2095 return fold_convert (orig_type
, expr
);
2098 op1
= TREE_OPERAND (expr
, 1);
2099 if (!cst_and_fits_in_hwi (op1
))
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, false, false, &off0
);
2104 if (op0
== TREE_OPERAND (expr
, 0))
2107 *offset
= off0
* int_cst_value (op1
);
2108 if (integer_zerop (op0
))
2111 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2113 return fold_convert (orig_type
, expr
);
2116 case ARRAY_RANGE_REF
:
2120 step
= array_ref_element_size (expr
);
2121 if (!cst_and_fits_in_hwi (step
))
2124 st
= int_cst_value (step
);
2125 op1
= TREE_OPERAND (expr
, 1);
2126 op1
= strip_offset_1 (op1
, false, false, &off1
);
2127 *offset
= off1
* st
;
2130 && integer_zerop (op1
))
2132 /* Strip the component reference completely. */
2133 op0
= TREE_OPERAND (expr
, 0);
2134 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2147 tmp
= component_ref_field_offset (expr
);
2148 field
= TREE_OPERAND (expr
, 1);
2150 && cst_and_fits_in_hwi (tmp
)
2151 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2153 HOST_WIDE_INT boffset
, abs_off
;
2155 /* Strip the component reference completely. */
2156 op0
= TREE_OPERAND (expr
, 0);
2157 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2158 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2159 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2163 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2170 op0
= TREE_OPERAND (expr
, 0);
2171 op0
= strip_offset_1 (op0
, true, true, &off0
);
2174 if (op0
== TREE_OPERAND (expr
, 0))
2177 expr
= build_fold_addr_expr (op0
);
2178 return fold_convert (orig_type
, expr
);
2181 /* ??? Offset operand? */
2182 inside_addr
= false;
2189 /* Default handling of expressions for that we want to recurse into
2190 the first operand. */
2191 op0
= TREE_OPERAND (expr
, 0);
2192 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2195 if (op0
== TREE_OPERAND (expr
, 0)
2196 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2199 expr
= copy_node (expr
);
2200 TREE_OPERAND (expr
, 0) = op0
;
2202 TREE_OPERAND (expr
, 1) = op1
;
2204 /* Inside address, we might strip the top level component references,
2205 thus changing type of the expression. Handling of ADDR_EXPR
2207 expr
= fold_convert (orig_type
, expr
);
2212 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2215 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2218 tree core
= strip_offset_1 (expr
, false, false, &off
);
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224 We return unsigned type with the same precision, which avoids problems
2228 generic_type_for (tree type
)
2230 if (POINTER_TYPE_P (type
))
2231 return unsigned_type_for (type
);
2233 if (TYPE_UNSIGNED (type
))
2236 return unsigned_type_for (type
);
2239 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2240 the bitmap to that we should store it. */
2242 static struct ivopts_data
*fd_ivopts_data
;
2244 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2246 bitmap
*depends_on
= (bitmap
*) data
;
2247 struct version_info
*info
;
2249 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2251 info
= name_info (fd_ivopts_data
, *expr_p
);
2253 if (!info
->inv_id
|| info
->has_nonlin_use
)
2257 *depends_on
= BITMAP_ALLOC (NULL
);
2258 bitmap_set_bit (*depends_on
, info
->inv_id
);
2263 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2264 position to POS. If USE is not NULL, the candidate is set as related to
2265 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2266 replacement of the final value of the iv by a direct computation. */
2268 static struct iv_cand
*
2269 add_candidate_1 (struct ivopts_data
*data
,
2270 tree base
, tree step
, bool important
, enum iv_position pos
,
2271 struct iv_use
*use
, gimple incremented_at
)
2274 struct iv_cand
*cand
= NULL
;
2275 tree type
, orig_type
;
2277 /* For non-original variables, make sure their values are computed in a type
2278 that does not invoke undefined behavior on overflows (since in general,
2279 we cannot prove that these induction variables are non-wrapping). */
2280 if (pos
!= IP_ORIGINAL
)
2282 orig_type
= TREE_TYPE (base
);
2283 type
= generic_type_for (orig_type
);
2284 if (type
!= orig_type
)
2286 base
= fold_convert (type
, base
);
2287 step
= fold_convert (type
, step
);
2291 for (i
= 0; i
< n_iv_cands (data
); i
++)
2293 cand
= iv_cand (data
, i
);
2295 if (cand
->pos
!= pos
)
2298 if (cand
->incremented_at
!= incremented_at
2299 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2300 && cand
->ainc_use
!= use
))
2314 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2315 && operand_equal_p (step
, cand
->iv
->step
, 0)
2316 && (TYPE_PRECISION (TREE_TYPE (base
))
2317 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2321 if (i
== n_iv_cands (data
))
2323 cand
= XCNEW (struct iv_cand
);
2329 cand
->iv
= alloc_iv (base
, step
);
2332 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2334 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2335 cand
->var_after
= cand
->var_before
;
2337 cand
->important
= important
;
2338 cand
->incremented_at
= incremented_at
;
2339 data
->iv_candidates
.safe_push (cand
);
2342 && TREE_CODE (step
) != INTEGER_CST
)
2344 fd_ivopts_data
= data
;
2345 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2348 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2349 cand
->ainc_use
= use
;
2351 cand
->ainc_use
= NULL
;
2353 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2354 dump_cand (dump_file
, cand
);
2357 if (important
&& !cand
->important
)
2359 cand
->important
= true;
2360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2361 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2366 bitmap_set_bit (use
->related_cands
, i
);
2367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2368 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2378 The purpose is to avoid splitting latch edge with a biv increment, thus
2379 creating a jump, possibly confusing other optimization passes and leaving
2380 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2381 is not available (so we do not have a better alternative), or if the latch
2382 edge is already nonempty. */
2385 allow_ip_end_pos_p (struct loop
*loop
)
2387 if (!ip_normal_pos (loop
))
2390 if (!empty_block_p (ip_end_pos (loop
)))
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397 Important field is set to IMPORTANT. */
2400 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2401 bool important
, struct iv_use
*use
)
2403 basic_block use_bb
= gimple_bb (use
->stmt
);
2404 enum machine_mode mem_mode
;
2405 unsigned HOST_WIDE_INT cstepi
;
2407 /* If we insert the increment in any position other than the standard
2408 ones, we must ensure that it is incremented once per iteration.
2409 It must not be in an inner nested loop, or one side of an if
2411 if (use_bb
->loop_father
!= data
->current_loop
2412 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2413 || stmt_could_throw_p (use
->stmt
)
2414 || !cst_and_fits_in_hwi (step
))
2417 cstepi
= int_cst_value (step
);
2419 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2420 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2421 || USE_STORE_PRE_INCREMENT (mem_mode
))
2422 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2423 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2424 || USE_STORE_PRE_DECREMENT (mem_mode
))
2425 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2427 enum tree_code code
= MINUS_EXPR
;
2429 tree new_step
= step
;
2431 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2433 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2434 code
= POINTER_PLUS_EXPR
;
2437 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2438 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2439 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2442 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2443 || USE_STORE_POST_INCREMENT (mem_mode
))
2444 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2445 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2446 || USE_STORE_POST_DECREMENT (mem_mode
))
2447 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2449 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2454 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2455 position to POS. If USE is not NULL, the candidate is set as related to
2456 it. The candidate computation is scheduled on all available positions. */
2459 add_candidate (struct ivopts_data
*data
,
2460 tree base
, tree step
, bool important
, struct iv_use
*use
)
2462 if (ip_normal_pos (data
->current_loop
))
2463 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2464 if (ip_end_pos (data
->current_loop
)
2465 && allow_ip_end_pos_p (data
->current_loop
))
2466 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2468 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2469 add_autoinc_candidates (data
, base
, step
, important
, use
);
2472 /* Adds standard iv candidates. */
2475 add_standard_iv_candidates (struct ivopts_data
*data
)
2477 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2479 /* The same for a double-integer type if it is still fast enough. */
2481 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2482 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2483 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2484 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2486 /* The same for a double-integer type if it is still fast enough. */
2488 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2489 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2490 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2491 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2495 /* Adds candidates bases on the old induction variable IV. */
2498 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2502 struct iv_cand
*cand
;
2504 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2506 /* The same, but with initial value zero. */
2507 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2508 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2510 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2511 iv
->step
, true, NULL
);
2513 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2514 if (gimple_code (phi
) == GIMPLE_PHI
)
2516 /* Additionally record the possibility of leaving the original iv
2518 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2519 cand
= add_candidate_1 (data
,
2520 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2521 SSA_NAME_DEF_STMT (def
));
2522 cand
->var_before
= iv
->ssa_name
;
2523 cand
->var_after
= def
;
2527 /* Adds candidates based on the old induction variables. */
2530 add_old_ivs_candidates (struct ivopts_data
*data
)
2536 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2538 iv
= ver_info (data
, i
)->iv
;
2539 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2540 add_old_iv_candidates (data
, iv
);
2544 /* Adds candidates based on the value of the induction variable IV and USE. */
2547 add_iv_value_candidates (struct ivopts_data
*data
,
2548 struct iv
*iv
, struct iv_use
*use
)
2550 unsigned HOST_WIDE_INT offset
;
2554 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2556 /* The same, but with initial value zero. Make such variable important,
2557 since it is generic enough so that possibly many uses may be based
2559 basetype
= TREE_TYPE (iv
->base
);
2560 if (POINTER_TYPE_P (basetype
))
2561 basetype
= sizetype
;
2562 add_candidate (data
, build_int_cst (basetype
, 0),
2563 iv
->step
, true, use
);
2565 /* Third, try removing the constant offset. Make sure to even
2566 add a candidate for &a[0] vs. (T *)&a. */
2567 base
= strip_offset (iv
->base
, &offset
);
2569 || base
!= iv
->base
)
2570 add_candidate (data
, base
, iv
->step
, false, use
);
2573 /* Adds candidates based on the uses. */
2576 add_derived_ivs_candidates (struct ivopts_data
*data
)
2580 for (i
= 0; i
< n_iv_uses (data
); i
++)
2582 struct iv_use
*use
= iv_use (data
, i
);
2589 case USE_NONLINEAR_EXPR
:
2592 /* Just add the ivs based on the value of the iv used here. */
2593 add_iv_value_candidates (data
, use
->iv
, use
);
2602 /* Record important candidates and add them to related_cands bitmaps
2606 record_important_candidates (struct ivopts_data
*data
)
2611 for (i
= 0; i
< n_iv_cands (data
); i
++)
2613 struct iv_cand
*cand
= iv_cand (data
, i
);
2615 if (cand
->important
)
2616 bitmap_set_bit (data
->important_candidates
, i
);
2619 data
->consider_all_candidates
= (n_iv_cands (data
)
2620 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2622 if (data
->consider_all_candidates
)
2624 /* We will not need "related_cands" bitmaps in this case,
2625 so release them to decrease peak memory consumption. */
2626 for (i
= 0; i
< n_iv_uses (data
); i
++)
2628 use
= iv_use (data
, i
);
2629 BITMAP_FREE (use
->related_cands
);
2634 /* Add important candidates to the related_cands bitmaps. */
2635 for (i
= 0; i
< n_iv_uses (data
); i
++)
2636 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2637 data
->important_candidates
);
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643 we allocate a simple list to every use. */
2646 alloc_use_cost_map (struct ivopts_data
*data
)
2648 unsigned i
, size
, s
;
2650 for (i
= 0; i
< n_iv_uses (data
); i
++)
2652 struct iv_use
*use
= iv_use (data
, i
);
2654 if (data
->consider_all_candidates
)
2655 size
= n_iv_cands (data
);
2658 s
= bitmap_count_bits (use
->related_cands
);
2660 /* Round up to the power of two, so that moduling by it is fast. */
2661 size
= s
? (1 << ceil_log2 (s
)) : 1;
2664 use
->n_map_members
= size
;
2665 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2669 /* Returns description of computation cost of expression whose runtime
2670 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2673 new_cost (unsigned runtime
, unsigned complexity
)
2677 cost
.cost
= runtime
;
2678 cost
.complexity
= complexity
;
2683 /* Adds costs COST1 and COST2. */
2686 add_costs (comp_cost cost1
, comp_cost cost2
)
2688 cost1
.cost
+= cost2
.cost
;
2689 cost1
.complexity
+= cost2
.complexity
;
2693 /* Subtracts costs COST1 and COST2. */
2696 sub_costs (comp_cost cost1
, comp_cost cost2
)
2698 cost1
.cost
-= cost2
.cost
;
2699 cost1
.complexity
-= cost2
.complexity
;
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705 COST1 > COST2, and 0 if COST1 = COST2. */
2708 compare_costs (comp_cost cost1
, comp_cost cost2
)
2710 if (cost1
.cost
== cost2
.cost
)
2711 return cost1
.complexity
- cost2
.complexity
;
2713 return cost1
.cost
- cost2
.cost
;
2716 /* Returns true if COST is infinite. */
2719 infinite_cost_p (comp_cost cost
)
2721 return cost
.cost
== INFTY
;
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725 on invariants DEPENDS_ON and that the value used in expressing it
2726 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2729 set_use_iv_cost (struct ivopts_data
*data
,
2730 struct iv_use
*use
, struct iv_cand
*cand
,
2731 comp_cost cost
, bitmap depends_on
, tree value
,
2732 enum tree_code comp
, int inv_expr_id
)
2736 if (infinite_cost_p (cost
))
2738 BITMAP_FREE (depends_on
);
2742 if (data
->consider_all_candidates
)
2744 use
->cost_map
[cand
->id
].cand
= cand
;
2745 use
->cost_map
[cand
->id
].cost
= cost
;
2746 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2747 use
->cost_map
[cand
->id
].value
= value
;
2748 use
->cost_map
[cand
->id
].comp
= comp
;
2749 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2753 /* n_map_members is a power of two, so this computes modulo. */
2754 s
= cand
->id
& (use
->n_map_members
- 1);
2755 for (i
= s
; i
< use
->n_map_members
; i
++)
2756 if (!use
->cost_map
[i
].cand
)
2758 for (i
= 0; i
< s
; i
++)
2759 if (!use
->cost_map
[i
].cand
)
2765 use
->cost_map
[i
].cand
= cand
;
2766 use
->cost_map
[i
].cost
= cost
;
2767 use
->cost_map
[i
].depends_on
= depends_on
;
2768 use
->cost_map
[i
].value
= value
;
2769 use
->cost_map
[i
].comp
= comp
;
2770 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2773 /* Gets cost of (USE, CANDIDATE) pair. */
2775 static struct cost_pair
*
2776 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2777 struct iv_cand
*cand
)
2780 struct cost_pair
*ret
;
2785 if (data
->consider_all_candidates
)
2787 ret
= use
->cost_map
+ cand
->id
;
2794 /* n_map_members is a power of two, so this computes modulo. */
2795 s
= cand
->id
& (use
->n_map_members
- 1);
2796 for (i
= s
; i
< use
->n_map_members
; i
++)
2797 if (use
->cost_map
[i
].cand
== cand
)
2798 return use
->cost_map
+ i
;
2799 else if (use
->cost_map
[i
].cand
== NULL
)
2801 for (i
= 0; i
< s
; i
++)
2802 if (use
->cost_map
[i
].cand
== cand
)
2803 return use
->cost_map
+ i
;
2804 else if (use
->cost_map
[i
].cand
== NULL
)
2810 /* Returns estimate on cost of computing SEQ. */
2813 seq_cost (rtx seq
, bool speed
)
2818 for (; seq
; seq
= NEXT_INSN (seq
))
2820 set
= single_set (seq
);
2822 cost
+= set_src_cost (SET_SRC (set
), speed
);
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2832 produce_memory_decl_rtl (tree obj
, int *regno
)
2834 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2835 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2839 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2841 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2842 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2843 SET_SYMBOL_REF_DECL (x
, obj
);
2844 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2845 set_mem_addr_space (x
, as
);
2846 targetm
.encode_section_info (obj
, x
, true);
2850 x
= gen_raw_REG (address_mode
, (*regno
)++);
2851 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2852 set_mem_addr_space (x
, as
);
2858 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2859 walk_tree. DATA contains the actual fake register number. */
2862 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2864 tree obj
= NULL_TREE
;
2866 int *regno
= (int *) data
;
2868 switch (TREE_CODE (*expr_p
))
2871 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2872 handled_component_p (*expr_p
);
2873 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2876 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2877 x
= produce_memory_decl_rtl (obj
, regno
);
2882 obj
= SSA_NAME_VAR (*expr_p
);
2883 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2886 if (!DECL_RTL_SET_P (obj
))
2887 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2896 if (DECL_RTL_SET_P (obj
))
2899 if (DECL_MODE (obj
) == BLKmode
)
2900 x
= produce_memory_decl_rtl (obj
, regno
);
2902 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2912 decl_rtl_to_reset
.safe_push (obj
);
2913 SET_DECL_RTL (obj
, x
);
2919 /* Determines cost of the computation of EXPR. */
2922 computation_cost (tree expr
, bool speed
)
2925 tree type
= TREE_TYPE (expr
);
2927 /* Avoid using hard regs in ways which may be unsupported. */
2928 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2929 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2930 enum node_frequency real_frequency
= node
->frequency
;
2932 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2933 crtl
->maybe_hot_insn_p
= speed
;
2934 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2936 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2939 default_rtl_profile ();
2940 node
->frequency
= real_frequency
;
2942 cost
= seq_cost (seq
, speed
);
2944 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2945 TYPE_ADDR_SPACE (type
), speed
);
2946 else if (!REG_P (rslt
))
2947 cost
+= set_src_cost (rslt
, speed
);
2952 /* Returns variable containing the value of candidate CAND at statement AT. */
2955 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2957 if (stmt_after_increment (loop
, cand
, stmt
))
2958 return cand
->var_after
;
2960 return cand
->var_before
;
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964 same precision that is at least as wide as the precision of TYPE, stores
2965 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2969 determine_common_wider_type (tree
*a
, tree
*b
)
2971 tree wider_type
= NULL
;
2973 tree atype
= TREE_TYPE (*a
);
2975 if (CONVERT_EXPR_P (*a
))
2977 suba
= TREE_OPERAND (*a
, 0);
2978 wider_type
= TREE_TYPE (suba
);
2979 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2985 if (CONVERT_EXPR_P (*b
))
2987 subb
= TREE_OPERAND (*b
, 0);
2988 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2999 /* Determines the expression by that USE is expressed from induction variable
3000 CAND at statement AT in LOOP. The expression is stored in a decomposed
3001 form into AFF. Returns false if USE cannot be expressed using CAND. */
3004 get_computation_aff (struct loop
*loop
,
3005 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3006 struct affine_tree_combination
*aff
)
3008 tree ubase
= use
->iv
->base
;
3009 tree ustep
= use
->iv
->step
;
3010 tree cbase
= cand
->iv
->base
;
3011 tree cstep
= cand
->iv
->step
, cstep_common
;
3012 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3013 tree common_type
, var
;
3015 aff_tree cbase_aff
, var_aff
;
3018 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3020 /* We do not have a precision to express the values of use. */
3024 var
= var_at_stmt (loop
, cand
, at
);
3025 uutype
= unsigned_type_for (utype
);
3027 /* If the conversion is not noop, perform it. */
3028 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3030 cstep
= fold_convert (uutype
, cstep
);
3031 cbase
= fold_convert (uutype
, cbase
);
3032 var
= fold_convert (uutype
, var
);
3035 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3038 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039 type, we achieve better folding by computing their difference in this
3040 wider type, and cast the result to UUTYPE. We do not need to worry about
3041 overflows, as all the arithmetics will in the end be performed in UUTYPE
3043 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3045 /* use = ubase - ratio * cbase + ratio * var. */
3046 tree_to_aff_combination (ubase
, common_type
, aff
);
3047 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3048 tree_to_aff_combination (var
, uutype
, &var_aff
);
3050 /* We need to shift the value if we are after the increment. */
3051 if (stmt_after_increment (loop
, cand
, at
))
3055 if (common_type
!= uutype
)
3056 cstep_common
= fold_convert (common_type
, cstep
);
3058 cstep_common
= cstep
;
3060 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3061 aff_combination_add (&cbase_aff
, &cstep_aff
);
3064 aff_combination_scale (&cbase_aff
, -rat
);
3065 aff_combination_add (aff
, &cbase_aff
);
3066 if (common_type
!= uutype
)
3067 aff_combination_convert (aff
, uutype
);
3069 aff_combination_scale (&var_aff
, rat
);
3070 aff_combination_add (aff
, &var_aff
);
3075 /* Return the type of USE. */
3078 get_use_type (struct iv_use
*use
)
3080 tree base_type
= TREE_TYPE (use
->iv
->base
);
3083 if (use
->type
== USE_ADDRESS
)
3085 /* The base_type may be a void pointer. Create a pointer type based on
3086 the mem_ref instead. */
3087 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3088 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3089 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3097 /* Determines the expression by that USE is expressed from induction variable
3098 CAND at statement AT in LOOP. The computation is unshared. */
3101 get_computation_at (struct loop
*loop
,
3102 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3105 tree type
= get_use_type (use
);
3107 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3109 unshare_aff_combination (&aff
);
3110 return fold_convert (type
, aff_combination_to_tree (&aff
));
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND in LOOP. The computation is unshared. */
3117 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3119 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123 If we're optimizing for space, the loop setup overhead is constant;
3124 if we're optimizing for speed, amortize it over the per-iteration cost. */
3126 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3130 else if (optimize_loop_for_speed_p (data
->current_loop
))
3131 return cost
/ avg_loop_niter (data
->current_loop
);
3136 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3137 validity for a memory reference accessing memory of mode MODE in
3138 address space AS. */
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3145 #define MAX_RATIO 128
3146 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3147 static vec
<sbitmap
> valid_mult_list
;
3150 if (data_index
>= valid_mult_list
.length ())
3151 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3153 valid_mult
= valid_mult_list
[data_index
];
3156 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3157 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3158 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3162 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3163 bitmap_clear (valid_mult
);
3164 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3165 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3166 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3168 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3169 if (memory_address_addr_space_p (mode
, addr
, as
)
3170 || memory_address_addr_space_p (mode
, scaled
, as
))
3171 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3176 fprintf (dump_file
, " allowed multipliers:");
3177 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3178 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3179 fprintf (dump_file
, " %d", (int) i
);
3180 fprintf (dump_file
, "\n");
3181 fprintf (dump_file
, "\n");
3184 valid_mult_list
[data_index
] = valid_mult
;
3187 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3190 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3195 variable is omitted. Compute the cost for a memory reference that accesses
3196 a memory location of mode MEM_MODE in address space AS.
3198 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199 size of MEM_MODE / RATIO) is available. To make this determination, we
3200 look at the size of the increment to be made, which is given in CSTEP.
3201 CSTEP may be zero if the step is unknown.
3202 STMT_AFTER_INC is true iff the statement we're looking at is after the
3203 increment of the original biv.
3205 TODO -- there must be some better way. This all is quite crude. */
3207 typedef struct address_cost_data_s
3209 HOST_WIDE_INT min_offset
, max_offset
;
3210 unsigned costs
[2][2][2][2];
3211 } *address_cost_data
;
3215 get_address_cost (bool symbol_present
, bool var_present
,
3216 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3217 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3218 addr_space_t as
, bool speed
,
3219 bool stmt_after_inc
, bool *may_autoinc
)
3221 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3222 static vec
<address_cost_data
> address_cost_data_list
;
3223 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3224 address_cost_data data
;
3225 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3226 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3227 unsigned cost
, acost
, complexity
;
3228 bool offset_p
, ratio_p
, autoinc
;
3229 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3230 unsigned HOST_WIDE_INT mask
;
3233 if (data_index
>= address_cost_data_list
.length ())
3234 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3236 data
= address_cost_data_list
[data_index
];
3240 HOST_WIDE_INT rat
, off
= 0;
3241 int old_cse_not_expected
, width
;
3242 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3243 rtx seq
, addr
, base
;
3246 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3248 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3250 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3251 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3252 width
= HOST_BITS_PER_WIDE_INT
- 1;
3253 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3255 for (i
= width
; i
>= 0; i
--)
3257 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3258 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3259 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3262 data
->min_offset
= (i
== -1? 0 : off
);
3264 for (i
= width
; i
>= 0; i
--)
3266 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3267 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3268 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3273 data
->max_offset
= off
;
3275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3277 fprintf (dump_file
, "get_address_cost:\n");
3278 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3279 GET_MODE_NAME (mem_mode
),
3281 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3282 GET_MODE_NAME (mem_mode
),
3287 for (i
= 2; i
<= MAX_RATIO
; i
++)
3288 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3294 /* Compute the cost of various addressing modes. */
3296 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3297 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3299 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3300 || USE_STORE_PRE_DECREMENT (mem_mode
))
3302 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3303 has_predec
[mem_mode
]
3304 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3306 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3307 || USE_STORE_POST_DECREMENT (mem_mode
))
3309 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3310 has_postdec
[mem_mode
]
3311 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3313 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3314 || USE_STORE_PRE_DECREMENT (mem_mode
))
3316 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3317 has_preinc
[mem_mode
]
3318 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3320 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3321 || USE_STORE_POST_INCREMENT (mem_mode
))
3323 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3324 has_postinc
[mem_mode
]
3325 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3327 for (i
= 0; i
< 16; i
++)
3330 var_p
= (i
>> 1) & 1;
3331 off_p
= (i
>> 2) & 1;
3332 rat_p
= (i
>> 3) & 1;
3336 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3337 gen_int_mode (rat
, address_mode
));
3340 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3344 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3352 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3354 (PLUS
, address_mode
, base
,
3355 gen_int_mode (off
, address_mode
)));
3358 base
= gen_int_mode (off
, address_mode
);
3363 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3366 /* To avoid splitting addressing modes, pretend that no cse will
3368 old_cse_not_expected
= cse_not_expected
;
3369 cse_not_expected
= true;
3370 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3371 cse_not_expected
= old_cse_not_expected
;
3375 acost
= seq_cost (seq
, speed
);
3376 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3380 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c
= add_cost (speed
, address_mode
);
3396 for (i
= 0; i
< 8; i
++)
3399 off_p
= (i
>> 1) & 1;
3400 rat_p
= (i
>> 2) & 1;
3402 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3406 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3407 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3410 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3412 fprintf (dump_file
, "Address costs:\n");
3414 for (i
= 0; i
< 16; i
++)
3417 var_p
= (i
>> 1) & 1;
3418 off_p
= (i
>> 2) & 1;
3419 rat_p
= (i
>> 3) & 1;
3421 fprintf (dump_file
, " ");
3423 fprintf (dump_file
, "sym + ");
3425 fprintf (dump_file
, "var + ");
3427 fprintf (dump_file
, "cst + ");
3429 fprintf (dump_file
, "rat * ");
3431 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3432 fprintf (dump_file
, "index costs %d\n", acost
);
3434 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3435 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3436 fprintf (dump_file
, " May include autoinc/dec\n");
3437 fprintf (dump_file
, "\n");
3440 address_cost_data_list
[data_index
] = data
;
3443 bits
= GET_MODE_BITSIZE (address_mode
);
3444 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3446 if ((offset
>> (bits
- 1) & 1))
3451 msize
= GET_MODE_SIZE (mem_mode
);
3452 autoinc_offset
= offset
;
3454 autoinc_offset
+= ratio
* cstep
;
3455 if (symbol_present
|| var_present
|| ratio
!= 1)
3457 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3459 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3461 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3463 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3464 && msize
== -cstep
))
3468 offset_p
= (s_offset
!= 0
3469 && data
->min_offset
<= s_offset
3470 && s_offset
<= data
->max_offset
);
3471 ratio_p
= (ratio
!= 1
3472 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3474 if (ratio
!= 1 && !ratio_p
)
3475 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3477 if (s_offset
&& !offset_p
&& !symbol_present
)
3478 cost
+= add_cost (speed
, address_mode
);
3481 *may_autoinc
= autoinc
;
3482 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3483 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3484 return new_cost (cost
+ acost
, complexity
);
3487 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3488 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3489 calculating the operands of EXPR. Returns true if successful, and returns
3490 the cost in COST. */
3493 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3494 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3497 tree op1
= TREE_OPERAND (expr
, 1);
3498 tree cst
= TREE_OPERAND (mult
, 1);
3499 tree multop
= TREE_OPERAND (mult
, 0);
3500 int m
= exact_log2 (int_cst_value (cst
));
3501 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3503 bool equal_p
= false;
3505 if (!(m
>= 0 && m
< maxm
))
3508 if (operand_equal_p (op1
, mult
, 0))
3511 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3512 ? shiftadd_cost (speed
, mode
, m
)
3514 ? shiftsub1_cost (speed
, mode
, m
)
3515 : shiftsub0_cost (speed
, mode
, m
)));
3516 res
= new_cost (sa_cost
, 0);
3517 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3519 STRIP_NOPS (multop
);
3520 if (!is_gimple_val (multop
))
3521 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3527 /* Estimates cost of forcing expression EXPR into a variable. */
3530 force_expr_to_var_cost (tree expr
, bool speed
)
3532 static bool costs_initialized
= false;
3533 static unsigned integer_cost
[2];
3534 static unsigned symbol_cost
[2];
3535 static unsigned address_cost
[2];
3537 comp_cost cost0
, cost1
, cost
;
3538 enum machine_mode mode
;
3540 if (!costs_initialized
)
3542 tree type
= build_pointer_type (integer_type_node
);
3547 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3548 TREE_STATIC (var
) = 1;
3549 x
= produce_memory_decl_rtl (var
, NULL
);
3550 SET_DECL_RTL (var
, x
);
3552 addr
= build1 (ADDR_EXPR
, type
, var
);
3555 for (i
= 0; i
< 2; i
++)
3557 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3560 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3563 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3566 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3567 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3568 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3569 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3570 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3571 fprintf (dump_file
, "\n");
3575 costs_initialized
= true;
3580 if (SSA_VAR_P (expr
))
3583 if (is_gimple_min_invariant (expr
))
3585 if (TREE_CODE (expr
) == INTEGER_CST
)
3586 return new_cost (integer_cost
[speed
], 0);
3588 if (TREE_CODE (expr
) == ADDR_EXPR
)
3590 tree obj
= TREE_OPERAND (expr
, 0);
3592 if (TREE_CODE (obj
) == VAR_DECL
3593 || TREE_CODE (obj
) == PARM_DECL
3594 || TREE_CODE (obj
) == RESULT_DECL
)
3595 return new_cost (symbol_cost
[speed
], 0);
3598 return new_cost (address_cost
[speed
], 0);
3601 switch (TREE_CODE (expr
))
3603 case POINTER_PLUS_EXPR
:
3607 op0
= TREE_OPERAND (expr
, 0);
3608 op1
= TREE_OPERAND (expr
, 1);
3615 op0
= TREE_OPERAND (expr
, 0);
3621 /* Just an arbitrary value, FIXME. */
3622 return new_cost (target_spill_cost
[speed
], 0);
3625 if (op0
== NULL_TREE
3626 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3629 cost0
= force_expr_to_var_cost (op0
, speed
);
3631 if (op1
== NULL_TREE
3632 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3635 cost1
= force_expr_to_var_cost (op1
, speed
);
3637 mode
= TYPE_MODE (TREE_TYPE (expr
));
3638 switch (TREE_CODE (expr
))
3640 case POINTER_PLUS_EXPR
:
3644 cost
= new_cost (add_cost (speed
, mode
), 0);
3645 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3647 tree mult
= NULL_TREE
;
3649 if (TREE_CODE (op1
) == MULT_EXPR
)
3651 else if (TREE_CODE (op0
) == MULT_EXPR
)
3654 if (mult
!= NULL_TREE
3655 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3656 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3664 tree inner_mode
, outer_mode
;
3665 outer_mode
= TREE_TYPE (expr
);
3666 inner_mode
= TREE_TYPE (op0
);
3667 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3668 TYPE_MODE (inner_mode
), speed
), 0);
3673 if (cst_and_fits_in_hwi (op0
))
3674 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3676 else if (cst_and_fits_in_hwi (op1
))
3677 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3680 return new_cost (target_spill_cost
[speed
], 0);
3687 cost
= add_costs (cost
, cost0
);
3688 cost
= add_costs (cost
, cost1
);
3690 /* Bound the cost by target_spill_cost. The parts of complicated
3691 computations often are either loop invariant or at least can
3692 be shared between several iv uses, so letting this grow without
3693 limits would not give reasonable results. */
3694 if (cost
.cost
> (int) target_spill_cost
[speed
])
3695 cost
.cost
= target_spill_cost
[speed
];
3700 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3701 invariants the computation depends on. */
3704 force_var_cost (struct ivopts_data
*data
,
3705 tree expr
, bitmap
*depends_on
)
3709 fd_ivopts_data
= data
;
3710 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3713 return force_expr_to_var_cost (expr
, data
->speed
);
3716 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3717 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3718 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3719 invariants the computation depends on. */
3722 split_address_cost (struct ivopts_data
*data
,
3723 tree addr
, bool *symbol_present
, bool *var_present
,
3724 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3727 HOST_WIDE_INT bitsize
;
3728 HOST_WIDE_INT bitpos
;
3730 enum machine_mode mode
;
3731 int unsignedp
, volatilep
;
3733 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3734 &unsignedp
, &volatilep
, false);
3737 || bitpos
% BITS_PER_UNIT
!= 0
3738 || TREE_CODE (core
) != VAR_DECL
)
3740 *symbol_present
= false;
3741 *var_present
= true;
3742 fd_ivopts_data
= data
;
3743 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3744 return new_cost (target_spill_cost
[data
->speed
], 0);
3747 *offset
+= bitpos
/ BITS_PER_UNIT
;
3748 if (TREE_STATIC (core
)
3749 || DECL_EXTERNAL (core
))
3751 *symbol_present
= true;
3752 *var_present
= false;
3756 *symbol_present
= false;
3757 *var_present
= true;
3761 /* Estimates cost of expressing difference of addresses E1 - E2 as
3762 var + symbol + offset. The value of offset is added to OFFSET,
3763 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3764 part is missing. DEPENDS_ON is a set of the invariants the computation
3768 ptr_difference_cost (struct ivopts_data
*data
,
3769 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3770 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3772 HOST_WIDE_INT diff
= 0;
3773 aff_tree aff_e1
, aff_e2
;
3776 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3778 if (ptr_difference_const (e1
, e2
, &diff
))
3781 *symbol_present
= false;
3782 *var_present
= false;
3786 if (integer_zerop (e2
))
3787 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3788 symbol_present
, var_present
, offset
, depends_on
);
3790 *symbol_present
= false;
3791 *var_present
= true;
3793 type
= signed_type_for (TREE_TYPE (e1
));
3794 tree_to_aff_combination (e1
, type
, &aff_e1
);
3795 tree_to_aff_combination (e2
, type
, &aff_e2
);
3796 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3797 aff_combination_add (&aff_e1
, &aff_e2
);
3799 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3802 /* Estimates cost of expressing difference E1 - E2 as
3803 var + symbol + offset. The value of offset is added to OFFSET,
3804 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3805 part is missing. DEPENDS_ON is a set of the invariants the computation
3809 difference_cost (struct ivopts_data
*data
,
3810 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3811 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3813 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3814 unsigned HOST_WIDE_INT off1
, off2
;
3815 aff_tree aff_e1
, aff_e2
;
3818 e1
= strip_offset (e1
, &off1
);
3819 e2
= strip_offset (e2
, &off2
);
3820 *offset
+= off1
- off2
;
3825 if (TREE_CODE (e1
) == ADDR_EXPR
)
3826 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3827 offset
, depends_on
);
3828 *symbol_present
= false;
3830 if (operand_equal_p (e1
, e2
, 0))
3832 *var_present
= false;
3836 *var_present
= true;
3838 if (integer_zerop (e2
))
3839 return force_var_cost (data
, e1
, depends_on
);
3841 if (integer_zerop (e1
))
3843 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3844 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3848 type
= signed_type_for (TREE_TYPE (e1
));
3849 tree_to_aff_combination (e1
, type
, &aff_e1
);
3850 tree_to_aff_combination (e2
, type
, &aff_e2
);
3851 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3852 aff_combination_add (&aff_e1
, &aff_e2
);
3854 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3857 /* Returns true if AFF1 and AFF2 are identical. */
3860 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3864 if (aff1
->n
!= aff2
->n
)
3867 for (i
= 0; i
< aff1
->n
; i
++)
3869 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3872 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3878 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3881 get_expr_id (struct ivopts_data
*data
, tree expr
)
3883 struct iv_inv_expr_ent ent
;
3884 struct iv_inv_expr_ent
**slot
;
3887 ent
.hash
= iterative_hash_expr (expr
, 0);
3888 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3892 *slot
= XNEW (struct iv_inv_expr_ent
);
3893 (*slot
)->expr
= expr
;
3894 (*slot
)->hash
= ent
.hash
;
3895 (*slot
)->id
= data
->inv_expr_id
++;
3899 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3900 requires a new compiler generated temporary. Returns -1 otherwise.
3901 ADDRESS_P is a flag indicating if the expression is for address
3905 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3906 tree cbase
, HOST_WIDE_INT ratio
,
3909 aff_tree ubase_aff
, cbase_aff
;
3917 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3918 && (TREE_CODE (cbase
) == INTEGER_CST
))
3921 /* Strips the constant part. */
3922 if (TREE_CODE (ubase
) == PLUS_EXPR
3923 || TREE_CODE (ubase
) == MINUS_EXPR
3924 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3926 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3927 ubase
= TREE_OPERAND (ubase
, 0);
3930 /* Strips the constant part. */
3931 if (TREE_CODE (cbase
) == PLUS_EXPR
3932 || TREE_CODE (cbase
) == MINUS_EXPR
3933 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3935 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3936 cbase
= TREE_OPERAND (cbase
, 0);
3941 if (((TREE_CODE (ubase
) == SSA_NAME
)
3942 || (TREE_CODE (ubase
) == ADDR_EXPR
3943 && is_gimple_min_invariant (ubase
)))
3944 && (TREE_CODE (cbase
) == INTEGER_CST
))
3947 if (((TREE_CODE (cbase
) == SSA_NAME
)
3948 || (TREE_CODE (cbase
) == ADDR_EXPR
3949 && is_gimple_min_invariant (cbase
)))
3950 && (TREE_CODE (ubase
) == INTEGER_CST
))
3956 if (operand_equal_p (ubase
, cbase
, 0))
3959 if (TREE_CODE (ubase
) == ADDR_EXPR
3960 && TREE_CODE (cbase
) == ADDR_EXPR
)
3964 usym
= TREE_OPERAND (ubase
, 0);
3965 csym
= TREE_OPERAND (cbase
, 0);
3966 if (TREE_CODE (usym
) == ARRAY_REF
)
3968 tree ind
= TREE_OPERAND (usym
, 1);
3969 if (TREE_CODE (ind
) == INTEGER_CST
3970 && host_integerp (ind
, 0)
3971 && TREE_INT_CST_LOW (ind
) == 0)
3972 usym
= TREE_OPERAND (usym
, 0);
3974 if (TREE_CODE (csym
) == ARRAY_REF
)
3976 tree ind
= TREE_OPERAND (csym
, 1);
3977 if (TREE_CODE (ind
) == INTEGER_CST
3978 && host_integerp (ind
, 0)
3979 && TREE_INT_CST_LOW (ind
) == 0)
3980 csym
= TREE_OPERAND (csym
, 0);
3982 if (operand_equal_p (usym
, csym
, 0))
3985 /* Now do more complex comparison */
3986 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3987 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3988 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3992 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3993 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3995 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3996 aff_combination_add (&ubase_aff
, &cbase_aff
);
3997 expr
= aff_combination_to_tree (&ubase_aff
);
3998 return get_expr_id (data
, expr
);
4003 /* Determines the cost of the computation by that USE is expressed
4004 from induction variable CAND. If ADDRESS_P is true, we just need
4005 to create an address from it, otherwise we want to get it into
4006 register. A set of invariants we depend on is stored in
4007 DEPENDS_ON. AT is the statement at that the value is computed.
4008 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4009 addressing is likely. */
4012 get_computation_cost_at (struct ivopts_data
*data
,
4013 struct iv_use
*use
, struct iv_cand
*cand
,
4014 bool address_p
, bitmap
*depends_on
, gimple at
,
4018 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4020 tree utype
= TREE_TYPE (ubase
), ctype
;
4021 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4022 HOST_WIDE_INT ratio
, aratio
;
4023 bool var_present
, symbol_present
, stmt_is_after_inc
;
4026 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4027 enum machine_mode mem_mode
= (address_p
4028 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4033 /* Only consider real candidates. */
4035 return infinite_cost
;
4037 cbase
= cand
->iv
->base
;
4038 cstep
= cand
->iv
->step
;
4039 ctype
= TREE_TYPE (cbase
);
4041 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4043 /* We do not have a precision to express the values of use. */
4044 return infinite_cost
;
4048 || (use
->iv
->base_object
4049 && cand
->iv
->base_object
4050 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4051 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4053 /* Do not try to express address of an object with computation based
4054 on address of a different object. This may cause problems in rtl
4055 level alias analysis (that does not expect this to be happening,
4056 as this is illegal in C), and would be unlikely to be useful
4058 if (use
->iv
->base_object
4059 && cand
->iv
->base_object
4060 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4061 return infinite_cost
;
4064 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4066 /* TODO -- add direct handling of this case. */
4070 /* CSTEPI is removed from the offset in case statement is after the
4071 increment. If the step is not constant, we use zero instead.
4072 This is a bit imprecise (there is the extra addition), but
4073 redundancy elimination is likely to transform the code so that
4074 it uses value of the variable before increment anyway,
4075 so it is not that much unrealistic. */
4076 if (cst_and_fits_in_hwi (cstep
))
4077 cstepi
= int_cst_value (cstep
);
4081 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4082 return infinite_cost
;
4084 if (rat
.fits_shwi ())
4085 ratio
= rat
.to_shwi ();
4087 return infinite_cost
;
4090 ctype
= TREE_TYPE (cbase
);
4092 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4094 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4095 or ratio == 1, it is better to handle this like
4097 ubase - ratio * cbase + ratio * var
4099 (also holds in the case ratio == -1, TODO. */
4101 if (cst_and_fits_in_hwi (cbase
))
4103 offset
= - ratio
* int_cst_value (cbase
);
4104 cost
= difference_cost (data
,
4105 ubase
, build_int_cst (utype
, 0),
4106 &symbol_present
, &var_present
, &offset
,
4108 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4110 else if (ratio
== 1)
4112 tree real_cbase
= cbase
;
4114 /* Check to see if any adjustment is needed. */
4115 if (cstepi
== 0 && stmt_is_after_inc
)
4117 aff_tree real_cbase_aff
;
4120 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4122 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4124 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4125 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4128 cost
= difference_cost (data
,
4130 &symbol_present
, &var_present
, &offset
,
4132 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4135 && !POINTER_TYPE_P (ctype
)
4136 && multiplier_allowed_in_address_p
4138 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4141 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4142 cost
= difference_cost (data
,
4144 &symbol_present
, &var_present
, &offset
,
4146 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4150 cost
= force_var_cost (data
, cbase
, depends_on
);
4151 cost
= add_costs (cost
,
4152 difference_cost (data
,
4153 ubase
, build_int_cst (utype
, 0),
4154 &symbol_present
, &var_present
,
4155 &offset
, depends_on
));
4156 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4157 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4163 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4164 /* Clear depends on. */
4165 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4166 bitmap_clear (*depends_on
);
4169 /* If we are after the increment, the value of the candidate is higher by
4171 if (stmt_is_after_inc
)
4172 offset
-= ratio
* cstepi
;
4174 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4175 (symbol/var1/const parts may be omitted). If we are looking for an
4176 address, find the cost of addressing this. */
4178 return add_costs (cost
,
4179 get_address_cost (symbol_present
, var_present
,
4180 offset
, ratio
, cstepi
,
4182 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4183 speed
, stmt_is_after_inc
,
4186 /* Otherwise estimate the costs for computing the expression. */
4187 if (!symbol_present
&& !var_present
&& !offset
)
4190 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4194 /* Symbol + offset should be compile-time computable so consider that they
4195 are added once to the variable, if present. */
4196 if (var_present
&& (symbol_present
|| offset
))
4197 cost
.cost
+= adjust_setup_cost (data
,
4198 add_cost (speed
, TYPE_MODE (ctype
)));
4200 /* Having offset does not affect runtime cost in case it is added to
4201 symbol, but it increases complexity. */
4205 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4207 aratio
= ratio
> 0 ? ratio
: -ratio
;
4209 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4214 *can_autoinc
= false;
4217 /* Just get the expression, expand it and measure the cost. */
4218 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4221 return infinite_cost
;
4224 comp
= build_simple_mem_ref (comp
);
4226 return new_cost (computation_cost (comp
, speed
), 0);
4230 /* Determines the cost of the computation by that USE is expressed
4231 from induction variable CAND. If ADDRESS_P is true, we just need
4232 to create an address from it, otherwise we want to get it into
4233 register. A set of invariants we depend on is stored in
4234 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4235 autoinc addressing is likely. */
4238 get_computation_cost (struct ivopts_data
*data
,
4239 struct iv_use
*use
, struct iv_cand
*cand
,
4240 bool address_p
, bitmap
*depends_on
,
4241 bool *can_autoinc
, int *inv_expr_id
)
4243 return get_computation_cost_at (data
,
4244 use
, cand
, address_p
, depends_on
, use
->stmt
,
4245 can_autoinc
, inv_expr_id
);
4248 /* Determines cost of basing replacement of USE on CAND in a generic
4252 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4253 struct iv_use
*use
, struct iv_cand
*cand
)
4257 int inv_expr_id
= -1;
4259 /* The simple case first -- if we need to express value of the preserved
4260 original biv, the cost is 0. This also prevents us from counting the
4261 cost of increment twice -- once at this use and once in the cost of
4263 if (cand
->pos
== IP_ORIGINAL
4264 && cand
->incremented_at
== use
->stmt
)
4266 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4271 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4272 NULL
, &inv_expr_id
);
4274 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4277 return !infinite_cost_p (cost
);
4280 /* Determines cost of basing replacement of USE on CAND in an address. */
4283 determine_use_iv_cost_address (struct ivopts_data
*data
,
4284 struct iv_use
*use
, struct iv_cand
*cand
)
4288 int inv_expr_id
= -1;
4289 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4290 &can_autoinc
, &inv_expr_id
);
4292 if (cand
->ainc_use
== use
)
4295 cost
.cost
-= cand
->cost_step
;
4296 /* If we generated the candidate solely for exploiting autoincrement
4297 opportunities, and it turns out it can't be used, set the cost to
4298 infinity to make sure we ignore it. */
4299 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4300 cost
= infinite_cost
;
4302 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4305 return !infinite_cost_p (cost
);
4308 /* Computes value of candidate CAND at position AT in iteration NITER, and
4309 stores it to VAL. */
4312 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4315 aff_tree step
, delta
, nit
;
4316 struct iv
*iv
= cand
->iv
;
4317 tree type
= TREE_TYPE (iv
->base
);
4318 tree steptype
= type
;
4319 if (POINTER_TYPE_P (type
))
4320 steptype
= sizetype
;
4322 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4323 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4324 aff_combination_convert (&nit
, steptype
);
4325 aff_combination_mult (&nit
, &step
, &delta
);
4326 if (stmt_after_increment (loop
, cand
, at
))
4327 aff_combination_add (&delta
, &step
);
4329 tree_to_aff_combination (iv
->base
, type
, val
);
4330 aff_combination_add (val
, &delta
);
4333 /* Returns period of induction variable iv. */
4336 iv_period (struct iv
*iv
)
4338 tree step
= iv
->step
, period
, type
;
4341 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4343 type
= unsigned_type_for (TREE_TYPE (step
));
4344 /* Period of the iv is lcm (step, type_range)/step -1,
4345 i.e., N*type_range/step - 1. Since type range is power
4346 of two, N == (step >> num_of_ending_zeros_binary (step),
4347 so the final result is
4349 (type_range >> num_of_ending_zeros_binary (step)) - 1
4352 pow2div
= num_ending_zeros (step
);
4354 period
= build_low_bits_mask (type
,
4355 (TYPE_PRECISION (type
)
4356 - tree_low_cst (pow2div
, 1)));
4361 /* Returns the comparison operator used when eliminating the iv USE. */
4363 static enum tree_code
4364 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4366 struct loop
*loop
= data
->current_loop
;
4370 ex_bb
= gimple_bb (use
->stmt
);
4371 exit
= EDGE_SUCC (ex_bb
, 0);
4372 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4373 exit
= EDGE_SUCC (ex_bb
, 1);
4375 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4379 strip_wrap_conserving_type_conversions (tree exp
)
4381 while (tree_ssa_useless_type_conversion (exp
)
4382 && (nowrap_type_p (TREE_TYPE (exp
))
4383 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4384 exp
= TREE_OPERAND (exp
, 0);
4388 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4389 check for an exact match. */
4392 expr_equal_p (tree e
, tree what
)
4395 enum tree_code code
;
4397 e
= strip_wrap_conserving_type_conversions (e
);
4398 what
= strip_wrap_conserving_type_conversions (what
);
4400 code
= TREE_CODE (what
);
4401 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4404 if (operand_equal_p (e
, what
, 0))
4407 if (TREE_CODE (e
) != SSA_NAME
)
4410 stmt
= SSA_NAME_DEF_STMT (e
);
4411 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4412 || gimple_assign_rhs_code (stmt
) != code
)
4415 switch (get_gimple_rhs_class (code
))
4417 case GIMPLE_BINARY_RHS
:
4418 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4422 case GIMPLE_UNARY_RHS
:
4423 case GIMPLE_SINGLE_RHS
:
4424 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4430 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4431 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4432 calculation is performed in non-wrapping type.
4434 TODO: More generally, we could test for the situation that
4435 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4436 This would require knowing the sign of OFFSET.
4438 Also, we only look for the first addition in the computation of BASE.
4439 More complex analysis would be better, but introducing it just for
4440 this optimization seems like an overkill. */
4443 difference_cannot_overflow_p (tree base
, tree offset
)
4445 enum tree_code code
;
4448 if (!nowrap_type_p (TREE_TYPE (base
)))
4451 base
= expand_simple_operations (base
);
4453 if (TREE_CODE (base
) == SSA_NAME
)
4455 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4457 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4460 code
= gimple_assign_rhs_code (stmt
);
4461 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4464 e1
= gimple_assign_rhs1 (stmt
);
4465 e2
= gimple_assign_rhs2 (stmt
);
4469 code
= TREE_CODE (base
);
4470 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4472 e1
= TREE_OPERAND (base
, 0);
4473 e2
= TREE_OPERAND (base
, 1);
4476 /* TODO: deeper inspection may be necessary to prove the equality. */
4480 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4481 case POINTER_PLUS_EXPR
:
4482 return expr_equal_p (e2
, offset
);
4489 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4490 comparison with CAND. NITER describes the number of iterations of
4491 the loops. If successful, the comparison in COMP_P is altered accordingly.
4493 We aim to handle the following situation:
4509 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4510 We aim to optimize this to
4518 while (p < p_0 - a + b);
4520 This preserves the correctness, since the pointer arithmetics does not
4521 overflow. More precisely:
4523 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4524 overflow in computing it or the values of p.
4525 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4526 overflow. To prove this, we use the fact that p_0 = base + a. */
4529 iv_elimination_compare_lt (struct ivopts_data
*data
,
4530 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4531 struct tree_niter_desc
*niter
)
4533 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4534 struct affine_tree_combination nit
, tmpa
, tmpb
;
4535 enum tree_code comp
;
4538 /* We need to know that the candidate induction variable does not overflow.
4539 While more complex analysis may be used to prove this, for now just
4540 check that the variable appears in the original program and that it
4541 is computed in a type that guarantees no overflows. */
4542 cand_type
= TREE_TYPE (cand
->iv
->base
);
4543 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4546 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4547 the calculation of the BOUND could overflow, making the comparison
4549 if (!data
->loop_single_exit_p
)
4552 /* We need to be able to decide whether candidate is increasing or decreasing
4553 in order to choose the right comparison operator. */
4554 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4556 step
= int_cst_value (cand
->iv
->step
);
4558 /* Check that the number of iterations matches the expected pattern:
4559 a + 1 > b ? 0 : b - a - 1. */
4560 mbz
= niter
->may_be_zero
;
4561 if (TREE_CODE (mbz
) == GT_EXPR
)
4563 /* Handle a + 1 > b. */
4564 tree op0
= TREE_OPERAND (mbz
, 0);
4565 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4567 a
= TREE_OPERAND (op0
, 0);
4568 b
= TREE_OPERAND (mbz
, 1);
4573 else if (TREE_CODE (mbz
) == LT_EXPR
)
4575 tree op1
= TREE_OPERAND (mbz
, 1);
4577 /* Handle b < a + 1. */
4578 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4580 a
= TREE_OPERAND (op1
, 0);
4581 b
= TREE_OPERAND (mbz
, 0);
4589 /* Expected number of iterations is B - A - 1. Check that it matches
4590 the actual number, i.e., that B - A - NITER = 1. */
4591 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4592 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4593 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4594 aff_combination_scale (&nit
, double_int_minus_one
);
4595 aff_combination_scale (&tmpa
, double_int_minus_one
);
4596 aff_combination_add (&tmpb
, &tmpa
);
4597 aff_combination_add (&tmpb
, &nit
);
4598 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4601 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4603 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4605 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4606 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4609 /* Determine the new comparison operator. */
4610 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4611 if (*comp_p
== NE_EXPR
)
4613 else if (*comp_p
== EQ_EXPR
)
4614 *comp_p
= invert_tree_comparison (comp
, false);
4621 /* Check whether it is possible to express the condition in USE by comparison
4622 of candidate CAND. If so, store the value compared with to BOUND, and the
4623 comparison operator to COMP. */
4626 may_eliminate_iv (struct ivopts_data
*data
,
4627 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4628 enum tree_code
*comp
)
4633 struct loop
*loop
= data
->current_loop
;
4635 struct tree_niter_desc
*desc
= NULL
;
4637 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4640 /* For now works only for exits that dominate the loop latch.
4641 TODO: extend to other conditions inside loop body. */
4642 ex_bb
= gimple_bb (use
->stmt
);
4643 if (use
->stmt
!= last_stmt (ex_bb
)
4644 || gimple_code (use
->stmt
) != GIMPLE_COND
4645 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4648 exit
= EDGE_SUCC (ex_bb
, 0);
4649 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4650 exit
= EDGE_SUCC (ex_bb
, 1);
4651 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4654 desc
= niter_for_exit (data
, exit
);
4658 /* Determine whether we can use the variable to test the exit condition.
4659 This is the case iff the period of the induction variable is greater
4660 than the number of iterations for which the exit condition is true. */
4661 period
= iv_period (cand
->iv
);
4663 /* If the number of iterations is constant, compare against it directly. */
4664 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4666 /* See cand_value_at. */
4667 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4669 if (!tree_int_cst_lt (desc
->niter
, period
))
4674 if (tree_int_cst_lt (period
, desc
->niter
))
4679 /* If not, and if this is the only possible exit of the loop, see whether
4680 we can get a conservative estimate on the number of iterations of the
4681 entire loop and compare against that instead. */
4684 double_int period_value
, max_niter
;
4686 max_niter
= desc
->max
;
4687 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4688 max_niter
+= double_int_one
;
4689 period_value
= tree_to_double_int (period
);
4690 if (max_niter
.ugt (period_value
))
4692 /* See if we can take advantage of inferred loop bound information. */
4693 if (data
->loop_single_exit_p
)
4695 if (!max_loop_iterations (loop
, &max_niter
))
4697 /* The loop bound is already adjusted by adding 1. */
4698 if (max_niter
.ugt (period_value
))
4706 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4708 *bound
= aff_combination_to_tree (&bnd
);
4709 *comp
= iv_elimination_compare (data
, use
);
4711 /* It is unlikely that computing the number of iterations using division
4712 would be more profitable than keeping the original induction variable. */
4713 if (expression_expensive_p (*bound
))
4716 /* Sometimes, it is possible to handle the situation that the number of
4717 iterations may be zero unless additional assumtions by using <
4718 instead of != in the exit condition.
4720 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4721 base the exit condition on it. However, that is often too
4723 if (!integer_zerop (desc
->may_be_zero
))
4724 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4729 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4730 be copied, if is is used in the loop body and DATA->body_includes_call. */
4733 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4735 tree sbound
= bound
;
4736 STRIP_NOPS (sbound
);
4738 if (TREE_CODE (sbound
) == SSA_NAME
4739 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4740 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4741 && data
->body_includes_call
)
4742 return COSTS_N_INSNS (1);
4747 /* Determines cost of basing replacement of USE on CAND in a condition. */
4750 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4751 struct iv_use
*use
, struct iv_cand
*cand
)
4753 tree bound
= NULL_TREE
;
4755 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4756 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4758 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4759 tree
*control_var
, *bound_cst
;
4760 enum tree_code comp
= ERROR_MARK
;
4762 /* Only consider real candidates. */
4765 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4770 /* Try iv elimination. */
4771 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4773 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4774 if (elim_cost
.cost
== 0)
4775 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4776 else if (TREE_CODE (bound
) == INTEGER_CST
)
4778 /* If we replace a loop condition 'i < n' with 'p < base + n',
4779 depends_on_elim will have 'base' and 'n' set, which implies
4780 that both 'base' and 'n' will be live during the loop. More likely,
4781 'base + n' will be loop invariant, resulting in only one live value
4782 during the loop. So in that case we clear depends_on_elim and set
4783 elim_inv_expr_id instead. */
4784 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4786 elim_inv_expr_id
= get_expr_id (data
, bound
);
4787 bitmap_clear (depends_on_elim
);
4789 /* The bound is a loop invariant, so it will be only computed
4791 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4794 elim_cost
= infinite_cost
;
4796 /* Try expressing the original giv. If it is compared with an invariant,
4797 note that we cannot get rid of it. */
4798 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4802 /* When the condition is a comparison of the candidate IV against
4803 zero, prefer this IV.
4805 TODO: The constant that we're subtracting from the cost should
4806 be target-dependent. This information should be added to the
4807 target costs for each backend. */
4808 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4809 && integer_zerop (*bound_cst
)
4810 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4811 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4812 elim_cost
.cost
-= 1;
4814 express_cost
= get_computation_cost (data
, use
, cand
, false,
4815 &depends_on_express
, NULL
,
4816 &express_inv_expr_id
);
4817 fd_ivopts_data
= data
;
4818 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4820 /* Count the cost of the original bound as well. */
4821 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4822 if (bound_cost
.cost
== 0)
4823 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4824 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4825 bound_cost
.cost
= 0;
4826 express_cost
.cost
+= bound_cost
.cost
;
4828 /* Choose the better approach, preferring the eliminated IV. */
4829 if (compare_costs (elim_cost
, express_cost
) <= 0)
4832 depends_on
= depends_on_elim
;
4833 depends_on_elim
= NULL
;
4834 inv_expr_id
= elim_inv_expr_id
;
4838 cost
= express_cost
;
4839 depends_on
= depends_on_express
;
4840 depends_on_express
= NULL
;
4843 inv_expr_id
= express_inv_expr_id
;
4846 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4848 if (depends_on_elim
)
4849 BITMAP_FREE (depends_on_elim
);
4850 if (depends_on_express
)
4851 BITMAP_FREE (depends_on_express
);
4853 return !infinite_cost_p (cost
);
4856 /* Determines cost of basing replacement of USE on CAND. Returns false
4857 if USE cannot be based on CAND. */
4860 determine_use_iv_cost (struct ivopts_data
*data
,
4861 struct iv_use
*use
, struct iv_cand
*cand
)
4865 case USE_NONLINEAR_EXPR
:
4866 return determine_use_iv_cost_generic (data
, use
, cand
);
4869 return determine_use_iv_cost_address (data
, use
, cand
);
4872 return determine_use_iv_cost_condition (data
, use
, cand
);
4879 /* Return true if get_computation_cost indicates that autoincrement is
4880 a possibility for the pair of USE and CAND, false otherwise. */
4883 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4884 struct iv_cand
*cand
)
4890 if (use
->type
!= USE_ADDRESS
)
4893 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4894 &can_autoinc
, NULL
);
4896 BITMAP_FREE (depends_on
);
4898 return !infinite_cost_p (cost
) && can_autoinc
;
4901 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4902 use that allows autoincrement, and set their AINC_USE if possible. */
4905 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4909 for (i
= 0; i
< n_iv_cands (data
); i
++)
4911 struct iv_cand
*cand
= iv_cand (data
, i
);
4912 struct iv_use
*closest_before
= NULL
;
4913 struct iv_use
*closest_after
= NULL
;
4914 if (cand
->pos
!= IP_ORIGINAL
)
4917 for (j
= 0; j
< n_iv_uses (data
); j
++)
4919 struct iv_use
*use
= iv_use (data
, j
);
4920 unsigned uid
= gimple_uid (use
->stmt
);
4922 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4925 if (uid
< gimple_uid (cand
->incremented_at
)
4926 && (closest_before
== NULL
4927 || uid
> gimple_uid (closest_before
->stmt
)))
4928 closest_before
= use
;
4930 if (uid
> gimple_uid (cand
->incremented_at
)
4931 && (closest_after
== NULL
4932 || uid
< gimple_uid (closest_after
->stmt
)))
4933 closest_after
= use
;
4936 if (closest_before
!= NULL
4937 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4938 cand
->ainc_use
= closest_before
;
4939 else if (closest_after
!= NULL
4940 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4941 cand
->ainc_use
= closest_after
;
4945 /* Finds the candidates for the induction variables. */
4948 find_iv_candidates (struct ivopts_data
*data
)
4950 /* Add commonly used ivs. */
4951 add_standard_iv_candidates (data
);
4953 /* Add old induction variables. */
4954 add_old_ivs_candidates (data
);
4956 /* Add induction variables derived from uses. */
4957 add_derived_ivs_candidates (data
);
4959 set_autoinc_for_original_candidates (data
);
4961 /* Record the important candidates. */
4962 record_important_candidates (data
);
4965 /* Determines costs of basing the use of the iv on an iv candidate. */
4968 determine_use_iv_costs (struct ivopts_data
*data
)
4972 struct iv_cand
*cand
;
4973 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4975 alloc_use_cost_map (data
);
4977 for (i
= 0; i
< n_iv_uses (data
); i
++)
4979 use
= iv_use (data
, i
);
4981 if (data
->consider_all_candidates
)
4983 for (j
= 0; j
< n_iv_cands (data
); j
++)
4985 cand
= iv_cand (data
, j
);
4986 determine_use_iv_cost (data
, use
, cand
);
4993 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4995 cand
= iv_cand (data
, j
);
4996 if (!determine_use_iv_cost (data
, use
, cand
))
4997 bitmap_set_bit (to_clear
, j
);
5000 /* Remove the candidates for that the cost is infinite from
5001 the list of related candidates. */
5002 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5003 bitmap_clear (to_clear
);
5007 BITMAP_FREE (to_clear
);
5009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5011 fprintf (dump_file
, "Use-candidate costs:\n");
5013 for (i
= 0; i
< n_iv_uses (data
); i
++)
5015 use
= iv_use (data
, i
);
5017 fprintf (dump_file
, "Use %d:\n", i
);
5018 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5019 for (j
= 0; j
< use
->n_map_members
; j
++)
5021 if (!use
->cost_map
[j
].cand
5022 || infinite_cost_p (use
->cost_map
[j
].cost
))
5025 fprintf (dump_file
, " %d\t%d\t%d\t",
5026 use
->cost_map
[j
].cand
->id
,
5027 use
->cost_map
[j
].cost
.cost
,
5028 use
->cost_map
[j
].cost
.complexity
);
5029 if (use
->cost_map
[j
].depends_on
)
5030 bitmap_print (dump_file
,
5031 use
->cost_map
[j
].depends_on
, "","");
5032 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5033 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5034 fprintf (dump_file
, "\n");
5037 fprintf (dump_file
, "\n");
5039 fprintf (dump_file
, "\n");
5043 /* Determines cost of the candidate CAND. */
5046 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5048 comp_cost cost_base
;
5049 unsigned cost
, cost_step
;
5058 /* There are two costs associated with the candidate -- its increment
5059 and its initialization. The second is almost negligible for any loop
5060 that rolls enough, so we take it just very little into account. */
5062 base
= cand
->iv
->base
;
5063 cost_base
= force_var_cost (data
, base
, NULL
);
5064 /* It will be exceptional that the iv register happens to be initialized with
5065 the proper value at no cost. In general, there will at least be a regcopy
5067 if (cost_base
.cost
== 0)
5068 cost_base
.cost
= COSTS_N_INSNS (1);
5069 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5071 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5073 /* Prefer the original ivs unless we may gain something by replacing it.
5074 The reason is to make debugging simpler; so this is not relevant for
5075 artificial ivs created by other optimization passes. */
5076 if (cand
->pos
!= IP_ORIGINAL
5077 || !SSA_NAME_VAR (cand
->var_before
)
5078 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5081 /* Prefer not to insert statements into latch unless there are some
5082 already (so that we do not create unnecessary jumps). */
5083 if (cand
->pos
== IP_END
5084 && empty_block_p (ip_end_pos (data
->current_loop
)))
5088 cand
->cost_step
= cost_step
;
5091 /* Determines costs of computation of the candidates. */
5094 determine_iv_costs (struct ivopts_data
*data
)
5098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5100 fprintf (dump_file
, "Candidate costs:\n");
5101 fprintf (dump_file
, " cand\tcost\n");
5104 for (i
= 0; i
< n_iv_cands (data
); i
++)
5106 struct iv_cand
*cand
= iv_cand (data
, i
);
5108 determine_iv_cost (data
, cand
);
5110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5111 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5114 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5115 fprintf (dump_file
, "\n");
5118 /* Calculates cost for having SIZE induction variables. */
5121 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5123 /* We add size to the cost, so that we prefer eliminating ivs
5125 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5126 data
->body_includes_call
);
5129 /* For each size of the induction variable set determine the penalty. */
5132 determine_set_costs (struct ivopts_data
*data
)
5136 gimple_stmt_iterator psi
;
5138 struct loop
*loop
= data
->current_loop
;
5141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5143 fprintf (dump_file
, "Global costs:\n");
5144 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5145 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5146 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5147 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5151 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5153 phi
= gsi_stmt (psi
);
5154 op
= PHI_RESULT (phi
);
5156 if (virtual_operand_p (op
))
5159 if (get_iv (data
, op
))
5165 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5167 struct version_info
*info
= ver_info (data
, j
);
5169 if (info
->inv_id
&& info
->has_nonlin_use
)
5173 data
->regs_used
= n
;
5174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5175 fprintf (dump_file
, " regs_used %d\n", n
);
5177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5179 fprintf (dump_file
, " cost for size:\n");
5180 fprintf (dump_file
, " ivs\tcost\n");
5181 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5182 fprintf (dump_file
, " %d\t%d\n", j
,
5183 ivopts_global_cost_for_size (data
, j
));
5184 fprintf (dump_file
, "\n");
5188 /* Returns true if A is a cheaper cost pair than B. */
5191 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5201 cmp
= compare_costs (a
->cost
, b
->cost
);
5208 /* In case the costs are the same, prefer the cheaper candidate. */
5209 if (a
->cand
->cost
< b
->cand
->cost
)
5216 /* Returns candidate by that USE is expressed in IVS. */
5218 static struct cost_pair
*
5219 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5221 return ivs
->cand_for_use
[use
->id
];
5224 /* Computes the cost field of IVS structure. */
5227 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5229 comp_cost cost
= ivs
->cand_use_cost
;
5231 cost
.cost
+= ivs
->cand_cost
;
5233 cost
.cost
+= ivopts_global_cost_for_size (data
,
5234 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5239 /* Remove invariants in set INVS to set IVS. */
5242 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5250 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5252 ivs
->n_invariant_uses
[iid
]--;
5253 if (ivs
->n_invariant_uses
[iid
] == 0)
5258 /* Set USE not to be expressed by any candidate in IVS. */
5261 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5264 unsigned uid
= use
->id
, cid
;
5265 struct cost_pair
*cp
;
5267 cp
= ivs
->cand_for_use
[uid
];
5273 ivs
->cand_for_use
[uid
] = NULL
;
5274 ivs
->n_cand_uses
[cid
]--;
5276 if (ivs
->n_cand_uses
[cid
] == 0)
5278 bitmap_clear_bit (ivs
->cands
, cid
);
5279 /* Do not count the pseudocandidates. */
5283 ivs
->cand_cost
-= cp
->cand
->cost
;
5285 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5288 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5290 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5292 if (cp
->inv_expr_id
!= -1)
5294 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5295 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5296 ivs
->num_used_inv_expr
--;
5298 iv_ca_recount_cost (data
, ivs
);
5301 /* Add invariants in set INVS to set IVS. */
5304 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5312 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5314 ivs
->n_invariant_uses
[iid
]++;
5315 if (ivs
->n_invariant_uses
[iid
] == 1)
5320 /* Set cost pair for USE in set IVS to CP. */
5323 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5324 struct iv_use
*use
, struct cost_pair
*cp
)
5326 unsigned uid
= use
->id
, cid
;
5328 if (ivs
->cand_for_use
[uid
] == cp
)
5331 if (ivs
->cand_for_use
[uid
])
5332 iv_ca_set_no_cp (data
, ivs
, use
);
5339 ivs
->cand_for_use
[uid
] = cp
;
5340 ivs
->n_cand_uses
[cid
]++;
5341 if (ivs
->n_cand_uses
[cid
] == 1)
5343 bitmap_set_bit (ivs
->cands
, cid
);
5344 /* Do not count the pseudocandidates. */
5348 ivs
->cand_cost
+= cp
->cand
->cost
;
5350 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5353 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5354 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5356 if (cp
->inv_expr_id
!= -1)
5358 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5359 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5360 ivs
->num_used_inv_expr
++;
5362 iv_ca_recount_cost (data
, ivs
);
5366 /* Extend set IVS by expressing USE by some of the candidates in it
5367 if possible. All important candidates will be considered
5368 if IMPORTANT_CANDIDATES is true. */
5371 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5372 struct iv_use
*use
, bool important_candidates
)
5374 struct cost_pair
*best_cp
= NULL
, *cp
;
5379 gcc_assert (ivs
->upto
>= use
->id
);
5381 if (ivs
->upto
== use
->id
)
5387 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5388 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5390 struct iv_cand
*cand
= iv_cand (data
, i
);
5392 cp
= get_use_iv_cost (data
, use
, cand
);
5394 if (cheaper_cost_pair (cp
, best_cp
))
5398 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5401 /* Get cost for assignment IVS. */
5404 iv_ca_cost (struct iv_ca
*ivs
)
5406 /* This was a conditional expression but it triggered a bug in
5409 return infinite_cost
;
5414 /* Returns true if all dependences of CP are among invariants in IVS. */
5417 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5422 if (!cp
->depends_on
)
5425 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5427 if (ivs
->n_invariant_uses
[i
] == 0)
5434 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5435 it before NEXT_CHANGE. */
5437 static struct iv_ca_delta
*
5438 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5439 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5441 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5444 change
->old_cp
= old_cp
;
5445 change
->new_cp
= new_cp
;
5446 change
->next_change
= next_change
;
5451 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5454 static struct iv_ca_delta
*
5455 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5457 struct iv_ca_delta
*last
;
5465 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5467 last
->next_change
= l2
;
5472 /* Reverse the list of changes DELTA, forming the inverse to it. */
5474 static struct iv_ca_delta
*
5475 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5477 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5478 struct cost_pair
*tmp
;
5480 for (act
= delta
; act
; act
= next
)
5482 next
= act
->next_change
;
5483 act
->next_change
= prev
;
5487 act
->old_cp
= act
->new_cp
;
5494 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5495 reverted instead. */
5498 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5499 struct iv_ca_delta
*delta
, bool forward
)
5501 struct cost_pair
*from
, *to
;
5502 struct iv_ca_delta
*act
;
5505 delta
= iv_ca_delta_reverse (delta
);
5507 for (act
= delta
; act
; act
= act
->next_change
)
5511 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5512 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5516 iv_ca_delta_reverse (delta
);
5519 /* Returns true if CAND is used in IVS. */
5522 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5524 return ivs
->n_cand_uses
[cand
->id
] > 0;
5527 /* Returns number of induction variable candidates in the set IVS. */
5530 iv_ca_n_cands (struct iv_ca
*ivs
)
5532 return ivs
->n_cands
;
5535 /* Free the list of changes DELTA. */
5538 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5540 struct iv_ca_delta
*act
, *next
;
5542 for (act
= *delta
; act
; act
= next
)
5544 next
= act
->next_change
;
5551 /* Allocates new iv candidates assignment. */
5553 static struct iv_ca
*
5554 iv_ca_new (struct ivopts_data
*data
)
5556 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5560 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5561 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5562 nw
->cands
= BITMAP_ALLOC (NULL
);
5565 nw
->cand_use_cost
= no_cost
;
5567 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5569 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5570 nw
->num_used_inv_expr
= 0;
5575 /* Free memory occupied by the set IVS. */
5578 iv_ca_free (struct iv_ca
**ivs
)
5580 free ((*ivs
)->cand_for_use
);
5581 free ((*ivs
)->n_cand_uses
);
5582 BITMAP_FREE ((*ivs
)->cands
);
5583 free ((*ivs
)->n_invariant_uses
);
5584 free ((*ivs
)->used_inv_expr
);
5589 /* Dumps IVS to FILE. */
5592 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5594 const char *pref
= " invariants ";
5596 comp_cost cost
= iv_ca_cost (ivs
);
5598 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5599 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5600 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5601 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5603 for (i
= 0; i
< ivs
->upto
; i
++)
5605 struct iv_use
*use
= iv_use (data
, i
);
5606 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5608 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5609 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5611 fprintf (file
, " use:%d --> ??\n", use
->id
);
5614 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5615 if (ivs
->n_invariant_uses
[i
])
5617 fprintf (file
, "%s%d", pref
, i
);
5620 fprintf (file
, "\n\n");
5623 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5624 new set, and store differences in DELTA. Number of induction variables
5625 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5626 the function will try to find a solution with mimimal iv candidates. */
5629 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5630 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5631 unsigned *n_ivs
, bool min_ncand
)
5636 struct cost_pair
*old_cp
, *new_cp
;
5639 for (i
= 0; i
< ivs
->upto
; i
++)
5641 use
= iv_use (data
, i
);
5642 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5645 && old_cp
->cand
== cand
)
5648 new_cp
= get_use_iv_cost (data
, use
, cand
);
5652 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5655 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5658 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5661 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5662 cost
= iv_ca_cost (ivs
);
5664 *n_ivs
= iv_ca_n_cands (ivs
);
5665 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5670 /* Try narrowing set IVS by removing CAND. Return the cost of
5671 the new set and store the differences in DELTA. */
5674 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5675 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5679 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5681 struct iv_cand
*cnd
;
5685 for (i
= 0; i
< n_iv_uses (data
); i
++)
5687 use
= iv_use (data
, i
);
5689 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5690 if (old_cp
->cand
!= cand
)
5695 if (data
->consider_all_candidates
)
5697 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5702 cnd
= iv_cand (data
, ci
);
5704 cp
= get_use_iv_cost (data
, use
, cnd
);
5708 if (!iv_ca_has_deps (ivs
, cp
))
5711 if (!cheaper_cost_pair (cp
, new_cp
))
5719 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5724 cnd
= iv_cand (data
, ci
);
5726 cp
= get_use_iv_cost (data
, use
, cnd
);
5729 if (!iv_ca_has_deps (ivs
, cp
))
5732 if (!cheaper_cost_pair (cp
, new_cp
))
5741 iv_ca_delta_free (delta
);
5742 return infinite_cost
;
5745 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5748 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5749 cost
= iv_ca_cost (ivs
);
5750 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5755 /* Try optimizing the set of candidates IVS by removing candidates different
5756 from to EXCEPT_CAND from it. Return cost of the new set, and store
5757 differences in DELTA. */
5760 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5761 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5764 struct iv_ca_delta
*act_delta
, *best_delta
;
5766 comp_cost best_cost
, acost
;
5767 struct iv_cand
*cand
;
5770 best_cost
= iv_ca_cost (ivs
);
5772 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5774 cand
= iv_cand (data
, i
);
5776 if (cand
== except_cand
)
5779 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5781 if (compare_costs (acost
, best_cost
) < 0)
5784 iv_ca_delta_free (&best_delta
);
5785 best_delta
= act_delta
;
5788 iv_ca_delta_free (&act_delta
);
5797 /* Recurse to possibly remove other unnecessary ivs. */
5798 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5799 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5800 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5801 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5805 /* Tries to extend the sets IVS in the best possible way in order
5806 to express the USE. If ORIGINALP is true, prefer candidates from
5807 the original set of IVs, otherwise favor important candidates not
5808 based on any memory object. */
5811 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5812 struct iv_use
*use
, bool originalp
)
5814 comp_cost best_cost
, act_cost
;
5817 struct iv_cand
*cand
;
5818 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5819 struct cost_pair
*cp
;
5821 iv_ca_add_use (data
, ivs
, use
, false);
5822 best_cost
= iv_ca_cost (ivs
);
5824 cp
= iv_ca_cand_for_use (ivs
, use
);
5829 iv_ca_add_use (data
, ivs
, use
, true);
5830 best_cost
= iv_ca_cost (ivs
);
5831 cp
= iv_ca_cand_for_use (ivs
, use
);
5835 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5836 iv_ca_set_no_cp (data
, ivs
, use
);
5839 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5840 first try important candidates not based on any memory object. Only if
5841 this fails, try the specific ones. Rationale -- in loops with many
5842 variables the best choice often is to use just one generic biv. If we
5843 added here many ivs specific to the uses, the optimization algorithm later
5844 would be likely to get stuck in a local minimum, thus causing us to create
5845 too many ivs. The approach from few ivs to more seems more likely to be
5846 successful -- starting from few ivs, replacing an expensive use by a
5847 specific iv should always be a win. */
5848 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5850 cand
= iv_cand (data
, i
);
5852 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5855 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5858 if (iv_ca_cand_used_p (ivs
, cand
))
5861 cp
= get_use_iv_cost (data
, use
, cand
);
5865 iv_ca_set_cp (data
, ivs
, use
, cp
);
5866 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5868 iv_ca_set_no_cp (data
, ivs
, use
);
5869 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5871 if (compare_costs (act_cost
, best_cost
) < 0)
5873 best_cost
= act_cost
;
5875 iv_ca_delta_free (&best_delta
);
5876 best_delta
= act_delta
;
5879 iv_ca_delta_free (&act_delta
);
5882 if (infinite_cost_p (best_cost
))
5884 for (i
= 0; i
< use
->n_map_members
; i
++)
5886 cp
= use
->cost_map
+ i
;
5891 /* Already tried this. */
5892 if (cand
->important
)
5894 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5896 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5900 if (iv_ca_cand_used_p (ivs
, cand
))
5904 iv_ca_set_cp (data
, ivs
, use
, cp
);
5905 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5906 iv_ca_set_no_cp (data
, ivs
, use
);
5907 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5910 if (compare_costs (act_cost
, best_cost
) < 0)
5912 best_cost
= act_cost
;
5915 iv_ca_delta_free (&best_delta
);
5916 best_delta
= act_delta
;
5919 iv_ca_delta_free (&act_delta
);
5923 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5924 iv_ca_delta_free (&best_delta
);
5926 return !infinite_cost_p (best_cost
);
5929 /* Finds an initial assignment of candidates to uses. */
5931 static struct iv_ca
*
5932 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5934 struct iv_ca
*ivs
= iv_ca_new (data
);
5937 for (i
= 0; i
< n_iv_uses (data
); i
++)
5938 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5947 /* Tries to improve set of induction variables IVS. */
5950 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5953 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5954 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5955 struct iv_cand
*cand
;
5957 /* Try extending the set of induction variables by one. */
5958 for (i
= 0; i
< n_iv_cands (data
); i
++)
5960 cand
= iv_cand (data
, i
);
5962 if (iv_ca_cand_used_p (ivs
, cand
))
5965 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5969 /* If we successfully added the candidate and the set is small enough,
5970 try optimizing it by removing other candidates. */
5971 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5973 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5974 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5975 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5976 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5979 if (compare_costs (acost
, best_cost
) < 0)
5982 iv_ca_delta_free (&best_delta
);
5983 best_delta
= act_delta
;
5986 iv_ca_delta_free (&act_delta
);
5991 /* Try removing the candidates from the set instead. */
5992 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5994 /* Nothing more we can do. */
5999 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6000 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6001 iv_ca_delta_free (&best_delta
);
6005 /* Attempts to find the optimal set of induction variables. We do simple
6006 greedy heuristic -- we try to replace at most one candidate in the selected
6007 solution and remove the unused ivs while this improves the cost. */
6009 static struct iv_ca
*
6010 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6014 /* Get the initial solution. */
6015 set
= get_initial_solution (data
, originalp
);
6018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6019 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6023 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6025 fprintf (dump_file
, "Initial set of candidates:\n");
6026 iv_ca_dump (data
, dump_file
, set
);
6029 while (try_improve_iv_set (data
, set
))
6031 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6033 fprintf (dump_file
, "Improved to:\n");
6034 iv_ca_dump (data
, dump_file
, set
);
6041 static struct iv_ca
*
6042 find_optimal_iv_set (struct ivopts_data
*data
)
6045 struct iv_ca
*set
, *origset
;
6047 comp_cost cost
, origcost
;
6049 /* Determine the cost based on a strategy that starts with original IVs,
6050 and try again using a strategy that prefers candidates not based
6052 origset
= find_optimal_iv_set_1 (data
, true);
6053 set
= find_optimal_iv_set_1 (data
, false);
6055 if (!origset
&& !set
)
6058 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6059 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6061 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6063 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6064 origcost
.cost
, origcost
.complexity
);
6065 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6066 cost
.cost
, cost
.complexity
);
6069 /* Choose the one with the best cost. */
6070 if (compare_costs (origcost
, cost
) <= 0)
6077 iv_ca_free (&origset
);
6079 for (i
= 0; i
< n_iv_uses (data
); i
++)
6081 use
= iv_use (data
, i
);
6082 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6088 /* Creates a new induction variable corresponding to CAND. */
6091 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6093 gimple_stmt_iterator incr_pos
;
6103 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6107 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6115 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6119 /* Mark that the iv is preserved. */
6120 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6121 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6123 /* Rewrite the increment so that it uses var_before directly. */
6124 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6128 gimple_add_tmp_var (cand
->var_before
);
6130 base
= unshare_expr (cand
->iv
->base
);
6132 create_iv (base
, unshare_expr (cand
->iv
->step
),
6133 cand
->var_before
, data
->current_loop
,
6134 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6137 /* Creates new induction variables described in SET. */
6140 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6143 struct iv_cand
*cand
;
6146 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6148 cand
= iv_cand (data
, i
);
6149 create_new_iv (data
, cand
);
6152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6154 fprintf (dump_file
, "\nSelected IV set: \n");
6155 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6157 cand
= iv_cand (data
, i
);
6158 dump_cand (dump_file
, cand
);
6160 fprintf (dump_file
, "\n");
6164 /* Rewrites USE (definition of iv used in a nonlinear expression)
6165 using candidate CAND. */
6168 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6169 struct iv_use
*use
, struct iv_cand
*cand
)
6174 gimple_stmt_iterator bsi
;
6176 /* An important special case -- if we are asked to express value of
6177 the original iv by itself, just exit; there is no need to
6178 introduce a new computation (that might also need casting the
6179 variable to unsigned and back). */
6180 if (cand
->pos
== IP_ORIGINAL
6181 && cand
->incremented_at
== use
->stmt
)
6183 enum tree_code stmt_code
;
6185 gcc_assert (is_gimple_assign (use
->stmt
));
6186 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6188 /* Check whether we may leave the computation unchanged.
6189 This is the case only if it does not rely on other
6190 computations in the loop -- otherwise, the computation
6191 we rely upon may be removed in remove_unused_ivs,
6192 thus leading to ICE. */
6193 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6194 if (stmt_code
== PLUS_EXPR
6195 || stmt_code
== MINUS_EXPR
6196 || stmt_code
== POINTER_PLUS_EXPR
)
6198 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6199 op
= gimple_assign_rhs2 (use
->stmt
);
6200 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6201 op
= gimple_assign_rhs1 (use
->stmt
);
6208 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6212 comp
= get_computation (data
->current_loop
, use
, cand
);
6213 gcc_assert (comp
!= NULL_TREE
);
6215 switch (gimple_code (use
->stmt
))
6218 tgt
= PHI_RESULT (use
->stmt
);
6220 /* If we should keep the biv, do not replace it. */
6221 if (name_info (data
, tgt
)->preserve_biv
)
6224 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6228 tgt
= gimple_assign_lhs (use
->stmt
);
6229 bsi
= gsi_for_stmt (use
->stmt
);
6236 if (!valid_gimple_rhs_p (comp
)
6237 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6238 /* We can't allow re-allocating the stmt as it might be pointed
6240 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6241 >= gimple_num_ops (gsi_stmt (bsi
)))))
6243 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6244 true, GSI_SAME_STMT
);
6245 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6247 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6248 /* As this isn't a plain copy we have to reset alignment
6250 if (SSA_NAME_PTR_INFO (comp
))
6251 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6255 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6257 ass
= gimple_build_assign (tgt
, comp
);
6258 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6260 bsi
= gsi_for_stmt (use
->stmt
);
6261 remove_phi_node (&bsi
, false);
6265 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6266 use
->stmt
= gsi_stmt (bsi
);
6270 /* Performs a peephole optimization to reorder the iv update statement with
6271 a mem ref to enable instruction combining in later phases. The mem ref uses
6272 the iv value before the update, so the reordering transformation requires
6273 adjustment of the offset. CAND is the selected IV_CAND.
6277 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6285 directly propagating t over to (1) will introduce overlapping live range
6286 thus increase register pressure. This peephole transform it into:
6290 t = MEM_REF (base, iv2, 8, 8);
6297 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6300 gimple iv_update
, stmt
;
6302 gimple_stmt_iterator gsi
, gsi_iv
;
6304 if (cand
->pos
!= IP_NORMAL
)
6307 var_after
= cand
->var_after
;
6308 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6310 bb
= gimple_bb (iv_update
);
6311 gsi
= gsi_last_nondebug_bb (bb
);
6312 stmt
= gsi_stmt (gsi
);
6314 /* Only handle conditional statement for now. */
6315 if (gimple_code (stmt
) != GIMPLE_COND
)
6318 gsi_prev_nondebug (&gsi
);
6319 stmt
= gsi_stmt (gsi
);
6320 if (stmt
!= iv_update
)
6323 gsi_prev_nondebug (&gsi
);
6324 if (gsi_end_p (gsi
))
6327 stmt
= gsi_stmt (gsi
);
6328 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6331 if (stmt
!= use
->stmt
)
6334 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6339 fprintf (dump_file
, "Reordering \n");
6340 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6341 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6342 fprintf (dump_file
, "\n");
6345 gsi
= gsi_for_stmt (use
->stmt
);
6346 gsi_iv
= gsi_for_stmt (iv_update
);
6347 gsi_move_before (&gsi_iv
, &gsi
);
6349 cand
->pos
= IP_BEFORE_USE
;
6350 cand
->incremented_at
= use
->stmt
;
6353 /* Rewrites USE (address that is an iv) using candidate CAND. */
6356 rewrite_use_address (struct ivopts_data
*data
,
6357 struct iv_use
*use
, struct iv_cand
*cand
)
6360 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6361 tree base_hint
= NULL_TREE
;
6365 adjust_iv_update_pos (cand
, use
);
6366 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6368 unshare_aff_combination (&aff
);
6370 /* To avoid undefined overflow problems, all IV candidates use unsigned
6371 integer types. The drawback is that this makes it impossible for
6372 create_mem_ref to distinguish an IV that is based on a memory object
6373 from one that represents simply an offset.
6375 To work around this problem, we pass a hint to create_mem_ref that
6376 indicates which variable (if any) in aff is an IV based on a memory
6377 object. Note that we only consider the candidate. If this is not
6378 based on an object, the base of the reference is in some subexpression
6379 of the use -- but these will use pointer types, so they are recognized
6380 by the create_mem_ref heuristics anyway. */
6381 if (cand
->iv
->base_object
)
6382 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6384 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6385 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6386 reference_alias_ptr_type (*use
->op_p
),
6387 iv
, base_hint
, data
->speed
);
6388 copy_ref_info (ref
, *use
->op_p
);
6392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6396 rewrite_use_compare (struct ivopts_data
*data
,
6397 struct iv_use
*use
, struct iv_cand
*cand
)
6399 tree comp
, *var_p
, op
, bound
;
6400 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6401 enum tree_code compare
;
6402 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6408 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6409 tree var_type
= TREE_TYPE (var
);
6412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6414 fprintf (dump_file
, "Replacing exit test: ");
6415 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6418 bound
= unshare_expr (fold_convert (var_type
, bound
));
6419 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6421 gsi_insert_seq_on_edge_immediate (
6422 loop_preheader_edge (data
->current_loop
),
6425 gimple_cond_set_lhs (use
->stmt
, var
);
6426 gimple_cond_set_code (use
->stmt
, compare
);
6427 gimple_cond_set_rhs (use
->stmt
, op
);
6431 /* The induction variable elimination failed; just express the original
6433 comp
= get_computation (data
->current_loop
, use
, cand
);
6434 gcc_assert (comp
!= NULL_TREE
);
6436 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6439 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6440 true, GSI_SAME_STMT
);
6443 /* Rewrites USE using candidate CAND. */
6446 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6450 case USE_NONLINEAR_EXPR
:
6451 rewrite_use_nonlinear_expr (data
, use
, cand
);
6455 rewrite_use_address (data
, use
, cand
);
6459 rewrite_use_compare (data
, use
, cand
);
6466 update_stmt (use
->stmt
);
6469 /* Rewrite the uses using the selected induction variables. */
6472 rewrite_uses (struct ivopts_data
*data
)
6475 struct iv_cand
*cand
;
6478 for (i
= 0; i
< n_iv_uses (data
); i
++)
6480 use
= iv_use (data
, i
);
6481 cand
= use
->selected
;
6484 rewrite_use (data
, use
, cand
);
6488 /* Removes the ivs that are not used after rewriting. */
6491 remove_unused_ivs (struct ivopts_data
*data
)
6495 bitmap toremove
= BITMAP_ALLOC (NULL
);
6497 /* Figure out an order in which to release SSA DEFs so that we don't
6498 release something that we'd have to propagate into a debug stmt
6500 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6502 struct version_info
*info
;
6504 info
= ver_info (data
, j
);
6506 && !integer_zerop (info
->iv
->step
)
6508 && !info
->iv
->have_use_for
6509 && !info
->preserve_biv
)
6511 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6513 tree def
= info
->iv
->ssa_name
;
6515 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6517 imm_use_iterator imm_iter
;
6518 use_operand_p use_p
;
6522 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6524 if (!gimple_debug_bind_p (stmt
))
6527 /* We just want to determine whether to do nothing
6528 (count == 0), to substitute the computed
6529 expression into a single use of the SSA DEF by
6530 itself (count == 1), or to use a debug temp
6531 because the SSA DEF is used multiple times or as
6532 part of a larger expression (count > 1). */
6534 if (gimple_debug_bind_get_value (stmt
) != def
)
6538 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6544 struct iv_use dummy_use
;
6545 struct iv_cand
*best_cand
= NULL
, *cand
;
6546 unsigned i
, best_pref
= 0, cand_pref
;
6548 memset (&dummy_use
, 0, sizeof (dummy_use
));
6549 dummy_use
.iv
= info
->iv
;
6550 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6552 cand
= iv_use (data
, i
)->selected
;
6553 if (cand
== best_cand
)
6555 cand_pref
= operand_equal_p (cand
->iv
->step
,
6559 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6560 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6563 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6565 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6568 best_pref
= cand_pref
;
6575 tree comp
= get_computation_at (data
->current_loop
,
6576 &dummy_use
, best_cand
,
6577 SSA_NAME_DEF_STMT (def
));
6583 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6584 DECL_ARTIFICIAL (vexpr
) = 1;
6585 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6586 if (SSA_NAME_VAR (def
))
6587 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6589 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6590 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6591 gimple_stmt_iterator gsi
;
6593 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6594 gsi
= gsi_after_labels (gimple_bb
6595 (SSA_NAME_DEF_STMT (def
)));
6597 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6599 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6603 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6605 if (!gimple_debug_bind_p (stmt
))
6608 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6609 SET_USE (use_p
, comp
);
6617 release_defs_bitset (toremove
);
6619 BITMAP_FREE (toremove
);
6622 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6623 for pointer_map_traverse. */
6626 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6627 void *data ATTRIBUTE_UNUSED
)
6629 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6635 /* Frees data allocated by the optimization of a single loop. */
6638 free_loop_data (struct ivopts_data
*data
)
6646 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6647 pointer_map_destroy (data
->niters
);
6648 data
->niters
= NULL
;
6651 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6653 struct version_info
*info
;
6655 info
= ver_info (data
, i
);
6658 info
->has_nonlin_use
= false;
6659 info
->preserve_biv
= false;
6662 bitmap_clear (data
->relevant
);
6663 bitmap_clear (data
->important_candidates
);
6665 for (i
= 0; i
< n_iv_uses (data
); i
++)
6667 struct iv_use
*use
= iv_use (data
, i
);
6670 BITMAP_FREE (use
->related_cands
);
6671 for (j
= 0; j
< use
->n_map_members
; j
++)
6672 if (use
->cost_map
[j
].depends_on
)
6673 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6674 free (use
->cost_map
);
6677 data
->iv_uses
.truncate (0);
6679 for (i
= 0; i
< n_iv_cands (data
); i
++)
6681 struct iv_cand
*cand
= iv_cand (data
, i
);
6684 if (cand
->depends_on
)
6685 BITMAP_FREE (cand
->depends_on
);
6688 data
->iv_candidates
.truncate (0);
6690 if (data
->version_info_size
< num_ssa_names
)
6692 data
->version_info_size
= 2 * num_ssa_names
;
6693 free (data
->version_info
);
6694 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6697 data
->max_inv_id
= 0;
6699 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6700 SET_DECL_RTL (obj
, NULL_RTX
);
6702 decl_rtl_to_reset
.truncate (0);
6704 data
->inv_expr_tab
.empty ();
6705 data
->inv_expr_id
= 0;
6708 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6712 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6714 free_loop_data (data
);
6715 free (data
->version_info
);
6716 BITMAP_FREE (data
->relevant
);
6717 BITMAP_FREE (data
->important_candidates
);
6719 decl_rtl_to_reset
.release ();
6720 data
->iv_uses
.release ();
6721 data
->iv_candidates
.release ();
6722 data
->inv_expr_tab
.dispose ();
6725 /* Returns true if the loop body BODY includes any function calls. */
6728 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6730 gimple_stmt_iterator gsi
;
6733 for (i
= 0; i
< num_nodes
; i
++)
6734 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6736 gimple stmt
= gsi_stmt (gsi
);
6737 if (is_gimple_call (stmt
)
6738 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6744 /* Optimizes the LOOP. Returns true if anything changed. */
6747 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6749 bool changed
= false;
6750 struct iv_ca
*iv_ca
;
6751 edge exit
= single_dom_exit (loop
);
6754 gcc_assert (!data
->niters
);
6755 data
->current_loop
= loop
;
6756 data
->speed
= optimize_loop_for_speed_p (loop
);
6758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6760 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6764 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6765 exit
->src
->index
, exit
->dest
->index
);
6766 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6767 fprintf (dump_file
, "\n");
6770 fprintf (dump_file
, "\n");
6773 body
= get_loop_body (loop
);
6774 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6775 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6778 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6780 /* For each ssa name determines whether it behaves as an induction variable
6782 if (!find_induction_variables (data
))
6785 /* Finds interesting uses (item 1). */
6786 find_interesting_uses (data
);
6787 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6790 /* Finds candidates for the induction variables (item 2). */
6791 find_iv_candidates (data
);
6793 /* Calculates the costs (item 3, part 1). */
6794 determine_iv_costs (data
);
6795 determine_use_iv_costs (data
);
6796 determine_set_costs (data
);
6798 /* Find the optimal set of induction variables (item 3, part 2). */
6799 iv_ca
= find_optimal_iv_set (data
);
6804 /* Create the new induction variables (item 4, part 1). */
6805 create_new_ivs (data
, iv_ca
);
6806 iv_ca_free (&iv_ca
);
6808 /* Rewrite the uses (item 4, part 2). */
6809 rewrite_uses (data
);
6811 /* Remove the ivs that are unused after rewriting. */
6812 remove_unused_ivs (data
);
6814 /* We have changed the structure of induction variables; it might happen
6815 that definitions in the scev database refer to some of them that were
6820 free_loop_data (data
);
6825 /* Main entry point. Optimizes induction variables in loops. */
6828 tree_ssa_iv_optimize (void)
6831 struct ivopts_data data
;
6834 tree_ssa_iv_optimize_init (&data
);
6836 /* Optimize the loops starting with the innermost ones. */
6837 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6839 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6840 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6842 tree_ssa_iv_optimize_loop (&data
, loop
);
6845 tree_ssa_iv_optimize_finalize (&data
);