1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
81 #include "tree-pass.h"
83 #include "insn-config.h"
85 #include "pointer-set.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
106 tree base
; /* Initial value of the iv. */
107 tree base_object
; /* A memory object to that the induction variable points. */
108 tree step
; /* Step of the iv (constant only). */
109 tree ssa_name
; /* The ssa name with the value. */
110 bool biv_p
; /* Is it a biv? */
111 bool have_use_for
; /* Do we already have a use for it? */
112 unsigned use_id
; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name
; /* The ssa name. */
119 struct iv
*iv
; /* Induction variable description. */
120 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 bool preserve_biv
; /* For the original biv, whether to preserve it. */
123 unsigned inv_id
; /* Id of an invariant. */
129 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
130 USE_ADDRESS
, /* Use in an address. */
131 USE_COMPARE
/* Use is a compare. */
134 /* Cost of a computation. */
137 int cost
; /* The runtime cost. */
138 unsigned complexity
; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
144 static const comp_cost zero_cost
= {0, 0};
145 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
147 /* The candidate - cost pair. */
150 struct iv_cand
*cand
; /* The candidate. */
151 comp_cost cost
; /* The cost. */
152 bitmap depends_on
; /* The list of invariants that have to be
154 tree value
; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
162 unsigned id
; /* The id of the use. */
163 enum use_type type
; /* Type of the use. */
164 struct iv
*iv
; /* The induction variable it is based on. */
165 gimple stmt
; /* Statement in that it occurs. */
166 tree
*op_p
; /* The place where it occurs. */
167 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
170 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
171 struct cost_pair
*cost_map
;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand
*selected
;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
181 IP_NORMAL
, /* At the end, just before the exit condition. */
182 IP_END
, /* At the end of the latch block. */
183 IP_BEFORE_USE
, /* Immediately before a specific use. */
184 IP_AFTER_USE
, /* Immediately after a specific use. */
185 IP_ORIGINAL
/* The original biv. */
188 /* The induction variable candidate. */
191 unsigned id
; /* The number of the candidate. */
192 bool important
; /* Whether this is an "important" candidate, i.e. such
193 that it should be considered by all uses. */
194 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
195 gimple incremented_at
;/* For original biv, the statement where it is
197 tree var_before
; /* The variable used for it before increment. */
198 tree var_after
; /* The variable used for it after increment. */
199 struct iv
*iv
; /* The value of the candidate. NULL for
200 "pseudocandidate" used to indicate the possibility
201 to replace the final value of an iv by direct
202 computation of the value. */
203 unsigned cost
; /* Cost of the candidate. */
204 unsigned cost_step
; /* Cost of the candidate's increment operation. */
205 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
206 where it is incremented. */
207 bitmap depends_on
; /* The list of invariants that are used in step of the
211 /* The data used by the induction variable optimizations. */
213 typedef struct iv_use
*iv_use_p
;
215 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
217 typedef struct iv_cand
*iv_cand_p
;
218 DEF_VEC_P(iv_cand_p
);
219 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
223 /* The currently optimized loop. */
224 struct loop
*current_loop
;
226 /* Numbers of iterations for all exits of the current loop. */
227 struct pointer_map_t
*niters
;
229 /* Number of registers used in it. */
232 /* The size of version_info array allocated. */
233 unsigned version_info_size
;
235 /* The array of information for the ssa names. */
236 struct version_info
*version_info
;
238 /* The bitmap of indices in version_info whose value was changed. */
241 /* The uses of induction variables. */
242 VEC(iv_use_p
,heap
) *iv_uses
;
244 /* The candidates. */
245 VEC(iv_cand_p
,heap
) *iv_candidates
;
247 /* A bitmap of important candidates. */
248 bitmap important_candidates
;
250 /* The maximum invariant id. */
253 /* Whether to consider just related and important candidates when replacing a
255 bool consider_all_candidates
;
257 /* Are we optimizing for speed? */
261 /* An assignment of iv candidates to uses. */
265 /* The number of uses covered by the assignment. */
268 /* Number of uses that cannot be expressed by the candidates in the set. */
271 /* Candidate assigned to a use, together with the related costs. */
272 struct cost_pair
**cand_for_use
;
274 /* Number of times each candidate is used. */
275 unsigned *n_cand_uses
;
277 /* The candidates used. */
280 /* The number of candidates in the set. */
283 /* Total number of registers needed. */
286 /* Total cost of expressing uses. */
287 comp_cost cand_use_cost
;
289 /* Total cost of candidates. */
292 /* Number of times each invariant is used. */
293 unsigned *n_invariant_uses
;
295 /* Total cost of the assignment. */
299 /* Difference of two iv candidate assignments. */
306 /* An old assignment (for rollback purposes). */
307 struct cost_pair
*old_cp
;
309 /* A new assignment. */
310 struct cost_pair
*new_cp
;
312 /* Next change in the list. */
313 struct iv_ca_delta
*next_change
;
316 /* Bound on number of candidates below that all candidates are considered. */
318 #define CONSIDER_ALL_CANDIDATES_BOUND \
319 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321 /* If there are more iv occurrences, we just give up (it is quite unlikely that
322 optimizing such a loop would help, and it would take ages). */
324 #define MAX_CONSIDERED_USES \
325 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327 /* If there are at most this number of ivs in the set, try removing unnecessary
328 ivs from the set always. */
330 #define ALWAYS_PRUNE_CAND_SET_BOUND \
331 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333 /* The list of trees for that the decl_rtl field must be reset is stored
336 static VEC(tree
,heap
) *decl_rtl_to_reset
;
338 /* Number of uses recorded in DATA. */
340 static inline unsigned
341 n_iv_uses (struct ivopts_data
*data
)
343 return VEC_length (iv_use_p
, data
->iv_uses
);
346 /* Ith use recorded in DATA. */
348 static inline struct iv_use
*
349 iv_use (struct ivopts_data
*data
, unsigned i
)
351 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
354 /* Number of candidates recorded in DATA. */
356 static inline unsigned
357 n_iv_cands (struct ivopts_data
*data
)
359 return VEC_length (iv_cand_p
, data
->iv_candidates
);
362 /* Ith candidate recorded in DATA. */
364 static inline struct iv_cand
*
365 iv_cand (struct ivopts_data
*data
, unsigned i
)
367 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
370 /* The single loop exit if it dominates the latch, NULL otherwise. */
373 single_dom_exit (struct loop
*loop
)
375 edge exit
= single_exit (loop
);
380 if (!just_once_each_iteration_p (loop
, exit
->src
))
386 /* Dumps information about the induction variable IV to FILE. */
388 extern void dump_iv (FILE *, struct iv
*);
390 dump_iv (FILE *file
, struct iv
*iv
)
394 fprintf (file
, "ssa name ");
395 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
396 fprintf (file
, "\n");
399 fprintf (file
, " type ");
400 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
401 fprintf (file
, "\n");
405 fprintf (file
, " base ");
406 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
407 fprintf (file
, "\n");
409 fprintf (file
, " step ");
410 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
411 fprintf (file
, "\n");
415 fprintf (file
, " invariant ");
416 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
417 fprintf (file
, "\n");
422 fprintf (file
, " base object ");
423 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
424 fprintf (file
, "\n");
428 fprintf (file
, " is a biv\n");
431 /* Dumps information about the USE to FILE. */
433 extern void dump_use (FILE *, struct iv_use
*);
435 dump_use (FILE *file
, struct iv_use
*use
)
437 fprintf (file
, "use %d\n", use
->id
);
441 case USE_NONLINEAR_EXPR
:
442 fprintf (file
, " generic\n");
446 fprintf (file
, " address\n");
450 fprintf (file
, " compare\n");
457 fprintf (file
, " in statement ");
458 print_gimple_stmt (file
, use
->stmt
, 0, 0);
459 fprintf (file
, "\n");
461 fprintf (file
, " at position ");
463 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
464 fprintf (file
, "\n");
466 dump_iv (file
, use
->iv
);
468 if (use
->related_cands
)
470 fprintf (file
, " related candidates ");
471 dump_bitmap (file
, use
->related_cands
);
475 /* Dumps information about the uses to FILE. */
477 extern void dump_uses (FILE *, struct ivopts_data
*);
479 dump_uses (FILE *file
, struct ivopts_data
*data
)
484 for (i
= 0; i
< n_iv_uses (data
); i
++)
486 use
= iv_use (data
, i
);
488 dump_use (file
, use
);
489 fprintf (file
, "\n");
493 /* Dumps information about induction variable candidate CAND to FILE. */
495 extern void dump_cand (FILE *, struct iv_cand
*);
497 dump_cand (FILE *file
, struct iv_cand
*cand
)
499 struct iv
*iv
= cand
->iv
;
501 fprintf (file
, "candidate %d%s\n",
502 cand
->id
, cand
->important
? " (important)" : "");
504 if (cand
->depends_on
)
506 fprintf (file
, " depends on ");
507 dump_bitmap (file
, cand
->depends_on
);
512 fprintf (file
, " final value replacement\n");
519 fprintf (file
, " incremented before exit test\n");
523 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
527 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
531 fprintf (file
, " incremented at end\n");
535 fprintf (file
, " original biv\n");
542 /* Returns the info for ssa version VER. */
544 static inline struct version_info
*
545 ver_info (struct ivopts_data
*data
, unsigned ver
)
547 return data
->version_info
+ ver
;
550 /* Returns the info for ssa name NAME. */
552 static inline struct version_info
*
553 name_info (struct ivopts_data
*data
, tree name
)
555 return ver_info (data
, SSA_NAME_VERSION (name
));
558 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
562 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
564 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
568 if (sbb
== loop
->latch
)
574 return stmt
== last_stmt (bb
);
577 /* Returns true if STMT if after the place where the original induction
578 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
579 if the positions are identical. */
582 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
584 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
585 basic_block stmt_bb
= gimple_bb (stmt
);
587 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
590 if (stmt_bb
!= cand_bb
)
594 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
596 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
599 /* Returns true if STMT if after the place where the induction variable
600 CAND is incremented in LOOP. */
603 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
611 return stmt_after_ip_normal_pos (loop
, stmt
);
615 return stmt_after_inc_pos (cand
, stmt
, false);
618 return stmt_after_inc_pos (cand
, stmt
, true);
625 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
628 abnormal_ssa_name_p (tree exp
)
633 if (TREE_CODE (exp
) != SSA_NAME
)
636 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
639 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
640 abnormal phi node. Callback for for_each_index. */
643 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
644 void *data ATTRIBUTE_UNUSED
)
646 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
648 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
650 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
654 return !abnormal_ssa_name_p (*index
);
657 /* Returns true if EXPR contains a ssa name that occurs in an
658 abnormal phi node. */
661 contains_abnormal_ssa_name_p (tree expr
)
664 enum tree_code_class codeclass
;
669 code
= TREE_CODE (expr
);
670 codeclass
= TREE_CODE_CLASS (code
);
672 if (code
== SSA_NAME
)
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
675 if (code
== INTEGER_CST
676 || is_gimple_min_invariant (expr
))
679 if (code
== ADDR_EXPR
)
680 return !for_each_index (&TREE_OPERAND (expr
, 0),
681 idx_contains_abnormal_ssa_name_p
,
688 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
693 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
705 /* Returns tree describing number of iterations determined from
706 EXIT of DATA->current_loop, or NULL if something goes wrong. */
709 niter_for_exit (struct ivopts_data
*data
, edge exit
)
711 struct tree_niter_desc desc
;
717 data
->niters
= pointer_map_create ();
721 slot
= pointer_map_contains (data
->niters
, exit
);
725 /* Try to determine number of iterations. We must know it
726 unconditionally (i.e., without possibility of # of iterations
727 being zero). Also, we cannot safely work with ssa names that
728 appear in phi nodes on abnormal edges, so that we do not create
729 overlapping life ranges for them (PR 27283). */
730 if (number_of_iterations_exit (data
->current_loop
,
732 && integer_zerop (desc
.may_be_zero
)
733 && !contains_abnormal_ssa_name_p (desc
.niter
))
738 *pointer_map_insert (data
->niters
, exit
) = niter
;
741 niter
= (tree
) *slot
;
746 /* Returns tree describing number of iterations determined from
747 single dominating exit of DATA->current_loop, or NULL if something
751 niter_for_single_dom_exit (struct ivopts_data
*data
)
753 edge exit
= single_dom_exit (data
->current_loop
);
758 return niter_for_exit (data
, exit
);
761 /* Initializes data structures used by the iv optimization pass, stored
765 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
767 data
->version_info_size
= 2 * num_ssa_names
;
768 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
769 data
->relevant
= BITMAP_ALLOC (NULL
);
770 data
->important_candidates
= BITMAP_ALLOC (NULL
);
771 data
->max_inv_id
= 0;
773 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
774 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
775 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
778 /* Returns a memory object to that EXPR points. In case we are able to
779 determine that it does not point to any such object, NULL is returned. */
782 determine_base_object (tree expr
)
784 enum tree_code code
= TREE_CODE (expr
);
787 /* If this is a pointer casted to any type, we need to determine
788 the base object for the pointer; so handle conversions before
789 throwing away non-pointer expressions. */
790 if (CONVERT_EXPR_P (expr
))
791 return determine_base_object (TREE_OPERAND (expr
, 0));
793 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
802 obj
= TREE_OPERAND (expr
, 0);
803 base
= get_base_address (obj
);
808 if (TREE_CODE (base
) == INDIRECT_REF
)
809 return determine_base_object (TREE_OPERAND (base
, 0));
811 return fold_convert (ptr_type_node
,
812 build_fold_addr_expr (base
));
814 case POINTER_PLUS_EXPR
:
815 return determine_base_object (TREE_OPERAND (expr
, 0));
819 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
823 return fold_convert (ptr_type_node
, expr
);
827 /* Allocates an induction variable with given initial value BASE and step STEP
831 alloc_iv (tree base
, tree step
)
833 struct iv
*iv
= XCNEW (struct iv
);
834 gcc_assert (step
!= NULL_TREE
);
837 iv
->base_object
= determine_base_object (base
);
840 iv
->have_use_for
= false;
842 iv
->ssa_name
= NULL_TREE
;
847 /* Sets STEP and BASE for induction variable IV. */
850 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
852 struct version_info
*info
= name_info (data
, iv
);
854 gcc_assert (!info
->iv
);
856 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
857 info
->iv
= alloc_iv (base
, step
);
858 info
->iv
->ssa_name
= iv
;
861 /* Finds induction variable declaration for VAR. */
864 get_iv (struct ivopts_data
*data
, tree var
)
867 tree type
= TREE_TYPE (var
);
869 if (!POINTER_TYPE_P (type
)
870 && !INTEGRAL_TYPE_P (type
))
873 if (!name_info (data
, var
)->iv
)
875 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
878 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
879 set_iv (data
, var
, var
, build_int_cst (type
, 0));
882 return name_info (data
, var
)->iv
;
885 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
886 not define a simple affine biv with nonzero step. */
889 determine_biv_step (gimple phi
)
891 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
892 tree name
= PHI_RESULT (phi
);
895 if (!is_gimple_reg (name
))
898 if (!simple_iv (loop
, loop
, name
, &iv
, true))
901 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
904 /* Finds basic ivs. */
907 find_bivs (struct ivopts_data
*data
)
910 tree step
, type
, base
;
912 struct loop
*loop
= data
->current_loop
;
913 gimple_stmt_iterator psi
;
915 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
917 phi
= gsi_stmt (psi
);
919 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
922 step
= determine_biv_step (phi
);
926 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
927 base
= expand_simple_operations (base
);
928 if (contains_abnormal_ssa_name_p (base
)
929 || contains_abnormal_ssa_name_p (step
))
932 type
= TREE_TYPE (PHI_RESULT (phi
));
933 base
= fold_convert (type
, base
);
936 if (POINTER_TYPE_P (type
))
937 step
= fold_convert (sizetype
, step
);
939 step
= fold_convert (type
, step
);
942 set_iv (data
, PHI_RESULT (phi
), base
, step
);
949 /* Marks basic ivs. */
952 mark_bivs (struct ivopts_data
*data
)
956 struct iv
*iv
, *incr_iv
;
957 struct loop
*loop
= data
->current_loop
;
959 gimple_stmt_iterator psi
;
961 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
963 phi
= gsi_stmt (psi
);
965 iv
= get_iv (data
, PHI_RESULT (phi
));
969 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
970 incr_iv
= get_iv (data
, var
);
974 /* If the increment is in the subloop, ignore it. */
975 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
976 if (incr_bb
->loop_father
!= data
->current_loop
977 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
981 incr_iv
->biv_p
= true;
985 /* Checks whether STMT defines a linear induction variable and stores its
989 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
992 struct loop
*loop
= data
->current_loop
;
994 iv
->base
= NULL_TREE
;
995 iv
->step
= NULL_TREE
;
997 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1000 lhs
= gimple_assign_lhs (stmt
);
1001 if (TREE_CODE (lhs
) != SSA_NAME
)
1004 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1006 iv
->base
= expand_simple_operations (iv
->base
);
1008 if (contains_abnormal_ssa_name_p (iv
->base
)
1009 || contains_abnormal_ssa_name_p (iv
->step
))
1015 /* Finds general ivs in statement STMT. */
1018 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1022 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1025 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1028 /* Finds general ivs in basic block BB. */
1031 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1033 gimple_stmt_iterator bsi
;
1035 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1036 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1039 /* Finds general ivs. */
1042 find_givs (struct ivopts_data
*data
)
1044 struct loop
*loop
= data
->current_loop
;
1045 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1048 for (i
= 0; i
< loop
->num_nodes
; i
++)
1049 find_givs_in_bb (data
, body
[i
]);
1053 /* For each ssa name defined in LOOP determines whether it is an induction
1054 variable and if so, its initial value and step. */
1057 find_induction_variables (struct ivopts_data
*data
)
1062 if (!find_bivs (data
))
1068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1070 tree niter
= niter_for_single_dom_exit (data
);
1074 fprintf (dump_file
, " number of iterations ");
1075 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1076 fprintf (dump_file
, "\n\n");
1079 fprintf (dump_file
, "Induction variables:\n\n");
1081 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1083 if (ver_info (data
, i
)->iv
)
1084 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1091 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093 static struct iv_use
*
1094 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1095 gimple stmt
, enum use_type use_type
)
1097 struct iv_use
*use
= XCNEW (struct iv_use
);
1099 use
->id
= n_iv_uses (data
);
1100 use
->type
= use_type
;
1104 use
->related_cands
= BITMAP_ALLOC (NULL
);
1106 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 iv
->ssa_name
= NULL_TREE
;
1110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1111 dump_use (dump_file
, use
);
1113 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1118 /* Checks whether OP is a loop-level invariant and if so, records it.
1119 NONLINEAR_USE is true if the invariant is used in a way we do not
1120 handle specially. */
1123 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1126 struct version_info
*info
;
1128 if (TREE_CODE (op
) != SSA_NAME
1129 || !is_gimple_reg (op
))
1132 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1134 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1137 info
= name_info (data
, op
);
1139 info
->has_nonlin_use
|= nonlinear_use
;
1141 info
->inv_id
= ++data
->max_inv_id
;
1142 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1145 /* Checks whether the use OP is interesting and if so, records it. */
1147 static struct iv_use
*
1148 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1155 if (TREE_CODE (op
) != SSA_NAME
)
1158 iv
= get_iv (data
, op
);
1162 if (iv
->have_use_for
)
1164 use
= iv_use (data
, iv
->use_id
);
1166 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1170 if (integer_zerop (iv
->step
))
1172 record_invariant (data
, op
, true);
1175 iv
->have_use_for
= true;
1177 civ
= XNEW (struct iv
);
1180 stmt
= SSA_NAME_DEF_STMT (op
);
1181 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1182 || is_gimple_assign (stmt
));
1184 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1185 iv
->use_id
= use
->id
;
1190 /* Given a condition in statement STMT, checks whether it is a compare
1191 of an induction variable and an invariant. If this is the case,
1192 CONTROL_VAR is set to location of the iv, BOUND to the location of
1193 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1194 induction variable descriptions, and true is returned. If this is not
1195 the case, CONTROL_VAR and BOUND are set to the arguments of the
1196 condition and false is returned. */
1199 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1200 tree
**control_var
, tree
**bound
,
1201 struct iv
**iv_var
, struct iv
**iv_bound
)
1203 /* The objects returned when COND has constant operands. */
1204 static struct iv const_iv
;
1206 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1207 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1210 if (gimple_code (stmt
) == GIMPLE_COND
)
1212 op0
= gimple_cond_lhs_ptr (stmt
);
1213 op1
= gimple_cond_rhs_ptr (stmt
);
1217 op0
= gimple_assign_rhs1_ptr (stmt
);
1218 op1
= gimple_assign_rhs2_ptr (stmt
);
1221 zero
= integer_zero_node
;
1222 const_iv
.step
= integer_zero_node
;
1224 if (TREE_CODE (*op0
) == SSA_NAME
)
1225 iv0
= get_iv (data
, *op0
);
1226 if (TREE_CODE (*op1
) == SSA_NAME
)
1227 iv1
= get_iv (data
, *op1
);
1229 /* Exactly one of the compared values must be an iv, and the other one must
1234 if (integer_zerop (iv0
->step
))
1236 /* Control variable may be on the other side. */
1237 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1238 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1240 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1244 *control_var
= op0
;;
1255 /* Checks whether the condition in STMT is interesting and if so,
1259 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1261 tree
*var_p
, *bound_p
;
1262 struct iv
*var_iv
, *civ
;
1264 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1266 find_interesting_uses_op (data
, *var_p
);
1267 find_interesting_uses_op (data
, *bound_p
);
1271 civ
= XNEW (struct iv
);
1273 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1276 /* Returns true if expression EXPR is obviously invariant in LOOP,
1277 i.e. if all its operands are defined outside of the LOOP. LOOP
1278 should not be the function body. */
1281 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1286 gcc_assert (loop_depth (loop
) > 0);
1288 if (is_gimple_min_invariant (expr
))
1291 if (TREE_CODE (expr
) == SSA_NAME
)
1293 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1295 && flow_bb_inside_loop_p (loop
, def_bb
))
1304 len
= TREE_OPERAND_LENGTH (expr
);
1305 for (i
= 0; i
< len
; i
++)
1306 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1312 /* Returns true if statement STMT is obviously invariant in LOOP,
1313 i.e. if all its operands on the RHS are defined outside of the LOOP.
1314 LOOP should not be the function body. */
1317 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1322 gcc_assert (loop_depth (loop
) > 0);
1324 lhs
= gimple_get_lhs (stmt
);
1325 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1327 tree op
= gimple_op (stmt
, i
);
1328 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1335 /* Cumulates the steps of indices into DATA and replaces their values with the
1336 initial ones. Returns false when the value of the index cannot be determined.
1337 Callback for for_each_index. */
1339 struct ifs_ivopts_data
1341 struct ivopts_data
*ivopts_data
;
1347 idx_find_step (tree base
, tree
*idx
, void *data
)
1349 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1351 tree step
, iv_base
, iv_step
, lbound
, off
;
1352 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1354 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1355 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1358 /* If base is a component ref, require that the offset of the reference
1360 if (TREE_CODE (base
) == COMPONENT_REF
)
1362 off
= component_ref_field_offset (base
);
1363 return expr_invariant_in_loop_p (loop
, off
);
1366 /* If base is array, first check whether we will be able to move the
1367 reference out of the loop (in order to take its address in strength
1368 reduction). In order for this to work we need both lower bound
1369 and step to be loop invariants. */
1370 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1372 /* Moreover, for a range, the size needs to be invariant as well. */
1373 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1374 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1377 step
= array_ref_element_size (base
);
1378 lbound
= array_ref_low_bound (base
);
1380 if (!expr_invariant_in_loop_p (loop
, step
)
1381 || !expr_invariant_in_loop_p (loop
, lbound
))
1385 if (TREE_CODE (*idx
) != SSA_NAME
)
1388 iv
= get_iv (dta
->ivopts_data
, *idx
);
1392 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1393 *&x[0], which is not folded and does not trigger the
1394 ARRAY_REF path below. */
1397 if (integer_zerop (iv
->step
))
1400 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1402 step
= array_ref_element_size (base
);
1404 /* We only handle addresses whose step is an integer constant. */
1405 if (TREE_CODE (step
) != INTEGER_CST
)
1409 /* The step for pointer arithmetics already is 1 byte. */
1410 step
= build_int_cst (sizetype
, 1);
1414 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1415 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1418 /* The index might wrap. */
1422 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1423 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1428 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1429 object is passed to it in DATA. */
1432 idx_record_use (tree base
, tree
*idx
,
1435 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1436 find_interesting_uses_op (data
, *idx
);
1437 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1439 find_interesting_uses_op (data
, array_ref_element_size (base
));
1440 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1445 /* If we can prove that TOP = cst * BOT for some constant cst,
1446 store cst to MUL and return true. Otherwise return false.
1447 The returned value is always sign-extended, regardless of the
1448 signedness of TOP and BOT. */
1451 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1454 enum tree_code code
;
1455 double_int res
, p0
, p1
;
1456 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1461 if (operand_equal_p (top
, bot
, 0))
1463 *mul
= double_int_one
;
1467 code
= TREE_CODE (top
);
1471 mby
= TREE_OPERAND (top
, 1);
1472 if (TREE_CODE (mby
) != INTEGER_CST
)
1475 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1478 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1484 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1485 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1488 if (code
== MINUS_EXPR
)
1489 p1
= double_int_neg (p1
);
1490 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1494 if (TREE_CODE (bot
) != INTEGER_CST
)
1497 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1498 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1499 if (double_int_zero_p (p1
))
1501 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1503 return double_int_zero_p (res
);
1510 /* Returns true if memory reference REF with step STEP may be unaligned. */
1513 may_be_unaligned_p (tree ref
, tree step
)
1517 HOST_WIDE_INT bitsize
;
1518 HOST_WIDE_INT bitpos
;
1520 enum machine_mode mode
;
1521 int unsignedp
, volatilep
;
1522 unsigned base_align
;
1524 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1525 thus they are not misaligned. */
1526 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1529 /* The test below is basically copy of what expr.c:normal_inner_ref
1530 does to check whether the object must be loaded by parts when
1531 STRICT_ALIGNMENT is true. */
1532 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1533 &unsignedp
, &volatilep
, true);
1534 base_type
= TREE_TYPE (base
);
1535 base_align
= TYPE_ALIGN (base_type
);
1537 if (mode
!= BLKmode
)
1539 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1541 if (base_align
< mode_align
1542 || (bitpos
% mode_align
) != 0
1543 || (bitpos
% BITS_PER_UNIT
) != 0)
1547 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1550 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1557 /* Return true if EXPR may be non-addressable. */
1560 may_be_nonaddressable_p (tree expr
)
1562 switch (TREE_CODE (expr
))
1564 case TARGET_MEM_REF
:
1565 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1566 target, thus they are always addressable. */
1570 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1571 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1573 case VIEW_CONVERT_EXPR
:
1574 /* This kind of view-conversions may wrap non-addressable objects
1575 and make them look addressable. After some processing the
1576 non-addressability may be uncovered again, causing ADDR_EXPRs
1577 of inappropriate objects to be built. */
1578 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1579 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1582 /* ... fall through ... */
1585 case ARRAY_RANGE_REF
:
1586 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1598 /* Finds addresses in *OP_P inside STMT. */
1601 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1603 tree base
= *op_p
, step
= build_int_cst (sizetype
, 0);
1605 struct ifs_ivopts_data ifs_ivopts_data
;
1607 /* Do not play with volatile memory references. A bit too conservative,
1608 perhaps, but safe. */
1609 if (gimple_has_volatile_ops (stmt
))
1612 /* Ignore bitfields for now. Not really something terribly complicated
1614 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1617 base
= unshare_expr (base
);
1619 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1621 tree type
= build_pointer_type (TREE_TYPE (base
));
1625 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1627 civ
= get_iv (data
, TMR_BASE (base
));
1631 TMR_BASE (base
) = civ
->base
;
1634 if (TMR_INDEX (base
)
1635 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1637 civ
= get_iv (data
, TMR_INDEX (base
));
1641 TMR_INDEX (base
) = civ
->base
;
1646 if (TMR_STEP (base
))
1647 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1649 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1653 if (integer_zerop (step
))
1655 base
= tree_mem_ref_addr (type
, base
);
1659 ifs_ivopts_data
.ivopts_data
= data
;
1660 ifs_ivopts_data
.stmt
= stmt
;
1661 ifs_ivopts_data
.step
= build_int_cst (sizetype
, 0);
1662 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1663 || integer_zerop (ifs_ivopts_data
.step
))
1665 step
= ifs_ivopts_data
.step
;
1667 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1668 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1670 /* Check that the base expression is addressable. This needs
1671 to be done after substituting bases of IVs into it. */
1672 if (may_be_nonaddressable_p (base
))
1675 /* Moreover, on strict alignment platforms, check that it is
1676 sufficiently aligned. */
1677 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1680 base
= build_fold_addr_expr (base
);
1682 /* Substituting bases of IVs into the base expression might
1683 have caused folding opportunities. */
1684 if (TREE_CODE (base
) == ADDR_EXPR
)
1686 tree
*ref
= &TREE_OPERAND (base
, 0);
1687 while (handled_component_p (*ref
))
1688 ref
= &TREE_OPERAND (*ref
, 0);
1689 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1691 tree tem
= gimple_fold_indirect_ref (TREE_OPERAND (*ref
, 0));
1698 civ
= alloc_iv (base
, step
);
1699 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1703 for_each_index (op_p
, idx_record_use
, data
);
1706 /* Finds and records invariants used in STMT. */
1709 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1712 use_operand_p use_p
;
1715 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1717 op
= USE_FROM_PTR (use_p
);
1718 record_invariant (data
, op
, false);
1722 /* Finds interesting uses of induction variables in the statement STMT. */
1725 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1728 tree op
, *lhs
, *rhs
;
1730 use_operand_p use_p
;
1731 enum tree_code code
;
1733 find_invariants_stmt (data
, stmt
);
1735 if (gimple_code (stmt
) == GIMPLE_COND
)
1737 find_interesting_uses_cond (data
, stmt
);
1741 if (is_gimple_assign (stmt
))
1743 lhs
= gimple_assign_lhs_ptr (stmt
);
1744 rhs
= gimple_assign_rhs1_ptr (stmt
);
1746 if (TREE_CODE (*lhs
) == SSA_NAME
)
1748 /* If the statement defines an induction variable, the uses are not
1749 interesting by themselves. */
1751 iv
= get_iv (data
, *lhs
);
1753 if (iv
&& !integer_zerop (iv
->step
))
1757 code
= gimple_assign_rhs_code (stmt
);
1758 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1759 && (REFERENCE_CLASS_P (*rhs
)
1760 || is_gimple_val (*rhs
)))
1762 if (REFERENCE_CLASS_P (*rhs
))
1763 find_interesting_uses_address (data
, stmt
, rhs
);
1765 find_interesting_uses_op (data
, *rhs
);
1767 if (REFERENCE_CLASS_P (*lhs
))
1768 find_interesting_uses_address (data
, stmt
, lhs
);
1771 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1773 find_interesting_uses_cond (data
, stmt
);
1777 /* TODO -- we should also handle address uses of type
1779 memory = call (whatever);
1786 if (gimple_code (stmt
) == GIMPLE_PHI
1787 && gimple_bb (stmt
) == data
->current_loop
->header
)
1789 iv
= get_iv (data
, PHI_RESULT (stmt
));
1791 if (iv
&& !integer_zerop (iv
->step
))
1795 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1797 op
= USE_FROM_PTR (use_p
);
1799 if (TREE_CODE (op
) != SSA_NAME
)
1802 iv
= get_iv (data
, op
);
1806 find_interesting_uses_op (data
, op
);
1810 /* Finds interesting uses of induction variables outside of loops
1811 on loop exit edge EXIT. */
1814 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1817 gimple_stmt_iterator psi
;
1820 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1822 phi
= gsi_stmt (psi
);
1823 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1824 if (is_gimple_reg (def
))
1825 find_interesting_uses_op (data
, def
);
1829 /* Finds uses of the induction variables that are interesting. */
1832 find_interesting_uses (struct ivopts_data
*data
)
1835 gimple_stmt_iterator bsi
;
1836 basic_block
*body
= get_loop_body (data
->current_loop
);
1838 struct version_info
*info
;
1841 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1842 fprintf (dump_file
, "Uses:\n\n");
1844 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1849 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1850 if (e
->dest
!= EXIT_BLOCK_PTR
1851 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1852 find_interesting_uses_outside (data
, e
);
1854 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1855 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1856 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1857 if (!is_gimple_debug (gsi_stmt (bsi
)))
1858 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1865 fprintf (dump_file
, "\n");
1867 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1869 info
= ver_info (data
, i
);
1872 fprintf (dump_file
, " ");
1873 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1874 fprintf (dump_file
, " is invariant (%d)%s\n",
1875 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1879 fprintf (dump_file
, "\n");
1885 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1886 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1887 we are at the top-level of the processed address. */
1890 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1891 unsigned HOST_WIDE_INT
*offset
)
1893 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1894 enum tree_code code
;
1895 tree type
, orig_type
= TREE_TYPE (expr
);
1896 unsigned HOST_WIDE_INT off0
, off1
, st
;
1897 tree orig_expr
= expr
;
1901 type
= TREE_TYPE (expr
);
1902 code
= TREE_CODE (expr
);
1908 if (!cst_and_fits_in_hwi (expr
)
1909 || integer_zerop (expr
))
1912 *offset
= int_cst_value (expr
);
1913 return build_int_cst (orig_type
, 0);
1915 case POINTER_PLUS_EXPR
:
1918 op0
= TREE_OPERAND (expr
, 0);
1919 op1
= TREE_OPERAND (expr
, 1);
1921 op0
= strip_offset_1 (op0
, false, false, &off0
);
1922 op1
= strip_offset_1 (op1
, false, false, &off1
);
1924 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
1925 if (op0
== TREE_OPERAND (expr
, 0)
1926 && op1
== TREE_OPERAND (expr
, 1))
1929 if (integer_zerop (op1
))
1931 else if (integer_zerop (op0
))
1933 if (code
== MINUS_EXPR
)
1934 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1939 expr
= fold_build2 (code
, type
, op0
, op1
);
1941 return fold_convert (orig_type
, expr
);
1944 op1
= TREE_OPERAND (expr
, 1);
1945 if (!cst_and_fits_in_hwi (op1
))
1948 op0
= TREE_OPERAND (expr
, 0);
1949 op0
= strip_offset_1 (op0
, false, false, &off0
);
1950 if (op0
== TREE_OPERAND (expr
, 0))
1953 *offset
= off0
* int_cst_value (op1
);
1954 if (integer_zerop (op0
))
1957 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
1959 return fold_convert (orig_type
, expr
);
1962 case ARRAY_RANGE_REF
:
1966 step
= array_ref_element_size (expr
);
1967 if (!cst_and_fits_in_hwi (step
))
1970 st
= int_cst_value (step
);
1971 op1
= TREE_OPERAND (expr
, 1);
1972 op1
= strip_offset_1 (op1
, false, false, &off1
);
1973 *offset
= off1
* st
;
1976 && integer_zerop (op1
))
1978 /* Strip the component reference completely. */
1979 op0
= TREE_OPERAND (expr
, 0);
1980 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1990 tmp
= component_ref_field_offset (expr
);
1992 && cst_and_fits_in_hwi (tmp
))
1994 /* Strip the component reference completely. */
1995 op0
= TREE_OPERAND (expr
, 0);
1996 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1997 *offset
= off0
+ int_cst_value (tmp
);
2003 op0
= TREE_OPERAND (expr
, 0);
2004 op0
= strip_offset_1 (op0
, true, true, &off0
);
2007 if (op0
== TREE_OPERAND (expr
, 0))
2010 expr
= build_fold_addr_expr (op0
);
2011 return fold_convert (orig_type
, expr
);
2014 inside_addr
= false;
2021 /* Default handling of expressions for that we want to recurse into
2022 the first operand. */
2023 op0
= TREE_OPERAND (expr
, 0);
2024 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2027 if (op0
== TREE_OPERAND (expr
, 0)
2028 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2031 expr
= copy_node (expr
);
2032 TREE_OPERAND (expr
, 0) = op0
;
2034 TREE_OPERAND (expr
, 1) = op1
;
2036 /* Inside address, we might strip the top level component references,
2037 thus changing type of the expression. Handling of ADDR_EXPR
2039 expr
= fold_convert (orig_type
, expr
);
2044 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2047 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2049 return strip_offset_1 (expr
, false, false, offset
);
2052 /* Returns variant of TYPE that can be used as base for different uses.
2053 We return unsigned type with the same precision, which avoids problems
2057 generic_type_for (tree type
)
2059 if (POINTER_TYPE_P (type
))
2060 return unsigned_type_for (type
);
2062 if (TYPE_UNSIGNED (type
))
2065 return unsigned_type_for (type
);
2068 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2069 the bitmap to that we should store it. */
2071 static struct ivopts_data
*fd_ivopts_data
;
2073 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2075 bitmap
*depends_on
= (bitmap
*) data
;
2076 struct version_info
*info
;
2078 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2080 info
= name_info (fd_ivopts_data
, *expr_p
);
2082 if (!info
->inv_id
|| info
->has_nonlin_use
)
2086 *depends_on
= BITMAP_ALLOC (NULL
);
2087 bitmap_set_bit (*depends_on
, info
->inv_id
);
2092 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2093 position to POS. If USE is not NULL, the candidate is set as related to
2094 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2095 replacement of the final value of the iv by a direct computation. */
2097 static struct iv_cand
*
2098 add_candidate_1 (struct ivopts_data
*data
,
2099 tree base
, tree step
, bool important
, enum iv_position pos
,
2100 struct iv_use
*use
, gimple incremented_at
)
2103 struct iv_cand
*cand
= NULL
;
2104 tree type
, orig_type
;
2108 orig_type
= TREE_TYPE (base
);
2109 type
= generic_type_for (orig_type
);
2110 if (type
!= orig_type
)
2112 base
= fold_convert (type
, base
);
2113 step
= fold_convert (type
, step
);
2117 for (i
= 0; i
< n_iv_cands (data
); i
++)
2119 cand
= iv_cand (data
, i
);
2121 if (cand
->pos
!= pos
)
2124 if (cand
->incremented_at
!= incremented_at
2125 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2126 && cand
->ainc_use
!= use
))
2140 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2141 && operand_equal_p (step
, cand
->iv
->step
, 0))
2145 if (i
== n_iv_cands (data
))
2147 cand
= XCNEW (struct iv_cand
);
2153 cand
->iv
= alloc_iv (base
, step
);
2156 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2158 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2159 cand
->var_after
= cand
->var_before
;
2161 cand
->important
= important
;
2162 cand
->incremented_at
= incremented_at
;
2163 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2166 && TREE_CODE (step
) != INTEGER_CST
)
2168 fd_ivopts_data
= data
;
2169 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2172 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2173 cand
->ainc_use
= use
;
2175 cand
->ainc_use
= NULL
;
2177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2178 dump_cand (dump_file
, cand
);
2181 if (important
&& !cand
->important
)
2183 cand
->important
= true;
2184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2185 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2190 bitmap_set_bit (use
->related_cands
, i
);
2191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2192 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2199 /* Returns true if incrementing the induction variable at the end of the LOOP
2202 The purpose is to avoid splitting latch edge with a biv increment, thus
2203 creating a jump, possibly confusing other optimization passes and leaving
2204 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2205 is not available (so we do not have a better alternative), or if the latch
2206 edge is already nonempty. */
2209 allow_ip_end_pos_p (struct loop
*loop
)
2211 if (!ip_normal_pos (loop
))
2214 if (!empty_block_p (ip_end_pos (loop
)))
2220 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2221 Important field is set to IMPORTANT. */
2224 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2225 bool important
, struct iv_use
*use
)
2227 basic_block use_bb
= gimple_bb (use
->stmt
);
2228 enum machine_mode mem_mode
;
2229 unsigned HOST_WIDE_INT cstepi
;
2231 /* If we insert the increment in any position other than the standard
2232 ones, we must ensure that it is incremented once per iteration.
2233 It must not be in an inner nested loop, or one side of an if
2235 if (use_bb
->loop_father
!= data
->current_loop
2236 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2237 || stmt_could_throw_p (use
->stmt
)
2238 || !cst_and_fits_in_hwi (step
))
2241 cstepi
= int_cst_value (step
);
2243 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2244 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2245 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2247 enum tree_code code
= MINUS_EXPR
;
2249 tree new_step
= step
;
2251 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2253 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2254 code
= POINTER_PLUS_EXPR
;
2257 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2258 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2259 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2262 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2263 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2265 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2271 position to POS. If USE is not NULL, the candidate is set as related to
2272 it. The candidate computation is scheduled on all available positions. */
2275 add_candidate (struct ivopts_data
*data
,
2276 tree base
, tree step
, bool important
, struct iv_use
*use
)
2278 if (ip_normal_pos (data
->current_loop
))
2279 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2280 if (ip_end_pos (data
->current_loop
)
2281 && allow_ip_end_pos_p (data
->current_loop
))
2282 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2284 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2285 add_autoinc_candidates (data
, base
, step
, important
, use
);
2288 /* Add a standard "0 + 1 * iteration" iv candidate for a
2289 type with SIZE bits. */
2292 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2295 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2296 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2300 /* Adds standard iv candidates. */
2303 add_standard_iv_candidates (struct ivopts_data
*data
)
2305 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2307 /* The same for a double-integer type if it is still fast enough. */
2308 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2309 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2313 /* Adds candidates bases on the old induction variable IV. */
2316 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2320 struct iv_cand
*cand
;
2322 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2324 /* The same, but with initial value zero. */
2325 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2326 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2328 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2329 iv
->step
, true, NULL
);
2331 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2332 if (gimple_code (phi
) == GIMPLE_PHI
)
2334 /* Additionally record the possibility of leaving the original iv
2336 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2337 cand
= add_candidate_1 (data
,
2338 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2339 SSA_NAME_DEF_STMT (def
));
2340 cand
->var_before
= iv
->ssa_name
;
2341 cand
->var_after
= def
;
2345 /* Adds candidates based on the old induction variables. */
2348 add_old_ivs_candidates (struct ivopts_data
*data
)
2354 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2356 iv
= ver_info (data
, i
)->iv
;
2357 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2358 add_old_iv_candidates (data
, iv
);
2362 /* Adds candidates based on the value of the induction variable IV and USE. */
2365 add_iv_value_candidates (struct ivopts_data
*data
,
2366 struct iv
*iv
, struct iv_use
*use
)
2368 unsigned HOST_WIDE_INT offset
;
2372 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2374 /* The same, but with initial value zero. Make such variable important,
2375 since it is generic enough so that possibly many uses may be based
2377 basetype
= TREE_TYPE (iv
->base
);
2378 if (POINTER_TYPE_P (basetype
))
2379 basetype
= sizetype
;
2380 add_candidate (data
, build_int_cst (basetype
, 0),
2381 iv
->step
, true, use
);
2383 /* Third, try removing the constant offset. Make sure to even
2384 add a candidate for &a[0] vs. (T *)&a. */
2385 base
= strip_offset (iv
->base
, &offset
);
2387 || base
!= iv
->base
)
2388 add_candidate (data
, base
, iv
->step
, false, use
);
2391 /* Adds candidates based on the uses. */
2394 add_derived_ivs_candidates (struct ivopts_data
*data
)
2398 for (i
= 0; i
< n_iv_uses (data
); i
++)
2400 struct iv_use
*use
= iv_use (data
, i
);
2407 case USE_NONLINEAR_EXPR
:
2410 /* Just add the ivs based on the value of the iv used here. */
2411 add_iv_value_candidates (data
, use
->iv
, use
);
2420 /* Record important candidates and add them to related_cands bitmaps
2424 record_important_candidates (struct ivopts_data
*data
)
2429 for (i
= 0; i
< n_iv_cands (data
); i
++)
2431 struct iv_cand
*cand
= iv_cand (data
, i
);
2433 if (cand
->important
)
2434 bitmap_set_bit (data
->important_candidates
, i
);
2437 data
->consider_all_candidates
= (n_iv_cands (data
)
2438 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2440 if (data
->consider_all_candidates
)
2442 /* We will not need "related_cands" bitmaps in this case,
2443 so release them to decrease peak memory consumption. */
2444 for (i
= 0; i
< n_iv_uses (data
); i
++)
2446 use
= iv_use (data
, i
);
2447 BITMAP_FREE (use
->related_cands
);
2452 /* Add important candidates to the related_cands bitmaps. */
2453 for (i
= 0; i
< n_iv_uses (data
); i
++)
2454 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2455 data
->important_candidates
);
2459 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2460 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2461 we allocate a simple list to every use. */
2464 alloc_use_cost_map (struct ivopts_data
*data
)
2466 unsigned i
, size
, s
, j
;
2468 for (i
= 0; i
< n_iv_uses (data
); i
++)
2470 struct iv_use
*use
= iv_use (data
, i
);
2473 if (data
->consider_all_candidates
)
2474 size
= n_iv_cands (data
);
2478 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2483 /* Round up to the power of two, so that moduling by it is fast. */
2484 for (size
= 1; size
< s
; size
<<= 1)
2488 use
->n_map_members
= size
;
2489 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2493 /* Returns description of computation cost of expression whose runtime
2494 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2497 new_cost (unsigned runtime
, unsigned complexity
)
2501 cost
.cost
= runtime
;
2502 cost
.complexity
= complexity
;
2507 /* Adds costs COST1 and COST2. */
2510 add_costs (comp_cost cost1
, comp_cost cost2
)
2512 cost1
.cost
+= cost2
.cost
;
2513 cost1
.complexity
+= cost2
.complexity
;
2517 /* Subtracts costs COST1 and COST2. */
2520 sub_costs (comp_cost cost1
, comp_cost cost2
)
2522 cost1
.cost
-= cost2
.cost
;
2523 cost1
.complexity
-= cost2
.complexity
;
2528 /* Returns a negative number if COST1 < COST2, a positive number if
2529 COST1 > COST2, and 0 if COST1 = COST2. */
2532 compare_costs (comp_cost cost1
, comp_cost cost2
)
2534 if (cost1
.cost
== cost2
.cost
)
2535 return cost1
.complexity
- cost2
.complexity
;
2537 return cost1
.cost
- cost2
.cost
;
2540 /* Returns true if COST is infinite. */
2543 infinite_cost_p (comp_cost cost
)
2545 return cost
.cost
== INFTY
;
2548 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2549 on invariants DEPENDS_ON and that the value used in expressing it
2553 set_use_iv_cost (struct ivopts_data
*data
,
2554 struct iv_use
*use
, struct iv_cand
*cand
,
2555 comp_cost cost
, bitmap depends_on
, tree value
)
2559 if (infinite_cost_p (cost
))
2561 BITMAP_FREE (depends_on
);
2565 if (data
->consider_all_candidates
)
2567 use
->cost_map
[cand
->id
].cand
= cand
;
2568 use
->cost_map
[cand
->id
].cost
= cost
;
2569 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2570 use
->cost_map
[cand
->id
].value
= value
;
2574 /* n_map_members is a power of two, so this computes modulo. */
2575 s
= cand
->id
& (use
->n_map_members
- 1);
2576 for (i
= s
; i
< use
->n_map_members
; i
++)
2577 if (!use
->cost_map
[i
].cand
)
2579 for (i
= 0; i
< s
; i
++)
2580 if (!use
->cost_map
[i
].cand
)
2586 use
->cost_map
[i
].cand
= cand
;
2587 use
->cost_map
[i
].cost
= cost
;
2588 use
->cost_map
[i
].depends_on
= depends_on
;
2589 use
->cost_map
[i
].value
= value
;
2592 /* Gets cost of (USE, CANDIDATE) pair. */
2594 static struct cost_pair
*
2595 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2596 struct iv_cand
*cand
)
2599 struct cost_pair
*ret
;
2604 if (data
->consider_all_candidates
)
2606 ret
= use
->cost_map
+ cand
->id
;
2613 /* n_map_members is a power of two, so this computes modulo. */
2614 s
= cand
->id
& (use
->n_map_members
- 1);
2615 for (i
= s
; i
< use
->n_map_members
; i
++)
2616 if (use
->cost_map
[i
].cand
== cand
)
2617 return use
->cost_map
+ i
;
2619 for (i
= 0; i
< s
; i
++)
2620 if (use
->cost_map
[i
].cand
== cand
)
2621 return use
->cost_map
+ i
;
2626 /* Returns estimate on cost of computing SEQ. */
2629 seq_cost (rtx seq
, bool speed
)
2634 for (; seq
; seq
= NEXT_INSN (seq
))
2636 set
= single_set (seq
);
2638 cost
+= rtx_cost (set
, SET
,speed
);
2646 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2648 produce_memory_decl_rtl (tree obj
, int *regno
)
2650 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2651 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2655 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2657 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2658 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2659 SET_SYMBOL_REF_DECL (x
, obj
);
2660 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2661 set_mem_addr_space (x
, as
);
2662 targetm
.encode_section_info (obj
, x
, true);
2666 x
= gen_raw_REG (address_mode
, (*regno
)++);
2667 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2668 set_mem_addr_space (x
, as
);
2674 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2675 walk_tree. DATA contains the actual fake register number. */
2678 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2680 tree obj
= NULL_TREE
;
2682 int *regno
= (int *) data
;
2684 switch (TREE_CODE (*expr_p
))
2687 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2688 handled_component_p (*expr_p
);
2689 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2692 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2693 x
= produce_memory_decl_rtl (obj
, regno
);
2698 obj
= SSA_NAME_VAR (*expr_p
);
2699 if (!DECL_RTL_SET_P (obj
))
2700 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2709 if (DECL_RTL_SET_P (obj
))
2712 if (DECL_MODE (obj
) == BLKmode
)
2713 x
= produce_memory_decl_rtl (obj
, regno
);
2715 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2725 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2726 SET_DECL_RTL (obj
, x
);
2732 /* Determines cost of the computation of EXPR. */
2735 computation_cost (tree expr
, bool speed
)
2738 tree type
= TREE_TYPE (expr
);
2740 /* Avoid using hard regs in ways which may be unsupported. */
2741 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2742 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2743 enum node_frequency real_frequency
= node
->frequency
;
2745 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2746 crtl
->maybe_hot_insn_p
= speed
;
2747 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2749 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2752 default_rtl_profile ();
2753 node
->frequency
= real_frequency
;
2755 cost
= seq_cost (seq
, speed
);
2757 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2758 TYPE_ADDR_SPACE (type
), speed
);
2763 /* Returns variable containing the value of candidate CAND at statement AT. */
2766 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2768 if (stmt_after_increment (loop
, cand
, stmt
))
2769 return cand
->var_after
;
2771 return cand
->var_before
;
2774 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2775 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2778 tree_int_cst_sign_bit (const_tree t
)
2780 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2781 unsigned HOST_WIDE_INT w
;
2783 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2784 w
= TREE_INT_CST_LOW (t
);
2787 w
= TREE_INT_CST_HIGH (t
);
2788 bitno
-= HOST_BITS_PER_WIDE_INT
;
2791 return (w
>> bitno
) & 1;
2794 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2795 same precision that is at least as wide as the precision of TYPE, stores
2796 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2800 determine_common_wider_type (tree
*a
, tree
*b
)
2802 tree wider_type
= NULL
;
2804 tree atype
= TREE_TYPE (*a
);
2806 if (CONVERT_EXPR_P (*a
))
2808 suba
= TREE_OPERAND (*a
, 0);
2809 wider_type
= TREE_TYPE (suba
);
2810 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2816 if (CONVERT_EXPR_P (*b
))
2818 subb
= TREE_OPERAND (*b
, 0);
2819 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2830 /* Determines the expression by that USE is expressed from induction variable
2831 CAND at statement AT in LOOP. The expression is stored in a decomposed
2832 form into AFF. Returns false if USE cannot be expressed using CAND. */
2835 get_computation_aff (struct loop
*loop
,
2836 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2837 struct affine_tree_combination
*aff
)
2839 tree ubase
= use
->iv
->base
;
2840 tree ustep
= use
->iv
->step
;
2841 tree cbase
= cand
->iv
->base
;
2842 tree cstep
= cand
->iv
->step
, cstep_common
;
2843 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2844 tree common_type
, var
;
2846 aff_tree cbase_aff
, var_aff
;
2849 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2851 /* We do not have a precision to express the values of use. */
2855 var
= var_at_stmt (loop
, cand
, at
);
2856 uutype
= unsigned_type_for (utype
);
2858 /* If the conversion is not noop, perform it. */
2859 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2861 cstep
= fold_convert (uutype
, cstep
);
2862 cbase
= fold_convert (uutype
, cbase
);
2863 var
= fold_convert (uutype
, var
);
2866 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2869 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2870 type, we achieve better folding by computing their difference in this
2871 wider type, and cast the result to UUTYPE. We do not need to worry about
2872 overflows, as all the arithmetics will in the end be performed in UUTYPE
2874 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2876 /* use = ubase - ratio * cbase + ratio * var. */
2877 tree_to_aff_combination (ubase
, common_type
, aff
);
2878 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2879 tree_to_aff_combination (var
, uutype
, &var_aff
);
2881 /* We need to shift the value if we are after the increment. */
2882 if (stmt_after_increment (loop
, cand
, at
))
2886 if (common_type
!= uutype
)
2887 cstep_common
= fold_convert (common_type
, cstep
);
2889 cstep_common
= cstep
;
2891 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2892 aff_combination_add (&cbase_aff
, &cstep_aff
);
2895 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2896 aff_combination_add (aff
, &cbase_aff
);
2897 if (common_type
!= uutype
)
2898 aff_combination_convert (aff
, uutype
);
2900 aff_combination_scale (&var_aff
, rat
);
2901 aff_combination_add (aff
, &var_aff
);
2906 /* Determines the expression by that USE is expressed from induction variable
2907 CAND at statement AT in LOOP. The computation is unshared. */
2910 get_computation_at (struct loop
*loop
,
2911 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
2914 tree type
= TREE_TYPE (use
->iv
->base
);
2916 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
2918 unshare_aff_combination (&aff
);
2919 return fold_convert (type
, aff_combination_to_tree (&aff
));
2922 /* Determines the expression by that USE is expressed from induction variable
2923 CAND in LOOP. The computation is unshared. */
2926 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2928 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2931 /* Returns cost of addition in MODE. */
2934 add_cost (enum machine_mode mode
, bool speed
)
2936 static unsigned costs
[NUM_MACHINE_MODES
];
2944 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2945 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2946 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
2951 cost
= seq_cost (seq
, speed
);
2957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2958 fprintf (dump_file
, "Addition in %s costs %d\n",
2959 GET_MODE_NAME (mode
), cost
);
2963 /* Entry in a hashtable of already known costs for multiplication. */
2966 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2967 enum machine_mode mode
; /* In mode. */
2968 unsigned cost
; /* The cost. */
2971 /* Counts hash value for the ENTRY. */
2974 mbc_entry_hash (const void *entry
)
2976 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
2978 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2981 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2984 mbc_entry_eq (const void *entry1
, const void *entry2
)
2986 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
2987 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
2989 return (e1
->mode
== e2
->mode
2990 && e1
->cst
== e2
->cst
);
2993 /* Returns cost of multiplication by constant CST in MODE. */
2996 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
2998 static htab_t costs
;
2999 struct mbc_entry
**cached
, act
;
3004 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3008 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3010 return (*cached
)->cost
;
3012 *cached
= XNEW (struct mbc_entry
);
3013 (*cached
)->mode
= mode
;
3014 (*cached
)->cst
= cst
;
3017 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3018 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3022 cost
= seq_cost (seq
, speed
);
3024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3025 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3026 (int) cst
, GET_MODE_NAME (mode
), cost
);
3028 (*cached
)->cost
= cost
;
3033 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3034 validity for a memory reference accessing memory of mode MODE in
3035 address space AS. */
3037 DEF_VEC_P (sbitmap
);
3038 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3041 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3044 #define MAX_RATIO 128
3045 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3046 static VEC (sbitmap
, heap
) *valid_mult_list
;
3049 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3050 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3052 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3055 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3056 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3060 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3061 sbitmap_zero (valid_mult
);
3062 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3063 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3065 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3066 if (memory_address_addr_space_p (mode
, addr
, as
))
3067 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3072 fprintf (dump_file
, " allowed multipliers:");
3073 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3074 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3075 fprintf (dump_file
, " %d", (int) i
);
3076 fprintf (dump_file
, "\n");
3077 fprintf (dump_file
, "\n");
3080 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3083 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3086 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3089 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3090 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3091 variable is omitted. Compute the cost for a memory reference that accesses
3092 a memory location of mode MEM_MODE in address space AS.
3094 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3095 size of MEM_MODE / RATIO) is available. To make this determination, we
3096 look at the size of the increment to be made, which is given in CSTEP.
3097 CSTEP may be zero if the step is unknown.
3098 STMT_AFTER_INC is true iff the statement we're looking at is after the
3099 increment of the original biv.
3101 TODO -- there must be some better way. This all is quite crude. */
3105 HOST_WIDE_INT min_offset
, max_offset
;
3106 unsigned costs
[2][2][2][2];
3107 } *address_cost_data
;
3109 DEF_VEC_P (address_cost_data
);
3110 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3113 get_address_cost (bool symbol_present
, bool var_present
,
3114 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3115 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3116 addr_space_t as
, bool speed
,
3117 bool stmt_after_inc
, bool *may_autoinc
)
3119 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3120 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3121 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3122 address_cost_data data
;
3123 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3124 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3125 unsigned cost
, acost
, complexity
;
3126 bool offset_p
, ratio_p
, autoinc
;
3127 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3128 unsigned HOST_WIDE_INT mask
;
3131 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3132 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3135 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3139 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
3140 HOST_WIDE_INT rat
, off
;
3141 int old_cse_not_expected
;
3142 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3143 rtx seq
, addr
, base
;
3146 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3148 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3150 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3151 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3153 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3154 if (!memory_address_addr_space_p (mem_mode
, addr
, as
))
3157 data
->max_offset
= i
== start
? 0 : i
>> 1;
3158 off
= data
->max_offset
;
3160 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3162 XEXP (addr
, 1) = gen_int_mode (-i
, address_mode
);
3163 if (!memory_address_addr_space_p (mem_mode
, addr
, as
))
3166 data
->min_offset
= i
== start
? 0 : -(i
>> 1);
3168 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3170 fprintf (dump_file
, "get_address_cost:\n");
3171 fprintf (dump_file
, " min offset %s %d\n",
3172 GET_MODE_NAME (mem_mode
),
3173 (int) data
->min_offset
);
3174 fprintf (dump_file
, " max offset %s %d\n",
3175 GET_MODE_NAME (mem_mode
),
3176 (int) data
->max_offset
);
3180 for (i
= 2; i
<= MAX_RATIO
; i
++)
3181 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3187 /* Compute the cost of various addressing modes. */
3189 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3190 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3192 if (HAVE_PRE_DECREMENT
)
3194 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3195 has_predec
[mem_mode
]
3196 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3198 if (HAVE_POST_DECREMENT
)
3200 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3201 has_postdec
[mem_mode
]
3202 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3204 if (HAVE_PRE_INCREMENT
)
3206 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3207 has_preinc
[mem_mode
]
3208 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3210 if (HAVE_POST_INCREMENT
)
3212 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3213 has_postinc
[mem_mode
]
3214 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3216 for (i
= 0; i
< 16; i
++)
3219 var_p
= (i
>> 1) & 1;
3220 off_p
= (i
>> 2) & 1;
3221 rat_p
= (i
>> 3) & 1;
3225 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3226 gen_int_mode (rat
, address_mode
));
3229 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3233 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3234 /* ??? We can run into trouble with some backends by presenting
3235 it with symbols which haven't been properly passed through
3236 targetm.encode_section_info. By setting the local bit, we
3237 enhance the probability of things working. */
3238 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3241 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3243 (PLUS
, address_mode
, base
,
3244 gen_int_mode (off
, address_mode
)));
3247 base
= gen_int_mode (off
, address_mode
);
3252 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3255 /* To avoid splitting addressing modes, pretend that no cse will
3257 old_cse_not_expected
= cse_not_expected
;
3258 cse_not_expected
= true;
3259 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3260 cse_not_expected
= old_cse_not_expected
;
3264 acost
= seq_cost (seq
, speed
);
3265 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3269 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3272 /* On some targets, it is quite expensive to load symbol to a register,
3273 which makes addresses that contain symbols look much more expensive.
3274 However, the symbol will have to be loaded in any case before the
3275 loop (and quite likely we have it in register already), so it does not
3276 make much sense to penalize them too heavily. So make some final
3277 tweaks for the SYMBOL_PRESENT modes:
3279 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3280 var is cheaper, use this mode with small penalty.
3281 If VAR_PRESENT is true, try whether the mode with
3282 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3283 if this is the case, use it. */
3284 add_c
= add_cost (address_mode
, speed
);
3285 for (i
= 0; i
< 8; i
++)
3288 off_p
= (i
>> 1) & 1;
3289 rat_p
= (i
>> 2) & 1;
3291 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3295 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3296 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3301 fprintf (dump_file
, "Address costs:\n");
3303 for (i
= 0; i
< 16; i
++)
3306 var_p
= (i
>> 1) & 1;
3307 off_p
= (i
>> 2) & 1;
3308 rat_p
= (i
>> 3) & 1;
3310 fprintf (dump_file
, " ");
3312 fprintf (dump_file
, "sym + ");
3314 fprintf (dump_file
, "var + ");
3316 fprintf (dump_file
, "cst + ");
3318 fprintf (dump_file
, "rat * ");
3320 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3321 fprintf (dump_file
, "index costs %d\n", acost
);
3323 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3324 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3325 fprintf (dump_file
, " May include autoinc/dec\n");
3326 fprintf (dump_file
, "\n");
3329 VEC_replace (address_cost_data
, address_cost_data_list
,
3333 bits
= GET_MODE_BITSIZE (address_mode
);
3334 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3336 if ((offset
>> (bits
- 1) & 1))
3341 msize
= GET_MODE_SIZE (mem_mode
);
3342 autoinc_offset
= offset
;
3344 autoinc_offset
+= ratio
* cstep
;
3345 if (symbol_present
|| var_present
|| ratio
!= 1)
3347 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3349 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3351 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3353 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3354 && msize
== -cstep
))
3358 offset_p
= (s_offset
!= 0
3359 && data
->min_offset
<= s_offset
3360 && s_offset
<= data
->max_offset
);
3361 ratio_p
= (ratio
!= 1
3362 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3364 if (ratio
!= 1 && !ratio_p
)
3365 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3367 if (s_offset
&& !offset_p
&& !symbol_present
)
3368 cost
+= add_cost (address_mode
, speed
);
3371 *may_autoinc
= autoinc
;
3372 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3373 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3374 return new_cost (cost
+ acost
, complexity
);
3377 /* Estimates cost of forcing expression EXPR into a variable. */
3380 force_expr_to_var_cost (tree expr
, bool speed
)
3382 static bool costs_initialized
= false;
3383 static unsigned integer_cost
[2];
3384 static unsigned symbol_cost
[2];
3385 static unsigned address_cost
[2];
3387 comp_cost cost0
, cost1
, cost
;
3388 enum machine_mode mode
;
3390 if (!costs_initialized
)
3392 tree type
= build_pointer_type (integer_type_node
);
3397 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3398 TREE_STATIC (var
) = 1;
3399 x
= produce_memory_decl_rtl (var
, NULL
);
3400 SET_DECL_RTL (var
, x
);
3402 addr
= build1 (ADDR_EXPR
, type
, var
);
3405 for (i
= 0; i
< 2; i
++)
3407 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3410 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3413 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3415 build_int_cst (sizetype
, 2000)), i
) + 1;
3416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3418 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3419 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3420 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3421 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3422 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3423 fprintf (dump_file
, "\n");
3427 costs_initialized
= true;
3432 if (SSA_VAR_P (expr
))
3435 if (is_gimple_min_invariant (expr
))
3437 if (TREE_CODE (expr
) == INTEGER_CST
)
3438 return new_cost (integer_cost
[speed
], 0);
3440 if (TREE_CODE (expr
) == ADDR_EXPR
)
3442 tree obj
= TREE_OPERAND (expr
, 0);
3444 if (TREE_CODE (obj
) == VAR_DECL
3445 || TREE_CODE (obj
) == PARM_DECL
3446 || TREE_CODE (obj
) == RESULT_DECL
)
3447 return new_cost (symbol_cost
[speed
], 0);
3450 return new_cost (address_cost
[speed
], 0);
3453 switch (TREE_CODE (expr
))
3455 case POINTER_PLUS_EXPR
:
3459 op0
= TREE_OPERAND (expr
, 0);
3460 op1
= TREE_OPERAND (expr
, 1);
3464 if (is_gimple_val (op0
))
3467 cost0
= force_expr_to_var_cost (op0
, speed
);
3469 if (is_gimple_val (op1
))
3472 cost1
= force_expr_to_var_cost (op1
, speed
);
3477 op0
= TREE_OPERAND (expr
, 0);
3481 if (is_gimple_val (op0
))
3484 cost0
= force_expr_to_var_cost (op0
, speed
);
3490 /* Just an arbitrary value, FIXME. */
3491 return new_cost (target_spill_cost
[speed
], 0);
3494 mode
= TYPE_MODE (TREE_TYPE (expr
));
3495 switch (TREE_CODE (expr
))
3497 case POINTER_PLUS_EXPR
:
3501 cost
= new_cost (add_cost (mode
, speed
), 0);
3505 if (cst_and_fits_in_hwi (op0
))
3506 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3507 else if (cst_and_fits_in_hwi (op1
))
3508 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3510 return new_cost (target_spill_cost
[speed
], 0);
3517 cost
= add_costs (cost
, cost0
);
3518 cost
= add_costs (cost
, cost1
);
3520 /* Bound the cost by target_spill_cost. The parts of complicated
3521 computations often are either loop invariant or at least can
3522 be shared between several iv uses, so letting this grow without
3523 limits would not give reasonable results. */
3524 if (cost
.cost
> (int) target_spill_cost
[speed
])
3525 cost
.cost
= target_spill_cost
[speed
];
3530 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3531 invariants the computation depends on. */
3534 force_var_cost (struct ivopts_data
*data
,
3535 tree expr
, bitmap
*depends_on
)
3539 fd_ivopts_data
= data
;
3540 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3543 return force_expr_to_var_cost (expr
, data
->speed
);
3546 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3547 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3549 invariants the computation depends on. */
3552 split_address_cost (struct ivopts_data
*data
,
3553 tree addr
, bool *symbol_present
, bool *var_present
,
3554 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3557 HOST_WIDE_INT bitsize
;
3558 HOST_WIDE_INT bitpos
;
3560 enum machine_mode mode
;
3561 int unsignedp
, volatilep
;
3563 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3564 &unsignedp
, &volatilep
, false);
3567 || bitpos
% BITS_PER_UNIT
!= 0
3568 || TREE_CODE (core
) != VAR_DECL
)
3570 *symbol_present
= false;
3571 *var_present
= true;
3572 fd_ivopts_data
= data
;
3573 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3574 return new_cost (target_spill_cost
[data
->speed
], 0);
3577 *offset
+= bitpos
/ BITS_PER_UNIT
;
3578 if (TREE_STATIC (core
)
3579 || DECL_EXTERNAL (core
))
3581 *symbol_present
= true;
3582 *var_present
= false;
3586 *symbol_present
= false;
3587 *var_present
= true;
3591 /* Estimates cost of expressing difference of addresses E1 - E2 as
3592 var + symbol + offset. The value of offset is added to OFFSET,
3593 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594 part is missing. DEPENDS_ON is a set of the invariants the computation
3598 ptr_difference_cost (struct ivopts_data
*data
,
3599 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3600 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3602 HOST_WIDE_INT diff
= 0;
3603 aff_tree aff_e1
, aff_e2
;
3606 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3608 if (ptr_difference_const (e1
, e2
, &diff
))
3611 *symbol_present
= false;
3612 *var_present
= false;
3616 if (integer_zerop (e2
))
3617 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3618 symbol_present
, var_present
, offset
, depends_on
);
3620 *symbol_present
= false;
3621 *var_present
= true;
3623 type
= signed_type_for (TREE_TYPE (e1
));
3624 tree_to_aff_combination (e1
, type
, &aff_e1
);
3625 tree_to_aff_combination (e2
, type
, &aff_e2
);
3626 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3627 aff_combination_add (&aff_e1
, &aff_e2
);
3629 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3632 /* Estimates cost of expressing difference E1 - E2 as
3633 var + symbol + offset. The value of offset is added to OFFSET,
3634 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635 part is missing. DEPENDS_ON is a set of the invariants the computation
3639 difference_cost (struct ivopts_data
*data
,
3640 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3641 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3643 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3644 unsigned HOST_WIDE_INT off1
, off2
;
3645 aff_tree aff_e1
, aff_e2
;
3648 e1
= strip_offset (e1
, &off1
);
3649 e2
= strip_offset (e2
, &off2
);
3650 *offset
+= off1
- off2
;
3655 if (TREE_CODE (e1
) == ADDR_EXPR
)
3656 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3657 offset
, depends_on
);
3658 *symbol_present
= false;
3660 if (operand_equal_p (e1
, e2
, 0))
3662 *var_present
= false;
3666 *var_present
= true;
3668 if (integer_zerop (e2
))
3669 return force_var_cost (data
, e1
, depends_on
);
3671 if (integer_zerop (e1
))
3673 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3674 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3678 type
= signed_type_for (TREE_TYPE (e1
));
3679 tree_to_aff_combination (e1
, type
, &aff_e1
);
3680 tree_to_aff_combination (e2
, type
, &aff_e2
);
3681 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3682 aff_combination_add (&aff_e1
, &aff_e2
);
3684 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3687 /* Determines the cost of the computation by that USE is expressed
3688 from induction variable CAND. If ADDRESS_P is true, we just need
3689 to create an address from it, otherwise we want to get it into
3690 register. A set of invariants we depend on is stored in
3691 DEPENDS_ON. AT is the statement at that the value is computed.
3692 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3693 addressing is likely. */
3696 get_computation_cost_at (struct ivopts_data
*data
,
3697 struct iv_use
*use
, struct iv_cand
*cand
,
3698 bool address_p
, bitmap
*depends_on
, gimple at
,
3701 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3703 tree utype
= TREE_TYPE (ubase
), ctype
;
3704 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3705 HOST_WIDE_INT ratio
, aratio
;
3706 bool var_present
, symbol_present
, stmt_is_after_inc
;
3709 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3713 /* Only consider real candidates. */
3715 return infinite_cost
;
3717 cbase
= cand
->iv
->base
;
3718 cstep
= cand
->iv
->step
;
3719 ctype
= TREE_TYPE (cbase
);
3721 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3723 /* We do not have a precision to express the values of use. */
3724 return infinite_cost
;
3729 /* Do not try to express address of an object with computation based
3730 on address of a different object. This may cause problems in rtl
3731 level alias analysis (that does not expect this to be happening,
3732 as this is illegal in C), and would be unlikely to be useful
3734 if (use
->iv
->base_object
3735 && cand
->iv
->base_object
3736 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3737 return infinite_cost
;
3740 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3742 /* TODO -- add direct handling of this case. */
3746 /* CSTEPI is removed from the offset in case statement is after the
3747 increment. If the step is not constant, we use zero instead.
3748 This is a bit imprecise (there is the extra addition), but
3749 redundancy elimination is likely to transform the code so that
3750 it uses value of the variable before increment anyway,
3751 so it is not that much unrealistic. */
3752 if (cst_and_fits_in_hwi (cstep
))
3753 cstepi
= int_cst_value (cstep
);
3757 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3758 return infinite_cost
;
3760 if (double_int_fits_in_shwi_p (rat
))
3761 ratio
= double_int_to_shwi (rat
);
3763 return infinite_cost
;
3766 ctype
= TREE_TYPE (cbase
);
3768 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3769 or ratio == 1, it is better to handle this like
3771 ubase - ratio * cbase + ratio * var
3773 (also holds in the case ratio == -1, TODO. */
3775 if (cst_and_fits_in_hwi (cbase
))
3777 offset
= - ratio
* int_cst_value (cbase
);
3778 cost
= difference_cost (data
,
3779 ubase
, build_int_cst (utype
, 0),
3780 &symbol_present
, &var_present
, &offset
,
3783 else if (ratio
== 1)
3785 cost
= difference_cost (data
,
3787 &symbol_present
, &var_present
, &offset
,
3791 && !POINTER_TYPE_P (ctype
)
3792 && multiplier_allowed_in_address_p
3793 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
3794 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
3797 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
3798 cost
= difference_cost (data
,
3800 &symbol_present
, &var_present
, &offset
,
3805 cost
= force_var_cost (data
, cbase
, depends_on
);
3806 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
3807 cost
= add_costs (cost
,
3808 difference_cost (data
,
3809 ubase
, build_int_cst (utype
, 0),
3810 &symbol_present
, &var_present
,
3811 &offset
, depends_on
));
3814 /* If we are after the increment, the value of the candidate is higher by
3816 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
3817 if (stmt_is_after_inc
)
3818 offset
-= ratio
* cstepi
;
3820 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3821 (symbol/var1/const parts may be omitted). If we are looking for an
3822 address, find the cost of addressing this. */
3824 return add_costs (cost
,
3825 get_address_cost (symbol_present
, var_present
,
3826 offset
, ratio
, cstepi
,
3827 TYPE_MODE (TREE_TYPE (utype
)),
3828 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
3829 speed
, stmt_is_after_inc
,
3832 /* Otherwise estimate the costs for computing the expression. */
3833 if (!symbol_present
&& !var_present
&& !offset
)
3836 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
3840 /* Symbol + offset should be compile-time computable so consider that they
3841 are added once to the variable, if present. */
3842 if (var_present
&& (symbol_present
|| offset
))
3843 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
)
3844 / AVG_LOOP_NITER (data
->current_loop
);
3846 /* Having offset does not affect runtime cost in case it is added to
3847 symbol, but it increases complexity. */
3851 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
3853 aratio
= ratio
> 0 ? ratio
: -ratio
;
3855 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
3859 *can_autoinc
= false;
3862 /* Just get the expression, expand it and measure the cost. */
3863 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3866 return infinite_cost
;
3869 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3871 return new_cost (computation_cost (comp
, speed
), 0);
3875 /* Determines the cost of the computation by that USE is expressed
3876 from induction variable CAND. If ADDRESS_P is true, we just need
3877 to create an address from it, otherwise we want to get it into
3878 register. A set of invariants we depend on is stored in
3879 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3880 autoinc addressing is likely. */
3883 get_computation_cost (struct ivopts_data
*data
,
3884 struct iv_use
*use
, struct iv_cand
*cand
,
3885 bool address_p
, bitmap
*depends_on
, bool *can_autoinc
)
3887 return get_computation_cost_at (data
,
3888 use
, cand
, address_p
, depends_on
, use
->stmt
,
3892 /* Determines cost of basing replacement of USE on CAND in a generic
3896 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3897 struct iv_use
*use
, struct iv_cand
*cand
)
3902 /* The simple case first -- if we need to express value of the preserved
3903 original biv, the cost is 0. This also prevents us from counting the
3904 cost of increment twice -- once at this use and once in the cost of
3906 if (cand
->pos
== IP_ORIGINAL
3907 && cand
->incremented_at
== use
->stmt
)
3909 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
);
3913 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
, NULL
);
3914 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3916 return !infinite_cost_p (cost
);
3919 /* Determines cost of basing replacement of USE on CAND in an address. */
3922 determine_use_iv_cost_address (struct ivopts_data
*data
,
3923 struct iv_use
*use
, struct iv_cand
*cand
)
3927 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
3930 if (cand
->ainc_use
== use
)
3933 cost
.cost
-= cand
->cost_step
;
3934 /* If we generated the candidate solely for exploiting autoincrement
3935 opportunities, and it turns out it can't be used, set the cost to
3936 infinity to make sure we ignore it. */
3937 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
3938 cost
= infinite_cost
;
3940 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3942 return !infinite_cost_p (cost
);
3945 /* Computes value of candidate CAND at position AT in iteration NITER, and
3946 stores it to VAL. */
3949 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
3952 aff_tree step
, delta
, nit
;
3953 struct iv
*iv
= cand
->iv
;
3954 tree type
= TREE_TYPE (iv
->base
);
3955 tree steptype
= type
;
3956 if (POINTER_TYPE_P (type
))
3957 steptype
= sizetype
;
3959 tree_to_aff_combination (iv
->step
, steptype
, &step
);
3960 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
3961 aff_combination_convert (&nit
, steptype
);
3962 aff_combination_mult (&nit
, &step
, &delta
);
3963 if (stmt_after_increment (loop
, cand
, at
))
3964 aff_combination_add (&delta
, &step
);
3966 tree_to_aff_combination (iv
->base
, type
, val
);
3967 aff_combination_add (val
, &delta
);
3970 /* Returns period of induction variable iv. */
3973 iv_period (struct iv
*iv
)
3975 tree step
= iv
->step
, period
, type
;
3978 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3980 /* Period of the iv is gcd (step, type range). Since type range is power
3981 of two, it suffices to determine the maximum power of two that divides
3983 pow2div
= num_ending_zeros (step
);
3984 type
= unsigned_type_for (TREE_TYPE (step
));
3986 period
= build_low_bits_mask (type
,
3987 (TYPE_PRECISION (type
)
3988 - tree_low_cst (pow2div
, 1)));
3993 /* Returns the comparison operator used when eliminating the iv USE. */
3995 static enum tree_code
3996 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3998 struct loop
*loop
= data
->current_loop
;
4002 ex_bb
= gimple_bb (use
->stmt
);
4003 exit
= EDGE_SUCC (ex_bb
, 0);
4004 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4005 exit
= EDGE_SUCC (ex_bb
, 1);
4007 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4010 /* Check whether it is possible to express the condition in USE by comparison
4011 of candidate CAND. If so, store the value compared with to BOUND. */
4014 may_eliminate_iv (struct ivopts_data
*data
,
4015 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4020 struct loop
*loop
= data
->current_loop
;
4023 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4026 /* For now works only for exits that dominate the loop latch.
4027 TODO: extend to other conditions inside loop body. */
4028 ex_bb
= gimple_bb (use
->stmt
);
4029 if (use
->stmt
!= last_stmt (ex_bb
)
4030 || gimple_code (use
->stmt
) != GIMPLE_COND
4031 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4034 exit
= EDGE_SUCC (ex_bb
, 0);
4035 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4036 exit
= EDGE_SUCC (ex_bb
, 1);
4037 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4040 nit
= niter_for_exit (data
, exit
);
4044 /* Determine whether we can use the variable to test the exit condition.
4045 This is the case iff the period of the induction variable is greater
4046 than the number of iterations for which the exit condition is true. */
4047 period
= iv_period (cand
->iv
);
4049 /* If the number of iterations is constant, compare against it directly. */
4050 if (TREE_CODE (nit
) == INTEGER_CST
)
4052 if (!tree_int_cst_lt (nit
, period
))
4056 /* If not, and if this is the only possible exit of the loop, see whether
4057 we can get a conservative estimate on the number of iterations of the
4058 entire loop and compare against that instead. */
4059 else if (loop_only_exit_p (loop
, exit
))
4061 double_int period_value
, max_niter
;
4062 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4064 period_value
= tree_to_double_int (period
);
4065 if (double_int_ucmp (max_niter
, period_value
) >= 0)
4069 /* Otherwise, punt. */
4073 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4075 *bound
= aff_combination_to_tree (&bnd
);
4076 /* It is unlikely that computing the number of iterations using division
4077 would be more profitable than keeping the original induction variable. */
4078 if (expression_expensive_p (*bound
))
4083 /* Determines cost of basing replacement of USE on CAND in a condition. */
4086 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4087 struct iv_use
*use
, struct iv_cand
*cand
)
4089 tree bound
= NULL_TREE
;
4091 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4092 comp_cost elim_cost
, express_cost
, cost
;
4094 tree
*control_var
, *bound_cst
;
4096 /* Only consider real candidates. */
4099 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
);
4103 /* Try iv elimination. */
4104 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4106 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4107 /* The bound is a loop invariant, so it will be only computed
4109 elim_cost
.cost
/= AVG_LOOP_NITER (data
->current_loop
);
4112 elim_cost
= infinite_cost
;
4114 /* Try expressing the original giv. If it is compared with an invariant,
4115 note that we cannot get rid of it. */
4116 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4120 /* When the condition is a comparison of the candidate IV against
4121 zero, prefer this IV.
4123 TODO: The constant that we're substracting from the cost should
4124 be target-dependent. This information should be added to the
4125 target costs for each backend. */
4126 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4127 && integer_zerop (*bound_cst
)
4128 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4129 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4130 elim_cost
.cost
-= 1;
4132 express_cost
= get_computation_cost (data
, use
, cand
, false,
4133 &depends_on_express
, NULL
);
4134 fd_ivopts_data
= data
;
4135 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4137 /* Choose the better approach, preferring the eliminated IV. */
4138 if (compare_costs (elim_cost
, express_cost
) <= 0)
4141 depends_on
= depends_on_elim
;
4142 depends_on_elim
= NULL
;
4146 cost
= express_cost
;
4147 depends_on
= depends_on_express
;
4148 depends_on_express
= NULL
;
4152 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4154 if (depends_on_elim
)
4155 BITMAP_FREE (depends_on_elim
);
4156 if (depends_on_express
)
4157 BITMAP_FREE (depends_on_express
);
4159 return !infinite_cost_p (cost
);
4162 /* Determines cost of basing replacement of USE on CAND. Returns false
4163 if USE cannot be based on CAND. */
4166 determine_use_iv_cost (struct ivopts_data
*data
,
4167 struct iv_use
*use
, struct iv_cand
*cand
)
4171 case USE_NONLINEAR_EXPR
:
4172 return determine_use_iv_cost_generic (data
, use
, cand
);
4175 return determine_use_iv_cost_address (data
, use
, cand
);
4178 return determine_use_iv_cost_condition (data
, use
, cand
);
4185 /* Return true if get_computation_cost indicates that autoincrement is
4186 a possibility for the pair of USE and CAND, false otherwise. */
4189 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4190 struct iv_cand
*cand
)
4196 if (use
->type
!= USE_ADDRESS
)
4199 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4202 BITMAP_FREE (depends_on
);
4204 return !infinite_cost_p (cost
) && can_autoinc
;
4207 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4208 use that allows autoincrement, and set their AINC_USE if possible. */
4211 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4215 for (i
= 0; i
< n_iv_cands (data
); i
++)
4217 struct iv_cand
*cand
= iv_cand (data
, i
);
4218 struct iv_use
*closest
= NULL
;
4219 if (cand
->pos
!= IP_ORIGINAL
)
4221 for (j
= 0; j
< n_iv_uses (data
); j
++)
4223 struct iv_use
*use
= iv_use (data
, j
);
4224 unsigned uid
= gimple_uid (use
->stmt
);
4225 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4226 || uid
> gimple_uid (cand
->incremented_at
))
4228 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4231 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4233 cand
->ainc_use
= closest
;
4237 /* Finds the candidates for the induction variables. */
4240 find_iv_candidates (struct ivopts_data
*data
)
4242 /* Add commonly used ivs. */
4243 add_standard_iv_candidates (data
);
4245 /* Add old induction variables. */
4246 add_old_ivs_candidates (data
);
4248 /* Add induction variables derived from uses. */
4249 add_derived_ivs_candidates (data
);
4251 set_autoinc_for_original_candidates (data
);
4253 /* Record the important candidates. */
4254 record_important_candidates (data
);
4257 /* Determines costs of basing the use of the iv on an iv candidate. */
4260 determine_use_iv_costs (struct ivopts_data
*data
)
4264 struct iv_cand
*cand
;
4265 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4267 alloc_use_cost_map (data
);
4269 for (i
= 0; i
< n_iv_uses (data
); i
++)
4271 use
= iv_use (data
, i
);
4273 if (data
->consider_all_candidates
)
4275 for (j
= 0; j
< n_iv_cands (data
); j
++)
4277 cand
= iv_cand (data
, j
);
4278 determine_use_iv_cost (data
, use
, cand
);
4285 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4287 cand
= iv_cand (data
, j
);
4288 if (!determine_use_iv_cost (data
, use
, cand
))
4289 bitmap_set_bit (to_clear
, j
);
4292 /* Remove the candidates for that the cost is infinite from
4293 the list of related candidates. */
4294 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4295 bitmap_clear (to_clear
);
4299 BITMAP_FREE (to_clear
);
4301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4303 fprintf (dump_file
, "Use-candidate costs:\n");
4305 for (i
= 0; i
< n_iv_uses (data
); i
++)
4307 use
= iv_use (data
, i
);
4309 fprintf (dump_file
, "Use %d:\n", i
);
4310 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4311 for (j
= 0; j
< use
->n_map_members
; j
++)
4313 if (!use
->cost_map
[j
].cand
4314 || infinite_cost_p (use
->cost_map
[j
].cost
))
4317 fprintf (dump_file
, " %d\t%d\t%d\t",
4318 use
->cost_map
[j
].cand
->id
,
4319 use
->cost_map
[j
].cost
.cost
,
4320 use
->cost_map
[j
].cost
.complexity
);
4321 if (use
->cost_map
[j
].depends_on
)
4322 bitmap_print (dump_file
,
4323 use
->cost_map
[j
].depends_on
, "","");
4324 fprintf (dump_file
, "\n");
4327 fprintf (dump_file
, "\n");
4329 fprintf (dump_file
, "\n");
4333 /* Determines cost of the candidate CAND. */
4336 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4338 comp_cost cost_base
;
4339 unsigned cost
, cost_step
;
4348 /* There are two costs associated with the candidate -- its increment
4349 and its initialization. The second is almost negligible for any loop
4350 that rolls enough, so we take it just very little into account. */
4352 base
= cand
->iv
->base
;
4353 cost_base
= force_var_cost (data
, base
, NULL
);
4354 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4356 cost
= cost_step
+ cost_base
.cost
/ AVG_LOOP_NITER (current_loop
);
4358 /* Prefer the original ivs unless we may gain something by replacing it.
4359 The reason is to make debugging simpler; so this is not relevant for
4360 artificial ivs created by other optimization passes. */
4361 if (cand
->pos
!= IP_ORIGINAL
4362 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4365 /* Prefer not to insert statements into latch unless there are some
4366 already (so that we do not create unnecessary jumps). */
4367 if (cand
->pos
== IP_END
4368 && empty_block_p (ip_end_pos (data
->current_loop
)))
4372 cand
->cost_step
= cost_step
;
4375 /* Determines costs of computation of the candidates. */
4378 determine_iv_costs (struct ivopts_data
*data
)
4382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4384 fprintf (dump_file
, "Candidate costs:\n");
4385 fprintf (dump_file
, " cand\tcost\n");
4388 for (i
= 0; i
< n_iv_cands (data
); i
++)
4390 struct iv_cand
*cand
= iv_cand (data
, i
);
4392 determine_iv_cost (data
, cand
);
4394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4395 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4399 fprintf (dump_file
, "\n");
4402 /* Calculates cost for having SIZE induction variables. */
4405 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4407 /* We add size to the cost, so that we prefer eliminating ivs
4409 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
);
4412 /* For each size of the induction variable set determine the penalty. */
4415 determine_set_costs (struct ivopts_data
*data
)
4419 gimple_stmt_iterator psi
;
4421 struct loop
*loop
= data
->current_loop
;
4424 /* We use the following model (definitely improvable, especially the
4425 cost function -- TODO):
4427 We estimate the number of registers available (using MD data), name it A.
4429 We estimate the number of registers used by the loop, name it U. This
4430 number is obtained as the number of loop phi nodes (not counting virtual
4431 registers and bivs) + the number of variables from outside of the loop.
4433 We set a reserve R (free regs that are used for temporary computations,
4434 etc.). For now the reserve is a constant 3.
4436 Let I be the number of induction variables.
4438 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4439 make a lot of ivs without a reason).
4440 -- if A - R < U + I <= A, the cost is I * PRES_COST
4441 -- if U + I > A, the cost is I * PRES_COST and
4442 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4446 fprintf (dump_file
, "Global costs:\n");
4447 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4448 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4449 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4453 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4455 phi
= gsi_stmt (psi
);
4456 op
= PHI_RESULT (phi
);
4458 if (!is_gimple_reg (op
))
4461 if (get_iv (data
, op
))
4467 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4469 struct version_info
*info
= ver_info (data
, j
);
4471 if (info
->inv_id
&& info
->has_nonlin_use
)
4475 data
->regs_used
= n
;
4476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4477 fprintf (dump_file
, " regs_used %d\n", n
);
4479 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4481 fprintf (dump_file
, " cost for size:\n");
4482 fprintf (dump_file
, " ivs\tcost\n");
4483 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4484 fprintf (dump_file
, " %d\t%d\n", j
,
4485 ivopts_global_cost_for_size (data
, j
));
4486 fprintf (dump_file
, "\n");
4490 /* Returns true if A is a cheaper cost pair than B. */
4493 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4503 cmp
= compare_costs (a
->cost
, b
->cost
);
4510 /* In case the costs are the same, prefer the cheaper candidate. */
4511 if (a
->cand
->cost
< b
->cand
->cost
)
4517 /* Computes the cost field of IVS structure. */
4520 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4522 comp_cost cost
= ivs
->cand_use_cost
;
4523 cost
.cost
+= ivs
->cand_cost
;
4524 cost
.cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4529 /* Remove invariants in set INVS to set IVS. */
4532 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4540 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4542 ivs
->n_invariant_uses
[iid
]--;
4543 if (ivs
->n_invariant_uses
[iid
] == 0)
4548 /* Set USE not to be expressed by any candidate in IVS. */
4551 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4554 unsigned uid
= use
->id
, cid
;
4555 struct cost_pair
*cp
;
4557 cp
= ivs
->cand_for_use
[uid
];
4563 ivs
->cand_for_use
[uid
] = NULL
;
4564 ivs
->n_cand_uses
[cid
]--;
4566 if (ivs
->n_cand_uses
[cid
] == 0)
4568 bitmap_clear_bit (ivs
->cands
, cid
);
4569 /* Do not count the pseudocandidates. */
4573 ivs
->cand_cost
-= cp
->cand
->cost
;
4575 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4578 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4580 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4581 iv_ca_recount_cost (data
, ivs
);
4584 /* Add invariants in set INVS to set IVS. */
4587 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4595 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4597 ivs
->n_invariant_uses
[iid
]++;
4598 if (ivs
->n_invariant_uses
[iid
] == 1)
4603 /* Set cost pair for USE in set IVS to CP. */
4606 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4607 struct iv_use
*use
, struct cost_pair
*cp
)
4609 unsigned uid
= use
->id
, cid
;
4611 if (ivs
->cand_for_use
[uid
] == cp
)
4614 if (ivs
->cand_for_use
[uid
])
4615 iv_ca_set_no_cp (data
, ivs
, use
);
4622 ivs
->cand_for_use
[uid
] = cp
;
4623 ivs
->n_cand_uses
[cid
]++;
4624 if (ivs
->n_cand_uses
[cid
] == 1)
4626 bitmap_set_bit (ivs
->cands
, cid
);
4627 /* Do not count the pseudocandidates. */
4631 ivs
->cand_cost
+= cp
->cand
->cost
;
4633 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4636 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4637 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4638 iv_ca_recount_cost (data
, ivs
);
4642 /* Extend set IVS by expressing USE by some of the candidates in it
4646 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4649 struct cost_pair
*best_cp
= NULL
, *cp
;
4653 gcc_assert (ivs
->upto
>= use
->id
);
4655 if (ivs
->upto
== use
->id
)
4661 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4663 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4665 if (cheaper_cost_pair (cp
, best_cp
))
4669 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4672 /* Get cost for assignment IVS. */
4675 iv_ca_cost (struct iv_ca
*ivs
)
4677 /* This was a conditional expression but it triggered a bug in
4680 return infinite_cost
;
4685 /* Returns true if all dependences of CP are among invariants in IVS. */
4688 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4693 if (!cp
->depends_on
)
4696 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4698 if (ivs
->n_invariant_uses
[i
] == 0)
4705 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4706 it before NEXT_CHANGE. */
4708 static struct iv_ca_delta
*
4709 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4710 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4712 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4715 change
->old_cp
= old_cp
;
4716 change
->new_cp
= new_cp
;
4717 change
->next_change
= next_change
;
4722 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4725 static struct iv_ca_delta
*
4726 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4728 struct iv_ca_delta
*last
;
4736 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4738 last
->next_change
= l2
;
4743 /* Returns candidate by that USE is expressed in IVS. */
4745 static struct cost_pair
*
4746 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4748 return ivs
->cand_for_use
[use
->id
];
4751 /* Reverse the list of changes DELTA, forming the inverse to it. */
4753 static struct iv_ca_delta
*
4754 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4756 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4757 struct cost_pair
*tmp
;
4759 for (act
= delta
; act
; act
= next
)
4761 next
= act
->next_change
;
4762 act
->next_change
= prev
;
4766 act
->old_cp
= act
->new_cp
;
4773 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4774 reverted instead. */
4777 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4778 struct iv_ca_delta
*delta
, bool forward
)
4780 struct cost_pair
*from
, *to
;
4781 struct iv_ca_delta
*act
;
4784 delta
= iv_ca_delta_reverse (delta
);
4786 for (act
= delta
; act
; act
= act
->next_change
)
4790 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4791 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4795 iv_ca_delta_reverse (delta
);
4798 /* Returns true if CAND is used in IVS. */
4801 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4803 return ivs
->n_cand_uses
[cand
->id
] > 0;
4806 /* Returns number of induction variable candidates in the set IVS. */
4809 iv_ca_n_cands (struct iv_ca
*ivs
)
4811 return ivs
->n_cands
;
4814 /* Free the list of changes DELTA. */
4817 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4819 struct iv_ca_delta
*act
, *next
;
4821 for (act
= *delta
; act
; act
= next
)
4823 next
= act
->next_change
;
4830 /* Allocates new iv candidates assignment. */
4832 static struct iv_ca
*
4833 iv_ca_new (struct ivopts_data
*data
)
4835 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4839 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4840 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4841 nw
->cands
= BITMAP_ALLOC (NULL
);
4844 nw
->cand_use_cost
= zero_cost
;
4846 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4847 nw
->cost
= zero_cost
;
4852 /* Free memory occupied by the set IVS. */
4855 iv_ca_free (struct iv_ca
**ivs
)
4857 free ((*ivs
)->cand_for_use
);
4858 free ((*ivs
)->n_cand_uses
);
4859 BITMAP_FREE ((*ivs
)->cands
);
4860 free ((*ivs
)->n_invariant_uses
);
4865 /* Dumps IVS to FILE. */
4868 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4870 const char *pref
= " invariants ";
4872 comp_cost cost
= iv_ca_cost (ivs
);
4874 fprintf (file
, " cost %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
4875 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4877 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4878 if (ivs
->n_invariant_uses
[i
])
4880 fprintf (file
, "%s%d", pref
, i
);
4883 fprintf (file
, "\n");
4886 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4887 new set, and store differences in DELTA. Number of induction variables
4888 in the new set is stored to N_IVS. */
4891 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4892 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4898 struct cost_pair
*old_cp
, *new_cp
;
4901 for (i
= 0; i
< ivs
->upto
; i
++)
4903 use
= iv_use (data
, i
);
4904 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4907 && old_cp
->cand
== cand
)
4910 new_cp
= get_use_iv_cost (data
, use
, cand
);
4914 if (!iv_ca_has_deps (ivs
, new_cp
))
4917 if (!cheaper_cost_pair (new_cp
, old_cp
))
4920 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4923 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4924 cost
= iv_ca_cost (ivs
);
4926 *n_ivs
= iv_ca_n_cands (ivs
);
4927 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4932 /* Try narrowing set IVS by removing CAND. Return the cost of
4933 the new set and store the differences in DELTA. */
4936 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4937 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4941 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4943 struct iv_cand
*cnd
;
4947 for (i
= 0; i
< n_iv_uses (data
); i
++)
4949 use
= iv_use (data
, i
);
4951 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4952 if (old_cp
->cand
!= cand
)
4957 if (data
->consider_all_candidates
)
4959 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4964 cnd
= iv_cand (data
, ci
);
4966 cp
= get_use_iv_cost (data
, use
, cnd
);
4969 if (!iv_ca_has_deps (ivs
, cp
))
4972 if (!cheaper_cost_pair (cp
, new_cp
))
4980 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4985 cnd
= iv_cand (data
, ci
);
4987 cp
= get_use_iv_cost (data
, use
, cnd
);
4990 if (!iv_ca_has_deps (ivs
, cp
))
4993 if (!cheaper_cost_pair (cp
, new_cp
))
5002 iv_ca_delta_free (delta
);
5003 return infinite_cost
;
5006 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5009 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5010 cost
= iv_ca_cost (ivs
);
5011 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5016 /* Try optimizing the set of candidates IVS by removing candidates different
5017 from to EXCEPT_CAND from it. Return cost of the new set, and store
5018 differences in DELTA. */
5021 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5022 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5025 struct iv_ca_delta
*act_delta
, *best_delta
;
5027 comp_cost best_cost
, acost
;
5028 struct iv_cand
*cand
;
5031 best_cost
= iv_ca_cost (ivs
);
5033 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5035 cand
= iv_cand (data
, i
);
5037 if (cand
== except_cand
)
5040 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5042 if (compare_costs (acost
, best_cost
) < 0)
5045 iv_ca_delta_free (&best_delta
);
5046 best_delta
= act_delta
;
5049 iv_ca_delta_free (&act_delta
);
5058 /* Recurse to possibly remove other unnecessary ivs. */
5059 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5060 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5061 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5062 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5066 /* Tries to extend the sets IVS in the best possible way in order
5067 to express the USE. */
5070 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5073 comp_cost best_cost
, act_cost
;
5076 struct iv_cand
*cand
;
5077 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5078 struct cost_pair
*cp
;
5080 iv_ca_add_use (data
, ivs
, use
);
5081 best_cost
= iv_ca_cost (ivs
);
5083 cp
= iv_ca_cand_for_use (ivs
, use
);
5086 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5087 iv_ca_set_no_cp (data
, ivs
, use
);
5090 /* First try important candidates not based on any memory object. Only if
5091 this fails, try the specific ones. Rationale -- in loops with many
5092 variables the best choice often is to use just one generic biv. If we
5093 added here many ivs specific to the uses, the optimization algorithm later
5094 would be likely to get stuck in a local minimum, thus causing us to create
5095 too many ivs. The approach from few ivs to more seems more likely to be
5096 successful -- starting from few ivs, replacing an expensive use by a
5097 specific iv should always be a win. */
5098 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5100 cand
= iv_cand (data
, i
);
5102 if (cand
->iv
->base_object
!= NULL_TREE
)
5105 if (iv_ca_cand_used_p (ivs
, cand
))
5108 cp
= get_use_iv_cost (data
, use
, cand
);
5112 iv_ca_set_cp (data
, ivs
, use
, cp
);
5113 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5114 iv_ca_set_no_cp (data
, ivs
, use
);
5115 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5117 if (compare_costs (act_cost
, best_cost
) < 0)
5119 best_cost
= act_cost
;
5121 iv_ca_delta_free (&best_delta
);
5122 best_delta
= act_delta
;
5125 iv_ca_delta_free (&act_delta
);
5128 if (infinite_cost_p (best_cost
))
5130 for (i
= 0; i
< use
->n_map_members
; i
++)
5132 cp
= use
->cost_map
+ i
;
5137 /* Already tried this. */
5138 if (cand
->important
&& cand
->iv
->base_object
== NULL_TREE
)
5141 if (iv_ca_cand_used_p (ivs
, cand
))
5145 iv_ca_set_cp (data
, ivs
, use
, cp
);
5146 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5147 iv_ca_set_no_cp (data
, ivs
, use
);
5148 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5151 if (compare_costs (act_cost
, best_cost
) < 0)
5153 best_cost
= act_cost
;
5156 iv_ca_delta_free (&best_delta
);
5157 best_delta
= act_delta
;
5160 iv_ca_delta_free (&act_delta
);
5164 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5165 iv_ca_delta_free (&best_delta
);
5167 return !infinite_cost_p (best_cost
);
5170 /* Finds an initial assignment of candidates to uses. */
5172 static struct iv_ca
*
5173 get_initial_solution (struct ivopts_data
*data
)
5175 struct iv_ca
*ivs
= iv_ca_new (data
);
5178 for (i
= 0; i
< n_iv_uses (data
); i
++)
5179 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5188 /* Tries to improve set of induction variables IVS. */
5191 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5194 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5195 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5196 struct iv_cand
*cand
;
5198 /* Try extending the set of induction variables by one. */
5199 for (i
= 0; i
< n_iv_cands (data
); i
++)
5201 cand
= iv_cand (data
, i
);
5203 if (iv_ca_cand_used_p (ivs
, cand
))
5206 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5210 /* If we successfully added the candidate and the set is small enough,
5211 try optimizing it by removing other candidates. */
5212 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5214 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5215 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5216 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5217 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5220 if (compare_costs (acost
, best_cost
) < 0)
5223 iv_ca_delta_free (&best_delta
);
5224 best_delta
= act_delta
;
5227 iv_ca_delta_free (&act_delta
);
5232 /* Try removing the candidates from the set instead. */
5233 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5235 /* Nothing more we can do. */
5240 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5241 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5242 iv_ca_delta_free (&best_delta
);
5246 /* Attempts to find the optimal set of induction variables. We do simple
5247 greedy heuristic -- we try to replace at most one candidate in the selected
5248 solution and remove the unused ivs while this improves the cost. */
5250 static struct iv_ca
*
5251 find_optimal_iv_set (struct ivopts_data
*data
)
5257 /* Get the initial solution. */
5258 set
= get_initial_solution (data
);
5261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5262 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5266 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5268 fprintf (dump_file
, "Initial set of candidates:\n");
5269 iv_ca_dump (data
, dump_file
, set
);
5272 while (try_improve_iv_set (data
, set
))
5274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5276 fprintf (dump_file
, "Improved to:\n");
5277 iv_ca_dump (data
, dump_file
, set
);
5281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5283 comp_cost cost
= iv_ca_cost (set
);
5284 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n", cost
.cost
, cost
.complexity
);
5287 for (i
= 0; i
< n_iv_uses (data
); i
++)
5289 use
= iv_use (data
, i
);
5290 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5296 /* Creates a new induction variable corresponding to CAND. */
5299 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5301 gimple_stmt_iterator incr_pos
;
5311 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5315 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5323 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5327 /* Mark that the iv is preserved. */
5328 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5329 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5331 /* Rewrite the increment so that it uses var_before directly. */
5332 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5337 gimple_add_tmp_var (cand
->var_before
);
5338 add_referenced_var (cand
->var_before
);
5340 base
= unshare_expr (cand
->iv
->base
);
5342 create_iv (base
, unshare_expr (cand
->iv
->step
),
5343 cand
->var_before
, data
->current_loop
,
5344 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5347 /* Creates new induction variables described in SET. */
5350 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5353 struct iv_cand
*cand
;
5356 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5358 cand
= iv_cand (data
, i
);
5359 create_new_iv (data
, cand
);
5364 /* Rewrites USE (definition of iv used in a nonlinear expression)
5365 using candidate CAND. */
5368 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5369 struct iv_use
*use
, struct iv_cand
*cand
)
5374 gimple_stmt_iterator bsi
;
5376 /* An important special case -- if we are asked to express value of
5377 the original iv by itself, just exit; there is no need to
5378 introduce a new computation (that might also need casting the
5379 variable to unsigned and back). */
5380 if (cand
->pos
== IP_ORIGINAL
5381 && cand
->incremented_at
== use
->stmt
)
5383 tree step
, ctype
, utype
;
5384 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5386 gcc_assert (is_gimple_assign (use
->stmt
));
5387 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5389 step
= cand
->iv
->step
;
5390 ctype
= TREE_TYPE (step
);
5391 utype
= TREE_TYPE (cand
->var_after
);
5392 if (TREE_CODE (step
) == NEGATE_EXPR
)
5394 incr_code
= MINUS_EXPR
;
5395 step
= TREE_OPERAND (step
, 0);
5398 /* Check whether we may leave the computation unchanged.
5399 This is the case only if it does not rely on other
5400 computations in the loop -- otherwise, the computation
5401 we rely upon may be removed in remove_unused_ivs,
5402 thus leading to ICE. */
5403 old_code
= gimple_assign_rhs_code (use
->stmt
);
5404 if (old_code
== PLUS_EXPR
5405 || old_code
== MINUS_EXPR
5406 || old_code
== POINTER_PLUS_EXPR
)
5408 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5409 op
= gimple_assign_rhs2 (use
->stmt
);
5410 else if (old_code
!= MINUS_EXPR
5411 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5412 op
= gimple_assign_rhs1 (use
->stmt
);
5420 && (TREE_CODE (op
) == INTEGER_CST
5421 || operand_equal_p (op
, step
, 0)))
5424 /* Otherwise, add the necessary computations to express
5426 op
= fold_convert (ctype
, cand
->var_before
);
5427 comp
= fold_convert (utype
,
5428 build2 (incr_code
, ctype
, op
,
5429 unshare_expr (step
)));
5433 comp
= get_computation (data
->current_loop
, use
, cand
);
5434 gcc_assert (comp
!= NULL_TREE
);
5437 switch (gimple_code (use
->stmt
))
5440 tgt
= PHI_RESULT (use
->stmt
);
5442 /* If we should keep the biv, do not replace it. */
5443 if (name_info (data
, tgt
)->preserve_biv
)
5446 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5450 tgt
= gimple_assign_lhs (use
->stmt
);
5451 bsi
= gsi_for_stmt (use
->stmt
);
5458 op
= force_gimple_operand_gsi (&bsi
, comp
, false, SSA_NAME_VAR (tgt
),
5459 true, GSI_SAME_STMT
);
5461 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5463 ass
= gimple_build_assign (tgt
, op
);
5464 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5466 bsi
= gsi_for_stmt (use
->stmt
);
5467 remove_phi_node (&bsi
, false);
5471 gimple_assign_set_rhs_from_tree (&bsi
, op
);
5472 use
->stmt
= gsi_stmt (bsi
);
5476 /* Replaces ssa name in index IDX by its basic variable. Callback for
5480 idx_remove_ssa_names (tree base
, tree
*idx
,
5481 void *data ATTRIBUTE_UNUSED
)
5485 if (TREE_CODE (*idx
) == SSA_NAME
)
5486 *idx
= SSA_NAME_VAR (*idx
);
5488 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
5490 op
= &TREE_OPERAND (base
, 2);
5492 && TREE_CODE (*op
) == SSA_NAME
)
5493 *op
= SSA_NAME_VAR (*op
);
5494 op
= &TREE_OPERAND (base
, 3);
5496 && TREE_CODE (*op
) == SSA_NAME
)
5497 *op
= SSA_NAME_VAR (*op
);
5503 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5506 unshare_and_remove_ssa_names (tree ref
)
5508 ref
= unshare_expr (ref
);
5509 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5514 /* Copies the reference information from OLD_REF to NEW_REF. */
5517 copy_ref_info (tree new_ref
, tree old_ref
)
5519 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5520 copy_mem_ref_info (new_ref
, old_ref
);
5523 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5524 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5525 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5529 /* Rewrites USE (address that is an iv) using candidate CAND. */
5532 rewrite_use_address (struct ivopts_data
*data
,
5533 struct iv_use
*use
, struct iv_cand
*cand
)
5536 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5537 tree base_hint
= NULL_TREE
;
5541 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5543 unshare_aff_combination (&aff
);
5545 /* To avoid undefined overflow problems, all IV candidates use unsigned
5546 integer types. The drawback is that this makes it impossible for
5547 create_mem_ref to distinguish an IV that is based on a memory object
5548 from one that represents simply an offset.
5550 To work around this problem, we pass a hint to create_mem_ref that
5551 indicates which variable (if any) in aff is an IV based on a memory
5552 object. Note that we only consider the candidate. If this is not
5553 based on an object, the base of the reference is in some subexpression
5554 of the use -- but these will use pointer types, so they are recognized
5555 by the create_mem_ref heuristics anyway. */
5556 if (cand
->iv
->base_object
)
5557 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5559 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
, base_hint
,
5561 copy_ref_info (ref
, *use
->op_p
);
5565 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5569 rewrite_use_compare (struct ivopts_data
*data
,
5570 struct iv_use
*use
, struct iv_cand
*cand
)
5572 tree comp
, *var_p
, op
, bound
;
5573 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5574 enum tree_code compare
;
5575 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5581 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5582 tree var_type
= TREE_TYPE (var
);
5585 compare
= iv_elimination_compare (data
, use
);
5586 bound
= unshare_expr (fold_convert (var_type
, bound
));
5587 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
5589 gsi_insert_seq_on_edge_immediate (
5590 loop_preheader_edge (data
->current_loop
),
5593 gimple_cond_set_lhs (use
->stmt
, var
);
5594 gimple_cond_set_code (use
->stmt
, compare
);
5595 gimple_cond_set_rhs (use
->stmt
, op
);
5599 /* The induction variable elimination failed; just express the original
5601 comp
= get_computation (data
->current_loop
, use
, cand
);
5602 gcc_assert (comp
!= NULL_TREE
);
5604 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
5607 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
5608 true, GSI_SAME_STMT
);
5611 /* Rewrites USE using candidate CAND. */
5614 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
5618 case USE_NONLINEAR_EXPR
:
5619 rewrite_use_nonlinear_expr (data
, use
, cand
);
5623 rewrite_use_address (data
, use
, cand
);
5627 rewrite_use_compare (data
, use
, cand
);
5634 update_stmt (use
->stmt
);
5637 /* Rewrite the uses using the selected induction variables. */
5640 rewrite_uses (struct ivopts_data
*data
)
5643 struct iv_cand
*cand
;
5646 for (i
= 0; i
< n_iv_uses (data
); i
++)
5648 use
= iv_use (data
, i
);
5649 cand
= use
->selected
;
5652 rewrite_use (data
, use
, cand
);
5656 /* Removes the ivs that are not used after rewriting. */
5659 remove_unused_ivs (struct ivopts_data
*data
)
5663 bitmap toremove
= BITMAP_ALLOC (NULL
);
5665 /* Figure out an order in which to release SSA DEFs so that we don't
5666 release something that we'd have to propagate into a debug stmt
5668 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5670 struct version_info
*info
;
5672 info
= ver_info (data
, j
);
5674 && !integer_zerop (info
->iv
->step
)
5676 && !info
->iv
->have_use_for
5677 && !info
->preserve_biv
)
5678 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
5681 release_defs_bitset (toremove
);
5683 BITMAP_FREE (toremove
);
5686 /* Frees data allocated by the optimization of a single loop. */
5689 free_loop_data (struct ivopts_data
*data
)
5697 pointer_map_destroy (data
->niters
);
5698 data
->niters
= NULL
;
5701 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5703 struct version_info
*info
;
5705 info
= ver_info (data
, i
);
5709 info
->has_nonlin_use
= false;
5710 info
->preserve_biv
= false;
5713 bitmap_clear (data
->relevant
);
5714 bitmap_clear (data
->important_candidates
);
5716 for (i
= 0; i
< n_iv_uses (data
); i
++)
5718 struct iv_use
*use
= iv_use (data
, i
);
5721 BITMAP_FREE (use
->related_cands
);
5722 for (j
= 0; j
< use
->n_map_members
; j
++)
5723 if (use
->cost_map
[j
].depends_on
)
5724 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5725 free (use
->cost_map
);
5728 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5730 for (i
= 0; i
< n_iv_cands (data
); i
++)
5732 struct iv_cand
*cand
= iv_cand (data
, i
);
5736 if (cand
->depends_on
)
5737 BITMAP_FREE (cand
->depends_on
);
5740 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5742 if (data
->version_info_size
< num_ssa_names
)
5744 data
->version_info_size
= 2 * num_ssa_names
;
5745 free (data
->version_info
);
5746 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5749 data
->max_inv_id
= 0;
5751 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5752 SET_DECL_RTL (obj
, NULL_RTX
);
5754 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5757 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5761 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5763 free_loop_data (data
);
5764 free (data
->version_info
);
5765 BITMAP_FREE (data
->relevant
);
5766 BITMAP_FREE (data
->important_candidates
);
5768 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5769 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5770 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5773 /* Optimizes the LOOP. Returns true if anything changed. */
5776 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5778 bool changed
= false;
5779 struct iv_ca
*iv_ca
;
5783 gcc_assert (!data
->niters
);
5784 data
->current_loop
= loop
;
5785 data
->speed
= optimize_loop_for_speed_p (loop
);
5787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5789 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5791 exit
= single_dom_exit (loop
);
5794 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5795 exit
->src
->index
, exit
->dest
->index
);
5796 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
5797 fprintf (dump_file
, "\n");
5800 fprintf (dump_file
, "\n");
5803 body
= get_loop_body (loop
);
5804 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
5807 /* For each ssa name determines whether it behaves as an induction variable
5809 if (!find_induction_variables (data
))
5812 /* Finds interesting uses (item 1). */
5813 find_interesting_uses (data
);
5814 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5817 /* Finds candidates for the induction variables (item 2). */
5818 find_iv_candidates (data
);
5820 /* Calculates the costs (item 3, part 1). */
5821 determine_iv_costs (data
);
5822 determine_use_iv_costs (data
);
5823 determine_set_costs (data
);
5825 /* Find the optimal set of induction variables (item 3, part 2). */
5826 iv_ca
= find_optimal_iv_set (data
);
5831 /* Create the new induction variables (item 4, part 1). */
5832 create_new_ivs (data
, iv_ca
);
5833 iv_ca_free (&iv_ca
);
5835 /* Rewrite the uses (item 4, part 2). */
5836 rewrite_uses (data
);
5838 /* Remove the ivs that are unused after rewriting. */
5839 remove_unused_ivs (data
);
5841 /* We have changed the structure of induction variables; it might happen
5842 that definitions in the scev database refer to some of them that were
5847 free_loop_data (data
);
5852 /* Main entry point. Optimizes induction variables in loops. */
5855 tree_ssa_iv_optimize (void)
5858 struct ivopts_data data
;
5861 tree_ssa_iv_optimize_init (&data
);
5863 /* Optimize the loops starting with the innermost ones. */
5864 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
5866 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5867 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5869 tree_ssa_iv_optimize_loop (&data
, loop
);
5872 tree_ssa_iv_optimize_finalize (&data
);