1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
75 #include "tree-pass.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop
*loop
)
109 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
111 return AVG_LOOP_NITER (loop
);
116 /* Representation of the induction variable. */
119 tree base
; /* Initial value of the iv. */
120 tree base_object
; /* A memory object to that the induction variable points. */
121 tree step
; /* Step of the iv (constant only). */
122 tree ssa_name
; /* The ssa name with the value. */
123 bool biv_p
; /* Is it a biv? */
124 bool have_use_for
; /* Do we already have a use for it? */
125 unsigned use_id
; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
131 tree name
; /* The ssa name. */
132 struct iv
*iv
; /* Induction variable description. */
133 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv
; /* For the original biv, whether to preserve it. */
136 unsigned inv_id
; /* Id of an invariant. */
142 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
143 USE_ADDRESS
, /* Use in an address. */
144 USE_COMPARE
/* Use is a compare. */
147 /* Cost of a computation. */
150 int cost
; /* The runtime cost. */
151 unsigned complexity
; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
157 static const comp_cost no_cost
= {0, 0};
158 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
160 /* The candidate - cost pair. */
163 struct iv_cand
*cand
; /* The candidate. */
164 comp_cost cost
; /* The cost. */
165 bitmap depends_on
; /* The list of invariants that have to be
167 tree value
; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp
; /* For iv elimination, the comparison. */
171 int inv_expr_id
; /* Loop invariant expression id. */
177 unsigned id
; /* The id of the use. */
178 enum use_type type
; /* Type of the use. */
179 struct iv
*iv
; /* The induction variable it is based on. */
180 gimple stmt
; /* Statement in that it occurs. */
181 tree
*op_p
; /* The place where it occurs. */
182 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
185 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
186 struct cost_pair
*cost_map
;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand
*selected
;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
196 IP_NORMAL
, /* At the end, just before the exit condition. */
197 IP_END
, /* At the end of the latch block. */
198 IP_BEFORE_USE
, /* Immediately before a specific use. */
199 IP_AFTER_USE
, /* Immediately after a specific use. */
200 IP_ORIGINAL
/* The original biv. */
203 /* The induction variable candidate. */
206 unsigned id
; /* The number of the candidate. */
207 bool important
; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
210 gimple incremented_at
;/* For original biv, the statement where it is
212 tree var_before
; /* The variable used for it before increment. */
213 tree var_after
; /* The variable used for it after increment. */
214 struct iv
*iv
; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost
; /* Cost of the candidate. */
219 unsigned cost_step
; /* Cost of the candidate's increment operation. */
220 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on
; /* The list of invariants that are used in step of the
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use
*iv_use_p
;
238 typedef struct iv_cand
*iv_cand_p
;
242 /* The currently optimized loop. */
243 struct loop
*current_loop
;
245 /* Numbers of iterations for all exits of the current loop. */
246 struct pointer_map_t
*niters
;
248 /* Number of registers used in it. */
251 /* The size of version_info array allocated. */
252 unsigned version_info_size
;
254 /* The array of information for the ssa names. */
255 struct version_info
*version_info
;
257 /* The hashtable of loop invariant expressions created
261 /* Loop invariant expression id. */
264 /* The bitmap of indices in version_info whose value was changed. */
267 /* The uses of induction variables. */
268 vec
<iv_use_p
> iv_uses
;
270 /* The candidates. */
271 vec
<iv_cand_p
> iv_candidates
;
273 /* A bitmap of important candidates. */
274 bitmap important_candidates
;
276 /* The maximum invariant id. */
279 /* Whether to consider just related and important candidates when replacing a
281 bool consider_all_candidates
;
283 /* Are we optimizing for speed? */
286 /* Whether the loop body includes any function calls. */
287 bool body_includes_call
;
289 /* Whether the loop body can only be exited via single exit. */
290 bool loop_single_exit_p
;
293 /* An assignment of iv candidates to uses. */
297 /* The number of uses covered by the assignment. */
300 /* Number of uses that cannot be expressed by the candidates in the set. */
303 /* Candidate assigned to a use, together with the related costs. */
304 struct cost_pair
**cand_for_use
;
306 /* Number of times each candidate is used. */
307 unsigned *n_cand_uses
;
309 /* The candidates used. */
312 /* The number of candidates in the set. */
315 /* Total number of registers needed. */
318 /* Total cost of expressing uses. */
319 comp_cost cand_use_cost
;
321 /* Total cost of candidates. */
324 /* Number of times each invariant is used. */
325 unsigned *n_invariant_uses
;
327 /* The array holding the number of uses of each loop
328 invariant expressions created by ivopt. */
329 unsigned *used_inv_expr
;
331 /* The number of created loop invariants. */
332 unsigned num_used_inv_expr
;
334 /* Total cost of the assignment. */
338 /* Difference of two iv candidate assignments. */
345 /* An old assignment (for rollback purposes). */
346 struct cost_pair
*old_cp
;
348 /* A new assignment. */
349 struct cost_pair
*new_cp
;
351 /* Next change in the list. */
352 struct iv_ca_delta
*next_change
;
355 /* Bound on number of candidates below that all candidates are considered. */
357 #define CONSIDER_ALL_CANDIDATES_BOUND \
358 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
360 /* If there are more iv occurrences, we just give up (it is quite unlikely that
361 optimizing such a loop would help, and it would take ages). */
363 #define MAX_CONSIDERED_USES \
364 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
366 /* If there are at most this number of ivs in the set, try removing unnecessary
367 ivs from the set always. */
369 #define ALWAYS_PRUNE_CAND_SET_BOUND \
370 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
372 /* The list of trees for that the decl_rtl field must be reset is stored
375 static vec
<tree
> decl_rtl_to_reset
;
377 static comp_cost
force_expr_to_var_cost (tree
, bool);
379 /* Number of uses recorded in DATA. */
381 static inline unsigned
382 n_iv_uses (struct ivopts_data
*data
)
384 return data
->iv_uses
.length ();
387 /* Ith use recorded in DATA. */
389 static inline struct iv_use
*
390 iv_use (struct ivopts_data
*data
, unsigned i
)
392 return data
->iv_uses
[i
];
395 /* Number of candidates recorded in DATA. */
397 static inline unsigned
398 n_iv_cands (struct ivopts_data
*data
)
400 return data
->iv_candidates
.length ();
403 /* Ith candidate recorded in DATA. */
405 static inline struct iv_cand
*
406 iv_cand (struct ivopts_data
*data
, unsigned i
)
408 return data
->iv_candidates
[i
];
411 /* The single loop exit if it dominates the latch, NULL otherwise. */
414 single_dom_exit (struct loop
*loop
)
416 edge exit
= single_exit (loop
);
421 if (!just_once_each_iteration_p (loop
, exit
->src
))
427 /* Dumps information about the induction variable IV to FILE. */
429 extern void dump_iv (FILE *, struct iv
*);
431 dump_iv (FILE *file
, struct iv
*iv
)
435 fprintf (file
, "ssa name ");
436 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
437 fprintf (file
, "\n");
440 fprintf (file
, " type ");
441 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
442 fprintf (file
, "\n");
446 fprintf (file
, " base ");
447 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
448 fprintf (file
, "\n");
450 fprintf (file
, " step ");
451 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
452 fprintf (file
, "\n");
456 fprintf (file
, " invariant ");
457 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
458 fprintf (file
, "\n");
463 fprintf (file
, " base object ");
464 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
465 fprintf (file
, "\n");
469 fprintf (file
, " is a biv\n");
472 /* Dumps information about the USE to FILE. */
474 extern void dump_use (FILE *, struct iv_use
*);
476 dump_use (FILE *file
, struct iv_use
*use
)
478 fprintf (file
, "use %d\n", use
->id
);
482 case USE_NONLINEAR_EXPR
:
483 fprintf (file
, " generic\n");
487 fprintf (file
, " address\n");
491 fprintf (file
, " compare\n");
498 fprintf (file
, " in statement ");
499 print_gimple_stmt (file
, use
->stmt
, 0, 0);
500 fprintf (file
, "\n");
502 fprintf (file
, " at position ");
504 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
505 fprintf (file
, "\n");
507 dump_iv (file
, use
->iv
);
509 if (use
->related_cands
)
511 fprintf (file
, " related candidates ");
512 dump_bitmap (file
, use
->related_cands
);
516 /* Dumps information about the uses to FILE. */
518 extern void dump_uses (FILE *, struct ivopts_data
*);
520 dump_uses (FILE *file
, struct ivopts_data
*data
)
525 for (i
= 0; i
< n_iv_uses (data
); i
++)
527 use
= iv_use (data
, i
);
529 dump_use (file
, use
);
530 fprintf (file
, "\n");
534 /* Dumps information about induction variable candidate CAND to FILE. */
536 extern void dump_cand (FILE *, struct iv_cand
*);
538 dump_cand (FILE *file
, struct iv_cand
*cand
)
540 struct iv
*iv
= cand
->iv
;
542 fprintf (file
, "candidate %d%s\n",
543 cand
->id
, cand
->important
? " (important)" : "");
545 if (cand
->depends_on
)
547 fprintf (file
, " depends on ");
548 dump_bitmap (file
, cand
->depends_on
);
553 fprintf (file
, " final value replacement\n");
557 if (cand
->var_before
)
559 fprintf (file
, " var_before ");
560 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
561 fprintf (file
, "\n");
565 fprintf (file
, " var_after ");
566 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
567 fprintf (file
, "\n");
573 fprintf (file
, " incremented before exit test\n");
577 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
581 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
585 fprintf (file
, " incremented at end\n");
589 fprintf (file
, " original biv\n");
596 /* Returns the info for ssa version VER. */
598 static inline struct version_info
*
599 ver_info (struct ivopts_data
*data
, unsigned ver
)
601 return data
->version_info
+ ver
;
604 /* Returns the info for ssa name NAME. */
606 static inline struct version_info
*
607 name_info (struct ivopts_data
*data
, tree name
)
609 return ver_info (data
, SSA_NAME_VERSION (name
));
612 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
616 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
618 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
622 if (sbb
== loop
->latch
)
628 return stmt
== last_stmt (bb
);
631 /* Returns true if STMT if after the place where the original induction
632 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
633 if the positions are identical. */
636 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
638 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
639 basic_block stmt_bb
= gimple_bb (stmt
);
641 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
644 if (stmt_bb
!= cand_bb
)
648 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
650 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
653 /* Returns true if STMT if after the place where the induction variable
654 CAND is incremented in LOOP. */
657 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
665 return stmt_after_ip_normal_pos (loop
, stmt
);
669 return stmt_after_inc_pos (cand
, stmt
, false);
672 return stmt_after_inc_pos (cand
, stmt
, true);
679 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
682 abnormal_ssa_name_p (tree exp
)
687 if (TREE_CODE (exp
) != SSA_NAME
)
690 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
693 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
694 abnormal phi node. Callback for for_each_index. */
697 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
698 void *data ATTRIBUTE_UNUSED
)
700 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
702 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
704 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
708 return !abnormal_ssa_name_p (*index
);
711 /* Returns true if EXPR contains a ssa name that occurs in an
712 abnormal phi node. */
715 contains_abnormal_ssa_name_p (tree expr
)
718 enum tree_code_class codeclass
;
723 code
= TREE_CODE (expr
);
724 codeclass
= TREE_CODE_CLASS (code
);
726 if (code
== SSA_NAME
)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
729 if (code
== INTEGER_CST
730 || is_gimple_min_invariant (expr
))
733 if (code
== ADDR_EXPR
)
734 return !for_each_index (&TREE_OPERAND (expr
, 0),
735 idx_contains_abnormal_ssa_name_p
,
738 if (code
== COND_EXPR
)
739 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
752 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
764 /* Returns the structure describing number of iterations determined from
765 EXIT of DATA->current_loop, or NULL if something goes wrong. */
767 static struct tree_niter_desc
*
768 niter_for_exit (struct ivopts_data
*data
, edge exit
)
770 struct tree_niter_desc
*desc
;
775 data
->niters
= pointer_map_create ();
779 slot
= pointer_map_contains (data
->niters
, exit
);
783 /* Try to determine number of iterations. We cannot safely work with ssa
784 names that appear in phi nodes on abnormal edges, so that we do not
785 create overlapping life ranges for them (PR 27283). */
786 desc
= XNEW (struct tree_niter_desc
);
787 if (!number_of_iterations_exit (data
->current_loop
,
789 || contains_abnormal_ssa_name_p (desc
->niter
))
794 slot
= pointer_map_insert (data
->niters
, exit
);
798 desc
= (struct tree_niter_desc
*) *slot
;
803 /* Returns the structure describing number of iterations determined from
804 single dominating exit of DATA->current_loop, or NULL if something
807 static struct tree_niter_desc
*
808 niter_for_single_dom_exit (struct ivopts_data
*data
)
810 edge exit
= single_dom_exit (data
->current_loop
);
815 return niter_for_exit (data
, exit
);
818 /* Hash table equality function for expressions. */
821 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
823 const struct iv_inv_expr_ent
*expr1
=
824 (const struct iv_inv_expr_ent
*)ent1
;
825 const struct iv_inv_expr_ent
*expr2
=
826 (const struct iv_inv_expr_ent
*)ent2
;
828 return expr1
->hash
== expr2
->hash
829 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
832 /* Hash function for loop invariant expressions. */
835 htab_inv_expr_hash (const void *ent
)
837 const struct iv_inv_expr_ent
*expr
=
838 (const struct iv_inv_expr_ent
*)ent
;
842 /* Initializes data structures used by the iv optimization pass, stored
846 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
848 data
->version_info_size
= 2 * num_ssa_names
;
849 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
850 data
->relevant
= BITMAP_ALLOC (NULL
);
851 data
->important_candidates
= BITMAP_ALLOC (NULL
);
852 data
->max_inv_id
= 0;
854 data
->iv_uses
.create (20);
855 data
->iv_candidates
.create (20);
856 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
857 htab_inv_expr_eq
, free
);
858 data
->inv_expr_id
= 0;
859 decl_rtl_to_reset
.create (20);
862 /* Returns a memory object to that EXPR points. In case we are able to
863 determine that it does not point to any such object, NULL is returned. */
866 determine_base_object (tree expr
)
868 enum tree_code code
= TREE_CODE (expr
);
871 /* If this is a pointer casted to any type, we need to determine
872 the base object for the pointer; so handle conversions before
873 throwing away non-pointer expressions. */
874 if (CONVERT_EXPR_P (expr
))
875 return determine_base_object (TREE_OPERAND (expr
, 0));
877 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
886 obj
= TREE_OPERAND (expr
, 0);
887 base
= get_base_address (obj
);
892 if (TREE_CODE (base
) == MEM_REF
)
893 return determine_base_object (TREE_OPERAND (base
, 0));
895 return fold_convert (ptr_type_node
,
896 build_fold_addr_expr (base
));
898 case POINTER_PLUS_EXPR
:
899 return determine_base_object (TREE_OPERAND (expr
, 0));
903 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
907 return fold_convert (ptr_type_node
, expr
);
911 /* Allocates an induction variable with given initial value BASE and step STEP
915 alloc_iv (tree base
, tree step
)
917 struct iv
*iv
= XCNEW (struct iv
);
918 gcc_assert (step
!= NULL_TREE
);
921 iv
->base_object
= determine_base_object (base
);
924 iv
->have_use_for
= false;
926 iv
->ssa_name
= NULL_TREE
;
931 /* Sets STEP and BASE for induction variable IV. */
934 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
936 struct version_info
*info
= name_info (data
, iv
);
938 gcc_assert (!info
->iv
);
940 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
941 info
->iv
= alloc_iv (base
, step
);
942 info
->iv
->ssa_name
= iv
;
945 /* Finds induction variable declaration for VAR. */
948 get_iv (struct ivopts_data
*data
, tree var
)
951 tree type
= TREE_TYPE (var
);
953 if (!POINTER_TYPE_P (type
)
954 && !INTEGRAL_TYPE_P (type
))
957 if (!name_info (data
, var
)->iv
)
959 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
962 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
963 set_iv (data
, var
, var
, build_int_cst (type
, 0));
966 return name_info (data
, var
)->iv
;
969 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
970 not define a simple affine biv with nonzero step. */
973 determine_biv_step (gimple phi
)
975 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
976 tree name
= PHI_RESULT (phi
);
979 if (virtual_operand_p (name
))
982 if (!simple_iv (loop
, loop
, name
, &iv
, true))
985 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
988 /* Finds basic ivs. */
991 find_bivs (struct ivopts_data
*data
)
994 tree step
, type
, base
;
996 struct loop
*loop
= data
->current_loop
;
997 gimple_stmt_iterator psi
;
999 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1001 phi
= gsi_stmt (psi
);
1003 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1006 step
= determine_biv_step (phi
);
1010 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1011 base
= expand_simple_operations (base
);
1012 if (contains_abnormal_ssa_name_p (base
)
1013 || contains_abnormal_ssa_name_p (step
))
1016 type
= TREE_TYPE (PHI_RESULT (phi
));
1017 base
= fold_convert (type
, base
);
1020 if (POINTER_TYPE_P (type
))
1021 step
= convert_to_ptrofftype (step
);
1023 step
= fold_convert (type
, step
);
1026 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1033 /* Marks basic ivs. */
1036 mark_bivs (struct ivopts_data
*data
)
1040 struct iv
*iv
, *incr_iv
;
1041 struct loop
*loop
= data
->current_loop
;
1042 basic_block incr_bb
;
1043 gimple_stmt_iterator psi
;
1045 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1047 phi
= gsi_stmt (psi
);
1049 iv
= get_iv (data
, PHI_RESULT (phi
));
1053 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1054 incr_iv
= get_iv (data
, var
);
1058 /* If the increment is in the subloop, ignore it. */
1059 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1060 if (incr_bb
->loop_father
!= data
->current_loop
1061 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1065 incr_iv
->biv_p
= true;
1069 /* Checks whether STMT defines a linear induction variable and stores its
1070 parameters to IV. */
1073 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1076 struct loop
*loop
= data
->current_loop
;
1078 iv
->base
= NULL_TREE
;
1079 iv
->step
= NULL_TREE
;
1081 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1084 lhs
= gimple_assign_lhs (stmt
);
1085 if (TREE_CODE (lhs
) != SSA_NAME
)
1088 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1090 iv
->base
= expand_simple_operations (iv
->base
);
1092 if (contains_abnormal_ssa_name_p (iv
->base
)
1093 || contains_abnormal_ssa_name_p (iv
->step
))
1096 /* If STMT could throw, then do not consider STMT as defining a GIV.
1097 While this will suppress optimizations, we can not safely delete this
1098 GIV and associated statements, even if it appears it is not used. */
1099 if (stmt_could_throw_p (stmt
))
1105 /* Finds general ivs in statement STMT. */
1108 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1112 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1115 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1118 /* Finds general ivs in basic block BB. */
1121 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1123 gimple_stmt_iterator bsi
;
1125 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1126 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1129 /* Finds general ivs. */
1132 find_givs (struct ivopts_data
*data
)
1134 struct loop
*loop
= data
->current_loop
;
1135 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1138 for (i
= 0; i
< loop
->num_nodes
; i
++)
1139 find_givs_in_bb (data
, body
[i
]);
1143 /* For each ssa name defined in LOOP determines whether it is an induction
1144 variable and if so, its initial value and step. */
1147 find_induction_variables (struct ivopts_data
*data
)
1152 if (!find_bivs (data
))
1158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1160 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1164 fprintf (dump_file
, " number of iterations ");
1165 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1166 if (!integer_zerop (niter
->may_be_zero
))
1168 fprintf (dump_file
, "; zero if ");
1169 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1171 fprintf (dump_file
, "\n\n");
1174 fprintf (dump_file
, "Induction variables:\n\n");
1176 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1178 if (ver_info (data
, i
)->iv
)
1179 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1186 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1188 static struct iv_use
*
1189 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1190 gimple stmt
, enum use_type use_type
)
1192 struct iv_use
*use
= XCNEW (struct iv_use
);
1194 use
->id
= n_iv_uses (data
);
1195 use
->type
= use_type
;
1199 use
->related_cands
= BITMAP_ALLOC (NULL
);
1201 /* To avoid showing ssa name in the dumps, if it was not reset by the
1203 iv
->ssa_name
= NULL_TREE
;
1205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1206 dump_use (dump_file
, use
);
1208 data
->iv_uses
.safe_push (use
);
1213 /* Checks whether OP is a loop-level invariant and if so, records it.
1214 NONLINEAR_USE is true if the invariant is used in a way we do not
1215 handle specially. */
1218 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1221 struct version_info
*info
;
1223 if (TREE_CODE (op
) != SSA_NAME
1224 || virtual_operand_p (op
))
1227 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1229 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1232 info
= name_info (data
, op
);
1234 info
->has_nonlin_use
|= nonlinear_use
;
1236 info
->inv_id
= ++data
->max_inv_id
;
1237 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1240 /* Checks whether the use OP is interesting and if so, records it. */
1242 static struct iv_use
*
1243 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1250 if (TREE_CODE (op
) != SSA_NAME
)
1253 iv
= get_iv (data
, op
);
1257 if (iv
->have_use_for
)
1259 use
= iv_use (data
, iv
->use_id
);
1261 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1265 if (integer_zerop (iv
->step
))
1267 record_invariant (data
, op
, true);
1270 iv
->have_use_for
= true;
1272 civ
= XNEW (struct iv
);
1275 stmt
= SSA_NAME_DEF_STMT (op
);
1276 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1277 || is_gimple_assign (stmt
));
1279 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1280 iv
->use_id
= use
->id
;
1285 /* Given a condition in statement STMT, checks whether it is a compare
1286 of an induction variable and an invariant. If this is the case,
1287 CONTROL_VAR is set to location of the iv, BOUND to the location of
1288 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1289 induction variable descriptions, and true is returned. If this is not
1290 the case, CONTROL_VAR and BOUND are set to the arguments of the
1291 condition and false is returned. */
1294 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1295 tree
**control_var
, tree
**bound
,
1296 struct iv
**iv_var
, struct iv
**iv_bound
)
1298 /* The objects returned when COND has constant operands. */
1299 static struct iv const_iv
;
1301 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1302 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1305 if (gimple_code (stmt
) == GIMPLE_COND
)
1307 op0
= gimple_cond_lhs_ptr (stmt
);
1308 op1
= gimple_cond_rhs_ptr (stmt
);
1312 op0
= gimple_assign_rhs1_ptr (stmt
);
1313 op1
= gimple_assign_rhs2_ptr (stmt
);
1316 zero
= integer_zero_node
;
1317 const_iv
.step
= integer_zero_node
;
1319 if (TREE_CODE (*op0
) == SSA_NAME
)
1320 iv0
= get_iv (data
, *op0
);
1321 if (TREE_CODE (*op1
) == SSA_NAME
)
1322 iv1
= get_iv (data
, *op1
);
1324 /* Exactly one of the compared values must be an iv, and the other one must
1329 if (integer_zerop (iv0
->step
))
1331 /* Control variable may be on the other side. */
1332 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1333 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1335 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1339 *control_var
= op0
;;
1350 /* Checks whether the condition in STMT is interesting and if so,
1354 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1356 tree
*var_p
, *bound_p
;
1357 struct iv
*var_iv
, *civ
;
1359 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1361 find_interesting_uses_op (data
, *var_p
);
1362 find_interesting_uses_op (data
, *bound_p
);
1366 civ
= XNEW (struct iv
);
1368 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1371 /* Returns true if expression EXPR is obviously invariant in LOOP,
1372 i.e. if all its operands are defined outside of the LOOP. LOOP
1373 should not be the function body. */
1376 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1381 gcc_assert (loop_depth (loop
) > 0);
1383 if (is_gimple_min_invariant (expr
))
1386 if (TREE_CODE (expr
) == SSA_NAME
)
1388 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1390 && flow_bb_inside_loop_p (loop
, def_bb
))
1399 len
= TREE_OPERAND_LENGTH (expr
);
1400 for (i
= 0; i
< len
; i
++)
1401 if (TREE_OPERAND (expr
, i
)
1402 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1408 /* Returns true if statement STMT is obviously invariant in LOOP,
1409 i.e. if all its operands on the RHS are defined outside of the LOOP.
1410 LOOP should not be the function body. */
1413 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1418 gcc_assert (loop_depth (loop
) > 0);
1420 lhs
= gimple_get_lhs (stmt
);
1421 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1423 tree op
= gimple_op (stmt
, i
);
1424 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1431 /* Cumulates the steps of indices into DATA and replaces their values with the
1432 initial ones. Returns false when the value of the index cannot be determined.
1433 Callback for for_each_index. */
1435 struct ifs_ivopts_data
1437 struct ivopts_data
*ivopts_data
;
1443 idx_find_step (tree base
, tree
*idx
, void *data
)
1445 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1447 tree step
, iv_base
, iv_step
, lbound
, off
;
1448 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1450 /* If base is a component ref, require that the offset of the reference
1452 if (TREE_CODE (base
) == COMPONENT_REF
)
1454 off
= component_ref_field_offset (base
);
1455 return expr_invariant_in_loop_p (loop
, off
);
1458 /* If base is array, first check whether we will be able to move the
1459 reference out of the loop (in order to take its address in strength
1460 reduction). In order for this to work we need both lower bound
1461 and step to be loop invariants. */
1462 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1464 /* Moreover, for a range, the size needs to be invariant as well. */
1465 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1466 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1469 step
= array_ref_element_size (base
);
1470 lbound
= array_ref_low_bound (base
);
1472 if (!expr_invariant_in_loop_p (loop
, step
)
1473 || !expr_invariant_in_loop_p (loop
, lbound
))
1477 if (TREE_CODE (*idx
) != SSA_NAME
)
1480 iv
= get_iv (dta
->ivopts_data
, *idx
);
1484 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1485 *&x[0], which is not folded and does not trigger the
1486 ARRAY_REF path below. */
1489 if (integer_zerop (iv
->step
))
1492 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1494 step
= array_ref_element_size (base
);
1496 /* We only handle addresses whose step is an integer constant. */
1497 if (TREE_CODE (step
) != INTEGER_CST
)
1501 /* The step for pointer arithmetics already is 1 byte. */
1502 step
= size_one_node
;
1506 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1507 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1510 /* The index might wrap. */
1514 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1515 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1520 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1521 object is passed to it in DATA. */
1524 idx_record_use (tree base
, tree
*idx
,
1527 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1528 find_interesting_uses_op (data
, *idx
);
1529 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1531 find_interesting_uses_op (data
, array_ref_element_size (base
));
1532 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1537 /* If we can prove that TOP = cst * BOT for some constant cst,
1538 store cst to MUL and return true. Otherwise return false.
1539 The returned value is always sign-extended, regardless of the
1540 signedness of TOP and BOT. */
1543 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1546 enum tree_code code
;
1547 double_int res
, p0
, p1
;
1548 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1553 if (operand_equal_p (top
, bot
, 0))
1555 *mul
= double_int_one
;
1559 code
= TREE_CODE (top
);
1563 mby
= TREE_OPERAND (top
, 1);
1564 if (TREE_CODE (mby
) != INTEGER_CST
)
1567 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1570 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1575 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1576 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1579 if (code
== MINUS_EXPR
)
1581 *mul
= (p0
+ p1
).sext (precision
);
1585 if (TREE_CODE (bot
) != INTEGER_CST
)
1588 p0
= tree_to_double_int (top
).sext (precision
);
1589 p1
= tree_to_double_int (bot
).sext (precision
);
1592 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1593 return res
.is_zero ();
1600 /* Returns true if memory reference REF with step STEP may be unaligned. */
1603 may_be_unaligned_p (tree ref
, tree step
)
1607 HOST_WIDE_INT bitsize
;
1608 HOST_WIDE_INT bitpos
;
1610 enum machine_mode mode
;
1611 int unsignedp
, volatilep
;
1612 unsigned base_align
;
1614 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1615 thus they are not misaligned. */
1616 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1619 /* The test below is basically copy of what expr.c:normal_inner_ref
1620 does to check whether the object must be loaded by parts when
1621 STRICT_ALIGNMENT is true. */
1622 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1623 &unsignedp
, &volatilep
, true);
1624 base_type
= TREE_TYPE (base
);
1625 base_align
= get_object_alignment (base
);
1626 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1628 if (mode
!= BLKmode
)
1630 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1632 if (base_align
< mode_align
1633 || (bitpos
% mode_align
) != 0
1634 || (bitpos
% BITS_PER_UNIT
) != 0)
1638 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1641 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1648 /* Return true if EXPR may be non-addressable. */
1651 may_be_nonaddressable_p (tree expr
)
1653 switch (TREE_CODE (expr
))
1655 case TARGET_MEM_REF
:
1656 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657 target, thus they are always addressable. */
1661 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1662 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1664 case VIEW_CONVERT_EXPR
:
1665 /* This kind of view-conversions may wrap non-addressable objects
1666 and make them look addressable. After some processing the
1667 non-addressability may be uncovered again, causing ADDR_EXPRs
1668 of inappropriate objects to be built. */
1669 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1670 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1673 /* ... fall through ... */
1676 case ARRAY_RANGE_REF
:
1677 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1689 /* Finds addresses in *OP_P inside STMT. */
1692 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1694 tree base
= *op_p
, step
= size_zero_node
;
1696 struct ifs_ivopts_data ifs_ivopts_data
;
1698 /* Do not play with volatile memory references. A bit too conservative,
1699 perhaps, but safe. */
1700 if (gimple_has_volatile_ops (stmt
))
1703 /* Ignore bitfields for now. Not really something terribly complicated
1705 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1708 base
= unshare_expr (base
);
1710 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1712 tree type
= build_pointer_type (TREE_TYPE (base
));
1716 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1718 civ
= get_iv (data
, TMR_BASE (base
));
1722 TMR_BASE (base
) = civ
->base
;
1725 if (TMR_INDEX2 (base
)
1726 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1728 civ
= get_iv (data
, TMR_INDEX2 (base
));
1732 TMR_INDEX2 (base
) = civ
->base
;
1735 if (TMR_INDEX (base
)
1736 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1738 civ
= get_iv (data
, TMR_INDEX (base
));
1742 TMR_INDEX (base
) = civ
->base
;
1747 if (TMR_STEP (base
))
1748 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1750 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1754 if (integer_zerop (step
))
1756 base
= tree_mem_ref_addr (type
, base
);
1760 ifs_ivopts_data
.ivopts_data
= data
;
1761 ifs_ivopts_data
.stmt
= stmt
;
1762 ifs_ivopts_data
.step
= size_zero_node
;
1763 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1764 || integer_zerop (ifs_ivopts_data
.step
))
1766 step
= ifs_ivopts_data
.step
;
1768 /* Check that the base expression is addressable. This needs
1769 to be done after substituting bases of IVs into it. */
1770 if (may_be_nonaddressable_p (base
))
1773 /* Moreover, on strict alignment platforms, check that it is
1774 sufficiently aligned. */
1775 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1778 base
= build_fold_addr_expr (base
);
1780 /* Substituting bases of IVs into the base expression might
1781 have caused folding opportunities. */
1782 if (TREE_CODE (base
) == ADDR_EXPR
)
1784 tree
*ref
= &TREE_OPERAND (base
, 0);
1785 while (handled_component_p (*ref
))
1786 ref
= &TREE_OPERAND (*ref
, 0);
1787 if (TREE_CODE (*ref
) == MEM_REF
)
1789 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1790 TREE_OPERAND (*ref
, 0),
1791 TREE_OPERAND (*ref
, 1));
1798 civ
= alloc_iv (base
, step
);
1799 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1803 for_each_index (op_p
, idx_record_use
, data
);
1806 /* Finds and records invariants used in STMT. */
1809 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1812 use_operand_p use_p
;
1815 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1817 op
= USE_FROM_PTR (use_p
);
1818 record_invariant (data
, op
, false);
1822 /* Finds interesting uses of induction variables in the statement STMT. */
1825 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1828 tree op
, *lhs
, *rhs
;
1830 use_operand_p use_p
;
1831 enum tree_code code
;
1833 find_invariants_stmt (data
, stmt
);
1835 if (gimple_code (stmt
) == GIMPLE_COND
)
1837 find_interesting_uses_cond (data
, stmt
);
1841 if (is_gimple_assign (stmt
))
1843 lhs
= gimple_assign_lhs_ptr (stmt
);
1844 rhs
= gimple_assign_rhs1_ptr (stmt
);
1846 if (TREE_CODE (*lhs
) == SSA_NAME
)
1848 /* If the statement defines an induction variable, the uses are not
1849 interesting by themselves. */
1851 iv
= get_iv (data
, *lhs
);
1853 if (iv
&& !integer_zerop (iv
->step
))
1857 code
= gimple_assign_rhs_code (stmt
);
1858 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1859 && (REFERENCE_CLASS_P (*rhs
)
1860 || is_gimple_val (*rhs
)))
1862 if (REFERENCE_CLASS_P (*rhs
))
1863 find_interesting_uses_address (data
, stmt
, rhs
);
1865 find_interesting_uses_op (data
, *rhs
);
1867 if (REFERENCE_CLASS_P (*lhs
))
1868 find_interesting_uses_address (data
, stmt
, lhs
);
1871 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1873 find_interesting_uses_cond (data
, stmt
);
1877 /* TODO -- we should also handle address uses of type
1879 memory = call (whatever);
1886 if (gimple_code (stmt
) == GIMPLE_PHI
1887 && gimple_bb (stmt
) == data
->current_loop
->header
)
1889 iv
= get_iv (data
, PHI_RESULT (stmt
));
1891 if (iv
&& !integer_zerop (iv
->step
))
1895 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1897 op
= USE_FROM_PTR (use_p
);
1899 if (TREE_CODE (op
) != SSA_NAME
)
1902 iv
= get_iv (data
, op
);
1906 find_interesting_uses_op (data
, op
);
1910 /* Finds interesting uses of induction variables outside of loops
1911 on loop exit edge EXIT. */
1914 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1917 gimple_stmt_iterator psi
;
1920 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1922 phi
= gsi_stmt (psi
);
1923 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1924 if (!virtual_operand_p (def
))
1925 find_interesting_uses_op (data
, def
);
1929 /* Finds uses of the induction variables that are interesting. */
1932 find_interesting_uses (struct ivopts_data
*data
)
1935 gimple_stmt_iterator bsi
;
1936 basic_block
*body
= get_loop_body (data
->current_loop
);
1938 struct version_info
*info
;
1941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1942 fprintf (dump_file
, "Uses:\n\n");
1944 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1949 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1950 if (e
->dest
!= EXIT_BLOCK_PTR
1951 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1952 find_interesting_uses_outside (data
, e
);
1954 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1955 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1956 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1957 if (!is_gimple_debug (gsi_stmt (bsi
)))
1958 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1961 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1965 fprintf (dump_file
, "\n");
1967 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1969 info
= ver_info (data
, i
);
1972 fprintf (dump_file
, " ");
1973 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1974 fprintf (dump_file
, " is invariant (%d)%s\n",
1975 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1979 fprintf (dump_file
, "\n");
1985 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1986 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1987 we are at the top-level of the processed address. */
1990 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1991 unsigned HOST_WIDE_INT
*offset
)
1993 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1994 enum tree_code code
;
1995 tree type
, orig_type
= TREE_TYPE (expr
);
1996 unsigned HOST_WIDE_INT off0
, off1
, st
;
1997 tree orig_expr
= expr
;
2001 type
= TREE_TYPE (expr
);
2002 code
= TREE_CODE (expr
);
2008 if (!cst_and_fits_in_hwi (expr
)
2009 || integer_zerop (expr
))
2012 *offset
= int_cst_value (expr
);
2013 return build_int_cst (orig_type
, 0);
2015 case POINTER_PLUS_EXPR
:
2018 op0
= TREE_OPERAND (expr
, 0);
2019 op1
= TREE_OPERAND (expr
, 1);
2021 op0
= strip_offset_1 (op0
, false, false, &off0
);
2022 op1
= strip_offset_1 (op1
, false, false, &off1
);
2024 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2025 if (op0
== TREE_OPERAND (expr
, 0)
2026 && op1
== TREE_OPERAND (expr
, 1))
2029 if (integer_zerop (op1
))
2031 else if (integer_zerop (op0
))
2033 if (code
== MINUS_EXPR
)
2034 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2039 expr
= fold_build2 (code
, type
, op0
, op1
);
2041 return fold_convert (orig_type
, expr
);
2044 op1
= TREE_OPERAND (expr
, 1);
2045 if (!cst_and_fits_in_hwi (op1
))
2048 op0
= TREE_OPERAND (expr
, 0);
2049 op0
= strip_offset_1 (op0
, false, false, &off0
);
2050 if (op0
== TREE_OPERAND (expr
, 0))
2053 *offset
= off0
* int_cst_value (op1
);
2054 if (integer_zerop (op0
))
2057 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2059 return fold_convert (orig_type
, expr
);
2062 case ARRAY_RANGE_REF
:
2066 step
= array_ref_element_size (expr
);
2067 if (!cst_and_fits_in_hwi (step
))
2070 st
= int_cst_value (step
);
2071 op1
= TREE_OPERAND (expr
, 1);
2072 op1
= strip_offset_1 (op1
, false, false, &off1
);
2073 *offset
= off1
* st
;
2076 && integer_zerop (op1
))
2078 /* Strip the component reference completely. */
2079 op0
= TREE_OPERAND (expr
, 0);
2080 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2090 tmp
= component_ref_field_offset (expr
);
2092 && cst_and_fits_in_hwi (tmp
))
2094 /* Strip the component reference completely. */
2095 op0
= TREE_OPERAND (expr
, 0);
2096 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2097 *offset
= off0
+ int_cst_value (tmp
);
2103 op0
= TREE_OPERAND (expr
, 0);
2104 op0
= strip_offset_1 (op0
, true, true, &off0
);
2107 if (op0
== TREE_OPERAND (expr
, 0))
2110 expr
= build_fold_addr_expr (op0
);
2111 return fold_convert (orig_type
, expr
);
2114 /* ??? Offset operand? */
2115 inside_addr
= false;
2122 /* Default handling of expressions for that we want to recurse into
2123 the first operand. */
2124 op0
= TREE_OPERAND (expr
, 0);
2125 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2128 if (op0
== TREE_OPERAND (expr
, 0)
2129 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2132 expr
= copy_node (expr
);
2133 TREE_OPERAND (expr
, 0) = op0
;
2135 TREE_OPERAND (expr
, 1) = op1
;
2137 /* Inside address, we might strip the top level component references,
2138 thus changing type of the expression. Handling of ADDR_EXPR
2140 expr
= fold_convert (orig_type
, expr
);
2145 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2148 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2150 return strip_offset_1 (expr
, false, false, offset
);
2153 /* Returns variant of TYPE that can be used as base for different uses.
2154 We return unsigned type with the same precision, which avoids problems
2158 generic_type_for (tree type
)
2160 if (POINTER_TYPE_P (type
))
2161 return unsigned_type_for (type
);
2163 if (TYPE_UNSIGNED (type
))
2166 return unsigned_type_for (type
);
2169 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2170 the bitmap to that we should store it. */
2172 static struct ivopts_data
*fd_ivopts_data
;
2174 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2176 bitmap
*depends_on
= (bitmap
*) data
;
2177 struct version_info
*info
;
2179 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2181 info
= name_info (fd_ivopts_data
, *expr_p
);
2183 if (!info
->inv_id
|| info
->has_nonlin_use
)
2187 *depends_on
= BITMAP_ALLOC (NULL
);
2188 bitmap_set_bit (*depends_on
, info
->inv_id
);
2193 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2194 position to POS. If USE is not NULL, the candidate is set as related to
2195 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2196 replacement of the final value of the iv by a direct computation. */
2198 static struct iv_cand
*
2199 add_candidate_1 (struct ivopts_data
*data
,
2200 tree base
, tree step
, bool important
, enum iv_position pos
,
2201 struct iv_use
*use
, gimple incremented_at
)
2204 struct iv_cand
*cand
= NULL
;
2205 tree type
, orig_type
;
2207 /* For non-original variables, make sure their values are computed in a type
2208 that does not invoke undefined behavior on overflows (since in general,
2209 we cannot prove that these induction variables are non-wrapping). */
2210 if (pos
!= IP_ORIGINAL
)
2212 orig_type
= TREE_TYPE (base
);
2213 type
= generic_type_for (orig_type
);
2214 if (type
!= orig_type
)
2216 base
= fold_convert (type
, base
);
2217 step
= fold_convert (type
, step
);
2221 for (i
= 0; i
< n_iv_cands (data
); i
++)
2223 cand
= iv_cand (data
, i
);
2225 if (cand
->pos
!= pos
)
2228 if (cand
->incremented_at
!= incremented_at
2229 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2230 && cand
->ainc_use
!= use
))
2244 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2245 && operand_equal_p (step
, cand
->iv
->step
, 0)
2246 && (TYPE_PRECISION (TREE_TYPE (base
))
2247 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2251 if (i
== n_iv_cands (data
))
2253 cand
= XCNEW (struct iv_cand
);
2259 cand
->iv
= alloc_iv (base
, step
);
2262 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2264 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2265 cand
->var_after
= cand
->var_before
;
2267 cand
->important
= important
;
2268 cand
->incremented_at
= incremented_at
;
2269 data
->iv_candidates
.safe_push (cand
);
2272 && TREE_CODE (step
) != INTEGER_CST
)
2274 fd_ivopts_data
= data
;
2275 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2278 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2279 cand
->ainc_use
= use
;
2281 cand
->ainc_use
= NULL
;
2283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2284 dump_cand (dump_file
, cand
);
2287 if (important
&& !cand
->important
)
2289 cand
->important
= true;
2290 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2291 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2296 bitmap_set_bit (use
->related_cands
, i
);
2297 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2298 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2305 /* Returns true if incrementing the induction variable at the end of the LOOP
2308 The purpose is to avoid splitting latch edge with a biv increment, thus
2309 creating a jump, possibly confusing other optimization passes and leaving
2310 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2311 is not available (so we do not have a better alternative), or if the latch
2312 edge is already nonempty. */
2315 allow_ip_end_pos_p (struct loop
*loop
)
2317 if (!ip_normal_pos (loop
))
2320 if (!empty_block_p (ip_end_pos (loop
)))
2326 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2327 Important field is set to IMPORTANT. */
2330 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2331 bool important
, struct iv_use
*use
)
2333 basic_block use_bb
= gimple_bb (use
->stmt
);
2334 enum machine_mode mem_mode
;
2335 unsigned HOST_WIDE_INT cstepi
;
2337 /* If we insert the increment in any position other than the standard
2338 ones, we must ensure that it is incremented once per iteration.
2339 It must not be in an inner nested loop, or one side of an if
2341 if (use_bb
->loop_father
!= data
->current_loop
2342 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2343 || stmt_could_throw_p (use
->stmt
)
2344 || !cst_and_fits_in_hwi (step
))
2347 cstepi
= int_cst_value (step
);
2349 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2350 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2351 || USE_STORE_PRE_INCREMENT (mem_mode
))
2352 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2353 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2354 || USE_STORE_PRE_DECREMENT (mem_mode
))
2355 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2357 enum tree_code code
= MINUS_EXPR
;
2359 tree new_step
= step
;
2361 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2363 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2364 code
= POINTER_PLUS_EXPR
;
2367 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2368 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2369 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2372 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2373 || USE_STORE_POST_INCREMENT (mem_mode
))
2374 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2375 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2376 || USE_STORE_POST_DECREMENT (mem_mode
))
2377 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2379 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2384 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2385 position to POS. If USE is not NULL, the candidate is set as related to
2386 it. The candidate computation is scheduled on all available positions. */
2389 add_candidate (struct ivopts_data
*data
,
2390 tree base
, tree step
, bool important
, struct iv_use
*use
)
2392 if (ip_normal_pos (data
->current_loop
))
2393 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2394 if (ip_end_pos (data
->current_loop
)
2395 && allow_ip_end_pos_p (data
->current_loop
))
2396 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2398 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2399 add_autoinc_candidates (data
, base
, step
, important
, use
);
2402 /* Adds standard iv candidates. */
2405 add_standard_iv_candidates (struct ivopts_data
*data
)
2407 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2409 /* The same for a double-integer type if it is still fast enough. */
2411 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2412 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2413 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2414 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2416 /* The same for a double-integer type if it is still fast enough. */
2418 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2419 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2420 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2421 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2425 /* Adds candidates bases on the old induction variable IV. */
2428 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2432 struct iv_cand
*cand
;
2434 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2436 /* The same, but with initial value zero. */
2437 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2438 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2440 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2441 iv
->step
, true, NULL
);
2443 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2444 if (gimple_code (phi
) == GIMPLE_PHI
)
2446 /* Additionally record the possibility of leaving the original iv
2448 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2449 cand
= add_candidate_1 (data
,
2450 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2451 SSA_NAME_DEF_STMT (def
));
2452 cand
->var_before
= iv
->ssa_name
;
2453 cand
->var_after
= def
;
2457 /* Adds candidates based on the old induction variables. */
2460 add_old_ivs_candidates (struct ivopts_data
*data
)
2466 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2468 iv
= ver_info (data
, i
)->iv
;
2469 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2470 add_old_iv_candidates (data
, iv
);
2474 /* Adds candidates based on the value of the induction variable IV and USE. */
2477 add_iv_value_candidates (struct ivopts_data
*data
,
2478 struct iv
*iv
, struct iv_use
*use
)
2480 unsigned HOST_WIDE_INT offset
;
2484 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2486 /* The same, but with initial value zero. Make such variable important,
2487 since it is generic enough so that possibly many uses may be based
2489 basetype
= TREE_TYPE (iv
->base
);
2490 if (POINTER_TYPE_P (basetype
))
2491 basetype
= sizetype
;
2492 add_candidate (data
, build_int_cst (basetype
, 0),
2493 iv
->step
, true, use
);
2495 /* Third, try removing the constant offset. Make sure to even
2496 add a candidate for &a[0] vs. (T *)&a. */
2497 base
= strip_offset (iv
->base
, &offset
);
2499 || base
!= iv
->base
)
2500 add_candidate (data
, base
, iv
->step
, false, use
);
2503 /* Adds candidates based on the uses. */
2506 add_derived_ivs_candidates (struct ivopts_data
*data
)
2510 for (i
= 0; i
< n_iv_uses (data
); i
++)
2512 struct iv_use
*use
= iv_use (data
, i
);
2519 case USE_NONLINEAR_EXPR
:
2522 /* Just add the ivs based on the value of the iv used here. */
2523 add_iv_value_candidates (data
, use
->iv
, use
);
2532 /* Record important candidates and add them to related_cands bitmaps
2536 record_important_candidates (struct ivopts_data
*data
)
2541 for (i
= 0; i
< n_iv_cands (data
); i
++)
2543 struct iv_cand
*cand
= iv_cand (data
, i
);
2545 if (cand
->important
)
2546 bitmap_set_bit (data
->important_candidates
, i
);
2549 data
->consider_all_candidates
= (n_iv_cands (data
)
2550 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2552 if (data
->consider_all_candidates
)
2554 /* We will not need "related_cands" bitmaps in this case,
2555 so release them to decrease peak memory consumption. */
2556 for (i
= 0; i
< n_iv_uses (data
); i
++)
2558 use
= iv_use (data
, i
);
2559 BITMAP_FREE (use
->related_cands
);
2564 /* Add important candidates to the related_cands bitmaps. */
2565 for (i
= 0; i
< n_iv_uses (data
); i
++)
2566 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2567 data
->important_candidates
);
2571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2573 we allocate a simple list to every use. */
2576 alloc_use_cost_map (struct ivopts_data
*data
)
2578 unsigned i
, size
, s
, j
;
2580 for (i
= 0; i
< n_iv_uses (data
); i
++)
2582 struct iv_use
*use
= iv_use (data
, i
);
2585 if (data
->consider_all_candidates
)
2586 size
= n_iv_cands (data
);
2590 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2595 /* Round up to the power of two, so that moduling by it is fast. */
2596 for (size
= 1; size
< s
; size
<<= 1)
2600 use
->n_map_members
= size
;
2601 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2605 /* Returns description of computation cost of expression whose runtime
2606 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2609 new_cost (unsigned runtime
, unsigned complexity
)
2613 cost
.cost
= runtime
;
2614 cost
.complexity
= complexity
;
2619 /* Adds costs COST1 and COST2. */
2622 add_costs (comp_cost cost1
, comp_cost cost2
)
2624 cost1
.cost
+= cost2
.cost
;
2625 cost1
.complexity
+= cost2
.complexity
;
2629 /* Subtracts costs COST1 and COST2. */
2632 sub_costs (comp_cost cost1
, comp_cost cost2
)
2634 cost1
.cost
-= cost2
.cost
;
2635 cost1
.complexity
-= cost2
.complexity
;
2640 /* Returns a negative number if COST1 < COST2, a positive number if
2641 COST1 > COST2, and 0 if COST1 = COST2. */
2644 compare_costs (comp_cost cost1
, comp_cost cost2
)
2646 if (cost1
.cost
== cost2
.cost
)
2647 return cost1
.complexity
- cost2
.complexity
;
2649 return cost1
.cost
- cost2
.cost
;
2652 /* Returns true if COST is infinite. */
2655 infinite_cost_p (comp_cost cost
)
2657 return cost
.cost
== INFTY
;
2660 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2661 on invariants DEPENDS_ON and that the value used in expressing it
2662 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2665 set_use_iv_cost (struct ivopts_data
*data
,
2666 struct iv_use
*use
, struct iv_cand
*cand
,
2667 comp_cost cost
, bitmap depends_on
, tree value
,
2668 enum tree_code comp
, int inv_expr_id
)
2672 if (infinite_cost_p (cost
))
2674 BITMAP_FREE (depends_on
);
2678 if (data
->consider_all_candidates
)
2680 use
->cost_map
[cand
->id
].cand
= cand
;
2681 use
->cost_map
[cand
->id
].cost
= cost
;
2682 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2683 use
->cost_map
[cand
->id
].value
= value
;
2684 use
->cost_map
[cand
->id
].comp
= comp
;
2685 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2689 /* n_map_members is a power of two, so this computes modulo. */
2690 s
= cand
->id
& (use
->n_map_members
- 1);
2691 for (i
= s
; i
< use
->n_map_members
; i
++)
2692 if (!use
->cost_map
[i
].cand
)
2694 for (i
= 0; i
< s
; i
++)
2695 if (!use
->cost_map
[i
].cand
)
2701 use
->cost_map
[i
].cand
= cand
;
2702 use
->cost_map
[i
].cost
= cost
;
2703 use
->cost_map
[i
].depends_on
= depends_on
;
2704 use
->cost_map
[i
].value
= value
;
2705 use
->cost_map
[i
].comp
= comp
;
2706 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2709 /* Gets cost of (USE, CANDIDATE) pair. */
2711 static struct cost_pair
*
2712 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2713 struct iv_cand
*cand
)
2716 struct cost_pair
*ret
;
2721 if (data
->consider_all_candidates
)
2723 ret
= use
->cost_map
+ cand
->id
;
2730 /* n_map_members is a power of two, so this computes modulo. */
2731 s
= cand
->id
& (use
->n_map_members
- 1);
2732 for (i
= s
; i
< use
->n_map_members
; i
++)
2733 if (use
->cost_map
[i
].cand
== cand
)
2734 return use
->cost_map
+ i
;
2736 for (i
= 0; i
< s
; i
++)
2737 if (use
->cost_map
[i
].cand
== cand
)
2738 return use
->cost_map
+ i
;
2743 /* Returns estimate on cost of computing SEQ. */
2746 seq_cost (rtx seq
, bool speed
)
2751 for (; seq
; seq
= NEXT_INSN (seq
))
2753 set
= single_set (seq
);
2755 cost
+= set_src_cost (SET_SRC (set
), speed
);
2763 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2765 produce_memory_decl_rtl (tree obj
, int *regno
)
2767 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2768 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2772 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2774 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2775 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2776 SET_SYMBOL_REF_DECL (x
, obj
);
2777 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2778 set_mem_addr_space (x
, as
);
2779 targetm
.encode_section_info (obj
, x
, true);
2783 x
= gen_raw_REG (address_mode
, (*regno
)++);
2784 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2785 set_mem_addr_space (x
, as
);
2791 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2792 walk_tree. DATA contains the actual fake register number. */
2795 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2797 tree obj
= NULL_TREE
;
2799 int *regno
= (int *) data
;
2801 switch (TREE_CODE (*expr_p
))
2804 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2805 handled_component_p (*expr_p
);
2806 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2809 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2810 x
= produce_memory_decl_rtl (obj
, regno
);
2815 obj
= SSA_NAME_VAR (*expr_p
);
2816 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2819 if (!DECL_RTL_SET_P (obj
))
2820 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2829 if (DECL_RTL_SET_P (obj
))
2832 if (DECL_MODE (obj
) == BLKmode
)
2833 x
= produce_memory_decl_rtl (obj
, regno
);
2835 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2845 decl_rtl_to_reset
.safe_push (obj
);
2846 SET_DECL_RTL (obj
, x
);
2852 /* Determines cost of the computation of EXPR. */
2855 computation_cost (tree expr
, bool speed
)
2858 tree type
= TREE_TYPE (expr
);
2860 /* Avoid using hard regs in ways which may be unsupported. */
2861 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2862 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2863 enum node_frequency real_frequency
= node
->frequency
;
2865 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2866 crtl
->maybe_hot_insn_p
= speed
;
2867 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2869 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2872 default_rtl_profile ();
2873 node
->frequency
= real_frequency
;
2875 cost
= seq_cost (seq
, speed
);
2877 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2878 TYPE_ADDR_SPACE (type
), speed
);
2879 else if (!REG_P (rslt
))
2880 cost
+= set_src_cost (rslt
, speed
);
2885 /* Returns variable containing the value of candidate CAND at statement AT. */
2888 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2890 if (stmt_after_increment (loop
, cand
, stmt
))
2891 return cand
->var_after
;
2893 return cand
->var_before
;
2896 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2897 same precision that is at least as wide as the precision of TYPE, stores
2898 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2902 determine_common_wider_type (tree
*a
, tree
*b
)
2904 tree wider_type
= NULL
;
2906 tree atype
= TREE_TYPE (*a
);
2908 if (CONVERT_EXPR_P (*a
))
2910 suba
= TREE_OPERAND (*a
, 0);
2911 wider_type
= TREE_TYPE (suba
);
2912 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2918 if (CONVERT_EXPR_P (*b
))
2920 subb
= TREE_OPERAND (*b
, 0);
2921 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2932 /* Determines the expression by that USE is expressed from induction variable
2933 CAND at statement AT in LOOP. The expression is stored in a decomposed
2934 form into AFF. Returns false if USE cannot be expressed using CAND. */
2937 get_computation_aff (struct loop
*loop
,
2938 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2939 struct affine_tree_combination
*aff
)
2941 tree ubase
= use
->iv
->base
;
2942 tree ustep
= use
->iv
->step
;
2943 tree cbase
= cand
->iv
->base
;
2944 tree cstep
= cand
->iv
->step
, cstep_common
;
2945 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2946 tree common_type
, var
;
2948 aff_tree cbase_aff
, var_aff
;
2951 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2953 /* We do not have a precision to express the values of use. */
2957 var
= var_at_stmt (loop
, cand
, at
);
2958 uutype
= unsigned_type_for (utype
);
2960 /* If the conversion is not noop, perform it. */
2961 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2963 cstep
= fold_convert (uutype
, cstep
);
2964 cbase
= fold_convert (uutype
, cbase
);
2965 var
= fold_convert (uutype
, var
);
2968 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2971 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2972 type, we achieve better folding by computing their difference in this
2973 wider type, and cast the result to UUTYPE. We do not need to worry about
2974 overflows, as all the arithmetics will in the end be performed in UUTYPE
2976 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2978 /* use = ubase - ratio * cbase + ratio * var. */
2979 tree_to_aff_combination (ubase
, common_type
, aff
);
2980 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2981 tree_to_aff_combination (var
, uutype
, &var_aff
);
2983 /* We need to shift the value if we are after the increment. */
2984 if (stmt_after_increment (loop
, cand
, at
))
2988 if (common_type
!= uutype
)
2989 cstep_common
= fold_convert (common_type
, cstep
);
2991 cstep_common
= cstep
;
2993 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2994 aff_combination_add (&cbase_aff
, &cstep_aff
);
2997 aff_combination_scale (&cbase_aff
, -rat
);
2998 aff_combination_add (aff
, &cbase_aff
);
2999 if (common_type
!= uutype
)
3000 aff_combination_convert (aff
, uutype
);
3002 aff_combination_scale (&var_aff
, rat
);
3003 aff_combination_add (aff
, &var_aff
);
3008 /* Return the type of USE. */
3011 get_use_type (struct iv_use
*use
)
3013 tree base_type
= TREE_TYPE (use
->iv
->base
);
3016 if (use
->type
== USE_ADDRESS
)
3018 /* The base_type may be a void pointer. Create a pointer type based on
3019 the mem_ref instead. */
3020 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3021 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3022 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The computation is unshared. */
3034 get_computation_at (struct loop
*loop
,
3035 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3038 tree type
= get_use_type (use
);
3040 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3042 unshare_aff_combination (&aff
);
3043 return fold_convert (type
, aff_combination_to_tree (&aff
));
3046 /* Determines the expression by that USE is expressed from induction variable
3047 CAND in LOOP. The computation is unshared. */
3050 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3052 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3055 /* Adjust the cost COST for being in loop setup rather than loop body.
3056 If we're optimizing for space, the loop setup overhead is constant;
3057 if we're optimizing for speed, amortize it over the per-iteration cost. */
3059 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3063 else if (optimize_loop_for_speed_p (data
->current_loop
))
3064 return cost
/ avg_loop_niter (data
->current_loop
);
3069 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3070 validity for a memory reference accessing memory of mode MODE in
3071 address space AS. */
3075 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3078 #define MAX_RATIO 128
3079 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3080 static vec
<sbitmap
> valid_mult_list
;
3083 if (data_index
>= valid_mult_list
.length ())
3084 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3086 valid_mult
= valid_mult_list
[data_index
];
3089 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3090 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3094 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3095 bitmap_clear (valid_mult
);
3096 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3097 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3099 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3100 if (memory_address_addr_space_p (mode
, addr
, as
))
3101 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3104 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3106 fprintf (dump_file
, " allowed multipliers:");
3107 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3108 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3109 fprintf (dump_file
, " %d", (int) i
);
3110 fprintf (dump_file
, "\n");
3111 fprintf (dump_file
, "\n");
3114 valid_mult_list
[data_index
] = valid_mult
;
3117 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3120 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3123 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3124 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3125 variable is omitted. Compute the cost for a memory reference that accesses
3126 a memory location of mode MEM_MODE in address space AS.
3128 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3129 size of MEM_MODE / RATIO) is available. To make this determination, we
3130 look at the size of the increment to be made, which is given in CSTEP.
3131 CSTEP may be zero if the step is unknown.
3132 STMT_AFTER_INC is true iff the statement we're looking at is after the
3133 increment of the original biv.
3135 TODO -- there must be some better way. This all is quite crude. */
3137 typedef struct address_cost_data_s
3139 HOST_WIDE_INT min_offset
, max_offset
;
3140 unsigned costs
[2][2][2][2];
3141 } *address_cost_data
;
3145 get_address_cost (bool symbol_present
, bool var_present
,
3146 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3147 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3148 addr_space_t as
, bool speed
,
3149 bool stmt_after_inc
, bool *may_autoinc
)
3151 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3152 static vec
<address_cost_data
> address_cost_data_list
;
3153 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3154 address_cost_data data
;
3155 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3156 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3157 unsigned cost
, acost
, complexity
;
3158 bool offset_p
, ratio_p
, autoinc
;
3159 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3160 unsigned HOST_WIDE_INT mask
;
3163 if (data_index
>= address_cost_data_list
.length ())
3164 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3166 data
= address_cost_data_list
[data_index
];
3170 HOST_WIDE_INT rat
, off
= 0;
3171 int old_cse_not_expected
, width
;
3172 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3173 rtx seq
, addr
, base
;
3176 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3178 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3180 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3181 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3182 width
= HOST_BITS_PER_WIDE_INT
- 1;
3183 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3185 for (i
= width
; i
>= 0; i
--)
3187 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3188 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3189 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3192 data
->min_offset
= (i
== -1? 0 : off
);
3194 for (i
= width
; i
>= 0; i
--)
3196 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3197 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3198 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3203 data
->max_offset
= off
;
3205 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3207 fprintf (dump_file
, "get_address_cost:\n");
3208 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3209 GET_MODE_NAME (mem_mode
),
3211 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3212 GET_MODE_NAME (mem_mode
),
3217 for (i
= 2; i
<= MAX_RATIO
; i
++)
3218 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3224 /* Compute the cost of various addressing modes. */
3226 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3227 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3229 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3230 || USE_STORE_PRE_DECREMENT (mem_mode
))
3232 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3233 has_predec
[mem_mode
]
3234 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3236 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3237 || USE_STORE_POST_DECREMENT (mem_mode
))
3239 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3240 has_postdec
[mem_mode
]
3241 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3243 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3244 || USE_STORE_PRE_DECREMENT (mem_mode
))
3246 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3247 has_preinc
[mem_mode
]
3248 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3250 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3251 || USE_STORE_POST_INCREMENT (mem_mode
))
3253 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3254 has_postinc
[mem_mode
]
3255 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3257 for (i
= 0; i
< 16; i
++)
3260 var_p
= (i
>> 1) & 1;
3261 off_p
= (i
>> 2) & 1;
3262 rat_p
= (i
>> 3) & 1;
3266 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3267 gen_int_mode (rat
, address_mode
));
3270 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3274 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3275 /* ??? We can run into trouble with some backends by presenting
3276 it with symbols which haven't been properly passed through
3277 targetm.encode_section_info. By setting the local bit, we
3278 enhance the probability of things working. */
3279 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3282 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3284 (PLUS
, address_mode
, base
,
3285 gen_int_mode (off
, address_mode
)));
3288 base
= gen_int_mode (off
, address_mode
);
3293 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3296 /* To avoid splitting addressing modes, pretend that no cse will
3298 old_cse_not_expected
= cse_not_expected
;
3299 cse_not_expected
= true;
3300 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3301 cse_not_expected
= old_cse_not_expected
;
3305 acost
= seq_cost (seq
, speed
);
3306 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3310 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3313 /* On some targets, it is quite expensive to load symbol to a register,
3314 which makes addresses that contain symbols look much more expensive.
3315 However, the symbol will have to be loaded in any case before the
3316 loop (and quite likely we have it in register already), so it does not
3317 make much sense to penalize them too heavily. So make some final
3318 tweaks for the SYMBOL_PRESENT modes:
3320 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3321 var is cheaper, use this mode with small penalty.
3322 If VAR_PRESENT is true, try whether the mode with
3323 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3324 if this is the case, use it. */
3325 add_c
= add_cost (speed
, address_mode
);
3326 for (i
= 0; i
< 8; i
++)
3329 off_p
= (i
>> 1) & 1;
3330 rat_p
= (i
>> 2) & 1;
3332 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3336 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3337 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3340 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3342 fprintf (dump_file
, "Address costs:\n");
3344 for (i
= 0; i
< 16; i
++)
3347 var_p
= (i
>> 1) & 1;
3348 off_p
= (i
>> 2) & 1;
3349 rat_p
= (i
>> 3) & 1;
3351 fprintf (dump_file
, " ");
3353 fprintf (dump_file
, "sym + ");
3355 fprintf (dump_file
, "var + ");
3357 fprintf (dump_file
, "cst + ");
3359 fprintf (dump_file
, "rat * ");
3361 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3362 fprintf (dump_file
, "index costs %d\n", acost
);
3364 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3365 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3366 fprintf (dump_file
, " May include autoinc/dec\n");
3367 fprintf (dump_file
, "\n");
3370 address_cost_data_list
[data_index
] = data
;
3373 bits
= GET_MODE_BITSIZE (address_mode
);
3374 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3376 if ((offset
>> (bits
- 1) & 1))
3381 msize
= GET_MODE_SIZE (mem_mode
);
3382 autoinc_offset
= offset
;
3384 autoinc_offset
+= ratio
* cstep
;
3385 if (symbol_present
|| var_present
|| ratio
!= 1)
3387 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3389 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3391 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3393 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3394 && msize
== -cstep
))
3398 offset_p
= (s_offset
!= 0
3399 && data
->min_offset
<= s_offset
3400 && s_offset
<= data
->max_offset
);
3401 ratio_p
= (ratio
!= 1
3402 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3404 if (ratio
!= 1 && !ratio_p
)
3405 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3407 if (s_offset
&& !offset_p
&& !symbol_present
)
3408 cost
+= add_cost (speed
, address_mode
);
3411 *may_autoinc
= autoinc
;
3412 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3413 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3414 return new_cost (cost
+ acost
, complexity
);
3417 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3418 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3419 calculating the operands of EXPR. Returns true if successful, and returns
3420 the cost in COST. */
3423 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3424 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3427 tree op1
= TREE_OPERAND (expr
, 1);
3428 tree cst
= TREE_OPERAND (mult
, 1);
3429 tree multop
= TREE_OPERAND (mult
, 0);
3430 int m
= exact_log2 (int_cst_value (cst
));
3431 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3434 if (!(m
>= 0 && m
< maxm
))
3437 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3438 ? shiftadd_cost (speed
, mode
, m
)
3440 ? shiftsub1_cost (speed
, mode
, m
)
3441 : shiftsub0_cost (speed
, mode
, m
)));
3442 res
= new_cost (sa_cost
, 0);
3443 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3445 STRIP_NOPS (multop
);
3446 if (!is_gimple_val (multop
))
3447 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3453 /* Estimates cost of forcing expression EXPR into a variable. */
3456 force_expr_to_var_cost (tree expr
, bool speed
)
3458 static bool costs_initialized
= false;
3459 static unsigned integer_cost
[2];
3460 static unsigned symbol_cost
[2];
3461 static unsigned address_cost
[2];
3463 comp_cost cost0
, cost1
, cost
;
3464 enum machine_mode mode
;
3466 if (!costs_initialized
)
3468 tree type
= build_pointer_type (integer_type_node
);
3473 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3474 TREE_STATIC (var
) = 1;
3475 x
= produce_memory_decl_rtl (var
, NULL
);
3476 SET_DECL_RTL (var
, x
);
3478 addr
= build1 (ADDR_EXPR
, type
, var
);
3481 for (i
= 0; i
< 2; i
++)
3483 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3486 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3489 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3490 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3492 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3493 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3494 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3495 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3496 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3497 fprintf (dump_file
, "\n");
3501 costs_initialized
= true;
3506 if (SSA_VAR_P (expr
))
3509 if (is_gimple_min_invariant (expr
))
3511 if (TREE_CODE (expr
) == INTEGER_CST
)
3512 return new_cost (integer_cost
[speed
], 0);
3514 if (TREE_CODE (expr
) == ADDR_EXPR
)
3516 tree obj
= TREE_OPERAND (expr
, 0);
3518 if (TREE_CODE (obj
) == VAR_DECL
3519 || TREE_CODE (obj
) == PARM_DECL
3520 || TREE_CODE (obj
) == RESULT_DECL
)
3521 return new_cost (symbol_cost
[speed
], 0);
3524 return new_cost (address_cost
[speed
], 0);
3527 switch (TREE_CODE (expr
))
3529 case POINTER_PLUS_EXPR
:
3533 op0
= TREE_OPERAND (expr
, 0);
3534 op1
= TREE_OPERAND (expr
, 1);
3538 if (is_gimple_val (op0
))
3541 cost0
= force_expr_to_var_cost (op0
, speed
);
3543 if (is_gimple_val (op1
))
3546 cost1
= force_expr_to_var_cost (op1
, speed
);
3551 op0
= TREE_OPERAND (expr
, 0);
3555 if (is_gimple_val (op0
))
3558 cost0
= force_expr_to_var_cost (op0
, speed
);
3564 /* Just an arbitrary value, FIXME. */
3565 return new_cost (target_spill_cost
[speed
], 0);
3568 mode
= TYPE_MODE (TREE_TYPE (expr
));
3569 switch (TREE_CODE (expr
))
3571 case POINTER_PLUS_EXPR
:
3575 cost
= new_cost (add_cost (speed
, mode
), 0);
3576 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3578 tree mult
= NULL_TREE
;
3580 if (TREE_CODE (op1
) == MULT_EXPR
)
3582 else if (TREE_CODE (op0
) == MULT_EXPR
)
3585 if (mult
!= NULL_TREE
3586 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3587 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3594 if (cst_and_fits_in_hwi (op0
))
3595 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3597 else if (cst_and_fits_in_hwi (op1
))
3598 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3601 return new_cost (target_spill_cost
[speed
], 0);
3608 cost
= add_costs (cost
, cost0
);
3609 cost
= add_costs (cost
, cost1
);
3611 /* Bound the cost by target_spill_cost. The parts of complicated
3612 computations often are either loop invariant or at least can
3613 be shared between several iv uses, so letting this grow without
3614 limits would not give reasonable results. */
3615 if (cost
.cost
> (int) target_spill_cost
[speed
])
3616 cost
.cost
= target_spill_cost
[speed
];
3621 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3622 invariants the computation depends on. */
3625 force_var_cost (struct ivopts_data
*data
,
3626 tree expr
, bitmap
*depends_on
)
3630 fd_ivopts_data
= data
;
3631 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3634 return force_expr_to_var_cost (expr
, data
->speed
);
3637 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3638 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3639 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3640 invariants the computation depends on. */
3643 split_address_cost (struct ivopts_data
*data
,
3644 tree addr
, bool *symbol_present
, bool *var_present
,
3645 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3648 HOST_WIDE_INT bitsize
;
3649 HOST_WIDE_INT bitpos
;
3651 enum machine_mode mode
;
3652 int unsignedp
, volatilep
;
3654 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3655 &unsignedp
, &volatilep
, false);
3658 || bitpos
% BITS_PER_UNIT
!= 0
3659 || TREE_CODE (core
) != VAR_DECL
)
3661 *symbol_present
= false;
3662 *var_present
= true;
3663 fd_ivopts_data
= data
;
3664 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3665 return new_cost (target_spill_cost
[data
->speed
], 0);
3668 *offset
+= bitpos
/ BITS_PER_UNIT
;
3669 if (TREE_STATIC (core
)
3670 || DECL_EXTERNAL (core
))
3672 *symbol_present
= true;
3673 *var_present
= false;
3677 *symbol_present
= false;
3678 *var_present
= true;
3682 /* Estimates cost of expressing difference of addresses E1 - E2 as
3683 var + symbol + offset. The value of offset is added to OFFSET,
3684 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3685 part is missing. DEPENDS_ON is a set of the invariants the computation
3689 ptr_difference_cost (struct ivopts_data
*data
,
3690 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3691 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3693 HOST_WIDE_INT diff
= 0;
3694 aff_tree aff_e1
, aff_e2
;
3697 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3699 if (ptr_difference_const (e1
, e2
, &diff
))
3702 *symbol_present
= false;
3703 *var_present
= false;
3707 if (integer_zerop (e2
))
3708 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3709 symbol_present
, var_present
, offset
, depends_on
);
3711 *symbol_present
= false;
3712 *var_present
= true;
3714 type
= signed_type_for (TREE_TYPE (e1
));
3715 tree_to_aff_combination (e1
, type
, &aff_e1
);
3716 tree_to_aff_combination (e2
, type
, &aff_e2
);
3717 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3718 aff_combination_add (&aff_e1
, &aff_e2
);
3720 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3723 /* Estimates cost of expressing difference E1 - E2 as
3724 var + symbol + offset. The value of offset is added to OFFSET,
3725 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726 part is missing. DEPENDS_ON is a set of the invariants the computation
3730 difference_cost (struct ivopts_data
*data
,
3731 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3732 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3734 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3735 unsigned HOST_WIDE_INT off1
, off2
;
3736 aff_tree aff_e1
, aff_e2
;
3739 e1
= strip_offset (e1
, &off1
);
3740 e2
= strip_offset (e2
, &off2
);
3741 *offset
+= off1
- off2
;
3746 if (TREE_CODE (e1
) == ADDR_EXPR
)
3747 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3748 offset
, depends_on
);
3749 *symbol_present
= false;
3751 if (operand_equal_p (e1
, e2
, 0))
3753 *var_present
= false;
3757 *var_present
= true;
3759 if (integer_zerop (e2
))
3760 return force_var_cost (data
, e1
, depends_on
);
3762 if (integer_zerop (e1
))
3764 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3765 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3769 type
= signed_type_for (TREE_TYPE (e1
));
3770 tree_to_aff_combination (e1
, type
, &aff_e1
);
3771 tree_to_aff_combination (e2
, type
, &aff_e2
);
3772 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3773 aff_combination_add (&aff_e1
, &aff_e2
);
3775 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3778 /* Returns true if AFF1 and AFF2 are identical. */
3781 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3785 if (aff1
->n
!= aff2
->n
)
3788 for (i
= 0; i
< aff1
->n
; i
++)
3790 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3793 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3799 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3802 get_expr_id (struct ivopts_data
*data
, tree expr
)
3804 struct iv_inv_expr_ent ent
;
3805 struct iv_inv_expr_ent
**slot
;
3808 ent
.hash
= iterative_hash_expr (expr
, 0);
3809 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3814 *slot
= XNEW (struct iv_inv_expr_ent
);
3815 (*slot
)->expr
= expr
;
3816 (*slot
)->hash
= ent
.hash
;
3817 (*slot
)->id
= data
->inv_expr_id
++;
3821 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3822 requires a new compiler generated temporary. Returns -1 otherwise.
3823 ADDRESS_P is a flag indicating if the expression is for address
3827 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3828 tree cbase
, HOST_WIDE_INT ratio
,
3831 aff_tree ubase_aff
, cbase_aff
;
3839 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3840 && (TREE_CODE (cbase
) == INTEGER_CST
))
3843 /* Strips the constant part. */
3844 if (TREE_CODE (ubase
) == PLUS_EXPR
3845 || TREE_CODE (ubase
) == MINUS_EXPR
3846 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3848 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3849 ubase
= TREE_OPERAND (ubase
, 0);
3852 /* Strips the constant part. */
3853 if (TREE_CODE (cbase
) == PLUS_EXPR
3854 || TREE_CODE (cbase
) == MINUS_EXPR
3855 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3857 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3858 cbase
= TREE_OPERAND (cbase
, 0);
3863 if (((TREE_CODE (ubase
) == SSA_NAME
)
3864 || (TREE_CODE (ubase
) == ADDR_EXPR
3865 && is_gimple_min_invariant (ubase
)))
3866 && (TREE_CODE (cbase
) == INTEGER_CST
))
3869 if (((TREE_CODE (cbase
) == SSA_NAME
)
3870 || (TREE_CODE (cbase
) == ADDR_EXPR
3871 && is_gimple_min_invariant (cbase
)))
3872 && (TREE_CODE (ubase
) == INTEGER_CST
))
3878 if(operand_equal_p (ubase
, cbase
, 0))
3881 if (TREE_CODE (ubase
) == ADDR_EXPR
3882 && TREE_CODE (cbase
) == ADDR_EXPR
)
3886 usym
= TREE_OPERAND (ubase
, 0);
3887 csym
= TREE_OPERAND (cbase
, 0);
3888 if (TREE_CODE (usym
) == ARRAY_REF
)
3890 tree ind
= TREE_OPERAND (usym
, 1);
3891 if (TREE_CODE (ind
) == INTEGER_CST
3892 && host_integerp (ind
, 0)
3893 && TREE_INT_CST_LOW (ind
) == 0)
3894 usym
= TREE_OPERAND (usym
, 0);
3896 if (TREE_CODE (csym
) == ARRAY_REF
)
3898 tree ind
= TREE_OPERAND (csym
, 1);
3899 if (TREE_CODE (ind
) == INTEGER_CST
3900 && host_integerp (ind
, 0)
3901 && TREE_INT_CST_LOW (ind
) == 0)
3902 csym
= TREE_OPERAND (csym
, 0);
3904 if (operand_equal_p (usym
, csym
, 0))
3907 /* Now do more complex comparison */
3908 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3909 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3910 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3914 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3915 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3917 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3918 aff_combination_add (&ubase_aff
, &cbase_aff
);
3919 expr
= aff_combination_to_tree (&ubase_aff
);
3920 return get_expr_id (data
, expr
);
3925 /* Determines the cost of the computation by that USE is expressed
3926 from induction variable CAND. If ADDRESS_P is true, we just need
3927 to create an address from it, otherwise we want to get it into
3928 register. A set of invariants we depend on is stored in
3929 DEPENDS_ON. AT is the statement at that the value is computed.
3930 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3931 addressing is likely. */
3934 get_computation_cost_at (struct ivopts_data
*data
,
3935 struct iv_use
*use
, struct iv_cand
*cand
,
3936 bool address_p
, bitmap
*depends_on
, gimple at
,
3940 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3942 tree utype
= TREE_TYPE (ubase
), ctype
;
3943 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3944 HOST_WIDE_INT ratio
, aratio
;
3945 bool var_present
, symbol_present
, stmt_is_after_inc
;
3948 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3949 enum machine_mode mem_mode
= (address_p
3950 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
3955 /* Only consider real candidates. */
3957 return infinite_cost
;
3959 cbase
= cand
->iv
->base
;
3960 cstep
= cand
->iv
->step
;
3961 ctype
= TREE_TYPE (cbase
);
3963 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3965 /* We do not have a precision to express the values of use. */
3966 return infinite_cost
;
3970 || (use
->iv
->base_object
3971 && cand
->iv
->base_object
3972 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
3973 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
3975 /* Do not try to express address of an object with computation based
3976 on address of a different object. This may cause problems in rtl
3977 level alias analysis (that does not expect this to be happening,
3978 as this is illegal in C), and would be unlikely to be useful
3980 if (use
->iv
->base_object
3981 && cand
->iv
->base_object
3982 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3983 return infinite_cost
;
3986 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3988 /* TODO -- add direct handling of this case. */
3992 /* CSTEPI is removed from the offset in case statement is after the
3993 increment. If the step is not constant, we use zero instead.
3994 This is a bit imprecise (there is the extra addition), but
3995 redundancy elimination is likely to transform the code so that
3996 it uses value of the variable before increment anyway,
3997 so it is not that much unrealistic. */
3998 if (cst_and_fits_in_hwi (cstep
))
3999 cstepi
= int_cst_value (cstep
);
4003 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4004 return infinite_cost
;
4006 if (rat
.fits_shwi ())
4007 ratio
= rat
.to_shwi ();
4009 return infinite_cost
;
4012 ctype
= TREE_TYPE (cbase
);
4014 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4016 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4017 or ratio == 1, it is better to handle this like
4019 ubase - ratio * cbase + ratio * var
4021 (also holds in the case ratio == -1, TODO. */
4023 if (cst_and_fits_in_hwi (cbase
))
4025 offset
= - ratio
* int_cst_value (cbase
);
4026 cost
= difference_cost (data
,
4027 ubase
, build_int_cst (utype
, 0),
4028 &symbol_present
, &var_present
, &offset
,
4030 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4032 else if (ratio
== 1)
4034 tree real_cbase
= cbase
;
4036 /* Check to see if any adjustment is needed. */
4037 if (cstepi
== 0 && stmt_is_after_inc
)
4039 aff_tree real_cbase_aff
;
4042 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4044 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4046 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4047 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4050 cost
= difference_cost (data
,
4052 &symbol_present
, &var_present
, &offset
,
4054 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4057 && !POINTER_TYPE_P (ctype
)
4058 && multiplier_allowed_in_address_p
4060 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4063 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4064 cost
= difference_cost (data
,
4066 &symbol_present
, &var_present
, &offset
,
4068 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4072 cost
= force_var_cost (data
, cbase
, depends_on
);
4073 cost
= add_costs (cost
,
4074 difference_cost (data
,
4075 ubase
, build_int_cst (utype
, 0),
4076 &symbol_present
, &var_present
,
4077 &offset
, depends_on
));
4078 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4079 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4085 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4086 /* Clear depends on. */
4087 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4088 bitmap_clear (*depends_on
);
4091 /* If we are after the increment, the value of the candidate is higher by
4093 if (stmt_is_after_inc
)
4094 offset
-= ratio
* cstepi
;
4096 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4097 (symbol/var1/const parts may be omitted). If we are looking for an
4098 address, find the cost of addressing this. */
4100 return add_costs (cost
,
4101 get_address_cost (symbol_present
, var_present
,
4102 offset
, ratio
, cstepi
,
4104 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4105 speed
, stmt_is_after_inc
,
4108 /* Otherwise estimate the costs for computing the expression. */
4109 if (!symbol_present
&& !var_present
&& !offset
)
4112 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4116 /* Symbol + offset should be compile-time computable so consider that they
4117 are added once to the variable, if present. */
4118 if (var_present
&& (symbol_present
|| offset
))
4119 cost
.cost
+= adjust_setup_cost (data
,
4120 add_cost (speed
, TYPE_MODE (ctype
)));
4122 /* Having offset does not affect runtime cost in case it is added to
4123 symbol, but it increases complexity. */
4127 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4129 aratio
= ratio
> 0 ? ratio
: -ratio
;
4131 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4136 *can_autoinc
= false;
4139 /* Just get the expression, expand it and measure the cost. */
4140 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4143 return infinite_cost
;
4146 comp
= build_simple_mem_ref (comp
);
4148 return new_cost (computation_cost (comp
, speed
), 0);
4152 /* Determines the cost of the computation by that USE is expressed
4153 from induction variable CAND. If ADDRESS_P is true, we just need
4154 to create an address from it, otherwise we want to get it into
4155 register. A set of invariants we depend on is stored in
4156 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4157 autoinc addressing is likely. */
4160 get_computation_cost (struct ivopts_data
*data
,
4161 struct iv_use
*use
, struct iv_cand
*cand
,
4162 bool address_p
, bitmap
*depends_on
,
4163 bool *can_autoinc
, int *inv_expr_id
)
4165 return get_computation_cost_at (data
,
4166 use
, cand
, address_p
, depends_on
, use
->stmt
,
4167 can_autoinc
, inv_expr_id
);
4170 /* Determines cost of basing replacement of USE on CAND in a generic
4174 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4175 struct iv_use
*use
, struct iv_cand
*cand
)
4179 int inv_expr_id
= -1;
4181 /* The simple case first -- if we need to express value of the preserved
4182 original biv, the cost is 0. This also prevents us from counting the
4183 cost of increment twice -- once at this use and once in the cost of
4185 if (cand
->pos
== IP_ORIGINAL
4186 && cand
->incremented_at
== use
->stmt
)
4188 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4193 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4194 NULL
, &inv_expr_id
);
4196 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4199 return !infinite_cost_p (cost
);
4202 /* Determines cost of basing replacement of USE on CAND in an address. */
4205 determine_use_iv_cost_address (struct ivopts_data
*data
,
4206 struct iv_use
*use
, struct iv_cand
*cand
)
4210 int inv_expr_id
= -1;
4211 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4212 &can_autoinc
, &inv_expr_id
);
4214 if (cand
->ainc_use
== use
)
4217 cost
.cost
-= cand
->cost_step
;
4218 /* If we generated the candidate solely for exploiting autoincrement
4219 opportunities, and it turns out it can't be used, set the cost to
4220 infinity to make sure we ignore it. */
4221 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4222 cost
= infinite_cost
;
4224 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4227 return !infinite_cost_p (cost
);
4230 /* Computes value of candidate CAND at position AT in iteration NITER, and
4231 stores it to VAL. */
4234 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4237 aff_tree step
, delta
, nit
;
4238 struct iv
*iv
= cand
->iv
;
4239 tree type
= TREE_TYPE (iv
->base
);
4240 tree steptype
= type
;
4241 if (POINTER_TYPE_P (type
))
4242 steptype
= sizetype
;
4244 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4245 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4246 aff_combination_convert (&nit
, steptype
);
4247 aff_combination_mult (&nit
, &step
, &delta
);
4248 if (stmt_after_increment (loop
, cand
, at
))
4249 aff_combination_add (&delta
, &step
);
4251 tree_to_aff_combination (iv
->base
, type
, val
);
4252 aff_combination_add (val
, &delta
);
4255 /* Returns period of induction variable iv. */
4258 iv_period (struct iv
*iv
)
4260 tree step
= iv
->step
, period
, type
;
4263 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4265 type
= unsigned_type_for (TREE_TYPE (step
));
4266 /* Period of the iv is lcm (step, type_range)/step -1,
4267 i.e., N*type_range/step - 1. Since type range is power
4268 of two, N == (step >> num_of_ending_zeros_binary (step),
4269 so the final result is
4271 (type_range >> num_of_ending_zeros_binary (step)) - 1
4274 pow2div
= num_ending_zeros (step
);
4276 period
= build_low_bits_mask (type
,
4277 (TYPE_PRECISION (type
)
4278 - tree_low_cst (pow2div
, 1)));
4283 /* Returns the comparison operator used when eliminating the iv USE. */
4285 static enum tree_code
4286 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4288 struct loop
*loop
= data
->current_loop
;
4292 ex_bb
= gimple_bb (use
->stmt
);
4293 exit
= EDGE_SUCC (ex_bb
, 0);
4294 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4295 exit
= EDGE_SUCC (ex_bb
, 1);
4297 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4301 strip_wrap_conserving_type_conversions (tree exp
)
4303 while (tree_ssa_useless_type_conversion (exp
)
4304 && (nowrap_type_p (TREE_TYPE (exp
))
4305 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4306 exp
= TREE_OPERAND (exp
, 0);
4310 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4311 check for an exact match. */
4314 expr_equal_p (tree e
, tree what
)
4317 enum tree_code code
;
4319 e
= strip_wrap_conserving_type_conversions (e
);
4320 what
= strip_wrap_conserving_type_conversions (what
);
4322 code
= TREE_CODE (what
);
4323 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4326 if (operand_equal_p (e
, what
, 0))
4329 if (TREE_CODE (e
) != SSA_NAME
)
4332 stmt
= SSA_NAME_DEF_STMT (e
);
4333 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4334 || gimple_assign_rhs_code (stmt
) != code
)
4337 switch (get_gimple_rhs_class (code
))
4339 case GIMPLE_BINARY_RHS
:
4340 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4344 case GIMPLE_UNARY_RHS
:
4345 case GIMPLE_SINGLE_RHS
:
4346 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4353 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354 calculation is performed in non-wrapping type.
4356 TODO: More generally, we could test for the situation that
4357 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358 This would require knowing the sign of OFFSET.
4360 Also, we only look for the first addition in the computation of BASE.
4361 More complex analysis would be better, but introducing it just for
4362 this optimization seems like an overkill. */
4365 difference_cannot_overflow_p (tree base
, tree offset
)
4367 enum tree_code code
;
4370 if (!nowrap_type_p (TREE_TYPE (base
)))
4373 base
= expand_simple_operations (base
);
4375 if (TREE_CODE (base
) == SSA_NAME
)
4377 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4379 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4382 code
= gimple_assign_rhs_code (stmt
);
4383 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4386 e1
= gimple_assign_rhs1 (stmt
);
4387 e2
= gimple_assign_rhs2 (stmt
);
4391 code
= TREE_CODE (base
);
4392 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4394 e1
= TREE_OPERAND (base
, 0);
4395 e2
= TREE_OPERAND (base
, 1);
4398 /* TODO: deeper inspection may be necessary to prove the equality. */
4402 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4403 case POINTER_PLUS_EXPR
:
4404 return expr_equal_p (e2
, offset
);
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412 comparison with CAND. NITER describes the number of iterations of
4413 the loops. If successful, the comparison in COMP_P is altered accordingly.
4415 We aim to handle the following situation:
4431 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432 We aim to optimize this to
4440 while (p < p_0 - a + b);
4442 This preserves the correctness, since the pointer arithmetics does not
4443 overflow. More precisely:
4445 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446 overflow in computing it or the values of p.
4447 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448 overflow. To prove this, we use the fact that p_0 = base + a. */
4451 iv_elimination_compare_lt (struct ivopts_data
*data
,
4452 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4453 struct tree_niter_desc
*niter
)
4455 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4456 struct affine_tree_combination nit
, tmpa
, tmpb
;
4457 enum tree_code comp
;
4460 /* We need to know that the candidate induction variable does not overflow.
4461 While more complex analysis may be used to prove this, for now just
4462 check that the variable appears in the original program and that it
4463 is computed in a type that guarantees no overflows. */
4464 cand_type
= TREE_TYPE (cand
->iv
->base
);
4465 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4468 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469 the calculation of the BOUND could overflow, making the comparison
4471 if (!data
->loop_single_exit_p
)
4474 /* We need to be able to decide whether candidate is increasing or decreasing
4475 in order to choose the right comparison operator. */
4476 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4478 step
= int_cst_value (cand
->iv
->step
);
4480 /* Check that the number of iterations matches the expected pattern:
4481 a + 1 > b ? 0 : b - a - 1. */
4482 mbz
= niter
->may_be_zero
;
4483 if (TREE_CODE (mbz
) == GT_EXPR
)
4485 /* Handle a + 1 > b. */
4486 tree op0
= TREE_OPERAND (mbz
, 0);
4487 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4489 a
= TREE_OPERAND (op0
, 0);
4490 b
= TREE_OPERAND (mbz
, 1);
4495 else if (TREE_CODE (mbz
) == LT_EXPR
)
4497 tree op1
= TREE_OPERAND (mbz
, 1);
4499 /* Handle b < a + 1. */
4500 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4502 a
= TREE_OPERAND (op1
, 0);
4503 b
= TREE_OPERAND (mbz
, 0);
4511 /* Expected number of iterations is B - A - 1. Check that it matches
4512 the actual number, i.e., that B - A - NITER = 1. */
4513 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4514 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4515 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4516 aff_combination_scale (&nit
, double_int_minus_one
);
4517 aff_combination_scale (&tmpa
, double_int_minus_one
);
4518 aff_combination_add (&tmpb
, &tmpa
);
4519 aff_combination_add (&tmpb
, &nit
);
4520 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4523 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4525 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4527 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4528 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4531 /* Determine the new comparison operator. */
4532 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4533 if (*comp_p
== NE_EXPR
)
4535 else if (*comp_p
== EQ_EXPR
)
4536 *comp_p
= invert_tree_comparison (comp
, false);
4543 /* Check whether it is possible to express the condition in USE by comparison
4544 of candidate CAND. If so, store the value compared with to BOUND, and the
4545 comparison operator to COMP. */
4548 may_eliminate_iv (struct ivopts_data
*data
,
4549 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4550 enum tree_code
*comp
)
4555 struct loop
*loop
= data
->current_loop
;
4557 struct tree_niter_desc
*desc
= NULL
;
4559 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4562 /* For now works only for exits that dominate the loop latch.
4563 TODO: extend to other conditions inside loop body. */
4564 ex_bb
= gimple_bb (use
->stmt
);
4565 if (use
->stmt
!= last_stmt (ex_bb
)
4566 || gimple_code (use
->stmt
) != GIMPLE_COND
4567 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4570 exit
= EDGE_SUCC (ex_bb
, 0);
4571 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4572 exit
= EDGE_SUCC (ex_bb
, 1);
4573 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4576 desc
= niter_for_exit (data
, exit
);
4580 /* Determine whether we can use the variable to test the exit condition.
4581 This is the case iff the period of the induction variable is greater
4582 than the number of iterations for which the exit condition is true. */
4583 period
= iv_period (cand
->iv
);
4585 /* If the number of iterations is constant, compare against it directly. */
4586 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4588 /* See cand_value_at. */
4589 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4591 if (!tree_int_cst_lt (desc
->niter
, period
))
4596 if (tree_int_cst_lt (period
, desc
->niter
))
4601 /* If not, and if this is the only possible exit of the loop, see whether
4602 we can get a conservative estimate on the number of iterations of the
4603 entire loop and compare against that instead. */
4606 double_int period_value
, max_niter
;
4608 max_niter
= desc
->max
;
4609 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4610 max_niter
+= double_int_one
;
4611 period_value
= tree_to_double_int (period
);
4612 if (max_niter
.ugt (period_value
))
4614 /* See if we can take advantage of inferred loop bound information. */
4615 if (data
->loop_single_exit_p
)
4617 if (!max_loop_iterations (loop
, &max_niter
))
4619 /* The loop bound is already adjusted by adding 1. */
4620 if (max_niter
.ugt (period_value
))
4628 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4630 *bound
= aff_combination_to_tree (&bnd
);
4631 *comp
= iv_elimination_compare (data
, use
);
4633 /* It is unlikely that computing the number of iterations using division
4634 would be more profitable than keeping the original induction variable. */
4635 if (expression_expensive_p (*bound
))
4638 /* Sometimes, it is possible to handle the situation that the number of
4639 iterations may be zero unless additional assumtions by using <
4640 instead of != in the exit condition.
4642 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4643 base the exit condition on it. However, that is often too
4645 if (!integer_zerop (desc
->may_be_zero
))
4646 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4651 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4652 be copied, if is is used in the loop body and DATA->body_includes_call. */
4655 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4657 tree sbound
= bound
;
4658 STRIP_NOPS (sbound
);
4660 if (TREE_CODE (sbound
) == SSA_NAME
4661 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4662 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4663 && data
->body_includes_call
)
4664 return COSTS_N_INSNS (1);
4669 /* Determines cost of basing replacement of USE on CAND in a condition. */
4672 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4673 struct iv_use
*use
, struct iv_cand
*cand
)
4675 tree bound
= NULL_TREE
;
4677 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4678 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4680 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4681 tree
*control_var
, *bound_cst
;
4682 enum tree_code comp
= ERROR_MARK
;
4684 /* Only consider real candidates. */
4687 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4692 /* Try iv elimination. */
4693 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4695 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4696 if (elim_cost
.cost
== 0)
4697 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4698 else if (TREE_CODE (bound
) == INTEGER_CST
)
4700 /* If we replace a loop condition 'i < n' with 'p < base + n',
4701 depends_on_elim will have 'base' and 'n' set, which implies
4702 that both 'base' and 'n' will be live during the loop. More likely,
4703 'base + n' will be loop invariant, resulting in only one live value
4704 during the loop. So in that case we clear depends_on_elim and set
4705 elim_inv_expr_id instead. */
4706 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4708 elim_inv_expr_id
= get_expr_id (data
, bound
);
4709 bitmap_clear (depends_on_elim
);
4711 /* The bound is a loop invariant, so it will be only computed
4713 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4716 elim_cost
= infinite_cost
;
4718 /* Try expressing the original giv. If it is compared with an invariant,
4719 note that we cannot get rid of it. */
4720 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4724 /* When the condition is a comparison of the candidate IV against
4725 zero, prefer this IV.
4727 TODO: The constant that we're subtracting from the cost should
4728 be target-dependent. This information should be added to the
4729 target costs for each backend. */
4730 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4731 && integer_zerop (*bound_cst
)
4732 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4733 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4734 elim_cost
.cost
-= 1;
4736 express_cost
= get_computation_cost (data
, use
, cand
, false,
4737 &depends_on_express
, NULL
,
4738 &express_inv_expr_id
);
4739 fd_ivopts_data
= data
;
4740 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4742 /* Count the cost of the original bound as well. */
4743 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4744 if (bound_cost
.cost
== 0)
4745 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4746 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4747 bound_cost
.cost
= 0;
4748 express_cost
.cost
+= bound_cost
.cost
;
4750 /* Choose the better approach, preferring the eliminated IV. */
4751 if (compare_costs (elim_cost
, express_cost
) <= 0)
4754 depends_on
= depends_on_elim
;
4755 depends_on_elim
= NULL
;
4756 inv_expr_id
= elim_inv_expr_id
;
4760 cost
= express_cost
;
4761 depends_on
= depends_on_express
;
4762 depends_on_express
= NULL
;
4765 inv_expr_id
= express_inv_expr_id
;
4768 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4770 if (depends_on_elim
)
4771 BITMAP_FREE (depends_on_elim
);
4772 if (depends_on_express
)
4773 BITMAP_FREE (depends_on_express
);
4775 return !infinite_cost_p (cost
);
4778 /* Determines cost of basing replacement of USE on CAND. Returns false
4779 if USE cannot be based on CAND. */
4782 determine_use_iv_cost (struct ivopts_data
*data
,
4783 struct iv_use
*use
, struct iv_cand
*cand
)
4787 case USE_NONLINEAR_EXPR
:
4788 return determine_use_iv_cost_generic (data
, use
, cand
);
4791 return determine_use_iv_cost_address (data
, use
, cand
);
4794 return determine_use_iv_cost_condition (data
, use
, cand
);
4801 /* Return true if get_computation_cost indicates that autoincrement is
4802 a possibility for the pair of USE and CAND, false otherwise. */
4805 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4806 struct iv_cand
*cand
)
4812 if (use
->type
!= USE_ADDRESS
)
4815 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4816 &can_autoinc
, NULL
);
4818 BITMAP_FREE (depends_on
);
4820 return !infinite_cost_p (cost
) && can_autoinc
;
4823 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4824 use that allows autoincrement, and set their AINC_USE if possible. */
4827 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4831 for (i
= 0; i
< n_iv_cands (data
); i
++)
4833 struct iv_cand
*cand
= iv_cand (data
, i
);
4834 struct iv_use
*closest
= NULL
;
4835 if (cand
->pos
!= IP_ORIGINAL
)
4837 for (j
= 0; j
< n_iv_uses (data
); j
++)
4839 struct iv_use
*use
= iv_use (data
, j
);
4840 unsigned uid
= gimple_uid (use
->stmt
);
4841 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4842 || uid
> gimple_uid (cand
->incremented_at
))
4844 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4847 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4849 cand
->ainc_use
= closest
;
4853 /* Finds the candidates for the induction variables. */
4856 find_iv_candidates (struct ivopts_data
*data
)
4858 /* Add commonly used ivs. */
4859 add_standard_iv_candidates (data
);
4861 /* Add old induction variables. */
4862 add_old_ivs_candidates (data
);
4864 /* Add induction variables derived from uses. */
4865 add_derived_ivs_candidates (data
);
4867 set_autoinc_for_original_candidates (data
);
4869 /* Record the important candidates. */
4870 record_important_candidates (data
);
4873 /* Determines costs of basing the use of the iv on an iv candidate. */
4876 determine_use_iv_costs (struct ivopts_data
*data
)
4880 struct iv_cand
*cand
;
4881 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4883 alloc_use_cost_map (data
);
4885 for (i
= 0; i
< n_iv_uses (data
); i
++)
4887 use
= iv_use (data
, i
);
4889 if (data
->consider_all_candidates
)
4891 for (j
= 0; j
< n_iv_cands (data
); j
++)
4893 cand
= iv_cand (data
, j
);
4894 determine_use_iv_cost (data
, use
, cand
);
4901 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4903 cand
= iv_cand (data
, j
);
4904 if (!determine_use_iv_cost (data
, use
, cand
))
4905 bitmap_set_bit (to_clear
, j
);
4908 /* Remove the candidates for that the cost is infinite from
4909 the list of related candidates. */
4910 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4911 bitmap_clear (to_clear
);
4915 BITMAP_FREE (to_clear
);
4917 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4919 fprintf (dump_file
, "Use-candidate costs:\n");
4921 for (i
= 0; i
< n_iv_uses (data
); i
++)
4923 use
= iv_use (data
, i
);
4925 fprintf (dump_file
, "Use %d:\n", i
);
4926 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4927 for (j
= 0; j
< use
->n_map_members
; j
++)
4929 if (!use
->cost_map
[j
].cand
4930 || infinite_cost_p (use
->cost_map
[j
].cost
))
4933 fprintf (dump_file
, " %d\t%d\t%d\t",
4934 use
->cost_map
[j
].cand
->id
,
4935 use
->cost_map
[j
].cost
.cost
,
4936 use
->cost_map
[j
].cost
.complexity
);
4937 if (use
->cost_map
[j
].depends_on
)
4938 bitmap_print (dump_file
,
4939 use
->cost_map
[j
].depends_on
, "","");
4940 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4941 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4942 fprintf (dump_file
, "\n");
4945 fprintf (dump_file
, "\n");
4947 fprintf (dump_file
, "\n");
4951 /* Determines cost of the candidate CAND. */
4954 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4956 comp_cost cost_base
;
4957 unsigned cost
, cost_step
;
4966 /* There are two costs associated with the candidate -- its increment
4967 and its initialization. The second is almost negligible for any loop
4968 that rolls enough, so we take it just very little into account. */
4970 base
= cand
->iv
->base
;
4971 cost_base
= force_var_cost (data
, base
, NULL
);
4972 /* It will be exceptional that the iv register happens to be initialized with
4973 the proper value at no cost. In general, there will at least be a regcopy
4975 if (cost_base
.cost
== 0)
4976 cost_base
.cost
= COSTS_N_INSNS (1);
4977 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
4979 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4981 /* Prefer the original ivs unless we may gain something by replacing it.
4982 The reason is to make debugging simpler; so this is not relevant for
4983 artificial ivs created by other optimization passes. */
4984 if (cand
->pos
!= IP_ORIGINAL
4985 || !SSA_NAME_VAR (cand
->var_before
)
4986 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4989 /* Prefer not to insert statements into latch unless there are some
4990 already (so that we do not create unnecessary jumps). */
4991 if (cand
->pos
== IP_END
4992 && empty_block_p (ip_end_pos (data
->current_loop
)))
4996 cand
->cost_step
= cost_step
;
4999 /* Determines costs of computation of the candidates. */
5002 determine_iv_costs (struct ivopts_data
*data
)
5006 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5008 fprintf (dump_file
, "Candidate costs:\n");
5009 fprintf (dump_file
, " cand\tcost\n");
5012 for (i
= 0; i
< n_iv_cands (data
); i
++)
5014 struct iv_cand
*cand
= iv_cand (data
, i
);
5016 determine_iv_cost (data
, cand
);
5018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5019 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5022 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5023 fprintf (dump_file
, "\n");
5026 /* Calculates cost for having SIZE induction variables. */
5029 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5031 /* We add size to the cost, so that we prefer eliminating ivs
5033 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5034 data
->body_includes_call
);
5037 /* For each size of the induction variable set determine the penalty. */
5040 determine_set_costs (struct ivopts_data
*data
)
5044 gimple_stmt_iterator psi
;
5046 struct loop
*loop
= data
->current_loop
;
5049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5051 fprintf (dump_file
, "Global costs:\n");
5052 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5053 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5054 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5055 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5059 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5061 phi
= gsi_stmt (psi
);
5062 op
= PHI_RESULT (phi
);
5064 if (virtual_operand_p (op
))
5067 if (get_iv (data
, op
))
5073 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5075 struct version_info
*info
= ver_info (data
, j
);
5077 if (info
->inv_id
&& info
->has_nonlin_use
)
5081 data
->regs_used
= n
;
5082 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5083 fprintf (dump_file
, " regs_used %d\n", n
);
5085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5087 fprintf (dump_file
, " cost for size:\n");
5088 fprintf (dump_file
, " ivs\tcost\n");
5089 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5090 fprintf (dump_file
, " %d\t%d\n", j
,
5091 ivopts_global_cost_for_size (data
, j
));
5092 fprintf (dump_file
, "\n");
5096 /* Returns true if A is a cheaper cost pair than B. */
5099 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5109 cmp
= compare_costs (a
->cost
, b
->cost
);
5116 /* In case the costs are the same, prefer the cheaper candidate. */
5117 if (a
->cand
->cost
< b
->cand
->cost
)
5124 /* Returns candidate by that USE is expressed in IVS. */
5126 static struct cost_pair
*
5127 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5129 return ivs
->cand_for_use
[use
->id
];
5132 /* Computes the cost field of IVS structure. */
5135 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5137 comp_cost cost
= ivs
->cand_use_cost
;
5139 cost
.cost
+= ivs
->cand_cost
;
5141 cost
.cost
+= ivopts_global_cost_for_size (data
,
5142 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5147 /* Remove invariants in set INVS to set IVS. */
5150 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5158 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5160 ivs
->n_invariant_uses
[iid
]--;
5161 if (ivs
->n_invariant_uses
[iid
] == 0)
5166 /* Set USE not to be expressed by any candidate in IVS. */
5169 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5172 unsigned uid
= use
->id
, cid
;
5173 struct cost_pair
*cp
;
5175 cp
= ivs
->cand_for_use
[uid
];
5181 ivs
->cand_for_use
[uid
] = NULL
;
5182 ivs
->n_cand_uses
[cid
]--;
5184 if (ivs
->n_cand_uses
[cid
] == 0)
5186 bitmap_clear_bit (ivs
->cands
, cid
);
5187 /* Do not count the pseudocandidates. */
5191 ivs
->cand_cost
-= cp
->cand
->cost
;
5193 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5196 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5198 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5200 if (cp
->inv_expr_id
!= -1)
5202 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5203 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5204 ivs
->num_used_inv_expr
--;
5206 iv_ca_recount_cost (data
, ivs
);
5209 /* Add invariants in set INVS to set IVS. */
5212 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5220 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5222 ivs
->n_invariant_uses
[iid
]++;
5223 if (ivs
->n_invariant_uses
[iid
] == 1)
5228 /* Set cost pair for USE in set IVS to CP. */
5231 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5232 struct iv_use
*use
, struct cost_pair
*cp
)
5234 unsigned uid
= use
->id
, cid
;
5236 if (ivs
->cand_for_use
[uid
] == cp
)
5239 if (ivs
->cand_for_use
[uid
])
5240 iv_ca_set_no_cp (data
, ivs
, use
);
5247 ivs
->cand_for_use
[uid
] = cp
;
5248 ivs
->n_cand_uses
[cid
]++;
5249 if (ivs
->n_cand_uses
[cid
] == 1)
5251 bitmap_set_bit (ivs
->cands
, cid
);
5252 /* Do not count the pseudocandidates. */
5256 ivs
->cand_cost
+= cp
->cand
->cost
;
5258 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5261 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5262 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5264 if (cp
->inv_expr_id
!= -1)
5266 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5267 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5268 ivs
->num_used_inv_expr
++;
5270 iv_ca_recount_cost (data
, ivs
);
5274 /* Extend set IVS by expressing USE by some of the candidates in it
5275 if possible. All important candidates will be considered
5276 if IMPORTANT_CANDIDATES is true. */
5279 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5280 struct iv_use
*use
, bool important_candidates
)
5282 struct cost_pair
*best_cp
= NULL
, *cp
;
5287 gcc_assert (ivs
->upto
>= use
->id
);
5289 if (ivs
->upto
== use
->id
)
5295 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5296 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5298 struct iv_cand
*cand
= iv_cand (data
, i
);
5300 cp
= get_use_iv_cost (data
, use
, cand
);
5302 if (cheaper_cost_pair (cp
, best_cp
))
5306 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5309 /* Get cost for assignment IVS. */
5312 iv_ca_cost (struct iv_ca
*ivs
)
5314 /* This was a conditional expression but it triggered a bug in
5317 return infinite_cost
;
5322 /* Returns true if all dependences of CP are among invariants in IVS. */
5325 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5330 if (!cp
->depends_on
)
5333 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5335 if (ivs
->n_invariant_uses
[i
] == 0)
5342 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5343 it before NEXT_CHANGE. */
5345 static struct iv_ca_delta
*
5346 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5347 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5349 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5352 change
->old_cp
= old_cp
;
5353 change
->new_cp
= new_cp
;
5354 change
->next_change
= next_change
;
5359 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5362 static struct iv_ca_delta
*
5363 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5365 struct iv_ca_delta
*last
;
5373 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5375 last
->next_change
= l2
;
5380 /* Reverse the list of changes DELTA, forming the inverse to it. */
5382 static struct iv_ca_delta
*
5383 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5385 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5386 struct cost_pair
*tmp
;
5388 for (act
= delta
; act
; act
= next
)
5390 next
= act
->next_change
;
5391 act
->next_change
= prev
;
5395 act
->old_cp
= act
->new_cp
;
5402 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5403 reverted instead. */
5406 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5407 struct iv_ca_delta
*delta
, bool forward
)
5409 struct cost_pair
*from
, *to
;
5410 struct iv_ca_delta
*act
;
5413 delta
= iv_ca_delta_reverse (delta
);
5415 for (act
= delta
; act
; act
= act
->next_change
)
5419 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5420 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5424 iv_ca_delta_reverse (delta
);
5427 /* Returns true if CAND is used in IVS. */
5430 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5432 return ivs
->n_cand_uses
[cand
->id
] > 0;
5435 /* Returns number of induction variable candidates in the set IVS. */
5438 iv_ca_n_cands (struct iv_ca
*ivs
)
5440 return ivs
->n_cands
;
5443 /* Free the list of changes DELTA. */
5446 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5448 struct iv_ca_delta
*act
, *next
;
5450 for (act
= *delta
; act
; act
= next
)
5452 next
= act
->next_change
;
5459 /* Allocates new iv candidates assignment. */
5461 static struct iv_ca
*
5462 iv_ca_new (struct ivopts_data
*data
)
5464 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5468 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5469 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5470 nw
->cands
= BITMAP_ALLOC (NULL
);
5473 nw
->cand_use_cost
= no_cost
;
5475 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5477 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5478 nw
->num_used_inv_expr
= 0;
5483 /* Free memory occupied by the set IVS. */
5486 iv_ca_free (struct iv_ca
**ivs
)
5488 free ((*ivs
)->cand_for_use
);
5489 free ((*ivs
)->n_cand_uses
);
5490 BITMAP_FREE ((*ivs
)->cands
);
5491 free ((*ivs
)->n_invariant_uses
);
5492 free ((*ivs
)->used_inv_expr
);
5497 /* Dumps IVS to FILE. */
5500 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5502 const char *pref
= " invariants ";
5504 comp_cost cost
= iv_ca_cost (ivs
);
5506 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5507 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5508 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5509 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5511 for (i
= 0; i
< ivs
->upto
; i
++)
5513 struct iv_use
*use
= iv_use (data
, i
);
5514 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5516 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5517 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5519 fprintf (file
, " use:%d --> ??\n", use
->id
);
5522 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5523 if (ivs
->n_invariant_uses
[i
])
5525 fprintf (file
, "%s%d", pref
, i
);
5528 fprintf (file
, "\n\n");
5531 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5532 new set, and store differences in DELTA. Number of induction variables
5533 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5534 the function will try to find a solution with mimimal iv candidates. */
5537 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5538 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5539 unsigned *n_ivs
, bool min_ncand
)
5544 struct cost_pair
*old_cp
, *new_cp
;
5547 for (i
= 0; i
< ivs
->upto
; i
++)
5549 use
= iv_use (data
, i
);
5550 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5553 && old_cp
->cand
== cand
)
5556 new_cp
= get_use_iv_cost (data
, use
, cand
);
5560 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5563 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5566 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5569 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5570 cost
= iv_ca_cost (ivs
);
5572 *n_ivs
= iv_ca_n_cands (ivs
);
5573 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5578 /* Try narrowing set IVS by removing CAND. Return the cost of
5579 the new set and store the differences in DELTA. */
5582 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5583 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5587 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5589 struct iv_cand
*cnd
;
5593 for (i
= 0; i
< n_iv_uses (data
); i
++)
5595 use
= iv_use (data
, i
);
5597 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5598 if (old_cp
->cand
!= cand
)
5603 if (data
->consider_all_candidates
)
5605 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5610 cnd
= iv_cand (data
, ci
);
5612 cp
= get_use_iv_cost (data
, use
, cnd
);
5616 if (!iv_ca_has_deps (ivs
, cp
))
5619 if (!cheaper_cost_pair (cp
, new_cp
))
5627 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5632 cnd
= iv_cand (data
, ci
);
5634 cp
= get_use_iv_cost (data
, use
, cnd
);
5637 if (!iv_ca_has_deps (ivs
, cp
))
5640 if (!cheaper_cost_pair (cp
, new_cp
))
5649 iv_ca_delta_free (delta
);
5650 return infinite_cost
;
5653 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5656 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5657 cost
= iv_ca_cost (ivs
);
5658 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5663 /* Try optimizing the set of candidates IVS by removing candidates different
5664 from to EXCEPT_CAND from it. Return cost of the new set, and store
5665 differences in DELTA. */
5668 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5669 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5672 struct iv_ca_delta
*act_delta
, *best_delta
;
5674 comp_cost best_cost
, acost
;
5675 struct iv_cand
*cand
;
5678 best_cost
= iv_ca_cost (ivs
);
5680 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5682 cand
= iv_cand (data
, i
);
5684 if (cand
== except_cand
)
5687 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5689 if (compare_costs (acost
, best_cost
) < 0)
5692 iv_ca_delta_free (&best_delta
);
5693 best_delta
= act_delta
;
5696 iv_ca_delta_free (&act_delta
);
5705 /* Recurse to possibly remove other unnecessary ivs. */
5706 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5707 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5708 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5709 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5713 /* Tries to extend the sets IVS in the best possible way in order
5714 to express the USE. If ORIGINALP is true, prefer candidates from
5715 the original set of IVs, otherwise favor important candidates not
5716 based on any memory object. */
5719 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5720 struct iv_use
*use
, bool originalp
)
5722 comp_cost best_cost
, act_cost
;
5725 struct iv_cand
*cand
;
5726 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5727 struct cost_pair
*cp
;
5729 iv_ca_add_use (data
, ivs
, use
, false);
5730 best_cost
= iv_ca_cost (ivs
);
5732 cp
= iv_ca_cand_for_use (ivs
, use
);
5737 iv_ca_add_use (data
, ivs
, use
, true);
5738 best_cost
= iv_ca_cost (ivs
);
5739 cp
= iv_ca_cand_for_use (ivs
, use
);
5743 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5744 iv_ca_set_no_cp (data
, ivs
, use
);
5747 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5748 first try important candidates not based on any memory object. Only if
5749 this fails, try the specific ones. Rationale -- in loops with many
5750 variables the best choice often is to use just one generic biv. If we
5751 added here many ivs specific to the uses, the optimization algorithm later
5752 would be likely to get stuck in a local minimum, thus causing us to create
5753 too many ivs. The approach from few ivs to more seems more likely to be
5754 successful -- starting from few ivs, replacing an expensive use by a
5755 specific iv should always be a win. */
5756 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5758 cand
= iv_cand (data
, i
);
5760 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5763 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5766 if (iv_ca_cand_used_p (ivs
, cand
))
5769 cp
= get_use_iv_cost (data
, use
, cand
);
5773 iv_ca_set_cp (data
, ivs
, use
, cp
);
5774 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5776 iv_ca_set_no_cp (data
, ivs
, use
);
5777 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5779 if (compare_costs (act_cost
, best_cost
) < 0)
5781 best_cost
= act_cost
;
5783 iv_ca_delta_free (&best_delta
);
5784 best_delta
= act_delta
;
5787 iv_ca_delta_free (&act_delta
);
5790 if (infinite_cost_p (best_cost
))
5792 for (i
= 0; i
< use
->n_map_members
; i
++)
5794 cp
= use
->cost_map
+ i
;
5799 /* Already tried this. */
5800 if (cand
->important
)
5802 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5804 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5808 if (iv_ca_cand_used_p (ivs
, cand
))
5812 iv_ca_set_cp (data
, ivs
, use
, cp
);
5813 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5814 iv_ca_set_no_cp (data
, ivs
, use
);
5815 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5818 if (compare_costs (act_cost
, best_cost
) < 0)
5820 best_cost
= act_cost
;
5823 iv_ca_delta_free (&best_delta
);
5824 best_delta
= act_delta
;
5827 iv_ca_delta_free (&act_delta
);
5831 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5832 iv_ca_delta_free (&best_delta
);
5834 return !infinite_cost_p (best_cost
);
5837 /* Finds an initial assignment of candidates to uses. */
5839 static struct iv_ca
*
5840 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5842 struct iv_ca
*ivs
= iv_ca_new (data
);
5845 for (i
= 0; i
< n_iv_uses (data
); i
++)
5846 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5855 /* Tries to improve set of induction variables IVS. */
5858 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5861 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5862 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5863 struct iv_cand
*cand
;
5865 /* Try extending the set of induction variables by one. */
5866 for (i
= 0; i
< n_iv_cands (data
); i
++)
5868 cand
= iv_cand (data
, i
);
5870 if (iv_ca_cand_used_p (ivs
, cand
))
5873 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5877 /* If we successfully added the candidate and the set is small enough,
5878 try optimizing it by removing other candidates. */
5879 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5881 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5882 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5883 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5884 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5887 if (compare_costs (acost
, best_cost
) < 0)
5890 iv_ca_delta_free (&best_delta
);
5891 best_delta
= act_delta
;
5894 iv_ca_delta_free (&act_delta
);
5899 /* Try removing the candidates from the set instead. */
5900 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5902 /* Nothing more we can do. */
5907 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5908 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5909 iv_ca_delta_free (&best_delta
);
5913 /* Attempts to find the optimal set of induction variables. We do simple
5914 greedy heuristic -- we try to replace at most one candidate in the selected
5915 solution and remove the unused ivs while this improves the cost. */
5917 static struct iv_ca
*
5918 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5922 /* Get the initial solution. */
5923 set
= get_initial_solution (data
, originalp
);
5926 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5927 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5931 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5933 fprintf (dump_file
, "Initial set of candidates:\n");
5934 iv_ca_dump (data
, dump_file
, set
);
5937 while (try_improve_iv_set (data
, set
))
5939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5941 fprintf (dump_file
, "Improved to:\n");
5942 iv_ca_dump (data
, dump_file
, set
);
5949 static struct iv_ca
*
5950 find_optimal_iv_set (struct ivopts_data
*data
)
5953 struct iv_ca
*set
, *origset
;
5955 comp_cost cost
, origcost
;
5957 /* Determine the cost based on a strategy that starts with original IVs,
5958 and try again using a strategy that prefers candidates not based
5960 origset
= find_optimal_iv_set_1 (data
, true);
5961 set
= find_optimal_iv_set_1 (data
, false);
5963 if (!origset
&& !set
)
5966 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5967 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5969 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5971 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5972 origcost
.cost
, origcost
.complexity
);
5973 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5974 cost
.cost
, cost
.complexity
);
5977 /* Choose the one with the best cost. */
5978 if (compare_costs (origcost
, cost
) <= 0)
5985 iv_ca_free (&origset
);
5987 for (i
= 0; i
< n_iv_uses (data
); i
++)
5989 use
= iv_use (data
, i
);
5990 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5996 /* Creates a new induction variable corresponding to CAND. */
5999 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6001 gimple_stmt_iterator incr_pos
;
6011 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6015 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6023 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6027 /* Mark that the iv is preserved. */
6028 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6029 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6031 /* Rewrite the increment so that it uses var_before directly. */
6032 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6036 gimple_add_tmp_var (cand
->var_before
);
6038 base
= unshare_expr (cand
->iv
->base
);
6040 create_iv (base
, unshare_expr (cand
->iv
->step
),
6041 cand
->var_before
, data
->current_loop
,
6042 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6045 /* Creates new induction variables described in SET. */
6048 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6051 struct iv_cand
*cand
;
6054 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6056 cand
= iv_cand (data
, i
);
6057 create_new_iv (data
, cand
);
6060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6062 fprintf (dump_file
, "\nSelected IV set: \n");
6063 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6065 cand
= iv_cand (data
, i
);
6066 dump_cand (dump_file
, cand
);
6068 fprintf (dump_file
, "\n");
6072 /* Rewrites USE (definition of iv used in a nonlinear expression)
6073 using candidate CAND. */
6076 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6077 struct iv_use
*use
, struct iv_cand
*cand
)
6082 gimple_stmt_iterator bsi
;
6084 /* An important special case -- if we are asked to express value of
6085 the original iv by itself, just exit; there is no need to
6086 introduce a new computation (that might also need casting the
6087 variable to unsigned and back). */
6088 if (cand
->pos
== IP_ORIGINAL
6089 && cand
->incremented_at
== use
->stmt
)
6091 tree step
, ctype
, utype
;
6092 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
6094 gcc_assert (is_gimple_assign (use
->stmt
));
6095 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6097 step
= cand
->iv
->step
;
6098 ctype
= TREE_TYPE (step
);
6099 utype
= TREE_TYPE (cand
->var_after
);
6100 if (TREE_CODE (step
) == NEGATE_EXPR
)
6102 incr_code
= MINUS_EXPR
;
6103 step
= TREE_OPERAND (step
, 0);
6106 /* Check whether we may leave the computation unchanged.
6107 This is the case only if it does not rely on other
6108 computations in the loop -- otherwise, the computation
6109 we rely upon may be removed in remove_unused_ivs,
6110 thus leading to ICE. */
6111 old_code
= gimple_assign_rhs_code (use
->stmt
);
6112 if (old_code
== PLUS_EXPR
6113 || old_code
== MINUS_EXPR
6114 || old_code
== POINTER_PLUS_EXPR
)
6116 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6117 op
= gimple_assign_rhs2 (use
->stmt
);
6118 else if (old_code
!= MINUS_EXPR
6119 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6120 op
= gimple_assign_rhs1 (use
->stmt
);
6128 && (TREE_CODE (op
) == INTEGER_CST
6129 || operand_equal_p (op
, step
, 0)))
6132 /* Otherwise, add the necessary computations to express
6134 op
= fold_convert (ctype
, cand
->var_before
);
6135 comp
= fold_convert (utype
,
6136 build2 (incr_code
, ctype
, op
,
6137 unshare_expr (step
)));
6141 comp
= get_computation (data
->current_loop
, use
, cand
);
6142 gcc_assert (comp
!= NULL_TREE
);
6145 switch (gimple_code (use
->stmt
))
6148 tgt
= PHI_RESULT (use
->stmt
);
6150 /* If we should keep the biv, do not replace it. */
6151 if (name_info (data
, tgt
)->preserve_biv
)
6154 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6158 tgt
= gimple_assign_lhs (use
->stmt
);
6159 bsi
= gsi_for_stmt (use
->stmt
);
6166 if (!valid_gimple_rhs_p (comp
)
6167 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6168 /* We can't allow re-allocating the stmt as it might be pointed
6170 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6171 >= gimple_num_ops (gsi_stmt (bsi
)))))
6173 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6174 true, GSI_SAME_STMT
);
6175 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6177 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6178 /* As this isn't a plain copy we have to reset alignment
6180 if (SSA_NAME_PTR_INFO (comp
))
6181 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6185 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6187 ass
= gimple_build_assign (tgt
, comp
);
6188 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6190 bsi
= gsi_for_stmt (use
->stmt
);
6191 remove_phi_node (&bsi
, false);
6195 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6196 use
->stmt
= gsi_stmt (bsi
);
6200 /* Performs a peephole optimization to reorder the iv update statement with
6201 a mem ref to enable instruction combining in later phases. The mem ref uses
6202 the iv value before the update, so the reordering transformation requires
6203 adjustment of the offset. CAND is the selected IV_CAND.
6207 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6215 directly propagating t over to (1) will introduce overlapping live range
6216 thus increase register pressure. This peephole transform it into:
6220 t = MEM_REF (base, iv2, 8, 8);
6227 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6230 gimple iv_update
, stmt
;
6232 gimple_stmt_iterator gsi
, gsi_iv
;
6234 if (cand
->pos
!= IP_NORMAL
)
6237 var_after
= cand
->var_after
;
6238 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6240 bb
= gimple_bb (iv_update
);
6241 gsi
= gsi_last_nondebug_bb (bb
);
6242 stmt
= gsi_stmt (gsi
);
6244 /* Only handle conditional statement for now. */
6245 if (gimple_code (stmt
) != GIMPLE_COND
)
6248 gsi_prev_nondebug (&gsi
);
6249 stmt
= gsi_stmt (gsi
);
6250 if (stmt
!= iv_update
)
6253 gsi_prev_nondebug (&gsi
);
6254 if (gsi_end_p (gsi
))
6257 stmt
= gsi_stmt (gsi
);
6258 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6261 if (stmt
!= use
->stmt
)
6264 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6269 fprintf (dump_file
, "Reordering \n");
6270 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6271 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6272 fprintf (dump_file
, "\n");
6275 gsi
= gsi_for_stmt (use
->stmt
);
6276 gsi_iv
= gsi_for_stmt (iv_update
);
6277 gsi_move_before (&gsi_iv
, &gsi
);
6279 cand
->pos
= IP_BEFORE_USE
;
6280 cand
->incremented_at
= use
->stmt
;
6283 /* Rewrites USE (address that is an iv) using candidate CAND. */
6286 rewrite_use_address (struct ivopts_data
*data
,
6287 struct iv_use
*use
, struct iv_cand
*cand
)
6290 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6291 tree base_hint
= NULL_TREE
;
6295 adjust_iv_update_pos (cand
, use
);
6296 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6298 unshare_aff_combination (&aff
);
6300 /* To avoid undefined overflow problems, all IV candidates use unsigned
6301 integer types. The drawback is that this makes it impossible for
6302 create_mem_ref to distinguish an IV that is based on a memory object
6303 from one that represents simply an offset.
6305 To work around this problem, we pass a hint to create_mem_ref that
6306 indicates which variable (if any) in aff is an IV based on a memory
6307 object. Note that we only consider the candidate. If this is not
6308 based on an object, the base of the reference is in some subexpression
6309 of the use -- but these will use pointer types, so they are recognized
6310 by the create_mem_ref heuristics anyway. */
6311 if (cand
->iv
->base_object
)
6312 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6314 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6315 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6316 reference_alias_ptr_type (*use
->op_p
),
6317 iv
, base_hint
, data
->speed
);
6318 copy_ref_info (ref
, *use
->op_p
);
6322 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6326 rewrite_use_compare (struct ivopts_data
*data
,
6327 struct iv_use
*use
, struct iv_cand
*cand
)
6329 tree comp
, *var_p
, op
, bound
;
6330 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6331 enum tree_code compare
;
6332 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6338 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6339 tree var_type
= TREE_TYPE (var
);
6342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6344 fprintf (dump_file
, "Replacing exit test: ");
6345 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6348 bound
= unshare_expr (fold_convert (var_type
, bound
));
6349 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6351 gsi_insert_seq_on_edge_immediate (
6352 loop_preheader_edge (data
->current_loop
),
6355 gimple_cond_set_lhs (use
->stmt
, var
);
6356 gimple_cond_set_code (use
->stmt
, compare
);
6357 gimple_cond_set_rhs (use
->stmt
, op
);
6361 /* The induction variable elimination failed; just express the original
6363 comp
= get_computation (data
->current_loop
, use
, cand
);
6364 gcc_assert (comp
!= NULL_TREE
);
6366 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6369 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6370 true, GSI_SAME_STMT
);
6373 /* Rewrites USE using candidate CAND. */
6376 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6380 case USE_NONLINEAR_EXPR
:
6381 rewrite_use_nonlinear_expr (data
, use
, cand
);
6385 rewrite_use_address (data
, use
, cand
);
6389 rewrite_use_compare (data
, use
, cand
);
6396 update_stmt (use
->stmt
);
6399 /* Rewrite the uses using the selected induction variables. */
6402 rewrite_uses (struct ivopts_data
*data
)
6405 struct iv_cand
*cand
;
6408 for (i
= 0; i
< n_iv_uses (data
); i
++)
6410 use
= iv_use (data
, i
);
6411 cand
= use
->selected
;
6414 rewrite_use (data
, use
, cand
);
6418 /* Removes the ivs that are not used after rewriting. */
6421 remove_unused_ivs (struct ivopts_data
*data
)
6425 bitmap toremove
= BITMAP_ALLOC (NULL
);
6427 /* Figure out an order in which to release SSA DEFs so that we don't
6428 release something that we'd have to propagate into a debug stmt
6430 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6432 struct version_info
*info
;
6434 info
= ver_info (data
, j
);
6436 && !integer_zerop (info
->iv
->step
)
6438 && !info
->iv
->have_use_for
6439 && !info
->preserve_biv
)
6441 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6443 tree def
= info
->iv
->ssa_name
;
6445 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6447 imm_use_iterator imm_iter
;
6448 use_operand_p use_p
;
6452 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6454 if (!gimple_debug_bind_p (stmt
))
6457 /* We just want to determine whether to do nothing
6458 (count == 0), to substitute the computed
6459 expression into a single use of the SSA DEF by
6460 itself (count == 1), or to use a debug temp
6461 because the SSA DEF is used multiple times or as
6462 part of a larger expression (count > 1). */
6464 if (gimple_debug_bind_get_value (stmt
) != def
)
6468 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6474 struct iv_use dummy_use
;
6475 struct iv_cand
*best_cand
= NULL
, *cand
;
6476 unsigned i
, best_pref
= 0, cand_pref
;
6478 memset (&dummy_use
, 0, sizeof (dummy_use
));
6479 dummy_use
.iv
= info
->iv
;
6480 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6482 cand
= iv_use (data
, i
)->selected
;
6483 if (cand
== best_cand
)
6485 cand_pref
= operand_equal_p (cand
->iv
->step
,
6489 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6490 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6493 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6495 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6498 best_pref
= cand_pref
;
6505 tree comp
= get_computation_at (data
->current_loop
,
6506 &dummy_use
, best_cand
,
6507 SSA_NAME_DEF_STMT (def
));
6513 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6514 DECL_ARTIFICIAL (vexpr
) = 1;
6515 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6516 if (SSA_NAME_VAR (def
))
6517 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6519 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6520 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6521 gimple_stmt_iterator gsi
;
6523 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6524 gsi
= gsi_after_labels (gimple_bb
6525 (SSA_NAME_DEF_STMT (def
)));
6527 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6529 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6533 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6535 if (!gimple_debug_bind_p (stmt
))
6538 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6539 SET_USE (use_p
, comp
);
6547 release_defs_bitset (toremove
);
6549 BITMAP_FREE (toremove
);
6552 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6553 for pointer_map_traverse. */
6556 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6557 void *data ATTRIBUTE_UNUSED
)
6559 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6565 /* Frees data allocated by the optimization of a single loop. */
6568 free_loop_data (struct ivopts_data
*data
)
6576 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6577 pointer_map_destroy (data
->niters
);
6578 data
->niters
= NULL
;
6581 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6583 struct version_info
*info
;
6585 info
= ver_info (data
, i
);
6588 info
->has_nonlin_use
= false;
6589 info
->preserve_biv
= false;
6592 bitmap_clear (data
->relevant
);
6593 bitmap_clear (data
->important_candidates
);
6595 for (i
= 0; i
< n_iv_uses (data
); i
++)
6597 struct iv_use
*use
= iv_use (data
, i
);
6600 BITMAP_FREE (use
->related_cands
);
6601 for (j
= 0; j
< use
->n_map_members
; j
++)
6602 if (use
->cost_map
[j
].depends_on
)
6603 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6604 free (use
->cost_map
);
6607 data
->iv_uses
.truncate (0);
6609 for (i
= 0; i
< n_iv_cands (data
); i
++)
6611 struct iv_cand
*cand
= iv_cand (data
, i
);
6614 if (cand
->depends_on
)
6615 BITMAP_FREE (cand
->depends_on
);
6618 data
->iv_candidates
.truncate (0);
6620 if (data
->version_info_size
< num_ssa_names
)
6622 data
->version_info_size
= 2 * num_ssa_names
;
6623 free (data
->version_info
);
6624 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6627 data
->max_inv_id
= 0;
6629 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6630 SET_DECL_RTL (obj
, NULL_RTX
);
6632 decl_rtl_to_reset
.truncate (0);
6634 htab_empty (data
->inv_expr_tab
);
6635 data
->inv_expr_id
= 0;
6638 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6642 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6644 free_loop_data (data
);
6645 free (data
->version_info
);
6646 BITMAP_FREE (data
->relevant
);
6647 BITMAP_FREE (data
->important_candidates
);
6649 decl_rtl_to_reset
.release ();
6650 data
->iv_uses
.release ();
6651 data
->iv_candidates
.release ();
6652 htab_delete (data
->inv_expr_tab
);
6655 /* Returns true if the loop body BODY includes any function calls. */
6658 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6660 gimple_stmt_iterator gsi
;
6663 for (i
= 0; i
< num_nodes
; i
++)
6664 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6666 gimple stmt
= gsi_stmt (gsi
);
6667 if (is_gimple_call (stmt
)
6668 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6674 /* Optimizes the LOOP. Returns true if anything changed. */
6677 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6679 bool changed
= false;
6680 struct iv_ca
*iv_ca
;
6681 edge exit
= single_dom_exit (loop
);
6684 gcc_assert (!data
->niters
);
6685 data
->current_loop
= loop
;
6686 data
->speed
= optimize_loop_for_speed_p (loop
);
6688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6690 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6694 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6695 exit
->src
->index
, exit
->dest
->index
);
6696 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6697 fprintf (dump_file
, "\n");
6700 fprintf (dump_file
, "\n");
6703 body
= get_loop_body (loop
);
6704 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6705 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6708 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6710 /* For each ssa name determines whether it behaves as an induction variable
6712 if (!find_induction_variables (data
))
6715 /* Finds interesting uses (item 1). */
6716 find_interesting_uses (data
);
6717 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6720 /* Finds candidates for the induction variables (item 2). */
6721 find_iv_candidates (data
);
6723 /* Calculates the costs (item 3, part 1). */
6724 determine_iv_costs (data
);
6725 determine_use_iv_costs (data
);
6726 determine_set_costs (data
);
6728 /* Find the optimal set of induction variables (item 3, part 2). */
6729 iv_ca
= find_optimal_iv_set (data
);
6734 /* Create the new induction variables (item 4, part 1). */
6735 create_new_ivs (data
, iv_ca
);
6736 iv_ca_free (&iv_ca
);
6738 /* Rewrite the uses (item 4, part 2). */
6739 rewrite_uses (data
);
6741 /* Remove the ivs that are unused after rewriting. */
6742 remove_unused_ivs (data
);
6744 /* We have changed the structure of induction variables; it might happen
6745 that definitions in the scev database refer to some of them that were
6750 free_loop_data (data
);
6755 /* Main entry point. Optimizes induction variables in loops. */
6758 tree_ssa_iv_optimize (void)
6761 struct ivopts_data data
;
6764 tree_ssa_iv_optimize_init (&data
);
6766 /* Optimize the loops starting with the innermost ones. */
6767 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6770 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6772 tree_ssa_iv_optimize_loop (&data
, loop
);
6775 tree_ssa_iv_optimize_finalize (&data
);