1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
74 #include "tree-pass.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
78 #include "hash-table.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
89 #include "tree-ssa-address.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop
*loop
)
109 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
111 return AVG_LOOP_NITER (loop
);
116 /* Representation of the induction variable. */
119 tree base
; /* Initial value of the iv. */
120 tree base_object
; /* A memory object to that the induction variable points. */
121 tree step
; /* Step of the iv (constant only). */
122 tree ssa_name
; /* The ssa name with the value. */
123 bool biv_p
; /* Is it a biv? */
124 bool have_use_for
; /* Do we already have a use for it? */
125 unsigned use_id
; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
131 tree name
; /* The ssa name. */
132 struct iv
*iv
; /* Induction variable description. */
133 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv
; /* For the original biv, whether to preserve it. */
136 unsigned inv_id
; /* Id of an invariant. */
142 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
143 USE_ADDRESS
, /* Use in an address. */
144 USE_COMPARE
/* Use is a compare. */
147 /* Cost of a computation. */
150 int cost
; /* The runtime cost. */
151 unsigned complexity
; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
157 static const comp_cost no_cost
= {0, 0};
158 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
160 /* The candidate - cost pair. */
163 struct iv_cand
*cand
; /* The candidate. */
164 comp_cost cost
; /* The cost. */
165 bitmap depends_on
; /* The list of invariants that have to be
167 tree value
; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp
; /* For iv elimination, the comparison. */
171 int inv_expr_id
; /* Loop invariant expression id. */
177 unsigned id
; /* The id of the use. */
178 enum use_type type
; /* Type of the use. */
179 struct iv
*iv
; /* The induction variable it is based on. */
180 gimple stmt
; /* Statement in that it occurs. */
181 tree
*op_p
; /* The place where it occurs. */
182 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
185 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
186 struct cost_pair
*cost_map
;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand
*selected
;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
196 IP_NORMAL
, /* At the end, just before the exit condition. */
197 IP_END
, /* At the end of the latch block. */
198 IP_BEFORE_USE
, /* Immediately before a specific use. */
199 IP_AFTER_USE
, /* Immediately after a specific use. */
200 IP_ORIGINAL
/* The original biv. */
203 /* The induction variable candidate. */
206 unsigned id
; /* The number of the candidate. */
207 bool important
; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
210 gimple incremented_at
;/* For original biv, the statement where it is
212 tree var_before
; /* The variable used for it before increment. */
213 tree var_after
; /* The variable used for it after increment. */
214 struct iv
*iv
; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost
; /* Cost of the candidate. */
219 unsigned cost_step
; /* Cost of the candidate's increment operation. */
220 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on
; /* The list of invariants that are used in step of the
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use
*iv_use_p
;
238 typedef struct iv_cand
*iv_cand_p
;
240 /* Hashtable helpers. */
242 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
244 typedef iv_inv_expr_ent value_type
;
245 typedef iv_inv_expr_ent compare_type
;
246 static inline hashval_t
hash (const value_type
*);
247 static inline bool equal (const value_type
*, const compare_type
*);
250 /* Hash function for loop invariant expressions. */
253 iv_inv_expr_hasher::hash (const value_type
*expr
)
258 /* Hash table equality function for expressions. */
261 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
263 return expr1
->hash
== expr2
->hash
264 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
269 /* The currently optimized loop. */
270 struct loop
*current_loop
;
272 /* Numbers of iterations for all exits of the current loop. */
273 struct pointer_map_t
*niters
;
275 /* Number of registers used in it. */
278 /* The size of version_info array allocated. */
279 unsigned version_info_size
;
281 /* The array of information for the ssa names. */
282 struct version_info
*version_info
;
284 /* The hashtable of loop invariant expressions created
286 hash_table
<iv_inv_expr_hasher
> inv_expr_tab
;
288 /* Loop invariant expression id. */
291 /* The bitmap of indices in version_info whose value was changed. */
294 /* The uses of induction variables. */
295 vec
<iv_use_p
> iv_uses
;
297 /* The candidates. */
298 vec
<iv_cand_p
> iv_candidates
;
300 /* A bitmap of important candidates. */
301 bitmap important_candidates
;
303 /* The maximum invariant id. */
306 /* Whether to consider just related and important candidates when replacing a
308 bool consider_all_candidates
;
310 /* Are we optimizing for speed? */
313 /* Whether the loop body includes any function calls. */
314 bool body_includes_call
;
316 /* Whether the loop body can only be exited via single exit. */
317 bool loop_single_exit_p
;
320 /* An assignment of iv candidates to uses. */
324 /* The number of uses covered by the assignment. */
327 /* Number of uses that cannot be expressed by the candidates in the set. */
330 /* Candidate assigned to a use, together with the related costs. */
331 struct cost_pair
**cand_for_use
;
333 /* Number of times each candidate is used. */
334 unsigned *n_cand_uses
;
336 /* The candidates used. */
339 /* The number of candidates in the set. */
342 /* Total number of registers needed. */
345 /* Total cost of expressing uses. */
346 comp_cost cand_use_cost
;
348 /* Total cost of candidates. */
351 /* Number of times each invariant is used. */
352 unsigned *n_invariant_uses
;
354 /* The array holding the number of uses of each loop
355 invariant expressions created by ivopt. */
356 unsigned *used_inv_expr
;
358 /* The number of created loop invariants. */
359 unsigned num_used_inv_expr
;
361 /* Total cost of the assignment. */
365 /* Difference of two iv candidate assignments. */
372 /* An old assignment (for rollback purposes). */
373 struct cost_pair
*old_cp
;
375 /* A new assignment. */
376 struct cost_pair
*new_cp
;
378 /* Next change in the list. */
379 struct iv_ca_delta
*next_change
;
382 /* Bound on number of candidates below that all candidates are considered. */
384 #define CONSIDER_ALL_CANDIDATES_BOUND \
385 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
387 /* If there are more iv occurrences, we just give up (it is quite unlikely that
388 optimizing such a loop would help, and it would take ages). */
390 #define MAX_CONSIDERED_USES \
391 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
393 /* If there are at most this number of ivs in the set, try removing unnecessary
394 ivs from the set always. */
396 #define ALWAYS_PRUNE_CAND_SET_BOUND \
397 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
399 /* The list of trees for that the decl_rtl field must be reset is stored
402 static vec
<tree
> decl_rtl_to_reset
;
404 static comp_cost
force_expr_to_var_cost (tree
, bool);
406 /* Number of uses recorded in DATA. */
408 static inline unsigned
409 n_iv_uses (struct ivopts_data
*data
)
411 return data
->iv_uses
.length ();
414 /* Ith use recorded in DATA. */
416 static inline struct iv_use
*
417 iv_use (struct ivopts_data
*data
, unsigned i
)
419 return data
->iv_uses
[i
];
422 /* Number of candidates recorded in DATA. */
424 static inline unsigned
425 n_iv_cands (struct ivopts_data
*data
)
427 return data
->iv_candidates
.length ();
430 /* Ith candidate recorded in DATA. */
432 static inline struct iv_cand
*
433 iv_cand (struct ivopts_data
*data
, unsigned i
)
435 return data
->iv_candidates
[i
];
438 /* The single loop exit if it dominates the latch, NULL otherwise. */
441 single_dom_exit (struct loop
*loop
)
443 edge exit
= single_exit (loop
);
448 if (!just_once_each_iteration_p (loop
, exit
->src
))
454 /* Dumps information about the induction variable IV to FILE. */
457 dump_iv (FILE *file
, struct iv
*iv
)
461 fprintf (file
, "ssa name ");
462 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
463 fprintf (file
, "\n");
466 fprintf (file
, " type ");
467 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
468 fprintf (file
, "\n");
472 fprintf (file
, " base ");
473 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
474 fprintf (file
, "\n");
476 fprintf (file
, " step ");
477 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
478 fprintf (file
, "\n");
482 fprintf (file
, " invariant ");
483 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
484 fprintf (file
, "\n");
489 fprintf (file
, " base object ");
490 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
491 fprintf (file
, "\n");
495 fprintf (file
, " is a biv\n");
498 /* Dumps information about the USE to FILE. */
501 dump_use (FILE *file
, struct iv_use
*use
)
503 fprintf (file
, "use %d\n", use
->id
);
507 case USE_NONLINEAR_EXPR
:
508 fprintf (file
, " generic\n");
512 fprintf (file
, " address\n");
516 fprintf (file
, " compare\n");
523 fprintf (file
, " in statement ");
524 print_gimple_stmt (file
, use
->stmt
, 0, 0);
525 fprintf (file
, "\n");
527 fprintf (file
, " at position ");
529 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
530 fprintf (file
, "\n");
532 dump_iv (file
, use
->iv
);
534 if (use
->related_cands
)
536 fprintf (file
, " related candidates ");
537 dump_bitmap (file
, use
->related_cands
);
541 /* Dumps information about the uses to FILE. */
544 dump_uses (FILE *file
, struct ivopts_data
*data
)
549 for (i
= 0; i
< n_iv_uses (data
); i
++)
551 use
= iv_use (data
, i
);
553 dump_use (file
, use
);
554 fprintf (file
, "\n");
558 /* Dumps information about induction variable candidate CAND to FILE. */
561 dump_cand (FILE *file
, struct iv_cand
*cand
)
563 struct iv
*iv
= cand
->iv
;
565 fprintf (file
, "candidate %d%s\n",
566 cand
->id
, cand
->important
? " (important)" : "");
568 if (cand
->depends_on
)
570 fprintf (file
, " depends on ");
571 dump_bitmap (file
, cand
->depends_on
);
576 fprintf (file
, " final value replacement\n");
580 if (cand
->var_before
)
582 fprintf (file
, " var_before ");
583 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
584 fprintf (file
, "\n");
588 fprintf (file
, " var_after ");
589 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
590 fprintf (file
, "\n");
596 fprintf (file
, " incremented before exit test\n");
600 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
604 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
608 fprintf (file
, " incremented at end\n");
612 fprintf (file
, " original biv\n");
619 /* Returns the info for ssa version VER. */
621 static inline struct version_info
*
622 ver_info (struct ivopts_data
*data
, unsigned ver
)
624 return data
->version_info
+ ver
;
627 /* Returns the info for ssa name NAME. */
629 static inline struct version_info
*
630 name_info (struct ivopts_data
*data
, tree name
)
632 return ver_info (data
, SSA_NAME_VERSION (name
));
635 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
639 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
641 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
645 if (sbb
== loop
->latch
)
651 return stmt
== last_stmt (bb
);
654 /* Returns true if STMT if after the place where the original induction
655 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
656 if the positions are identical. */
659 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
661 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
662 basic_block stmt_bb
= gimple_bb (stmt
);
664 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
667 if (stmt_bb
!= cand_bb
)
671 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
673 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
676 /* Returns true if STMT if after the place where the induction variable
677 CAND is incremented in LOOP. */
680 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
688 return stmt_after_ip_normal_pos (loop
, stmt
);
692 return stmt_after_inc_pos (cand
, stmt
, false);
695 return stmt_after_inc_pos (cand
, stmt
, true);
702 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
705 abnormal_ssa_name_p (tree exp
)
710 if (TREE_CODE (exp
) != SSA_NAME
)
713 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
716 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
717 abnormal phi node. Callback for for_each_index. */
720 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
721 void *data ATTRIBUTE_UNUSED
)
723 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
725 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
727 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
731 return !abnormal_ssa_name_p (*index
);
734 /* Returns true if EXPR contains a ssa name that occurs in an
735 abnormal phi node. */
738 contains_abnormal_ssa_name_p (tree expr
)
741 enum tree_code_class codeclass
;
746 code
= TREE_CODE (expr
);
747 codeclass
= TREE_CODE_CLASS (code
);
749 if (code
== SSA_NAME
)
750 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
752 if (code
== INTEGER_CST
753 || is_gimple_min_invariant (expr
))
756 if (code
== ADDR_EXPR
)
757 return !for_each_index (&TREE_OPERAND (expr
, 0),
758 idx_contains_abnormal_ssa_name_p
,
761 if (code
== COND_EXPR
)
762 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
763 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
764 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
770 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
775 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
787 /* Returns the structure describing number of iterations determined from
788 EXIT of DATA->current_loop, or NULL if something goes wrong. */
790 static struct tree_niter_desc
*
791 niter_for_exit (struct ivopts_data
*data
, edge exit
)
793 struct tree_niter_desc
*desc
;
798 data
->niters
= pointer_map_create ();
802 slot
= pointer_map_contains (data
->niters
, exit
);
806 /* Try to determine number of iterations. We cannot safely work with ssa
807 names that appear in phi nodes on abnormal edges, so that we do not
808 create overlapping life ranges for them (PR 27283). */
809 desc
= XNEW (struct tree_niter_desc
);
810 if (!number_of_iterations_exit (data
->current_loop
,
812 || contains_abnormal_ssa_name_p (desc
->niter
))
817 slot
= pointer_map_insert (data
->niters
, exit
);
821 desc
= (struct tree_niter_desc
*) *slot
;
826 /* Returns the structure describing number of iterations determined from
827 single dominating exit of DATA->current_loop, or NULL if something
830 static struct tree_niter_desc
*
831 niter_for_single_dom_exit (struct ivopts_data
*data
)
833 edge exit
= single_dom_exit (data
->current_loop
);
838 return niter_for_exit (data
, exit
);
841 /* Initializes data structures used by the iv optimization pass, stored
845 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
847 data
->version_info_size
= 2 * num_ssa_names
;
848 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
849 data
->relevant
= BITMAP_ALLOC (NULL
);
850 data
->important_candidates
= BITMAP_ALLOC (NULL
);
851 data
->max_inv_id
= 0;
853 data
->iv_uses
.create (20);
854 data
->iv_candidates
.create (20);
855 data
->inv_expr_tab
.create (10);
856 data
->inv_expr_id
= 0;
857 decl_rtl_to_reset
.create (20);
860 /* Returns a memory object to that EXPR points. In case we are able to
861 determine that it does not point to any such object, NULL is returned. */
864 determine_base_object (tree expr
)
866 enum tree_code code
= TREE_CODE (expr
);
869 /* If this is a pointer casted to any type, we need to determine
870 the base object for the pointer; so handle conversions before
871 throwing away non-pointer expressions. */
872 if (CONVERT_EXPR_P (expr
))
873 return determine_base_object (TREE_OPERAND (expr
, 0));
875 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
884 obj
= TREE_OPERAND (expr
, 0);
885 base
= get_base_address (obj
);
890 if (TREE_CODE (base
) == MEM_REF
)
891 return determine_base_object (TREE_OPERAND (base
, 0));
893 return fold_convert (ptr_type_node
,
894 build_fold_addr_expr (base
));
896 case POINTER_PLUS_EXPR
:
897 return determine_base_object (TREE_OPERAND (expr
, 0));
901 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
905 return fold_convert (ptr_type_node
, expr
);
909 /* Allocates an induction variable with given initial value BASE and step STEP
913 alloc_iv (tree base
, tree step
)
915 struct iv
*iv
= XCNEW (struct iv
);
916 gcc_assert (step
!= NULL_TREE
);
919 iv
->base_object
= determine_base_object (base
);
922 iv
->have_use_for
= false;
924 iv
->ssa_name
= NULL_TREE
;
929 /* Sets STEP and BASE for induction variable IV. */
932 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
934 struct version_info
*info
= name_info (data
, iv
);
936 gcc_assert (!info
->iv
);
938 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
939 info
->iv
= alloc_iv (base
, step
);
940 info
->iv
->ssa_name
= iv
;
943 /* Finds induction variable declaration for VAR. */
946 get_iv (struct ivopts_data
*data
, tree var
)
949 tree type
= TREE_TYPE (var
);
951 if (!POINTER_TYPE_P (type
)
952 && !INTEGRAL_TYPE_P (type
))
955 if (!name_info (data
, var
)->iv
)
957 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
960 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
961 set_iv (data
, var
, var
, build_int_cst (type
, 0));
964 return name_info (data
, var
)->iv
;
967 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
968 not define a simple affine biv with nonzero step. */
971 determine_biv_step (gimple phi
)
973 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
974 tree name
= PHI_RESULT (phi
);
977 if (virtual_operand_p (name
))
980 if (!simple_iv (loop
, loop
, name
, &iv
, true))
983 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
986 /* Finds basic ivs. */
989 find_bivs (struct ivopts_data
*data
)
992 tree step
, type
, base
;
994 struct loop
*loop
= data
->current_loop
;
995 gimple_stmt_iterator psi
;
997 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
999 phi
= gsi_stmt (psi
);
1001 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1004 step
= determine_biv_step (phi
);
1008 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1009 base
= expand_simple_operations (base
);
1010 if (contains_abnormal_ssa_name_p (base
)
1011 || contains_abnormal_ssa_name_p (step
))
1014 type
= TREE_TYPE (PHI_RESULT (phi
));
1015 base
= fold_convert (type
, base
);
1018 if (POINTER_TYPE_P (type
))
1019 step
= convert_to_ptrofftype (step
);
1021 step
= fold_convert (type
, step
);
1024 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1031 /* Marks basic ivs. */
1034 mark_bivs (struct ivopts_data
*data
)
1038 struct iv
*iv
, *incr_iv
;
1039 struct loop
*loop
= data
->current_loop
;
1040 basic_block incr_bb
;
1041 gimple_stmt_iterator psi
;
1043 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1045 phi
= gsi_stmt (psi
);
1047 iv
= get_iv (data
, PHI_RESULT (phi
));
1051 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1052 incr_iv
= get_iv (data
, var
);
1056 /* If the increment is in the subloop, ignore it. */
1057 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1058 if (incr_bb
->loop_father
!= data
->current_loop
1059 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1063 incr_iv
->biv_p
= true;
1067 /* Checks whether STMT defines a linear induction variable and stores its
1068 parameters to IV. */
1071 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1074 struct loop
*loop
= data
->current_loop
;
1076 iv
->base
= NULL_TREE
;
1077 iv
->step
= NULL_TREE
;
1079 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1082 lhs
= gimple_assign_lhs (stmt
);
1083 if (TREE_CODE (lhs
) != SSA_NAME
)
1086 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1088 iv
->base
= expand_simple_operations (iv
->base
);
1090 if (contains_abnormal_ssa_name_p (iv
->base
)
1091 || contains_abnormal_ssa_name_p (iv
->step
))
1094 /* If STMT could throw, then do not consider STMT as defining a GIV.
1095 While this will suppress optimizations, we can not safely delete this
1096 GIV and associated statements, even if it appears it is not used. */
1097 if (stmt_could_throw_p (stmt
))
1103 /* Finds general ivs in statement STMT. */
1106 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1110 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1113 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1116 /* Finds general ivs in basic block BB. */
1119 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1121 gimple_stmt_iterator bsi
;
1123 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1124 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1127 /* Finds general ivs. */
1130 find_givs (struct ivopts_data
*data
)
1132 struct loop
*loop
= data
->current_loop
;
1133 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1136 for (i
= 0; i
< loop
->num_nodes
; i
++)
1137 find_givs_in_bb (data
, body
[i
]);
1141 /* For each ssa name defined in LOOP determines whether it is an induction
1142 variable and if so, its initial value and step. */
1145 find_induction_variables (struct ivopts_data
*data
)
1150 if (!find_bivs (data
))
1156 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1158 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1162 fprintf (dump_file
, " number of iterations ");
1163 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1164 if (!integer_zerop (niter
->may_be_zero
))
1166 fprintf (dump_file
, "; zero if ");
1167 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1169 fprintf (dump_file
, "\n\n");
1172 fprintf (dump_file
, "Induction variables:\n\n");
1174 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1176 if (ver_info (data
, i
)->iv
)
1177 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1186 static struct iv_use
*
1187 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1188 gimple stmt
, enum use_type use_type
)
1190 struct iv_use
*use
= XCNEW (struct iv_use
);
1192 use
->id
= n_iv_uses (data
);
1193 use
->type
= use_type
;
1197 use
->related_cands
= BITMAP_ALLOC (NULL
);
1199 /* To avoid showing ssa name in the dumps, if it was not reset by the
1201 iv
->ssa_name
= NULL_TREE
;
1203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1204 dump_use (dump_file
, use
);
1206 data
->iv_uses
.safe_push (use
);
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212 NONLINEAR_USE is true if the invariant is used in a way we do not
1213 handle specially. */
1216 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1219 struct version_info
*info
;
1221 if (TREE_CODE (op
) != SSA_NAME
1222 || virtual_operand_p (op
))
1225 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1227 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1230 info
= name_info (data
, op
);
1232 info
->has_nonlin_use
|= nonlinear_use
;
1234 info
->inv_id
= ++data
->max_inv_id
;
1235 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1238 /* Checks whether the use OP is interesting and if so, records it. */
1240 static struct iv_use
*
1241 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1248 if (TREE_CODE (op
) != SSA_NAME
)
1251 iv
= get_iv (data
, op
);
1255 if (iv
->have_use_for
)
1257 use
= iv_use (data
, iv
->use_id
);
1259 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1263 if (integer_zerop (iv
->step
))
1265 record_invariant (data
, op
, true);
1268 iv
->have_use_for
= true;
1270 civ
= XNEW (struct iv
);
1273 stmt
= SSA_NAME_DEF_STMT (op
);
1274 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1275 || is_gimple_assign (stmt
));
1277 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1278 iv
->use_id
= use
->id
;
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284 of an induction variable and an invariant. If this is the case,
1285 CONTROL_VAR is set to location of the iv, BOUND to the location of
1286 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287 induction variable descriptions, and true is returned. If this is not
1288 the case, CONTROL_VAR and BOUND are set to the arguments of the
1289 condition and false is returned. */
1292 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1293 tree
**control_var
, tree
**bound
,
1294 struct iv
**iv_var
, struct iv
**iv_bound
)
1296 /* The objects returned when COND has constant operands. */
1297 static struct iv const_iv
;
1299 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1300 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1303 if (gimple_code (stmt
) == GIMPLE_COND
)
1305 op0
= gimple_cond_lhs_ptr (stmt
);
1306 op1
= gimple_cond_rhs_ptr (stmt
);
1310 op0
= gimple_assign_rhs1_ptr (stmt
);
1311 op1
= gimple_assign_rhs2_ptr (stmt
);
1314 zero
= integer_zero_node
;
1315 const_iv
.step
= integer_zero_node
;
1317 if (TREE_CODE (*op0
) == SSA_NAME
)
1318 iv0
= get_iv (data
, *op0
);
1319 if (TREE_CODE (*op1
) == SSA_NAME
)
1320 iv1
= get_iv (data
, *op1
);
1322 /* Exactly one of the compared values must be an iv, and the other one must
1327 if (integer_zerop (iv0
->step
))
1329 /* Control variable may be on the other side. */
1330 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1331 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1333 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1337 *control_var
= op0
;;
1348 /* Checks whether the condition in STMT is interesting and if so,
1352 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1354 tree
*var_p
, *bound_p
;
1355 struct iv
*var_iv
, *civ
;
1357 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1359 find_interesting_uses_op (data
, *var_p
);
1360 find_interesting_uses_op (data
, *bound_p
);
1364 civ
= XNEW (struct iv
);
1366 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1369 /* Returns the outermost loop EXPR is obviously invariant in
1370 relative to the loop LOOP, i.e. if all its operands are defined
1371 outside of the returned loop. Returns NULL if EXPR is not
1372 even obviously invariant in LOOP. */
1375 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1380 if (is_gimple_min_invariant (expr
))
1381 return current_loops
->tree_root
;
1383 if (TREE_CODE (expr
) == SSA_NAME
)
1385 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1388 if (flow_bb_inside_loop_p (loop
, def_bb
))
1390 return superloop_at_depth (loop
,
1391 loop_depth (def_bb
->loop_father
) + 1);
1394 return current_loops
->tree_root
;
1400 unsigned maxdepth
= 0;
1401 len
= TREE_OPERAND_LENGTH (expr
);
1402 for (i
= 0; i
< len
; i
++)
1404 struct loop
*ivloop
;
1405 if (!TREE_OPERAND (expr
, i
))
1408 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1411 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1414 return superloop_at_depth (loop
, maxdepth
);
1417 /* Returns true if expression EXPR is obviously invariant in LOOP,
1418 i.e. if all its operands are defined outside of the LOOP. LOOP
1419 should not be the function body. */
1422 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1427 gcc_assert (loop_depth (loop
) > 0);
1429 if (is_gimple_min_invariant (expr
))
1432 if (TREE_CODE (expr
) == SSA_NAME
)
1434 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1436 && flow_bb_inside_loop_p (loop
, def_bb
))
1445 len
= TREE_OPERAND_LENGTH (expr
);
1446 for (i
= 0; i
< len
; i
++)
1447 if (TREE_OPERAND (expr
, i
)
1448 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1454 /* Cumulates the steps of indices into DATA and replaces their values with the
1455 initial ones. Returns false when the value of the index cannot be determined.
1456 Callback for for_each_index. */
1458 struct ifs_ivopts_data
1460 struct ivopts_data
*ivopts_data
;
1466 idx_find_step (tree base
, tree
*idx
, void *data
)
1468 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1470 tree step
, iv_base
, iv_step
, lbound
, off
;
1471 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1473 /* If base is a component ref, require that the offset of the reference
1475 if (TREE_CODE (base
) == COMPONENT_REF
)
1477 off
= component_ref_field_offset (base
);
1478 return expr_invariant_in_loop_p (loop
, off
);
1481 /* If base is array, first check whether we will be able to move the
1482 reference out of the loop (in order to take its address in strength
1483 reduction). In order for this to work we need both lower bound
1484 and step to be loop invariants. */
1485 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1487 /* Moreover, for a range, the size needs to be invariant as well. */
1488 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1489 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1492 step
= array_ref_element_size (base
);
1493 lbound
= array_ref_low_bound (base
);
1495 if (!expr_invariant_in_loop_p (loop
, step
)
1496 || !expr_invariant_in_loop_p (loop
, lbound
))
1500 if (TREE_CODE (*idx
) != SSA_NAME
)
1503 iv
= get_iv (dta
->ivopts_data
, *idx
);
1507 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1508 *&x[0], which is not folded and does not trigger the
1509 ARRAY_REF path below. */
1512 if (integer_zerop (iv
->step
))
1515 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1517 step
= array_ref_element_size (base
);
1519 /* We only handle addresses whose step is an integer constant. */
1520 if (TREE_CODE (step
) != INTEGER_CST
)
1524 /* The step for pointer arithmetics already is 1 byte. */
1525 step
= size_one_node
;
1529 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1530 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1533 /* The index might wrap. */
1537 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1538 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1543 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1544 object is passed to it in DATA. */
1547 idx_record_use (tree base
, tree
*idx
,
1550 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1551 find_interesting_uses_op (data
, *idx
);
1552 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1554 find_interesting_uses_op (data
, array_ref_element_size (base
));
1555 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1560 /* If we can prove that TOP = cst * BOT for some constant cst,
1561 store cst to MUL and return true. Otherwise return false.
1562 The returned value is always sign-extended, regardless of the
1563 signedness of TOP and BOT. */
1566 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1569 enum tree_code code
;
1570 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1571 widest_int res
, p0
, p1
;
1576 if (operand_equal_p (top
, bot
, 0))
1582 code
= TREE_CODE (top
);
1586 mby
= TREE_OPERAND (top
, 1);
1587 if (TREE_CODE (mby
) != INTEGER_CST
)
1590 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1593 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1598 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1599 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1602 if (code
== MINUS_EXPR
)
1604 *mul
= wi::sext (p0
+ p1
, precision
);
1608 if (TREE_CODE (bot
) != INTEGER_CST
)
1611 p0
= widest_int::from (top
, SIGNED
);
1612 p1
= widest_int::from (bot
, SIGNED
);
1615 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1623 /* Returns true if memory reference REF with step STEP may be unaligned. */
1626 may_be_unaligned_p (tree ref
, tree step
)
1630 HOST_WIDE_INT bitsize
;
1631 HOST_WIDE_INT bitpos
;
1633 enum machine_mode mode
;
1634 int unsignedp
, volatilep
;
1635 unsigned base_align
;
1637 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1638 thus they are not misaligned. */
1639 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1642 /* The test below is basically copy of what expr.c:normal_inner_ref
1643 does to check whether the object must be loaded by parts when
1644 STRICT_ALIGNMENT is true. */
1645 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1646 &unsignedp
, &volatilep
, true);
1647 base_type
= TREE_TYPE (base
);
1648 base_align
= get_object_alignment (base
);
1649 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1651 if (mode
!= BLKmode
)
1653 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1655 if (base_align
< mode_align
1656 || (bitpos
% mode_align
) != 0
1657 || (bitpos
% BITS_PER_UNIT
) != 0)
1661 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1664 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1671 /* Return true if EXPR may be non-addressable. */
1674 may_be_nonaddressable_p (tree expr
)
1676 switch (TREE_CODE (expr
))
1678 case TARGET_MEM_REF
:
1679 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1680 target, thus they are always addressable. */
1684 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1685 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1687 case VIEW_CONVERT_EXPR
:
1688 /* This kind of view-conversions may wrap non-addressable objects
1689 and make them look addressable. After some processing the
1690 non-addressability may be uncovered again, causing ADDR_EXPRs
1691 of inappropriate objects to be built. */
1692 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1693 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1696 /* ... fall through ... */
1699 case ARRAY_RANGE_REF
:
1700 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1712 /* Finds addresses in *OP_P inside STMT. */
1715 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1717 tree base
= *op_p
, step
= size_zero_node
;
1719 struct ifs_ivopts_data ifs_ivopts_data
;
1721 /* Do not play with volatile memory references. A bit too conservative,
1722 perhaps, but safe. */
1723 if (gimple_has_volatile_ops (stmt
))
1726 /* Ignore bitfields for now. Not really something terribly complicated
1728 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1731 base
= unshare_expr (base
);
1733 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1735 tree type
= build_pointer_type (TREE_TYPE (base
));
1739 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1741 civ
= get_iv (data
, TMR_BASE (base
));
1745 TMR_BASE (base
) = civ
->base
;
1748 if (TMR_INDEX2 (base
)
1749 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1751 civ
= get_iv (data
, TMR_INDEX2 (base
));
1755 TMR_INDEX2 (base
) = civ
->base
;
1758 if (TMR_INDEX (base
)
1759 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1761 civ
= get_iv (data
, TMR_INDEX (base
));
1765 TMR_INDEX (base
) = civ
->base
;
1770 if (TMR_STEP (base
))
1771 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1773 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1777 if (integer_zerop (step
))
1779 base
= tree_mem_ref_addr (type
, base
);
1783 ifs_ivopts_data
.ivopts_data
= data
;
1784 ifs_ivopts_data
.stmt
= stmt
;
1785 ifs_ivopts_data
.step
= size_zero_node
;
1786 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1787 || integer_zerop (ifs_ivopts_data
.step
))
1789 step
= ifs_ivopts_data
.step
;
1791 /* Check that the base expression is addressable. This needs
1792 to be done after substituting bases of IVs into it. */
1793 if (may_be_nonaddressable_p (base
))
1796 /* Moreover, on strict alignment platforms, check that it is
1797 sufficiently aligned. */
1798 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1801 base
= build_fold_addr_expr (base
);
1803 /* Substituting bases of IVs into the base expression might
1804 have caused folding opportunities. */
1805 if (TREE_CODE (base
) == ADDR_EXPR
)
1807 tree
*ref
= &TREE_OPERAND (base
, 0);
1808 while (handled_component_p (*ref
))
1809 ref
= &TREE_OPERAND (*ref
, 0);
1810 if (TREE_CODE (*ref
) == MEM_REF
)
1812 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1813 TREE_OPERAND (*ref
, 0),
1814 TREE_OPERAND (*ref
, 1));
1821 civ
= alloc_iv (base
, step
);
1822 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1826 for_each_index (op_p
, idx_record_use
, data
);
1829 /* Finds and records invariants used in STMT. */
1832 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1835 use_operand_p use_p
;
1838 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1840 op
= USE_FROM_PTR (use_p
);
1841 record_invariant (data
, op
, false);
1845 /* Finds interesting uses of induction variables in the statement STMT. */
1848 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1851 tree op
, *lhs
, *rhs
;
1853 use_operand_p use_p
;
1854 enum tree_code code
;
1856 find_invariants_stmt (data
, stmt
);
1858 if (gimple_code (stmt
) == GIMPLE_COND
)
1860 find_interesting_uses_cond (data
, stmt
);
1864 if (is_gimple_assign (stmt
))
1866 lhs
= gimple_assign_lhs_ptr (stmt
);
1867 rhs
= gimple_assign_rhs1_ptr (stmt
);
1869 if (TREE_CODE (*lhs
) == SSA_NAME
)
1871 /* If the statement defines an induction variable, the uses are not
1872 interesting by themselves. */
1874 iv
= get_iv (data
, *lhs
);
1876 if (iv
&& !integer_zerop (iv
->step
))
1880 code
= gimple_assign_rhs_code (stmt
);
1881 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1882 && (REFERENCE_CLASS_P (*rhs
)
1883 || is_gimple_val (*rhs
)))
1885 if (REFERENCE_CLASS_P (*rhs
))
1886 find_interesting_uses_address (data
, stmt
, rhs
);
1888 find_interesting_uses_op (data
, *rhs
);
1890 if (REFERENCE_CLASS_P (*lhs
))
1891 find_interesting_uses_address (data
, stmt
, lhs
);
1894 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1896 find_interesting_uses_cond (data
, stmt
);
1900 /* TODO -- we should also handle address uses of type
1902 memory = call (whatever);
1909 if (gimple_code (stmt
) == GIMPLE_PHI
1910 && gimple_bb (stmt
) == data
->current_loop
->header
)
1912 iv
= get_iv (data
, PHI_RESULT (stmt
));
1914 if (iv
&& !integer_zerop (iv
->step
))
1918 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1920 op
= USE_FROM_PTR (use_p
);
1922 if (TREE_CODE (op
) != SSA_NAME
)
1925 iv
= get_iv (data
, op
);
1929 find_interesting_uses_op (data
, op
);
1933 /* Finds interesting uses of induction variables outside of loops
1934 on loop exit edge EXIT. */
1937 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1940 gimple_stmt_iterator psi
;
1943 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1945 phi
= gsi_stmt (psi
);
1946 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1947 if (!virtual_operand_p (def
))
1948 find_interesting_uses_op (data
, def
);
1952 /* Finds uses of the induction variables that are interesting. */
1955 find_interesting_uses (struct ivopts_data
*data
)
1958 gimple_stmt_iterator bsi
;
1959 basic_block
*body
= get_loop_body (data
->current_loop
);
1961 struct version_info
*info
;
1964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1965 fprintf (dump_file
, "Uses:\n\n");
1967 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1972 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1973 if (e
->dest
!= EXIT_BLOCK_PTR
1974 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1975 find_interesting_uses_outside (data
, e
);
1977 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1978 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1979 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1980 if (!is_gimple_debug (gsi_stmt (bsi
)))
1981 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1988 fprintf (dump_file
, "\n");
1990 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1992 info
= ver_info (data
, i
);
1995 fprintf (dump_file
, " ");
1996 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1997 fprintf (dump_file
, " is invariant (%d)%s\n",
1998 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2002 fprintf (dump_file
, "\n");
2008 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2009 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2010 we are at the top-level of the processed address. */
2013 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2014 unsigned HOST_WIDE_INT
*offset
)
2016 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2017 enum tree_code code
;
2018 tree type
, orig_type
= TREE_TYPE (expr
);
2019 unsigned HOST_WIDE_INT off0
, off1
, st
;
2020 tree orig_expr
= expr
;
2024 type
= TREE_TYPE (expr
);
2025 code
= TREE_CODE (expr
);
2031 if (!cst_fits_shwi_p (expr
)
2032 || integer_zerop (expr
))
2035 *offset
= int_cst_value (expr
);
2036 return build_int_cst (orig_type
, 0);
2038 case POINTER_PLUS_EXPR
:
2041 op0
= TREE_OPERAND (expr
, 0);
2042 op1
= TREE_OPERAND (expr
, 1);
2044 op0
= strip_offset_1 (op0
, false, false, &off0
);
2045 op1
= strip_offset_1 (op1
, false, false, &off1
);
2047 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2048 if (op0
== TREE_OPERAND (expr
, 0)
2049 && op1
== TREE_OPERAND (expr
, 1))
2052 if (integer_zerop (op1
))
2054 else if (integer_zerop (op0
))
2056 if (code
== MINUS_EXPR
)
2057 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2062 expr
= fold_build2 (code
, type
, op0
, op1
);
2064 return fold_convert (orig_type
, expr
);
2067 op1
= TREE_OPERAND (expr
, 1);
2068 if (!cst_fits_shwi_p (op1
))
2071 op0
= TREE_OPERAND (expr
, 0);
2072 op0
= strip_offset_1 (op0
, false, false, &off0
);
2073 if (op0
== TREE_OPERAND (expr
, 0))
2076 *offset
= off0
* int_cst_value (op1
);
2077 if (integer_zerop (op0
))
2080 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2082 return fold_convert (orig_type
, expr
);
2085 case ARRAY_RANGE_REF
:
2089 step
= array_ref_element_size (expr
);
2090 if (!cst_fits_shwi_p (step
))
2093 st
= int_cst_value (step
);
2094 op1
= TREE_OPERAND (expr
, 1);
2095 op1
= strip_offset_1 (op1
, false, false, &off1
);
2096 *offset
= off1
* st
;
2099 && integer_zerop (op1
))
2101 /* Strip the component reference completely. */
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2113 tmp
= component_ref_field_offset (expr
);
2115 && cst_fits_shwi_p (tmp
))
2117 /* Strip the component reference completely. */
2118 op0
= TREE_OPERAND (expr
, 0);
2119 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2120 *offset
= off0
+ int_cst_value (tmp
);
2126 op0
= TREE_OPERAND (expr
, 0);
2127 op0
= strip_offset_1 (op0
, true, true, &off0
);
2130 if (op0
== TREE_OPERAND (expr
, 0))
2133 expr
= build_fold_addr_expr (op0
);
2134 return fold_convert (orig_type
, expr
);
2137 /* ??? Offset operand? */
2138 inside_addr
= false;
2145 /* Default handling of expressions for that we want to recurse into
2146 the first operand. */
2147 op0
= TREE_OPERAND (expr
, 0);
2148 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2151 if (op0
== TREE_OPERAND (expr
, 0)
2152 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2155 expr
= copy_node (expr
);
2156 TREE_OPERAND (expr
, 0) = op0
;
2158 TREE_OPERAND (expr
, 1) = op1
;
2160 /* Inside address, we might strip the top level component references,
2161 thus changing type of the expression. Handling of ADDR_EXPR
2163 expr
= fold_convert (orig_type
, expr
);
2168 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2171 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2173 return strip_offset_1 (expr
, false, false, offset
);
2176 /* Returns variant of TYPE that can be used as base for different uses.
2177 We return unsigned type with the same precision, which avoids problems
2181 generic_type_for (tree type
)
2183 if (POINTER_TYPE_P (type
))
2184 return unsigned_type_for (type
);
2186 if (TYPE_UNSIGNED (type
))
2189 return unsigned_type_for (type
);
2192 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2193 the bitmap to that we should store it. */
2195 static struct ivopts_data
*fd_ivopts_data
;
2197 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2199 bitmap
*depends_on
= (bitmap
*) data
;
2200 struct version_info
*info
;
2202 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2204 info
= name_info (fd_ivopts_data
, *expr_p
);
2206 if (!info
->inv_id
|| info
->has_nonlin_use
)
2210 *depends_on
= BITMAP_ALLOC (NULL
);
2211 bitmap_set_bit (*depends_on
, info
->inv_id
);
2216 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2217 position to POS. If USE is not NULL, the candidate is set as related to
2218 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2219 replacement of the final value of the iv by a direct computation. */
2221 static struct iv_cand
*
2222 add_candidate_1 (struct ivopts_data
*data
,
2223 tree base
, tree step
, bool important
, enum iv_position pos
,
2224 struct iv_use
*use
, gimple incremented_at
)
2227 struct iv_cand
*cand
= NULL
;
2228 tree type
, orig_type
;
2230 /* For non-original variables, make sure their values are computed in a type
2231 that does not invoke undefined behavior on overflows (since in general,
2232 we cannot prove that these induction variables are non-wrapping). */
2233 if (pos
!= IP_ORIGINAL
)
2235 orig_type
= TREE_TYPE (base
);
2236 type
= generic_type_for (orig_type
);
2237 if (type
!= orig_type
)
2239 base
= fold_convert (type
, base
);
2240 step
= fold_convert (type
, step
);
2244 for (i
= 0; i
< n_iv_cands (data
); i
++)
2246 cand
= iv_cand (data
, i
);
2248 if (cand
->pos
!= pos
)
2251 if (cand
->incremented_at
!= incremented_at
2252 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2253 && cand
->ainc_use
!= use
))
2267 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2268 && operand_equal_p (step
, cand
->iv
->step
, 0)
2269 && (TYPE_PRECISION (TREE_TYPE (base
))
2270 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2274 if (i
== n_iv_cands (data
))
2276 cand
= XCNEW (struct iv_cand
);
2282 cand
->iv
= alloc_iv (base
, step
);
2285 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2287 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2288 cand
->var_after
= cand
->var_before
;
2290 cand
->important
= important
;
2291 cand
->incremented_at
= incremented_at
;
2292 data
->iv_candidates
.safe_push (cand
);
2295 && TREE_CODE (step
) != INTEGER_CST
)
2297 fd_ivopts_data
= data
;
2298 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2301 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2302 cand
->ainc_use
= use
;
2304 cand
->ainc_use
= NULL
;
2306 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2307 dump_cand (dump_file
, cand
);
2310 if (important
&& !cand
->important
)
2312 cand
->important
= true;
2313 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2314 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2319 bitmap_set_bit (use
->related_cands
, i
);
2320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2321 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2328 /* Returns true if incrementing the induction variable at the end of the LOOP
2331 The purpose is to avoid splitting latch edge with a biv increment, thus
2332 creating a jump, possibly confusing other optimization passes and leaving
2333 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2334 is not available (so we do not have a better alternative), or if the latch
2335 edge is already nonempty. */
2338 allow_ip_end_pos_p (struct loop
*loop
)
2340 if (!ip_normal_pos (loop
))
2343 if (!empty_block_p (ip_end_pos (loop
)))
2349 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2350 Important field is set to IMPORTANT. */
2353 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2354 bool important
, struct iv_use
*use
)
2356 basic_block use_bb
= gimple_bb (use
->stmt
);
2357 enum machine_mode mem_mode
;
2358 unsigned HOST_WIDE_INT cstepi
;
2360 /* If we insert the increment in any position other than the standard
2361 ones, we must ensure that it is incremented once per iteration.
2362 It must not be in an inner nested loop, or one side of an if
2364 if (use_bb
->loop_father
!= data
->current_loop
2365 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2366 || stmt_could_throw_p (use
->stmt
)
2367 || !cst_fits_shwi_p (step
))
2370 cstepi
= int_cst_value (step
);
2372 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2373 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2374 || USE_STORE_PRE_INCREMENT (mem_mode
))
2375 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2376 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2377 || USE_STORE_PRE_DECREMENT (mem_mode
))
2378 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2380 enum tree_code code
= MINUS_EXPR
;
2382 tree new_step
= step
;
2384 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2386 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2387 code
= POINTER_PLUS_EXPR
;
2390 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2391 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2392 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2395 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2396 || USE_STORE_POST_INCREMENT (mem_mode
))
2397 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2398 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2399 || USE_STORE_POST_DECREMENT (mem_mode
))
2400 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2402 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2407 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2408 position to POS. If USE is not NULL, the candidate is set as related to
2409 it. The candidate computation is scheduled on all available positions. */
2412 add_candidate (struct ivopts_data
*data
,
2413 tree base
, tree step
, bool important
, struct iv_use
*use
)
2415 if (ip_normal_pos (data
->current_loop
))
2416 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2417 if (ip_end_pos (data
->current_loop
)
2418 && allow_ip_end_pos_p (data
->current_loop
))
2419 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2421 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2422 add_autoinc_candidates (data
, base
, step
, important
, use
);
2425 /* Adds standard iv candidates. */
2428 add_standard_iv_candidates (struct ivopts_data
*data
)
2430 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2432 /* The same for a double-integer type if it is still fast enough. */
2434 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2435 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2436 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2437 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2439 /* The same for a double-integer type if it is still fast enough. */
2441 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2442 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2443 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2444 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2448 /* Adds candidates bases on the old induction variable IV. */
2451 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2455 struct iv_cand
*cand
;
2457 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2459 /* The same, but with initial value zero. */
2460 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2461 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2463 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2464 iv
->step
, true, NULL
);
2466 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2467 if (gimple_code (phi
) == GIMPLE_PHI
)
2469 /* Additionally record the possibility of leaving the original iv
2471 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2472 cand
= add_candidate_1 (data
,
2473 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2474 SSA_NAME_DEF_STMT (def
));
2475 cand
->var_before
= iv
->ssa_name
;
2476 cand
->var_after
= def
;
2480 /* Adds candidates based on the old induction variables. */
2483 add_old_ivs_candidates (struct ivopts_data
*data
)
2489 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2491 iv
= ver_info (data
, i
)->iv
;
2492 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2493 add_old_iv_candidates (data
, iv
);
2497 /* Adds candidates based on the value of the induction variable IV and USE. */
2500 add_iv_value_candidates (struct ivopts_data
*data
,
2501 struct iv
*iv
, struct iv_use
*use
)
2503 unsigned HOST_WIDE_INT offset
;
2507 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2509 /* The same, but with initial value zero. Make such variable important,
2510 since it is generic enough so that possibly many uses may be based
2512 basetype
= TREE_TYPE (iv
->base
);
2513 if (POINTER_TYPE_P (basetype
))
2514 basetype
= sizetype
;
2515 add_candidate (data
, build_int_cst (basetype
, 0),
2516 iv
->step
, true, use
);
2518 /* Third, try removing the constant offset. Make sure to even
2519 add a candidate for &a[0] vs. (T *)&a. */
2520 base
= strip_offset (iv
->base
, &offset
);
2522 || base
!= iv
->base
)
2523 add_candidate (data
, base
, iv
->step
, false, use
);
2526 /* Adds candidates based on the uses. */
2529 add_derived_ivs_candidates (struct ivopts_data
*data
)
2533 for (i
= 0; i
< n_iv_uses (data
); i
++)
2535 struct iv_use
*use
= iv_use (data
, i
);
2542 case USE_NONLINEAR_EXPR
:
2545 /* Just add the ivs based on the value of the iv used here. */
2546 add_iv_value_candidates (data
, use
->iv
, use
);
2555 /* Record important candidates and add them to related_cands bitmaps
2559 record_important_candidates (struct ivopts_data
*data
)
2564 for (i
= 0; i
< n_iv_cands (data
); i
++)
2566 struct iv_cand
*cand
= iv_cand (data
, i
);
2568 if (cand
->important
)
2569 bitmap_set_bit (data
->important_candidates
, i
);
2572 data
->consider_all_candidates
= (n_iv_cands (data
)
2573 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2575 if (data
->consider_all_candidates
)
2577 /* We will not need "related_cands" bitmaps in this case,
2578 so release them to decrease peak memory consumption. */
2579 for (i
= 0; i
< n_iv_uses (data
); i
++)
2581 use
= iv_use (data
, i
);
2582 BITMAP_FREE (use
->related_cands
);
2587 /* Add important candidates to the related_cands bitmaps. */
2588 for (i
= 0; i
< n_iv_uses (data
); i
++)
2589 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2590 data
->important_candidates
);
2594 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2595 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2596 we allocate a simple list to every use. */
2599 alloc_use_cost_map (struct ivopts_data
*data
)
2601 unsigned i
, size
, s
;
2603 for (i
= 0; i
< n_iv_uses (data
); i
++)
2605 struct iv_use
*use
= iv_use (data
, i
);
2607 if (data
->consider_all_candidates
)
2608 size
= n_iv_cands (data
);
2611 s
= bitmap_count_bits (use
->related_cands
);
2613 /* Round up to the power of two, so that moduling by it is fast. */
2614 size
= s
? (1 << ceil_log2 (s
)) : 1;
2617 use
->n_map_members
= size
;
2618 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2622 /* Returns description of computation cost of expression whose runtime
2623 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2626 new_cost (unsigned runtime
, unsigned complexity
)
2633 cost
.cost
= runtime
;
2634 cost
.complexity
= complexity
;
2639 /* Adds costs COST1 and COST2. */
2642 add_costs (comp_cost cost1
, comp_cost cost2
)
2644 cost1
.cost
+= cost2
.cost
;
2645 cost1
.complexity
+= cost2
.complexity
;
2649 /* Subtracts costs COST1 and COST2. */
2652 sub_costs (comp_cost cost1
, comp_cost cost2
)
2654 cost1
.cost
-= cost2
.cost
;
2655 cost1
.complexity
-= cost2
.complexity
;
2660 /* Returns a negative number if COST1 < COST2, a positive number if
2661 COST1 > COST2, and 0 if COST1 = COST2. */
2664 compare_costs (comp_cost cost1
, comp_cost cost2
)
2666 if (cost1
.cost
== cost2
.cost
)
2667 return cost1
.complexity
- cost2
.complexity
;
2669 return cost1
.cost
- cost2
.cost
;
2672 /* Returns true if COST is infinite. */
2675 infinite_cost_p (comp_cost cost
)
2677 return cost
.cost
== INFTY
;
2680 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2681 on invariants DEPENDS_ON and that the value used in expressing it
2682 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2685 set_use_iv_cost (struct ivopts_data
*data
,
2686 struct iv_use
*use
, struct iv_cand
*cand
,
2687 comp_cost cost
, bitmap depends_on
, tree value
,
2688 enum tree_code comp
, int inv_expr_id
)
2692 if (infinite_cost_p (cost
))
2694 BITMAP_FREE (depends_on
);
2698 if (data
->consider_all_candidates
)
2700 use
->cost_map
[cand
->id
].cand
= cand
;
2701 use
->cost_map
[cand
->id
].cost
= cost
;
2702 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2703 use
->cost_map
[cand
->id
].value
= value
;
2704 use
->cost_map
[cand
->id
].comp
= comp
;
2705 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2709 /* n_map_members is a power of two, so this computes modulo. */
2710 s
= cand
->id
& (use
->n_map_members
- 1);
2711 for (i
= s
; i
< use
->n_map_members
; i
++)
2712 if (!use
->cost_map
[i
].cand
)
2714 for (i
= 0; i
< s
; i
++)
2715 if (!use
->cost_map
[i
].cand
)
2721 use
->cost_map
[i
].cand
= cand
;
2722 use
->cost_map
[i
].cost
= cost
;
2723 use
->cost_map
[i
].depends_on
= depends_on
;
2724 use
->cost_map
[i
].value
= value
;
2725 use
->cost_map
[i
].comp
= comp
;
2726 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2729 /* Gets cost of (USE, CANDIDATE) pair. */
2731 static struct cost_pair
*
2732 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2733 struct iv_cand
*cand
)
2736 struct cost_pair
*ret
;
2741 if (data
->consider_all_candidates
)
2743 ret
= use
->cost_map
+ cand
->id
;
2750 /* n_map_members is a power of two, so this computes modulo. */
2751 s
= cand
->id
& (use
->n_map_members
- 1);
2752 for (i
= s
; i
< use
->n_map_members
; i
++)
2753 if (use
->cost_map
[i
].cand
== cand
)
2754 return use
->cost_map
+ i
;
2755 else if (use
->cost_map
[i
].cand
== NULL
)
2757 for (i
= 0; i
< s
; i
++)
2758 if (use
->cost_map
[i
].cand
== cand
)
2759 return use
->cost_map
+ i
;
2760 else if (use
->cost_map
[i
].cand
== NULL
)
2766 /* Returns estimate on cost of computing SEQ. */
2769 seq_cost (rtx seq
, bool speed
)
2774 for (; seq
; seq
= NEXT_INSN (seq
))
2776 set
= single_set (seq
);
2778 cost
+= set_src_cost (SET_SRC (set
), speed
);
2786 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2788 produce_memory_decl_rtl (tree obj
, int *regno
)
2790 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2791 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2795 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2797 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2798 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2799 SET_SYMBOL_REF_DECL (x
, obj
);
2800 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2801 set_mem_addr_space (x
, as
);
2802 targetm
.encode_section_info (obj
, x
, true);
2806 x
= gen_raw_REG (address_mode
, (*regno
)++);
2807 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2808 set_mem_addr_space (x
, as
);
2814 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2815 walk_tree. DATA contains the actual fake register number. */
2818 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2820 tree obj
= NULL_TREE
;
2822 int *regno
= (int *) data
;
2824 switch (TREE_CODE (*expr_p
))
2827 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2828 handled_component_p (*expr_p
);
2829 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2832 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2833 x
= produce_memory_decl_rtl (obj
, regno
);
2838 obj
= SSA_NAME_VAR (*expr_p
);
2839 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2842 if (!DECL_RTL_SET_P (obj
))
2843 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2852 if (DECL_RTL_SET_P (obj
))
2855 if (DECL_MODE (obj
) == BLKmode
)
2856 x
= produce_memory_decl_rtl (obj
, regno
);
2858 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2868 decl_rtl_to_reset
.safe_push (obj
);
2869 SET_DECL_RTL (obj
, x
);
2875 /* Determines cost of the computation of EXPR. */
2878 computation_cost (tree expr
, bool speed
)
2881 tree type
= TREE_TYPE (expr
);
2883 /* Avoid using hard regs in ways which may be unsupported. */
2884 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2885 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2886 enum node_frequency real_frequency
= node
->frequency
;
2888 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2889 crtl
->maybe_hot_insn_p
= speed
;
2890 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2892 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2895 default_rtl_profile ();
2896 node
->frequency
= real_frequency
;
2898 cost
= seq_cost (seq
, speed
);
2900 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2901 TYPE_ADDR_SPACE (type
), speed
);
2902 else if (!REG_P (rslt
))
2903 cost
+= set_src_cost (rslt
, speed
);
2908 /* Returns variable containing the value of candidate CAND at statement AT. */
2911 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2913 if (stmt_after_increment (loop
, cand
, stmt
))
2914 return cand
->var_after
;
2916 return cand
->var_before
;
2919 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2920 same precision that is at least as wide as the precision of TYPE, stores
2921 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2925 determine_common_wider_type (tree
*a
, tree
*b
)
2927 tree wider_type
= NULL
;
2929 tree atype
= TREE_TYPE (*a
);
2931 if (CONVERT_EXPR_P (*a
))
2933 suba
= TREE_OPERAND (*a
, 0);
2934 wider_type
= TREE_TYPE (suba
);
2935 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2941 if (CONVERT_EXPR_P (*b
))
2943 subb
= TREE_OPERAND (*b
, 0);
2944 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2955 /* Determines the expression by that USE is expressed from induction variable
2956 CAND at statement AT in LOOP. The expression is stored in a decomposed
2957 form into AFF. Returns false if USE cannot be expressed using CAND. */
2960 get_computation_aff (struct loop
*loop
,
2961 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2962 struct affine_tree_combination
*aff
)
2964 tree ubase
= use
->iv
->base
;
2965 tree ustep
= use
->iv
->step
;
2966 tree cbase
= cand
->iv
->base
;
2967 tree cstep
= cand
->iv
->step
, cstep_common
;
2968 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2969 tree common_type
, var
;
2971 aff_tree cbase_aff
, var_aff
;
2974 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2976 /* We do not have a precision to express the values of use. */
2980 var
= var_at_stmt (loop
, cand
, at
);
2981 uutype
= unsigned_type_for (utype
);
2983 /* If the conversion is not noop, perform it. */
2984 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2986 cstep
= fold_convert (uutype
, cstep
);
2987 cbase
= fold_convert (uutype
, cbase
);
2988 var
= fold_convert (uutype
, var
);
2991 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2994 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2995 type, we achieve better folding by computing their difference in this
2996 wider type, and cast the result to UUTYPE. We do not need to worry about
2997 overflows, as all the arithmetics will in the end be performed in UUTYPE
2999 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3001 /* use = ubase - ratio * cbase + ratio * var. */
3002 tree_to_aff_combination (ubase
, common_type
, aff
);
3003 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3004 tree_to_aff_combination (var
, uutype
, &var_aff
);
3006 /* We need to shift the value if we are after the increment. */
3007 if (stmt_after_increment (loop
, cand
, at
))
3011 if (common_type
!= uutype
)
3012 cstep_common
= fold_convert (common_type
, cstep
);
3014 cstep_common
= cstep
;
3016 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3017 aff_combination_add (&cbase_aff
, &cstep_aff
);
3020 aff_combination_scale (&cbase_aff
, -rat
);
3021 aff_combination_add (aff
, &cbase_aff
);
3022 if (common_type
!= uutype
)
3023 aff_combination_convert (aff
, uutype
);
3025 aff_combination_scale (&var_aff
, rat
);
3026 aff_combination_add (aff
, &var_aff
);
3031 /* Return the type of USE. */
3034 get_use_type (struct iv_use
*use
)
3036 tree base_type
= TREE_TYPE (use
->iv
->base
);
3039 if (use
->type
== USE_ADDRESS
)
3041 /* The base_type may be a void pointer. Create a pointer type based on
3042 the mem_ref instead. */
3043 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3044 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3045 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3053 /* Determines the expression by that USE is expressed from induction variable
3054 CAND at statement AT in LOOP. The computation is unshared. */
3057 get_computation_at (struct loop
*loop
,
3058 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3061 tree type
= get_use_type (use
);
3063 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3065 unshare_aff_combination (&aff
);
3066 return fold_convert (type
, aff_combination_to_tree (&aff
));
3069 /* Determines the expression by that USE is expressed from induction variable
3070 CAND in LOOP. The computation is unshared. */
3073 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3075 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3078 /* Adjust the cost COST for being in loop setup rather than loop body.
3079 If we're optimizing for space, the loop setup overhead is constant;
3080 if we're optimizing for speed, amortize it over the per-iteration cost. */
3082 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3086 else if (optimize_loop_for_speed_p (data
->current_loop
))
3087 return cost
/ avg_loop_niter (data
->current_loop
);
3092 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3093 validity for a memory reference accessing memory of mode MODE in
3094 address space AS. */
3098 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3101 #define MAX_RATIO 128
3102 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3103 static vec
<sbitmap
> valid_mult_list
;
3106 if (data_index
>= valid_mult_list
.length ())
3107 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3109 valid_mult
= valid_mult_list
[data_index
];
3112 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3113 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3117 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3118 bitmap_clear (valid_mult
);
3119 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3120 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3122 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3123 if (memory_address_addr_space_p (mode
, addr
, as
))
3124 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3127 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3129 fprintf (dump_file
, " allowed multipliers:");
3130 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3131 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3132 fprintf (dump_file
, " %d", (int) i
);
3133 fprintf (dump_file
, "\n");
3134 fprintf (dump_file
, "\n");
3137 valid_mult_list
[data_index
] = valid_mult
;
3140 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3143 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3146 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3147 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3148 variable is omitted. Compute the cost for a memory reference that accesses
3149 a memory location of mode MEM_MODE in address space AS.
3151 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3152 size of MEM_MODE / RATIO) is available. To make this determination, we
3153 look at the size of the increment to be made, which is given in CSTEP.
3154 CSTEP may be zero if the step is unknown.
3155 STMT_AFTER_INC is true iff the statement we're looking at is after the
3156 increment of the original biv.
3158 TODO -- there must be some better way. This all is quite crude. */
3160 typedef struct address_cost_data_s
3162 HOST_WIDE_INT min_offset
, max_offset
;
3163 unsigned costs
[2][2][2][2];
3164 } *address_cost_data
;
3168 get_address_cost (bool symbol_present
, bool var_present
,
3169 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3170 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3171 addr_space_t as
, bool speed
,
3172 bool stmt_after_inc
, bool *may_autoinc
)
3174 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3175 static vec
<address_cost_data
> address_cost_data_list
;
3176 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3177 address_cost_data data
;
3178 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3179 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3180 unsigned cost
, acost
, complexity
;
3181 bool offset_p
, ratio_p
, autoinc
;
3182 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3183 unsigned HOST_WIDE_INT mask
;
3186 if (data_index
>= address_cost_data_list
.length ())
3187 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3189 data
= address_cost_data_list
[data_index
];
3193 HOST_WIDE_INT rat
, off
= 0;
3194 int old_cse_not_expected
, width
;
3195 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3196 rtx seq
, addr
, base
;
3199 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3201 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3203 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3204 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3205 width
= HOST_BITS_PER_WIDE_INT
- 1;
3206 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3208 for (i
= width
; i
>= 0; i
--)
3210 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3211 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3212 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3215 data
->min_offset
= (i
== -1? 0 : off
);
3217 for (i
= width
; i
>= 0; i
--)
3219 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3220 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3221 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3226 data
->max_offset
= off
;
3228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3230 fprintf (dump_file
, "get_address_cost:\n");
3231 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3232 GET_MODE_NAME (mem_mode
),
3234 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3235 GET_MODE_NAME (mem_mode
),
3240 for (i
= 2; i
<= MAX_RATIO
; i
++)
3241 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3247 /* Compute the cost of various addressing modes. */
3249 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3250 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3252 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3253 || USE_STORE_PRE_DECREMENT (mem_mode
))
3255 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3256 has_predec
[mem_mode
]
3257 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3259 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3260 || USE_STORE_POST_DECREMENT (mem_mode
))
3262 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3263 has_postdec
[mem_mode
]
3264 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3266 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3267 || USE_STORE_PRE_DECREMENT (mem_mode
))
3269 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3270 has_preinc
[mem_mode
]
3271 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3273 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3274 || USE_STORE_POST_INCREMENT (mem_mode
))
3276 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3277 has_postinc
[mem_mode
]
3278 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3280 for (i
= 0; i
< 16; i
++)
3283 var_p
= (i
>> 1) & 1;
3284 off_p
= (i
>> 2) & 1;
3285 rat_p
= (i
>> 3) & 1;
3289 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3290 gen_int_mode (rat
, address_mode
));
3293 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3297 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3298 /* ??? We can run into trouble with some backends by presenting
3299 it with symbols which haven't been properly passed through
3300 targetm.encode_section_info. By setting the local bit, we
3301 enhance the probability of things working. */
3302 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3305 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3307 (PLUS
, address_mode
, base
,
3308 gen_int_mode (off
, address_mode
)));
3311 base
= gen_int_mode (off
, address_mode
);
3316 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3319 /* To avoid splitting addressing modes, pretend that no cse will
3321 old_cse_not_expected
= cse_not_expected
;
3322 cse_not_expected
= true;
3323 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3324 cse_not_expected
= old_cse_not_expected
;
3328 acost
= seq_cost (seq
, speed
);
3329 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3333 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3336 /* On some targets, it is quite expensive to load symbol to a register,
3337 which makes addresses that contain symbols look much more expensive.
3338 However, the symbol will have to be loaded in any case before the
3339 loop (and quite likely we have it in register already), so it does not
3340 make much sense to penalize them too heavily. So make some final
3341 tweaks for the SYMBOL_PRESENT modes:
3343 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3344 var is cheaper, use this mode with small penalty.
3345 If VAR_PRESENT is true, try whether the mode with
3346 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3347 if this is the case, use it. */
3348 add_c
= add_cost (speed
, address_mode
);
3349 for (i
= 0; i
< 8; i
++)
3352 off_p
= (i
>> 1) & 1;
3353 rat_p
= (i
>> 2) & 1;
3355 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3359 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3360 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3365 fprintf (dump_file
, "Address costs:\n");
3367 for (i
= 0; i
< 16; i
++)
3370 var_p
= (i
>> 1) & 1;
3371 off_p
= (i
>> 2) & 1;
3372 rat_p
= (i
>> 3) & 1;
3374 fprintf (dump_file
, " ");
3376 fprintf (dump_file
, "sym + ");
3378 fprintf (dump_file
, "var + ");
3380 fprintf (dump_file
, "cst + ");
3382 fprintf (dump_file
, "rat * ");
3384 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3385 fprintf (dump_file
, "index costs %d\n", acost
);
3387 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3388 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3389 fprintf (dump_file
, " May include autoinc/dec\n");
3390 fprintf (dump_file
, "\n");
3393 address_cost_data_list
[data_index
] = data
;
3396 bits
= GET_MODE_BITSIZE (address_mode
);
3397 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3399 if ((offset
>> (bits
- 1) & 1))
3404 msize
= GET_MODE_SIZE (mem_mode
);
3405 autoinc_offset
= offset
;
3407 autoinc_offset
+= ratio
* cstep
;
3408 if (symbol_present
|| var_present
|| ratio
!= 1)
3410 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3412 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3414 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3416 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3417 && msize
== -cstep
))
3421 offset_p
= (s_offset
!= 0
3422 && data
->min_offset
<= s_offset
3423 && s_offset
<= data
->max_offset
);
3424 ratio_p
= (ratio
!= 1
3425 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3427 if (ratio
!= 1 && !ratio_p
)
3428 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3430 if (s_offset
&& !offset_p
&& !symbol_present
)
3431 cost
+= add_cost (speed
, address_mode
);
3434 *may_autoinc
= autoinc
;
3435 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3436 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3437 return new_cost (cost
+ acost
, complexity
);
3440 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3441 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3442 calculating the operands of EXPR. Returns true if successful, and returns
3443 the cost in COST. */
3446 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3447 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3450 tree op1
= TREE_OPERAND (expr
, 1);
3451 tree cst
= TREE_OPERAND (mult
, 1);
3452 tree multop
= TREE_OPERAND (mult
, 0);
3453 int m
= exact_log2 (int_cst_value (cst
));
3454 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3457 if (!(m
>= 0 && m
< maxm
))
3460 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3461 ? shiftadd_cost (speed
, mode
, m
)
3463 ? shiftsub1_cost (speed
, mode
, m
)
3464 : shiftsub0_cost (speed
, mode
, m
)));
3465 res
= new_cost (sa_cost
, 0);
3466 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3468 STRIP_NOPS (multop
);
3469 if (!is_gimple_val (multop
))
3470 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3477 /* Estimates cost of forcing expression EXPR into a variable. */
3480 force_expr_to_var_cost (tree expr
, bool speed
)
3482 static bool costs_initialized
= false;
3483 static unsigned integer_cost
[2];
3484 static unsigned symbol_cost
[2];
3485 static unsigned address_cost
[2];
3487 comp_cost cost0
, cost1
, cost
;
3488 enum machine_mode mode
;
3490 if (!costs_initialized
)
3492 tree type
= build_pointer_type (integer_type_node
);
3497 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3498 TREE_STATIC (var
) = 1;
3499 x
= produce_memory_decl_rtl (var
, NULL
);
3500 SET_DECL_RTL (var
, x
);
3502 addr
= build1 (ADDR_EXPR
, type
, var
);
3505 for (i
= 0; i
< 2; i
++)
3507 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3510 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3513 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3516 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3517 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3518 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3519 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3520 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3521 fprintf (dump_file
, "\n");
3525 costs_initialized
= true;
3530 if (SSA_VAR_P (expr
))
3533 if (is_gimple_min_invariant (expr
))
3535 if (TREE_CODE (expr
) == INTEGER_CST
)
3536 return new_cost (integer_cost
[speed
], 0);
3538 if (TREE_CODE (expr
) == ADDR_EXPR
)
3540 tree obj
= TREE_OPERAND (expr
, 0);
3542 if (TREE_CODE (obj
) == VAR_DECL
3543 || TREE_CODE (obj
) == PARM_DECL
3544 || TREE_CODE (obj
) == RESULT_DECL
)
3545 return new_cost (symbol_cost
[speed
], 0);
3548 return new_cost (address_cost
[speed
], 0);
3551 switch (TREE_CODE (expr
))
3553 case POINTER_PLUS_EXPR
:
3557 op0
= TREE_OPERAND (expr
, 0);
3558 op1
= TREE_OPERAND (expr
, 1);
3562 if (is_gimple_val (op0
))
3565 cost0
= force_expr_to_var_cost (op0
, speed
);
3567 if (is_gimple_val (op1
))
3570 cost1
= force_expr_to_var_cost (op1
, speed
);
3575 op0
= TREE_OPERAND (expr
, 0);
3579 if (is_gimple_val (op0
))
3582 cost0
= force_expr_to_var_cost (op0
, speed
);
3589 /* Just an arbitrary value, FIXME. */
3590 return new_cost (target_spill_cost
[speed
], 0);
3593 mode
= TYPE_MODE (TREE_TYPE (expr
));
3594 switch (TREE_CODE (expr
))
3596 case POINTER_PLUS_EXPR
:
3600 cost
= new_cost (add_cost (speed
, mode
), 0);
3601 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3603 tree mult
= NULL_TREE
;
3605 if (TREE_CODE (op1
) == MULT_EXPR
)
3607 else if (TREE_CODE (op0
) == MULT_EXPR
)
3610 if (mult
!= NULL_TREE
3611 && cst_fits_shwi_p (TREE_OPERAND (mult
, 1))
3612 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3619 if (cst_fits_shwi_p (op0
))
3620 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3622 else if (cst_fits_shwi_p (op1
))
3623 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3626 return new_cost (target_spill_cost
[speed
], 0);
3633 cost
= add_costs (cost
, cost0
);
3634 cost
= add_costs (cost
, cost1
);
3636 /* Bound the cost by target_spill_cost. The parts of complicated
3637 computations often are either loop invariant or at least can
3638 be shared between several iv uses, so letting this grow without
3639 limits would not give reasonable results. */
3640 if (cost
.cost
> (int) target_spill_cost
[speed
])
3641 cost
.cost
= target_spill_cost
[speed
];
3646 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3647 invariants the computation depends on. */
3650 force_var_cost (struct ivopts_data
*data
,
3651 tree expr
, bitmap
*depends_on
)
3655 fd_ivopts_data
= data
;
3656 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3659 return force_expr_to_var_cost (expr
, data
->speed
);
3662 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3663 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3664 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3665 invariants the computation depends on. */
3668 split_address_cost (struct ivopts_data
*data
,
3669 tree addr
, bool *symbol_present
, bool *var_present
,
3670 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3673 HOST_WIDE_INT bitsize
;
3674 HOST_WIDE_INT bitpos
;
3676 enum machine_mode mode
;
3677 int unsignedp
, volatilep
;
3679 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3680 &unsignedp
, &volatilep
, false);
3683 || bitpos
% BITS_PER_UNIT
!= 0
3684 || TREE_CODE (core
) != VAR_DECL
)
3686 *symbol_present
= false;
3687 *var_present
= true;
3688 fd_ivopts_data
= data
;
3689 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3690 return new_cost (target_spill_cost
[data
->speed
], 0);
3693 *offset
+= bitpos
/ BITS_PER_UNIT
;
3694 if (TREE_STATIC (core
)
3695 || DECL_EXTERNAL (core
))
3697 *symbol_present
= true;
3698 *var_present
= false;
3702 *symbol_present
= false;
3703 *var_present
= true;
3707 /* Estimates cost of expressing difference of addresses E1 - E2 as
3708 var + symbol + offset. The value of offset is added to OFFSET,
3709 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3710 part is missing. DEPENDS_ON is a set of the invariants the computation
3714 ptr_difference_cost (struct ivopts_data
*data
,
3715 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3716 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3718 HOST_WIDE_INT diff
= 0;
3719 aff_tree aff_e1
, aff_e2
;
3722 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3724 if (ptr_difference_const (e1
, e2
, &diff
))
3727 *symbol_present
= false;
3728 *var_present
= false;
3732 if (integer_zerop (e2
))
3733 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3734 symbol_present
, var_present
, offset
, depends_on
);
3736 *symbol_present
= false;
3737 *var_present
= true;
3739 type
= signed_type_for (TREE_TYPE (e1
));
3740 tree_to_aff_combination (e1
, type
, &aff_e1
);
3741 tree_to_aff_combination (e2
, type
, &aff_e2
);
3742 aff_combination_scale (&aff_e2
, -1);
3743 aff_combination_add (&aff_e1
, &aff_e2
);
3745 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3748 /* Estimates cost of expressing difference E1 - E2 as
3749 var + symbol + offset. The value of offset is added to OFFSET,
3750 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3751 part is missing. DEPENDS_ON is a set of the invariants the computation
3755 difference_cost (struct ivopts_data
*data
,
3756 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3757 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3759 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3760 unsigned HOST_WIDE_INT off1
, off2
;
3761 aff_tree aff_e1
, aff_e2
;
3764 e1
= strip_offset (e1
, &off1
);
3765 e2
= strip_offset (e2
, &off2
);
3766 *offset
+= off1
- off2
;
3771 if (TREE_CODE (e1
) == ADDR_EXPR
)
3772 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3773 offset
, depends_on
);
3774 *symbol_present
= false;
3776 if (operand_equal_p (e1
, e2
, 0))
3778 *var_present
= false;
3782 *var_present
= true;
3784 if (integer_zerop (e2
))
3785 return force_var_cost (data
, e1
, depends_on
);
3787 if (integer_zerop (e1
))
3789 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3790 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3794 type
= signed_type_for (TREE_TYPE (e1
));
3795 tree_to_aff_combination (e1
, type
, &aff_e1
);
3796 tree_to_aff_combination (e2
, type
, &aff_e2
);
3797 aff_combination_scale (&aff_e2
, -1);
3798 aff_combination_add (&aff_e1
, &aff_e2
);
3800 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3803 /* Returns true if AFF1 and AFF2 are identical. */
3806 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3810 if (aff1
->n
!= aff2
->n
)
3813 for (i
= 0; i
< aff1
->n
; i
++)
3815 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3818 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3824 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3827 get_expr_id (struct ivopts_data
*data
, tree expr
)
3829 struct iv_inv_expr_ent ent
;
3830 struct iv_inv_expr_ent
**slot
;
3833 ent
.hash
= iterative_hash_expr (expr
, 0);
3834 slot
= data
->inv_expr_tab
.find_slot (&ent
, INSERT
);
3838 *slot
= XNEW (struct iv_inv_expr_ent
);
3839 (*slot
)->expr
= expr
;
3840 (*slot
)->hash
= ent
.hash
;
3841 (*slot
)->id
= data
->inv_expr_id
++;
3845 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3846 requires a new compiler generated temporary. Returns -1 otherwise.
3847 ADDRESS_P is a flag indicating if the expression is for address
3851 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3852 tree cbase
, HOST_WIDE_INT ratio
,
3855 aff_tree ubase_aff
, cbase_aff
;
3863 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3864 && (TREE_CODE (cbase
) == INTEGER_CST
))
3867 /* Strips the constant part. */
3868 if (TREE_CODE (ubase
) == PLUS_EXPR
3869 || TREE_CODE (ubase
) == MINUS_EXPR
3870 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3872 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3873 ubase
= TREE_OPERAND (ubase
, 0);
3876 /* Strips the constant part. */
3877 if (TREE_CODE (cbase
) == PLUS_EXPR
3878 || TREE_CODE (cbase
) == MINUS_EXPR
3879 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3881 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3882 cbase
= TREE_OPERAND (cbase
, 0);
3887 if (((TREE_CODE (ubase
) == SSA_NAME
)
3888 || (TREE_CODE (ubase
) == ADDR_EXPR
3889 && is_gimple_min_invariant (ubase
)))
3890 && (TREE_CODE (cbase
) == INTEGER_CST
))
3893 if (((TREE_CODE (cbase
) == SSA_NAME
)
3894 || (TREE_CODE (cbase
) == ADDR_EXPR
3895 && is_gimple_min_invariant (cbase
)))
3896 && (TREE_CODE (ubase
) == INTEGER_CST
))
3902 if (operand_equal_p (ubase
, cbase
, 0))
3905 if (TREE_CODE (ubase
) == ADDR_EXPR
3906 && TREE_CODE (cbase
) == ADDR_EXPR
)
3910 usym
= TREE_OPERAND (ubase
, 0);
3911 csym
= TREE_OPERAND (cbase
, 0);
3912 if (TREE_CODE (usym
) == ARRAY_REF
)
3914 tree ind
= TREE_OPERAND (usym
, 1);
3915 if (TREE_CODE (ind
) == INTEGER_CST
3916 && tree_fits_shwi_p (ind
)
3917 && tree_to_shwi (ind
) == 0)
3918 usym
= TREE_OPERAND (usym
, 0);
3920 if (TREE_CODE (csym
) == ARRAY_REF
)
3922 tree ind
= TREE_OPERAND (csym
, 1);
3923 if (TREE_CODE (ind
) == INTEGER_CST
3924 && tree_fits_shwi_p (ind
)
3925 && tree_to_shwi (ind
) == 0)
3926 csym
= TREE_OPERAND (csym
, 0);
3928 if (operand_equal_p (usym
, csym
, 0))
3931 /* Now do more complex comparison */
3932 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3933 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3934 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3938 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3939 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3941 aff_combination_scale (&cbase_aff
, -1 * ratio
);
3942 aff_combination_add (&ubase_aff
, &cbase_aff
);
3943 expr
= aff_combination_to_tree (&ubase_aff
);
3944 return get_expr_id (data
, expr
);
3949 /* Determines the cost of the computation by that USE is expressed
3950 from induction variable CAND. If ADDRESS_P is true, we just need
3951 to create an address from it, otherwise we want to get it into
3952 register. A set of invariants we depend on is stored in
3953 DEPENDS_ON. AT is the statement at that the value is computed.
3954 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955 addressing is likely. */
3958 get_computation_cost_at (struct ivopts_data
*data
,
3959 struct iv_use
*use
, struct iv_cand
*cand
,
3960 bool address_p
, bitmap
*depends_on
, gimple at
,
3964 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3966 tree utype
= TREE_TYPE (ubase
), ctype
;
3967 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3968 HOST_WIDE_INT ratio
, aratio
;
3969 bool var_present
, symbol_present
, stmt_is_after_inc
;
3972 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3973 enum machine_mode mem_mode
= (address_p
3974 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
3979 /* Only consider real candidates. */
3981 return infinite_cost
;
3983 cbase
= cand
->iv
->base
;
3984 cstep
= cand
->iv
->step
;
3985 ctype
= TREE_TYPE (cbase
);
3987 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3989 /* We do not have a precision to express the values of use. */
3990 return infinite_cost
;
3994 || (use
->iv
->base_object
3995 && cand
->iv
->base_object
3996 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
3997 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
3999 /* Do not try to express address of an object with computation based
4000 on address of a different object. This may cause problems in rtl
4001 level alias analysis (that does not expect this to be happening,
4002 as this is illegal in C), and would be unlikely to be useful
4004 if (use
->iv
->base_object
4005 && cand
->iv
->base_object
4006 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4007 return infinite_cost
;
4010 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4012 /* TODO -- add direct handling of this case. */
4016 /* CSTEPI is removed from the offset in case statement is after the
4017 increment. If the step is not constant, we use zero instead.
4018 This is a bit imprecise (there is the extra addition), but
4019 redundancy elimination is likely to transform the code so that
4020 it uses value of the variable before increment anyway,
4021 so it is not that much unrealistic. */
4022 if (cst_fits_shwi_p (cstep
))
4023 cstepi
= int_cst_value (cstep
);
4027 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4028 return infinite_cost
;
4030 if (wi::fits_shwi_p (rat
))
4031 ratio
= rat
.to_shwi ();
4033 return infinite_cost
;
4036 ctype
= TREE_TYPE (cbase
);
4038 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4040 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4041 or ratio == 1, it is better to handle this like
4043 ubase - ratio * cbase + ratio * var
4045 (also holds in the case ratio == -1, TODO. */
4047 if (cst_fits_shwi_p (cbase
))
4049 offset
= - ratio
* int_cst_value (cbase
);
4050 cost
= difference_cost (data
,
4051 ubase
, build_int_cst (utype
, 0),
4052 &symbol_present
, &var_present
, &offset
,
4054 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4056 else if (ratio
== 1)
4058 tree real_cbase
= cbase
;
4060 /* Check to see if any adjustment is needed. */
4061 if (cstepi
== 0 && stmt_is_after_inc
)
4063 aff_tree real_cbase_aff
;
4066 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4068 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4070 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4071 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4074 cost
= difference_cost (data
,
4076 &symbol_present
, &var_present
, &offset
,
4078 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4081 && !POINTER_TYPE_P (ctype
)
4082 && multiplier_allowed_in_address_p
4084 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4087 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4088 cost
= difference_cost (data
,
4090 &symbol_present
, &var_present
, &offset
,
4092 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4096 cost
= force_var_cost (data
, cbase
, depends_on
);
4097 cost
= add_costs (cost
,
4098 difference_cost (data
,
4099 ubase
, build_int_cst (utype
, 0),
4100 &symbol_present
, &var_present
,
4101 &offset
, depends_on
));
4102 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4103 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4109 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4110 /* Clear depends on. */
4111 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4112 bitmap_clear (*depends_on
);
4115 /* If we are after the increment, the value of the candidate is higher by
4117 if (stmt_is_after_inc
)
4118 offset
-= ratio
* cstepi
;
4120 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4121 (symbol/var1/const parts may be omitted). If we are looking for an
4122 address, find the cost of addressing this. */
4124 return add_costs (cost
,
4125 get_address_cost (symbol_present
, var_present
,
4126 offset
, ratio
, cstepi
,
4128 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4129 speed
, stmt_is_after_inc
,
4132 /* Otherwise estimate the costs for computing the expression. */
4133 if (!symbol_present
&& !var_present
&& !offset
)
4136 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4140 /* Symbol + offset should be compile-time computable so consider that they
4141 are added once to the variable, if present. */
4142 if (var_present
&& (symbol_present
|| offset
))
4143 cost
.cost
+= adjust_setup_cost (data
,
4144 add_cost (speed
, TYPE_MODE (ctype
)));
4146 /* Having offset does not affect runtime cost in case it is added to
4147 symbol, but it increases complexity. */
4151 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4153 aratio
= ratio
> 0 ? ratio
: -ratio
;
4155 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4160 *can_autoinc
= false;
4163 /* Just get the expression, expand it and measure the cost. */
4164 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4167 return infinite_cost
;
4170 comp
= build_simple_mem_ref (comp
);
4172 return new_cost (computation_cost (comp
, speed
), 0);
4176 /* Determines the cost of the computation by that USE is expressed
4177 from induction variable CAND. If ADDRESS_P is true, we just need
4178 to create an address from it, otherwise we want to get it into
4179 register. A set of invariants we depend on is stored in
4180 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4181 autoinc addressing is likely. */
4184 get_computation_cost (struct ivopts_data
*data
,
4185 struct iv_use
*use
, struct iv_cand
*cand
,
4186 bool address_p
, bitmap
*depends_on
,
4187 bool *can_autoinc
, int *inv_expr_id
)
4189 return get_computation_cost_at (data
,
4190 use
, cand
, address_p
, depends_on
, use
->stmt
,
4191 can_autoinc
, inv_expr_id
);
4194 /* Determines cost of basing replacement of USE on CAND in a generic
4198 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4199 struct iv_use
*use
, struct iv_cand
*cand
)
4203 int inv_expr_id
= -1;
4205 /* The simple case first -- if we need to express value of the preserved
4206 original biv, the cost is 0. This also prevents us from counting the
4207 cost of increment twice -- once at this use and once in the cost of
4209 if (cand
->pos
== IP_ORIGINAL
4210 && cand
->incremented_at
== use
->stmt
)
4212 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4217 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4218 NULL
, &inv_expr_id
);
4220 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4223 return !infinite_cost_p (cost
);
4226 /* Determines cost of basing replacement of USE on CAND in an address. */
4229 determine_use_iv_cost_address (struct ivopts_data
*data
,
4230 struct iv_use
*use
, struct iv_cand
*cand
)
4234 int inv_expr_id
= -1;
4235 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4236 &can_autoinc
, &inv_expr_id
);
4238 if (cand
->ainc_use
== use
)
4241 cost
.cost
-= cand
->cost_step
;
4242 /* If we generated the candidate solely for exploiting autoincrement
4243 opportunities, and it turns out it can't be used, set the cost to
4244 infinity to make sure we ignore it. */
4245 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4246 cost
= infinite_cost
;
4248 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4251 return !infinite_cost_p (cost
);
4254 /* Computes value of candidate CAND at position AT in iteration NITER, and
4255 stores it to VAL. */
4258 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4261 aff_tree step
, delta
, nit
;
4262 struct iv
*iv
= cand
->iv
;
4263 tree type
= TREE_TYPE (iv
->base
);
4264 tree steptype
= type
;
4265 if (POINTER_TYPE_P (type
))
4266 steptype
= sizetype
;
4268 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4269 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4270 aff_combination_convert (&nit
, steptype
);
4271 aff_combination_mult (&nit
, &step
, &delta
);
4272 if (stmt_after_increment (loop
, cand
, at
))
4273 aff_combination_add (&delta
, &step
);
4275 tree_to_aff_combination (iv
->base
, type
, val
);
4276 aff_combination_add (val
, &delta
);
4279 /* Returns period of induction variable iv. */
4282 iv_period (struct iv
*iv
)
4284 tree step
= iv
->step
, period
, type
;
4287 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4289 type
= unsigned_type_for (TREE_TYPE (step
));
4290 /* Period of the iv is lcm (step, type_range)/step -1,
4291 i.e., N*type_range/step - 1. Since type range is power
4292 of two, N == (step >> num_of_ending_zeros_binary (step),
4293 so the final result is
4295 (type_range >> num_of_ending_zeros_binary (step)) - 1
4298 pow2div
= num_ending_zeros (step
);
4300 period
= build_low_bits_mask (type
,
4301 (TYPE_PRECISION (type
)
4302 - tree_to_uhwi (pow2div
)));
4307 /* Returns the comparison operator used when eliminating the iv USE. */
4309 static enum tree_code
4310 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4312 struct loop
*loop
= data
->current_loop
;
4316 ex_bb
= gimple_bb (use
->stmt
);
4317 exit
= EDGE_SUCC (ex_bb
, 0);
4318 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4319 exit
= EDGE_SUCC (ex_bb
, 1);
4321 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4325 strip_wrap_conserving_type_conversions (tree exp
)
4327 while (tree_ssa_useless_type_conversion (exp
)
4328 && (nowrap_type_p (TREE_TYPE (exp
))
4329 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4330 exp
= TREE_OPERAND (exp
, 0);
4334 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4335 check for an exact match. */
4338 expr_equal_p (tree e
, tree what
)
4341 enum tree_code code
;
4343 e
= strip_wrap_conserving_type_conversions (e
);
4344 what
= strip_wrap_conserving_type_conversions (what
);
4346 code
= TREE_CODE (what
);
4347 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4350 if (operand_equal_p (e
, what
, 0))
4353 if (TREE_CODE (e
) != SSA_NAME
)
4356 stmt
= SSA_NAME_DEF_STMT (e
);
4357 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4358 || gimple_assign_rhs_code (stmt
) != code
)
4361 switch (get_gimple_rhs_class (code
))
4363 case GIMPLE_BINARY_RHS
:
4364 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4368 case GIMPLE_UNARY_RHS
:
4369 case GIMPLE_SINGLE_RHS
:
4370 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4376 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4377 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4378 calculation is performed in non-wrapping type.
4380 TODO: More generally, we could test for the situation that
4381 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4382 This would require knowing the sign of OFFSET.
4384 Also, we only look for the first addition in the computation of BASE.
4385 More complex analysis would be better, but introducing it just for
4386 this optimization seems like an overkill. */
4389 difference_cannot_overflow_p (tree base
, tree offset
)
4391 enum tree_code code
;
4394 if (!nowrap_type_p (TREE_TYPE (base
)))
4397 base
= expand_simple_operations (base
);
4399 if (TREE_CODE (base
) == SSA_NAME
)
4401 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4403 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4406 code
= gimple_assign_rhs_code (stmt
);
4407 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4410 e1
= gimple_assign_rhs1 (stmt
);
4411 e2
= gimple_assign_rhs2 (stmt
);
4415 code
= TREE_CODE (base
);
4416 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4418 e1
= TREE_OPERAND (base
, 0);
4419 e2
= TREE_OPERAND (base
, 1);
4422 /* TODO: deeper inspection may be necessary to prove the equality. */
4426 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4427 case POINTER_PLUS_EXPR
:
4428 return expr_equal_p (e2
, offset
);
4435 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4436 comparison with CAND. NITER describes the number of iterations of
4437 the loops. If successful, the comparison in COMP_P is altered accordingly.
4439 We aim to handle the following situation:
4455 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4456 We aim to optimize this to
4464 while (p < p_0 - a + b);
4466 This preserves the correctness, since the pointer arithmetics does not
4467 overflow. More precisely:
4469 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4470 overflow in computing it or the values of p.
4471 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4472 overflow. To prove this, we use the fact that p_0 = base + a. */
4475 iv_elimination_compare_lt (struct ivopts_data
*data
,
4476 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4477 struct tree_niter_desc
*niter
)
4479 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4480 struct affine_tree_combination nit
, tmpa
, tmpb
;
4481 enum tree_code comp
;
4484 /* We need to know that the candidate induction variable does not overflow.
4485 While more complex analysis may be used to prove this, for now just
4486 check that the variable appears in the original program and that it
4487 is computed in a type that guarantees no overflows. */
4488 cand_type
= TREE_TYPE (cand
->iv
->base
);
4489 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4492 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4493 the calculation of the BOUND could overflow, making the comparison
4495 if (!data
->loop_single_exit_p
)
4498 /* We need to be able to decide whether candidate is increasing or decreasing
4499 in order to choose the right comparison operator. */
4500 if (!cst_fits_shwi_p (cand
->iv
->step
))
4502 step
= int_cst_value (cand
->iv
->step
);
4504 /* Check that the number of iterations matches the expected pattern:
4505 a + 1 > b ? 0 : b - a - 1. */
4506 mbz
= niter
->may_be_zero
;
4507 if (TREE_CODE (mbz
) == GT_EXPR
)
4509 /* Handle a + 1 > b. */
4510 tree op0
= TREE_OPERAND (mbz
, 0);
4511 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4513 a
= TREE_OPERAND (op0
, 0);
4514 b
= TREE_OPERAND (mbz
, 1);
4519 else if (TREE_CODE (mbz
) == LT_EXPR
)
4521 tree op1
= TREE_OPERAND (mbz
, 1);
4523 /* Handle b < a + 1. */
4524 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4526 a
= TREE_OPERAND (op1
, 0);
4527 b
= TREE_OPERAND (mbz
, 0);
4535 /* Expected number of iterations is B - A - 1. Check that it matches
4536 the actual number, i.e., that B - A - NITER = 1. */
4537 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4538 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4539 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4540 aff_combination_scale (&nit
, -1);
4541 aff_combination_scale (&tmpa
, -1);
4542 aff_combination_add (&tmpb
, &tmpa
);
4543 aff_combination_add (&tmpb
, &nit
);
4544 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4547 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4549 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4551 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4552 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4555 /* Determine the new comparison operator. */
4556 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4557 if (*comp_p
== NE_EXPR
)
4559 else if (*comp_p
== EQ_EXPR
)
4560 *comp_p
= invert_tree_comparison (comp
, false);
4567 /* Check whether it is possible to express the condition in USE by comparison
4568 of candidate CAND. If so, store the value compared with to BOUND, and the
4569 comparison operator to COMP. */
4572 may_eliminate_iv (struct ivopts_data
*data
,
4573 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4574 enum tree_code
*comp
)
4579 struct loop
*loop
= data
->current_loop
;
4581 struct tree_niter_desc
*desc
= NULL
;
4583 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4586 /* For now works only for exits that dominate the loop latch.
4587 TODO: extend to other conditions inside loop body. */
4588 ex_bb
= gimple_bb (use
->stmt
);
4589 if (use
->stmt
!= last_stmt (ex_bb
)
4590 || gimple_code (use
->stmt
) != GIMPLE_COND
4591 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4594 exit
= EDGE_SUCC (ex_bb
, 0);
4595 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4596 exit
= EDGE_SUCC (ex_bb
, 1);
4597 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4600 desc
= niter_for_exit (data
, exit
);
4604 /* Determine whether we can use the variable to test the exit condition.
4605 This is the case iff the period of the induction variable is greater
4606 than the number of iterations for which the exit condition is true. */
4607 period
= iv_period (cand
->iv
);
4609 /* If the number of iterations is constant, compare against it directly. */
4610 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4612 /* See cand_value_at. */
4613 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4615 if (!tree_int_cst_lt (desc
->niter
, period
))
4620 if (tree_int_cst_lt (period
, desc
->niter
))
4625 /* If not, and if this is the only possible exit of the loop, see whether
4626 we can get a conservative estimate on the number of iterations of the
4627 entire loop and compare against that instead. */
4630 widest_int period_value
, max_niter
;
4632 max_niter
= desc
->max
;
4633 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4635 period_value
= wi::to_widest (period
);
4636 if (wi::gtu_p (max_niter
, period_value
))
4638 /* See if we can take advantage of inferred loop bound information. */
4639 if (data
->loop_single_exit_p
)
4641 if (!max_loop_iterations (loop
, &max_niter
))
4643 /* The loop bound is already adjusted by adding 1. */
4644 if (wi::gtu_p (max_niter
, period_value
))
4652 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4654 *bound
= aff_combination_to_tree (&bnd
);
4655 *comp
= iv_elimination_compare (data
, use
);
4657 /* It is unlikely that computing the number of iterations using division
4658 would be more profitable than keeping the original induction variable. */
4659 if (expression_expensive_p (*bound
))
4662 /* Sometimes, it is possible to handle the situation that the number of
4663 iterations may be zero unless additional assumtions by using <
4664 instead of != in the exit condition.
4666 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4667 base the exit condition on it. However, that is often too
4669 if (!integer_zerop (desc
->may_be_zero
))
4670 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4675 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4676 be copied, if is is used in the loop body and DATA->body_includes_call. */
4679 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4681 tree sbound
= bound
;
4682 STRIP_NOPS (sbound
);
4684 if (TREE_CODE (sbound
) == SSA_NAME
4685 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4686 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4687 && data
->body_includes_call
)
4688 return COSTS_N_INSNS (1);
4693 /* Determines cost of basing replacement of USE on CAND in a condition. */
4696 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4697 struct iv_use
*use
, struct iv_cand
*cand
)
4699 tree bound
= NULL_TREE
;
4701 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4702 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4704 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4705 tree
*control_var
, *bound_cst
;
4706 enum tree_code comp
= ERROR_MARK
;
4708 /* Only consider real candidates. */
4711 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4716 /* Try iv elimination. */
4717 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4719 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4720 if (elim_cost
.cost
== 0)
4721 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4722 else if (TREE_CODE (bound
) == INTEGER_CST
)
4724 /* If we replace a loop condition 'i < n' with 'p < base + n',
4725 depends_on_elim will have 'base' and 'n' set, which implies
4726 that both 'base' and 'n' will be live during the loop. More likely,
4727 'base + n' will be loop invariant, resulting in only one live value
4728 during the loop. So in that case we clear depends_on_elim and set
4729 elim_inv_expr_id instead. */
4730 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4732 elim_inv_expr_id
= get_expr_id (data
, bound
);
4733 bitmap_clear (depends_on_elim
);
4735 /* The bound is a loop invariant, so it will be only computed
4737 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4740 elim_cost
= infinite_cost
;
4742 /* Try expressing the original giv. If it is compared with an invariant,
4743 note that we cannot get rid of it. */
4744 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4748 /* When the condition is a comparison of the candidate IV against
4749 zero, prefer this IV.
4751 TODO: The constant that we're subtracting from the cost should
4752 be target-dependent. This information should be added to the
4753 target costs for each backend. */
4754 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4755 && integer_zerop (*bound_cst
)
4756 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4757 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4758 elim_cost
.cost
-= 1;
4760 express_cost
= get_computation_cost (data
, use
, cand
, false,
4761 &depends_on_express
, NULL
,
4762 &express_inv_expr_id
);
4763 fd_ivopts_data
= data
;
4764 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4766 /* Count the cost of the original bound as well. */
4767 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4768 if (bound_cost
.cost
== 0)
4769 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4770 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4771 bound_cost
.cost
= 0;
4772 express_cost
.cost
+= bound_cost
.cost
;
4774 /* Choose the better approach, preferring the eliminated IV. */
4775 if (compare_costs (elim_cost
, express_cost
) <= 0)
4778 depends_on
= depends_on_elim
;
4779 depends_on_elim
= NULL
;
4780 inv_expr_id
= elim_inv_expr_id
;
4784 cost
= express_cost
;
4785 depends_on
= depends_on_express
;
4786 depends_on_express
= NULL
;
4789 inv_expr_id
= express_inv_expr_id
;
4792 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4794 if (depends_on_elim
)
4795 BITMAP_FREE (depends_on_elim
);
4796 if (depends_on_express
)
4797 BITMAP_FREE (depends_on_express
);
4799 return !infinite_cost_p (cost
);
4802 /* Determines cost of basing replacement of USE on CAND. Returns false
4803 if USE cannot be based on CAND. */
4806 determine_use_iv_cost (struct ivopts_data
*data
,
4807 struct iv_use
*use
, struct iv_cand
*cand
)
4811 case USE_NONLINEAR_EXPR
:
4812 return determine_use_iv_cost_generic (data
, use
, cand
);
4815 return determine_use_iv_cost_address (data
, use
, cand
);
4818 return determine_use_iv_cost_condition (data
, use
, cand
);
4825 /* Return true if get_computation_cost indicates that autoincrement is
4826 a possibility for the pair of USE and CAND, false otherwise. */
4829 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4830 struct iv_cand
*cand
)
4836 if (use
->type
!= USE_ADDRESS
)
4839 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4840 &can_autoinc
, NULL
);
4842 BITMAP_FREE (depends_on
);
4844 return !infinite_cost_p (cost
) && can_autoinc
;
4847 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4848 use that allows autoincrement, and set their AINC_USE if possible. */
4851 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4855 for (i
= 0; i
< n_iv_cands (data
); i
++)
4857 struct iv_cand
*cand
= iv_cand (data
, i
);
4858 struct iv_use
*closest_before
= NULL
;
4859 struct iv_use
*closest_after
= NULL
;
4860 if (cand
->pos
!= IP_ORIGINAL
)
4863 for (j
= 0; j
< n_iv_uses (data
); j
++)
4865 struct iv_use
*use
= iv_use (data
, j
);
4866 unsigned uid
= gimple_uid (use
->stmt
);
4868 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4871 if (uid
< gimple_uid (cand
->incremented_at
)
4872 && (closest_before
== NULL
4873 || uid
> gimple_uid (closest_before
->stmt
)))
4874 closest_before
= use
;
4876 if (uid
> gimple_uid (cand
->incremented_at
)
4877 && (closest_after
== NULL
4878 || uid
< gimple_uid (closest_after
->stmt
)))
4879 closest_after
= use
;
4882 if (closest_before
!= NULL
4883 && autoinc_possible_for_pair (data
, closest_before
, cand
))
4884 cand
->ainc_use
= closest_before
;
4885 else if (closest_after
!= NULL
4886 && autoinc_possible_for_pair (data
, closest_after
, cand
))
4887 cand
->ainc_use
= closest_after
;
4891 /* Finds the candidates for the induction variables. */
4894 find_iv_candidates (struct ivopts_data
*data
)
4896 /* Add commonly used ivs. */
4897 add_standard_iv_candidates (data
);
4899 /* Add old induction variables. */
4900 add_old_ivs_candidates (data
);
4902 /* Add induction variables derived from uses. */
4903 add_derived_ivs_candidates (data
);
4905 set_autoinc_for_original_candidates (data
);
4907 /* Record the important candidates. */
4908 record_important_candidates (data
);
4911 /* Determines costs of basing the use of the iv on an iv candidate. */
4914 determine_use_iv_costs (struct ivopts_data
*data
)
4918 struct iv_cand
*cand
;
4919 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4921 alloc_use_cost_map (data
);
4923 for (i
= 0; i
< n_iv_uses (data
); i
++)
4925 use
= iv_use (data
, i
);
4927 if (data
->consider_all_candidates
)
4929 for (j
= 0; j
< n_iv_cands (data
); j
++)
4931 cand
= iv_cand (data
, j
);
4932 determine_use_iv_cost (data
, use
, cand
);
4939 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4941 cand
= iv_cand (data
, j
);
4942 if (!determine_use_iv_cost (data
, use
, cand
))
4943 bitmap_set_bit (to_clear
, j
);
4946 /* Remove the candidates for that the cost is infinite from
4947 the list of related candidates. */
4948 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4949 bitmap_clear (to_clear
);
4953 BITMAP_FREE (to_clear
);
4955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4957 fprintf (dump_file
, "Use-candidate costs:\n");
4959 for (i
= 0; i
< n_iv_uses (data
); i
++)
4961 use
= iv_use (data
, i
);
4963 fprintf (dump_file
, "Use %d:\n", i
);
4964 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4965 for (j
= 0; j
< use
->n_map_members
; j
++)
4967 if (!use
->cost_map
[j
].cand
4968 || infinite_cost_p (use
->cost_map
[j
].cost
))
4971 fprintf (dump_file
, " %d\t%d\t%d\t",
4972 use
->cost_map
[j
].cand
->id
,
4973 use
->cost_map
[j
].cost
.cost
,
4974 use
->cost_map
[j
].cost
.complexity
);
4975 if (use
->cost_map
[j
].depends_on
)
4976 bitmap_print (dump_file
,
4977 use
->cost_map
[j
].depends_on
, "","");
4978 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4979 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4980 fprintf (dump_file
, "\n");
4983 fprintf (dump_file
, "\n");
4985 fprintf (dump_file
, "\n");
4989 /* Determines cost of the candidate CAND. */
4992 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4994 comp_cost cost_base
;
4995 unsigned cost
, cost_step
;
5004 /* There are two costs associated with the candidate -- its increment
5005 and its initialization. The second is almost negligible for any loop
5006 that rolls enough, so we take it just very little into account. */
5008 base
= cand
->iv
->base
;
5009 cost_base
= force_var_cost (data
, base
, NULL
);
5010 /* It will be exceptional that the iv register happens to be initialized with
5011 the proper value at no cost. In general, there will at least be a regcopy
5013 if (cost_base
.cost
== 0)
5014 cost_base
.cost
= COSTS_N_INSNS (1);
5015 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5017 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5019 /* Prefer the original ivs unless we may gain something by replacing it.
5020 The reason is to make debugging simpler; so this is not relevant for
5021 artificial ivs created by other optimization passes. */
5022 if (cand
->pos
!= IP_ORIGINAL
5023 || !SSA_NAME_VAR (cand
->var_before
)
5024 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5027 /* Prefer not to insert statements into latch unless there are some
5028 already (so that we do not create unnecessary jumps). */
5029 if (cand
->pos
== IP_END
5030 && empty_block_p (ip_end_pos (data
->current_loop
)))
5034 cand
->cost_step
= cost_step
;
5037 /* Determines costs of computation of the candidates. */
5040 determine_iv_costs (struct ivopts_data
*data
)
5044 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5046 fprintf (dump_file
, "Candidate costs:\n");
5047 fprintf (dump_file
, " cand\tcost\n");
5050 for (i
= 0; i
< n_iv_cands (data
); i
++)
5052 struct iv_cand
*cand
= iv_cand (data
, i
);
5054 determine_iv_cost (data
, cand
);
5056 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5057 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5061 fprintf (dump_file
, "\n");
5064 /* Calculates cost for having SIZE induction variables. */
5067 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5069 /* We add size to the cost, so that we prefer eliminating ivs
5071 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5072 data
->body_includes_call
);
5075 /* For each size of the induction variable set determine the penalty. */
5078 determine_set_costs (struct ivopts_data
*data
)
5082 gimple_stmt_iterator psi
;
5084 struct loop
*loop
= data
->current_loop
;
5087 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5089 fprintf (dump_file
, "Global costs:\n");
5090 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5091 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5092 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5093 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5097 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5099 phi
= gsi_stmt (psi
);
5100 op
= PHI_RESULT (phi
);
5102 if (virtual_operand_p (op
))
5105 if (get_iv (data
, op
))
5111 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5113 struct version_info
*info
= ver_info (data
, j
);
5115 if (info
->inv_id
&& info
->has_nonlin_use
)
5119 data
->regs_used
= n
;
5120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5121 fprintf (dump_file
, " regs_used %d\n", n
);
5123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5125 fprintf (dump_file
, " cost for size:\n");
5126 fprintf (dump_file
, " ivs\tcost\n");
5127 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5128 fprintf (dump_file
, " %d\t%d\n", j
,
5129 ivopts_global_cost_for_size (data
, j
));
5130 fprintf (dump_file
, "\n");
5134 /* Returns true if A is a cheaper cost pair than B. */
5137 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5147 cmp
= compare_costs (a
->cost
, b
->cost
);
5154 /* In case the costs are the same, prefer the cheaper candidate. */
5155 if (a
->cand
->cost
< b
->cand
->cost
)
5162 /* Returns candidate by that USE is expressed in IVS. */
5164 static struct cost_pair
*
5165 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5167 return ivs
->cand_for_use
[use
->id
];
5170 /* Computes the cost field of IVS structure. */
5173 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5175 comp_cost cost
= ivs
->cand_use_cost
;
5177 cost
.cost
+= ivs
->cand_cost
;
5179 cost
.cost
+= ivopts_global_cost_for_size (data
,
5180 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5185 /* Remove invariants in set INVS to set IVS. */
5188 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5196 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5198 ivs
->n_invariant_uses
[iid
]--;
5199 if (ivs
->n_invariant_uses
[iid
] == 0)
5204 /* Set USE not to be expressed by any candidate in IVS. */
5207 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5210 unsigned uid
= use
->id
, cid
;
5211 struct cost_pair
*cp
;
5213 cp
= ivs
->cand_for_use
[uid
];
5219 ivs
->cand_for_use
[uid
] = NULL
;
5220 ivs
->n_cand_uses
[cid
]--;
5222 if (ivs
->n_cand_uses
[cid
] == 0)
5224 bitmap_clear_bit (ivs
->cands
, cid
);
5225 /* Do not count the pseudocandidates. */
5229 ivs
->cand_cost
-= cp
->cand
->cost
;
5231 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5234 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5236 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5238 if (cp
->inv_expr_id
!= -1)
5240 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5241 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5242 ivs
->num_used_inv_expr
--;
5244 iv_ca_recount_cost (data
, ivs
);
5247 /* Add invariants in set INVS to set IVS. */
5250 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5258 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5260 ivs
->n_invariant_uses
[iid
]++;
5261 if (ivs
->n_invariant_uses
[iid
] == 1)
5266 /* Set cost pair for USE in set IVS to CP. */
5269 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5270 struct iv_use
*use
, struct cost_pair
*cp
)
5272 unsigned uid
= use
->id
, cid
;
5274 if (ivs
->cand_for_use
[uid
] == cp
)
5277 if (ivs
->cand_for_use
[uid
])
5278 iv_ca_set_no_cp (data
, ivs
, use
);
5285 ivs
->cand_for_use
[uid
] = cp
;
5286 ivs
->n_cand_uses
[cid
]++;
5287 if (ivs
->n_cand_uses
[cid
] == 1)
5289 bitmap_set_bit (ivs
->cands
, cid
);
5290 /* Do not count the pseudocandidates. */
5294 ivs
->cand_cost
+= cp
->cand
->cost
;
5296 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5299 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5300 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5302 if (cp
->inv_expr_id
!= -1)
5304 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5305 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5306 ivs
->num_used_inv_expr
++;
5308 iv_ca_recount_cost (data
, ivs
);
5312 /* Extend set IVS by expressing USE by some of the candidates in it
5313 if possible. All important candidates will be considered
5314 if IMPORTANT_CANDIDATES is true. */
5317 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5318 struct iv_use
*use
, bool important_candidates
)
5320 struct cost_pair
*best_cp
= NULL
, *cp
;
5325 gcc_assert (ivs
->upto
>= use
->id
);
5327 if (ivs
->upto
== use
->id
)
5333 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5334 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5336 struct iv_cand
*cand
= iv_cand (data
, i
);
5338 cp
= get_use_iv_cost (data
, use
, cand
);
5340 if (cheaper_cost_pair (cp
, best_cp
))
5344 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5347 /* Get cost for assignment IVS. */
5350 iv_ca_cost (struct iv_ca
*ivs
)
5352 /* This was a conditional expression but it triggered a bug in
5355 return infinite_cost
;
5360 /* Returns true if all dependences of CP are among invariants in IVS. */
5363 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5368 if (!cp
->depends_on
)
5371 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5373 if (ivs
->n_invariant_uses
[i
] == 0)
5380 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5381 it before NEXT_CHANGE. */
5383 static struct iv_ca_delta
*
5384 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5385 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5387 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5390 change
->old_cp
= old_cp
;
5391 change
->new_cp
= new_cp
;
5392 change
->next_change
= next_change
;
5397 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5400 static struct iv_ca_delta
*
5401 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5403 struct iv_ca_delta
*last
;
5411 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5413 last
->next_change
= l2
;
5418 /* Reverse the list of changes DELTA, forming the inverse to it. */
5420 static struct iv_ca_delta
*
5421 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5423 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5424 struct cost_pair
*tmp
;
5426 for (act
= delta
; act
; act
= next
)
5428 next
= act
->next_change
;
5429 act
->next_change
= prev
;
5433 act
->old_cp
= act
->new_cp
;
5440 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5441 reverted instead. */
5444 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5445 struct iv_ca_delta
*delta
, bool forward
)
5447 struct cost_pair
*from
, *to
;
5448 struct iv_ca_delta
*act
;
5451 delta
= iv_ca_delta_reverse (delta
);
5453 for (act
= delta
; act
; act
= act
->next_change
)
5457 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5458 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5462 iv_ca_delta_reverse (delta
);
5465 /* Returns true if CAND is used in IVS. */
5468 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5470 return ivs
->n_cand_uses
[cand
->id
] > 0;
5473 /* Returns number of induction variable candidates in the set IVS. */
5476 iv_ca_n_cands (struct iv_ca
*ivs
)
5478 return ivs
->n_cands
;
5481 /* Free the list of changes DELTA. */
5484 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5486 struct iv_ca_delta
*act
, *next
;
5488 for (act
= *delta
; act
; act
= next
)
5490 next
= act
->next_change
;
5497 /* Allocates new iv candidates assignment. */
5499 static struct iv_ca
*
5500 iv_ca_new (struct ivopts_data
*data
)
5502 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5506 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5507 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5508 nw
->cands
= BITMAP_ALLOC (NULL
);
5511 nw
->cand_use_cost
= no_cost
;
5513 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5515 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5516 nw
->num_used_inv_expr
= 0;
5521 /* Free memory occupied by the set IVS. */
5524 iv_ca_free (struct iv_ca
**ivs
)
5526 free ((*ivs
)->cand_for_use
);
5527 free ((*ivs
)->n_cand_uses
);
5528 BITMAP_FREE ((*ivs
)->cands
);
5529 free ((*ivs
)->n_invariant_uses
);
5530 free ((*ivs
)->used_inv_expr
);
5535 /* Dumps IVS to FILE. */
5538 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5540 const char *pref
= " invariants ";
5542 comp_cost cost
= iv_ca_cost (ivs
);
5544 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5545 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5546 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5547 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5549 for (i
= 0; i
< ivs
->upto
; i
++)
5551 struct iv_use
*use
= iv_use (data
, i
);
5552 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5554 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5555 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5557 fprintf (file
, " use:%d --> ??\n", use
->id
);
5560 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5561 if (ivs
->n_invariant_uses
[i
])
5563 fprintf (file
, "%s%d", pref
, i
);
5566 fprintf (file
, "\n\n");
5569 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5570 new set, and store differences in DELTA. Number of induction variables
5571 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5572 the function will try to find a solution with mimimal iv candidates. */
5575 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5576 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5577 unsigned *n_ivs
, bool min_ncand
)
5582 struct cost_pair
*old_cp
, *new_cp
;
5585 for (i
= 0; i
< ivs
->upto
; i
++)
5587 use
= iv_use (data
, i
);
5588 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5591 && old_cp
->cand
== cand
)
5594 new_cp
= get_use_iv_cost (data
, use
, cand
);
5598 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5601 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5604 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5607 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5608 cost
= iv_ca_cost (ivs
);
5610 *n_ivs
= iv_ca_n_cands (ivs
);
5611 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5616 /* Try narrowing set IVS by removing CAND. Return the cost of
5617 the new set and store the differences in DELTA. */
5620 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5621 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5625 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5627 struct iv_cand
*cnd
;
5631 for (i
= 0; i
< n_iv_uses (data
); i
++)
5633 use
= iv_use (data
, i
);
5635 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5636 if (old_cp
->cand
!= cand
)
5641 if (data
->consider_all_candidates
)
5643 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5648 cnd
= iv_cand (data
, ci
);
5650 cp
= get_use_iv_cost (data
, use
, cnd
);
5654 if (!iv_ca_has_deps (ivs
, cp
))
5657 if (!cheaper_cost_pair (cp
, new_cp
))
5665 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5670 cnd
= iv_cand (data
, ci
);
5672 cp
= get_use_iv_cost (data
, use
, cnd
);
5675 if (!iv_ca_has_deps (ivs
, cp
))
5678 if (!cheaper_cost_pair (cp
, new_cp
))
5687 iv_ca_delta_free (delta
);
5688 return infinite_cost
;
5691 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5694 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5695 cost
= iv_ca_cost (ivs
);
5696 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5701 /* Try optimizing the set of candidates IVS by removing candidates different
5702 from to EXCEPT_CAND from it. Return cost of the new set, and store
5703 differences in DELTA. */
5706 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5707 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5710 struct iv_ca_delta
*act_delta
, *best_delta
;
5712 comp_cost best_cost
, acost
;
5713 struct iv_cand
*cand
;
5716 best_cost
= iv_ca_cost (ivs
);
5718 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5720 cand
= iv_cand (data
, i
);
5722 if (cand
== except_cand
)
5725 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5727 if (compare_costs (acost
, best_cost
) < 0)
5730 iv_ca_delta_free (&best_delta
);
5731 best_delta
= act_delta
;
5734 iv_ca_delta_free (&act_delta
);
5743 /* Recurse to possibly remove other unnecessary ivs. */
5744 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5745 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5746 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5747 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5751 /* Tries to extend the sets IVS in the best possible way in order
5752 to express the USE. If ORIGINALP is true, prefer candidates from
5753 the original set of IVs, otherwise favor important candidates not
5754 based on any memory object. */
5757 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5758 struct iv_use
*use
, bool originalp
)
5760 comp_cost best_cost
, act_cost
;
5763 struct iv_cand
*cand
;
5764 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5765 struct cost_pair
*cp
;
5767 iv_ca_add_use (data
, ivs
, use
, false);
5768 best_cost
= iv_ca_cost (ivs
);
5770 cp
= iv_ca_cand_for_use (ivs
, use
);
5775 iv_ca_add_use (data
, ivs
, use
, true);
5776 best_cost
= iv_ca_cost (ivs
);
5777 cp
= iv_ca_cand_for_use (ivs
, use
);
5781 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5782 iv_ca_set_no_cp (data
, ivs
, use
);
5785 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5786 first try important candidates not based on any memory object. Only if
5787 this fails, try the specific ones. Rationale -- in loops with many
5788 variables the best choice often is to use just one generic biv. If we
5789 added here many ivs specific to the uses, the optimization algorithm later
5790 would be likely to get stuck in a local minimum, thus causing us to create
5791 too many ivs. The approach from few ivs to more seems more likely to be
5792 successful -- starting from few ivs, replacing an expensive use by a
5793 specific iv should always be a win. */
5794 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5796 cand
= iv_cand (data
, i
);
5798 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5801 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5804 if (iv_ca_cand_used_p (ivs
, cand
))
5807 cp
= get_use_iv_cost (data
, use
, cand
);
5811 iv_ca_set_cp (data
, ivs
, use
, cp
);
5812 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5814 iv_ca_set_no_cp (data
, ivs
, use
);
5815 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5817 if (compare_costs (act_cost
, best_cost
) < 0)
5819 best_cost
= act_cost
;
5821 iv_ca_delta_free (&best_delta
);
5822 best_delta
= act_delta
;
5825 iv_ca_delta_free (&act_delta
);
5828 if (infinite_cost_p (best_cost
))
5830 for (i
= 0; i
< use
->n_map_members
; i
++)
5832 cp
= use
->cost_map
+ i
;
5837 /* Already tried this. */
5838 if (cand
->important
)
5840 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5842 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5846 if (iv_ca_cand_used_p (ivs
, cand
))
5850 iv_ca_set_cp (data
, ivs
, use
, cp
);
5851 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5852 iv_ca_set_no_cp (data
, ivs
, use
);
5853 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5856 if (compare_costs (act_cost
, best_cost
) < 0)
5858 best_cost
= act_cost
;
5861 iv_ca_delta_free (&best_delta
);
5862 best_delta
= act_delta
;
5865 iv_ca_delta_free (&act_delta
);
5869 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5870 iv_ca_delta_free (&best_delta
);
5872 return !infinite_cost_p (best_cost
);
5875 /* Finds an initial assignment of candidates to uses. */
5877 static struct iv_ca
*
5878 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5880 struct iv_ca
*ivs
= iv_ca_new (data
);
5883 for (i
= 0; i
< n_iv_uses (data
); i
++)
5884 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5893 /* Tries to improve set of induction variables IVS. */
5896 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5899 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5900 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5901 struct iv_cand
*cand
;
5903 /* Try extending the set of induction variables by one. */
5904 for (i
= 0; i
< n_iv_cands (data
); i
++)
5906 cand
= iv_cand (data
, i
);
5908 if (iv_ca_cand_used_p (ivs
, cand
))
5911 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5915 /* If we successfully added the candidate and the set is small enough,
5916 try optimizing it by removing other candidates. */
5917 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5919 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5920 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5921 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5922 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5925 if (compare_costs (acost
, best_cost
) < 0)
5928 iv_ca_delta_free (&best_delta
);
5929 best_delta
= act_delta
;
5932 iv_ca_delta_free (&act_delta
);
5937 /* Try removing the candidates from the set instead. */
5938 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5940 /* Nothing more we can do. */
5945 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5946 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5947 iv_ca_delta_free (&best_delta
);
5951 /* Attempts to find the optimal set of induction variables. We do simple
5952 greedy heuristic -- we try to replace at most one candidate in the selected
5953 solution and remove the unused ivs while this improves the cost. */
5955 static struct iv_ca
*
5956 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5960 /* Get the initial solution. */
5961 set
= get_initial_solution (data
, originalp
);
5964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5965 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5969 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5971 fprintf (dump_file
, "Initial set of candidates:\n");
5972 iv_ca_dump (data
, dump_file
, set
);
5975 while (try_improve_iv_set (data
, set
))
5977 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5979 fprintf (dump_file
, "Improved to:\n");
5980 iv_ca_dump (data
, dump_file
, set
);
5987 static struct iv_ca
*
5988 find_optimal_iv_set (struct ivopts_data
*data
)
5991 struct iv_ca
*set
, *origset
;
5993 comp_cost cost
, origcost
;
5995 /* Determine the cost based on a strategy that starts with original IVs,
5996 and try again using a strategy that prefers candidates not based
5998 origset
= find_optimal_iv_set_1 (data
, true);
5999 set
= find_optimal_iv_set_1 (data
, false);
6001 if (!origset
&& !set
)
6004 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6005 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6009 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6010 origcost
.cost
, origcost
.complexity
);
6011 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6012 cost
.cost
, cost
.complexity
);
6015 /* Choose the one with the best cost. */
6016 if (compare_costs (origcost
, cost
) <= 0)
6023 iv_ca_free (&origset
);
6025 for (i
= 0; i
< n_iv_uses (data
); i
++)
6027 use
= iv_use (data
, i
);
6028 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6034 /* Creates a new induction variable corresponding to CAND. */
6037 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6039 gimple_stmt_iterator incr_pos
;
6049 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6053 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6061 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6065 /* Mark that the iv is preserved. */
6066 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6067 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6069 /* Rewrite the increment so that it uses var_before directly. */
6070 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6074 gimple_add_tmp_var (cand
->var_before
);
6076 base
= unshare_expr (cand
->iv
->base
);
6078 create_iv (base
, unshare_expr (cand
->iv
->step
),
6079 cand
->var_before
, data
->current_loop
,
6080 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6083 /* Creates new induction variables described in SET. */
6086 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6089 struct iv_cand
*cand
;
6092 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6094 cand
= iv_cand (data
, i
);
6095 create_new_iv (data
, cand
);
6098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6100 fprintf (dump_file
, "\nSelected IV set: \n");
6101 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6103 cand
= iv_cand (data
, i
);
6104 dump_cand (dump_file
, cand
);
6106 fprintf (dump_file
, "\n");
6110 /* Rewrites USE (definition of iv used in a nonlinear expression)
6111 using candidate CAND. */
6114 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6115 struct iv_use
*use
, struct iv_cand
*cand
)
6120 gimple_stmt_iterator bsi
;
6122 /* An important special case -- if we are asked to express value of
6123 the original iv by itself, just exit; there is no need to
6124 introduce a new computation (that might also need casting the
6125 variable to unsigned and back). */
6126 if (cand
->pos
== IP_ORIGINAL
6127 && cand
->incremented_at
== use
->stmt
)
6129 enum tree_code stmt_code
;
6131 gcc_assert (is_gimple_assign (use
->stmt
));
6132 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6134 /* Check whether we may leave the computation unchanged.
6135 This is the case only if it does not rely on other
6136 computations in the loop -- otherwise, the computation
6137 we rely upon may be removed in remove_unused_ivs,
6138 thus leading to ICE. */
6139 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6140 if (stmt_code
== PLUS_EXPR
6141 || stmt_code
== MINUS_EXPR
6142 || stmt_code
== POINTER_PLUS_EXPR
)
6144 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6145 op
= gimple_assign_rhs2 (use
->stmt
);
6146 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6147 op
= gimple_assign_rhs1 (use
->stmt
);
6154 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6158 comp
= get_computation (data
->current_loop
, use
, cand
);
6159 gcc_assert (comp
!= NULL_TREE
);
6161 switch (gimple_code (use
->stmt
))
6164 tgt
= PHI_RESULT (use
->stmt
);
6166 /* If we should keep the biv, do not replace it. */
6167 if (name_info (data
, tgt
)->preserve_biv
)
6170 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6174 tgt
= gimple_assign_lhs (use
->stmt
);
6175 bsi
= gsi_for_stmt (use
->stmt
);
6182 if (!valid_gimple_rhs_p (comp
)
6183 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6184 /* We can't allow re-allocating the stmt as it might be pointed
6186 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6187 >= gimple_num_ops (gsi_stmt (bsi
)))))
6189 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6190 true, GSI_SAME_STMT
);
6191 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6193 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6194 /* As this isn't a plain copy we have to reset alignment
6196 if (SSA_NAME_PTR_INFO (comp
))
6197 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6201 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6203 ass
= gimple_build_assign (tgt
, comp
);
6204 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6206 bsi
= gsi_for_stmt (use
->stmt
);
6207 remove_phi_node (&bsi
, false);
6211 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6212 use
->stmt
= gsi_stmt (bsi
);
6216 /* Performs a peephole optimization to reorder the iv update statement with
6217 a mem ref to enable instruction combining in later phases. The mem ref uses
6218 the iv value before the update, so the reordering transformation requires
6219 adjustment of the offset. CAND is the selected IV_CAND.
6223 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6231 directly propagating t over to (1) will introduce overlapping live range
6232 thus increase register pressure. This peephole transform it into:
6236 t = MEM_REF (base, iv2, 8, 8);
6243 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6246 gimple iv_update
, stmt
;
6248 gimple_stmt_iterator gsi
, gsi_iv
;
6250 if (cand
->pos
!= IP_NORMAL
)
6253 var_after
= cand
->var_after
;
6254 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6256 bb
= gimple_bb (iv_update
);
6257 gsi
= gsi_last_nondebug_bb (bb
);
6258 stmt
= gsi_stmt (gsi
);
6260 /* Only handle conditional statement for now. */
6261 if (gimple_code (stmt
) != GIMPLE_COND
)
6264 gsi_prev_nondebug (&gsi
);
6265 stmt
= gsi_stmt (gsi
);
6266 if (stmt
!= iv_update
)
6269 gsi_prev_nondebug (&gsi
);
6270 if (gsi_end_p (gsi
))
6273 stmt
= gsi_stmt (gsi
);
6274 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6277 if (stmt
!= use
->stmt
)
6280 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6285 fprintf (dump_file
, "Reordering \n");
6286 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6287 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6288 fprintf (dump_file
, "\n");
6291 gsi
= gsi_for_stmt (use
->stmt
);
6292 gsi_iv
= gsi_for_stmt (iv_update
);
6293 gsi_move_before (&gsi_iv
, &gsi
);
6295 cand
->pos
= IP_BEFORE_USE
;
6296 cand
->incremented_at
= use
->stmt
;
6299 /* Rewrites USE (address that is an iv) using candidate CAND. */
6302 rewrite_use_address (struct ivopts_data
*data
,
6303 struct iv_use
*use
, struct iv_cand
*cand
)
6306 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6307 tree base_hint
= NULL_TREE
;
6311 adjust_iv_update_pos (cand
, use
);
6312 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6314 unshare_aff_combination (&aff
);
6316 /* To avoid undefined overflow problems, all IV candidates use unsigned
6317 integer types. The drawback is that this makes it impossible for
6318 create_mem_ref to distinguish an IV that is based on a memory object
6319 from one that represents simply an offset.
6321 To work around this problem, we pass a hint to create_mem_ref that
6322 indicates which variable (if any) in aff is an IV based on a memory
6323 object. Note that we only consider the candidate. If this is not
6324 based on an object, the base of the reference is in some subexpression
6325 of the use -- but these will use pointer types, so they are recognized
6326 by the create_mem_ref heuristics anyway. */
6327 if (cand
->iv
->base_object
)
6328 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6330 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6331 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6332 reference_alias_ptr_type (*use
->op_p
),
6333 iv
, base_hint
, data
->speed
);
6334 copy_ref_info (ref
, *use
->op_p
);
6338 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6342 rewrite_use_compare (struct ivopts_data
*data
,
6343 struct iv_use
*use
, struct iv_cand
*cand
)
6345 tree comp
, *var_p
, op
, bound
;
6346 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6347 enum tree_code compare
;
6348 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6354 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6355 tree var_type
= TREE_TYPE (var
);
6358 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6360 fprintf (dump_file
, "Replacing exit test: ");
6361 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6364 bound
= unshare_expr (fold_convert (var_type
, bound
));
6365 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6367 gsi_insert_seq_on_edge_immediate (
6368 loop_preheader_edge (data
->current_loop
),
6371 gimple_cond_set_lhs (use
->stmt
, var
);
6372 gimple_cond_set_code (use
->stmt
, compare
);
6373 gimple_cond_set_rhs (use
->stmt
, op
);
6377 /* The induction variable elimination failed; just express the original
6379 comp
= get_computation (data
->current_loop
, use
, cand
);
6380 gcc_assert (comp
!= NULL_TREE
);
6382 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6385 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6386 true, GSI_SAME_STMT
);
6389 /* Rewrites USE using candidate CAND. */
6392 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6396 case USE_NONLINEAR_EXPR
:
6397 rewrite_use_nonlinear_expr (data
, use
, cand
);
6401 rewrite_use_address (data
, use
, cand
);
6405 rewrite_use_compare (data
, use
, cand
);
6412 update_stmt (use
->stmt
);
6415 /* Rewrite the uses using the selected induction variables. */
6418 rewrite_uses (struct ivopts_data
*data
)
6421 struct iv_cand
*cand
;
6424 for (i
= 0; i
< n_iv_uses (data
); i
++)
6426 use
= iv_use (data
, i
);
6427 cand
= use
->selected
;
6430 rewrite_use (data
, use
, cand
);
6434 /* Removes the ivs that are not used after rewriting. */
6437 remove_unused_ivs (struct ivopts_data
*data
)
6441 bitmap toremove
= BITMAP_ALLOC (NULL
);
6443 /* Figure out an order in which to release SSA DEFs so that we don't
6444 release something that we'd have to propagate into a debug stmt
6446 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6448 struct version_info
*info
;
6450 info
= ver_info (data
, j
);
6452 && !integer_zerop (info
->iv
->step
)
6454 && !info
->iv
->have_use_for
6455 && !info
->preserve_biv
)
6457 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6459 tree def
= info
->iv
->ssa_name
;
6461 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6463 imm_use_iterator imm_iter
;
6464 use_operand_p use_p
;
6468 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6470 if (!gimple_debug_bind_p (stmt
))
6473 /* We just want to determine whether to do nothing
6474 (count == 0), to substitute the computed
6475 expression into a single use of the SSA DEF by
6476 itself (count == 1), or to use a debug temp
6477 because the SSA DEF is used multiple times or as
6478 part of a larger expression (count > 1). */
6480 if (gimple_debug_bind_get_value (stmt
) != def
)
6484 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6490 struct iv_use dummy_use
;
6491 struct iv_cand
*best_cand
= NULL
, *cand
;
6492 unsigned i
, best_pref
= 0, cand_pref
;
6494 memset (&dummy_use
, 0, sizeof (dummy_use
));
6495 dummy_use
.iv
= info
->iv
;
6496 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6498 cand
= iv_use (data
, i
)->selected
;
6499 if (cand
== best_cand
)
6501 cand_pref
= operand_equal_p (cand
->iv
->step
,
6505 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6506 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6509 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6511 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6514 best_pref
= cand_pref
;
6521 tree comp
= get_computation_at (data
->current_loop
,
6522 &dummy_use
, best_cand
,
6523 SSA_NAME_DEF_STMT (def
));
6529 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6530 DECL_ARTIFICIAL (vexpr
) = 1;
6531 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6532 if (SSA_NAME_VAR (def
))
6533 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6535 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6536 gimple def_temp
= gimple_build_debug_bind (vexpr
, comp
, NULL
);
6537 gimple_stmt_iterator gsi
;
6539 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6540 gsi
= gsi_after_labels (gimple_bb
6541 (SSA_NAME_DEF_STMT (def
)));
6543 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6545 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6549 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6551 if (!gimple_debug_bind_p (stmt
))
6554 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6555 SET_USE (use_p
, comp
);
6563 release_defs_bitset (toremove
);
6565 BITMAP_FREE (toremove
);
6568 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6569 for pointer_map_traverse. */
6572 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6573 void *data ATTRIBUTE_UNUSED
)
6575 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6581 /* Frees data allocated by the optimization of a single loop. */
6584 free_loop_data (struct ivopts_data
*data
)
6592 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6593 pointer_map_destroy (data
->niters
);
6594 data
->niters
= NULL
;
6597 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6599 struct version_info
*info
;
6601 info
= ver_info (data
, i
);
6604 info
->has_nonlin_use
= false;
6605 info
->preserve_biv
= false;
6608 bitmap_clear (data
->relevant
);
6609 bitmap_clear (data
->important_candidates
);
6611 for (i
= 0; i
< n_iv_uses (data
); i
++)
6613 struct iv_use
*use
= iv_use (data
, i
);
6616 BITMAP_FREE (use
->related_cands
);
6617 for (j
= 0; j
< use
->n_map_members
; j
++)
6618 if (use
->cost_map
[j
].depends_on
)
6619 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6620 free (use
->cost_map
);
6623 data
->iv_uses
.truncate (0);
6625 for (i
= 0; i
< n_iv_cands (data
); i
++)
6627 struct iv_cand
*cand
= iv_cand (data
, i
);
6630 if (cand
->depends_on
)
6631 BITMAP_FREE (cand
->depends_on
);
6634 data
->iv_candidates
.truncate (0);
6636 if (data
->version_info_size
< num_ssa_names
)
6638 data
->version_info_size
= 2 * num_ssa_names
;
6639 free (data
->version_info
);
6640 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6643 data
->max_inv_id
= 0;
6645 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6646 SET_DECL_RTL (obj
, NULL_RTX
);
6648 decl_rtl_to_reset
.truncate (0);
6650 data
->inv_expr_tab
.empty ();
6651 data
->inv_expr_id
= 0;
6654 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6658 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6660 free_loop_data (data
);
6661 free (data
->version_info
);
6662 BITMAP_FREE (data
->relevant
);
6663 BITMAP_FREE (data
->important_candidates
);
6665 decl_rtl_to_reset
.release ();
6666 data
->iv_uses
.release ();
6667 data
->iv_candidates
.release ();
6668 data
->inv_expr_tab
.dispose ();
6671 /* Returns true if the loop body BODY includes any function calls. */
6674 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6676 gimple_stmt_iterator gsi
;
6679 for (i
= 0; i
< num_nodes
; i
++)
6680 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6682 gimple stmt
= gsi_stmt (gsi
);
6683 if (is_gimple_call (stmt
)
6684 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6690 /* Optimizes the LOOP. Returns true if anything changed. */
6693 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6695 bool changed
= false;
6696 struct iv_ca
*iv_ca
;
6697 edge exit
= single_dom_exit (loop
);
6700 gcc_assert (!data
->niters
);
6701 data
->current_loop
= loop
;
6702 data
->speed
= optimize_loop_for_speed_p (loop
);
6704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6706 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6710 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6711 exit
->src
->index
, exit
->dest
->index
);
6712 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6713 fprintf (dump_file
, "\n");
6716 fprintf (dump_file
, "\n");
6719 body
= get_loop_body (loop
);
6720 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6721 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6724 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6726 /* For each ssa name determines whether it behaves as an induction variable
6728 if (!find_induction_variables (data
))
6731 /* Finds interesting uses (item 1). */
6732 find_interesting_uses (data
);
6733 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6736 /* Finds candidates for the induction variables (item 2). */
6737 find_iv_candidates (data
);
6739 /* Calculates the costs (item 3, part 1). */
6740 determine_iv_costs (data
);
6741 determine_use_iv_costs (data
);
6742 determine_set_costs (data
);
6744 /* Find the optimal set of induction variables (item 3, part 2). */
6745 iv_ca
= find_optimal_iv_set (data
);
6750 /* Create the new induction variables (item 4, part 1). */
6751 create_new_ivs (data
, iv_ca
);
6752 iv_ca_free (&iv_ca
);
6754 /* Rewrite the uses (item 4, part 2). */
6755 rewrite_uses (data
);
6757 /* Remove the ivs that are unused after rewriting. */
6758 remove_unused_ivs (data
);
6760 /* We have changed the structure of induction variables; it might happen
6761 that definitions in the scev database refer to some of them that were
6766 free_loop_data (data
);
6771 /* Main entry point. Optimizes induction variables in loops. */
6774 tree_ssa_iv_optimize (void)
6777 struct ivopts_data data
;
6780 tree_ssa_iv_optimize_init (&data
);
6782 /* Optimize the loops starting with the innermost ones. */
6783 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6785 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6786 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6788 tree_ssa_iv_optimize_loop (&data
, loop
);
6791 tree_ssa_iv_optimize_finalize (&data
);