1 /* Straight-line strength reduction.
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
46 #include "coretypes.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
55 #include "pointer-set.h"
58 /* Information about a strength reduction candidate. Each statement
59 in the candidate table represents an expression of one of the
60 following forms (the special case of CAND_REF will be described
63 (CAND_MULT) S1: X = (B + i) * S
64 (CAND_ADD) S1: X = B + (i * S)
66 Here X and B are SSA names, i is an integer constant, and S is
67 either an SSA name or a constant. We call B the "base," i the
68 "index", and S the "stride."
70 Any statement S0 that dominates S1 and is of the form:
72 (CAND_MULT) S0: Y = (B + i') * S
73 (CAND_ADD) S0: Y = B + (i' * S)
75 is called a "basis" for S1. In both cases, S1 may be replaced by
77 S1': X = Y + (i - i') * S,
79 where (i - i') * S is folded to the extent possible.
81 All gimple statements are visited in dominator order, and each
82 statement that may contribute to one of the forms of S1 above is
83 given at least one entry in the candidate table. Such statements
84 include addition, pointer addition, subtraction, multiplication,
85 negation, copies, and nontrivial type casts. If a statement may
86 represent more than one expression of the forms of S1 above,
87 multiple "interpretations" are stored in the table and chained
90 * An add of two SSA names may treat either operand as the base.
91 * A multiply of two SSA names, likewise.
92 * A copy or cast may be thought of as either a CAND_MULT with
93 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
95 Candidate records are allocated from an obstack. They are addressed
96 both from a hash table keyed on S1, and from a vector of candidate
97 pointers arranged in predominator order.
101 Currently we don't recognize:
106 as a strength reduction opportunity, even though this S1 would
107 also be replaceable by the S1' above. This can be added if it
108 comes up in practice.
110 Strength reduction in addressing
111 --------------------------------
112 There is another kind of candidate known as CAND_REF. A CAND_REF
113 describes a statement containing a memory reference having
114 complex addressing that might benefit from strength reduction.
115 Specifically, we are interested in references for which
116 get_inner_reference returns a base address, offset, and bitpos as
119 base: MEM_REF (T1, C1)
120 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
121 bitpos: C4 * BITS_PER_UNIT
123 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
124 arbitrary integer constants. Note that C2 may be zero, in which
125 case the offset will be MULT_EXPR (T2, C3).
127 When this pattern is recognized, the original memory reference
128 can be replaced with:
130 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133 which distributes the multiply to allow constant folding. When
134 two or more addressing expressions can be represented by MEM_REFs
135 of this form, differing only in the constants C1, C2, and C4,
136 making this substitution produces more efficient addressing during
137 the RTL phases. When there are not at least two expressions with
138 the same values of T1, T2, and C3, there is nothing to be gained
141 Strength reduction of CAND_REFs uses the same infrastructure as
142 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
143 field, MULT_EXPR (T2, C3) in the stride (S) field, and
144 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
145 is thus another CAND_REF with the same B and S values. When at
146 least two CAND_REFs are chained together using the basis relation,
147 each of them is replaced as above, resulting in improved code
148 generation for addressing. */
151 /* Index into the candidate vector, offset by 1. VECs are zero-based,
152 while cand_idx's are one-based, with zero indicating null. */
153 typedef unsigned cand_idx
;
155 /* The kind of candidate. */
165 /* The candidate statement S1. */
168 /* The base expression B: often an SSA name, but not always. */
174 /* The index constant i. */
177 /* The type of the candidate. This is normally the type of base_expr,
178 but casts may have occurred when combining feeding instructions.
179 A candidate can only be a basis for candidates of the same final type.
180 (For CAND_REFs, this is the type to be used for operand 1 of the
181 replacement MEM_REF.) */
184 /* The kind of candidate (CAND_MULT, etc.). */
187 /* Index of this candidate in the candidate vector. */
190 /* Index of the next candidate record for the same statement.
191 A statement may be useful in more than one way (e.g., due to
192 commutativity). So we can have multiple "interpretations"
194 cand_idx next_interp
;
196 /* Index of the basis statement S0, if any, in the candidate vector. */
199 /* First candidate for which this candidate is a basis, if one exists. */
202 /* Next candidate having the same basis as this one. */
205 /* If this is a conditional candidate, the defining PHI statement
206 for the base SSA name B. For future use; always NULL for now. */
209 /* Savings that can be expected from eliminating dead code if this
210 candidate is replaced. */
214 typedef struct slsr_cand_d slsr_cand
, *slsr_cand_t
;
215 typedef const struct slsr_cand_d
*const_slsr_cand_t
;
217 /* Pointers to candidates are chained together as part of a mapping
218 from base expressions to the candidates that use them. */
222 /* Base expression for the chain of candidates: often, but not
223 always, an SSA name. */
226 /* Pointer to a candidate. */
230 struct cand_chain_d
*next
;
234 typedef struct cand_chain_d cand_chain
, *cand_chain_t
;
235 typedef const struct cand_chain_d
*const_cand_chain_t
;
237 /* Information about a unique "increment" associated with candidates
238 having an SSA name for a stride. An increment is the difference
239 between the index of the candidate and the index of its basis,
240 i.e., (i - i') as discussed in the module commentary.
242 When we are not going to generate address arithmetic we treat
243 increments that differ only in sign as the same, allowing sharing
244 of the cost of initializers. The absolute value of the increment
245 is stored in the incr_info. */
249 /* The increment that relates a candidate to its basis. */
252 /* How many times the increment occurs in the candidate tree. */
255 /* Cost of replacing candidates using this increment. Negative and
256 zero costs indicate replacement should be performed. */
259 /* If this increment is profitable but is not -1, 0, or 1, it requires
260 an initializer T_0 = stride * incr to be found or introduced in the
261 nearest common dominator of all candidates. This field holds T_0
262 for subsequent use. */
265 /* If the initializer was found to already exist, this is the block
266 where it was found. */
270 typedef struct incr_info_d incr_info
, *incr_info_t
;
272 /* Candidates are maintained in a vector. If candidate X dominates
273 candidate Y, then X appears before Y in the vector; but the
274 converse does not necessarily hold. */
275 DEF_VEC_P (slsr_cand_t
);
276 DEF_VEC_ALLOC_P (slsr_cand_t
, heap
);
277 static VEC (slsr_cand_t
, heap
) *cand_vec
;
285 /* Pointer map embodying a mapping from statements to candidates. */
286 static struct pointer_map_t
*stmt_cand_map
;
288 /* Obstack for candidates. */
289 static struct obstack cand_obstack
;
291 /* Hash table embodying a mapping from base exprs to chains of candidates. */
292 static htab_t base_cand_map
;
294 /* Obstack for candidate chains. */
295 static struct obstack chain_obstack
;
297 /* An array INCR_VEC of incr_infos is used during analysis of related
298 candidates having an SSA name for a stride. INCR_VEC_LEN describes
299 its current length. */
300 static incr_info_t incr_vec
;
301 static unsigned incr_vec_len
;
303 /* For a chain of candidates with unknown stride, indicates whether or not
304 we must generate pointer arithmetic when replacing statements. */
305 static bool address_arithmetic_p
;
307 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
310 lookup_cand (cand_idx idx
)
312 return VEC_index (slsr_cand_t
, cand_vec
, idx
- 1);
315 /* Callback to produce a hash value for a candidate chain header. */
318 base_cand_hash (const void *p
)
320 tree base_expr
= ((const_cand_chain_t
) p
)->base_expr
;
321 return iterative_hash_expr (base_expr
, 0);
324 /* Callback when an element is removed from the hash table.
325 We never remove entries until the entire table is released. */
328 base_cand_free (void *p ATTRIBUTE_UNUSED
)
332 /* Callback to return true if two candidate chain headers are equal. */
335 base_cand_eq (const void *p1
, const void *p2
)
337 const_cand_chain_t
const chain1
= (const_cand_chain_t
) p1
;
338 const_cand_chain_t
const chain2
= (const_cand_chain_t
) p2
;
339 return operand_equal_p (chain1
->base_expr
, chain2
->base_expr
, 0);
342 /* Use the base expr from candidate C to look for possible candidates
343 that can serve as a basis for C. Each potential basis must also
344 appear in a block that dominates the candidate statement and have
345 the same stride and type. If more than one possible basis exists,
346 the one with highest index in the vector is chosen; this will be
347 the most immediately dominating basis. */
350 find_basis_for_candidate (slsr_cand_t c
)
352 cand_chain mapping_key
;
354 slsr_cand_t basis
= NULL
;
356 mapping_key
.base_expr
= c
->base_expr
;
357 chain
= (cand_chain_t
) htab_find (base_cand_map
, &mapping_key
);
359 for (; chain
; chain
= chain
->next
)
361 slsr_cand_t one_basis
= chain
->cand
;
363 if (one_basis
->kind
!= c
->kind
364 || !operand_equal_p (one_basis
->stride
, c
->stride
, 0)
365 || !types_compatible_p (one_basis
->cand_type
, c
->cand_type
)
366 || !dominated_by_p (CDI_DOMINATORS
,
367 gimple_bb (c
->cand_stmt
),
368 gimple_bb (one_basis
->cand_stmt
)))
371 if (!basis
|| basis
->cand_num
< one_basis
->cand_num
)
377 c
->sibling
= basis
->dependent
;
378 basis
->dependent
= c
->cand_num
;
379 return basis
->cand_num
;
385 /* Record a mapping from the base expression of C to C itself, indicating that
386 C may potentially serve as a basis using that base expression. */
389 record_potential_basis (slsr_cand_t c
)
394 node
= (cand_chain_t
) obstack_alloc (&chain_obstack
, sizeof (cand_chain
));
395 node
->base_expr
= c
->base_expr
;
398 slot
= htab_find_slot (base_cand_map
, node
, INSERT
);
402 cand_chain_t head
= (cand_chain_t
) (*slot
);
403 node
->next
= head
->next
;
410 /* Allocate storage for a new candidate and initialize its fields.
411 Attempt to find a basis for the candidate. */
414 alloc_cand_and_find_basis (enum cand_kind kind
, gimple gs
, tree base
,
415 double_int index
, tree stride
, tree ctype
,
418 slsr_cand_t c
= (slsr_cand_t
) obstack_alloc (&cand_obstack
,
424 c
->cand_type
= ctype
;
426 c
->cand_num
= VEC_length (slsr_cand_t
, cand_vec
) + 1;
431 c
->dead_savings
= savings
;
433 VEC_safe_push (slsr_cand_t
, heap
, cand_vec
, c
);
434 c
->basis
= find_basis_for_candidate (c
);
435 record_potential_basis (c
);
440 /* Determine the target cost of statement GS when compiling according
444 stmt_cost (gimple gs
, bool speed
)
446 tree lhs
, rhs1
, rhs2
;
447 enum machine_mode lhs_mode
;
449 gcc_assert (is_gimple_assign (gs
));
450 lhs
= gimple_assign_lhs (gs
);
451 rhs1
= gimple_assign_rhs1 (gs
);
452 lhs_mode
= TYPE_MODE (TREE_TYPE (lhs
));
454 switch (gimple_assign_rhs_code (gs
))
457 rhs2
= gimple_assign_rhs2 (gs
);
459 if (host_integerp (rhs2
, 0))
460 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2
), lhs_mode
, speed
);
462 gcc_assert (TREE_CODE (rhs1
) != INTEGER_CST
);
463 return mul_cost (speed
, lhs_mode
);
466 case POINTER_PLUS_EXPR
:
468 return add_cost (speed
, lhs_mode
);
471 return neg_cost (speed
, lhs_mode
);
474 return convert_cost (lhs_mode
, TYPE_MODE (TREE_TYPE (rhs1
)), speed
);
476 /* Note that we don't assign costs to copies that in most cases
486 /* Look up the defining statement for BASE_IN and return a pointer
487 to its candidate in the candidate table, if any; otherwise NULL.
488 Only CAND_ADD and CAND_MULT candidates are returned. */
491 base_cand_from_table (tree base_in
)
495 gimple def
= SSA_NAME_DEF_STMT (base_in
);
497 return (slsr_cand_t
) NULL
;
499 result
= (slsr_cand_t
*) pointer_map_contains (stmt_cand_map
, def
);
501 if (result
&& (*result
)->kind
!= CAND_REF
)
504 return (slsr_cand_t
) NULL
;
507 /* Add an entry to the statement-to-candidate mapping. */
510 add_cand_for_stmt (gimple gs
, slsr_cand_t c
)
512 void **slot
= pointer_map_insert (stmt_cand_map
, gs
);
517 /* Look for the following pattern:
519 *PBASE: MEM_REF (T1, C1)
521 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
523 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
525 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
527 *PINDEX: C4 * BITS_PER_UNIT
529 If not present, leave the input values unchanged and return FALSE.
530 Otherwise, modify the input values as follows and return TRUE:
533 *POFFSET: MULT_EXPR (T2, C3)
534 *PINDEX: C1 + (C2 * C3) + C4 */
537 restructure_reference (tree
*pbase
, tree
*poffset
, double_int
*pindex
,
540 tree base
= *pbase
, offset
= *poffset
;
541 double_int index
= *pindex
;
542 double_int bpu
= uhwi_to_double_int (BITS_PER_UNIT
);
543 tree mult_op0
, mult_op1
, t1
, t2
, type
;
544 double_int c1
, c2
, c3
, c4
;
548 || TREE_CODE (base
) != MEM_REF
549 || TREE_CODE (offset
) != MULT_EXPR
550 || TREE_CODE (TREE_OPERAND (offset
, 1)) != INTEGER_CST
551 || !double_int_zero_p (double_int_umod (index
, bpu
, FLOOR_MOD_EXPR
)))
554 t1
= TREE_OPERAND (base
, 0);
555 c1
= mem_ref_offset (base
);
556 type
= TREE_TYPE (TREE_OPERAND (base
, 1));
558 mult_op0
= TREE_OPERAND (offset
, 0);
559 mult_op1
= TREE_OPERAND (offset
, 1);
561 c3
= tree_to_double_int (mult_op1
);
563 if (TREE_CODE (mult_op0
) == PLUS_EXPR
)
565 if (TREE_CODE (TREE_OPERAND (mult_op0
, 1)) == INTEGER_CST
)
567 t2
= TREE_OPERAND (mult_op0
, 0);
568 c2
= tree_to_double_int (TREE_OPERAND (mult_op0
, 1));
573 else if (TREE_CODE (mult_op0
) == MINUS_EXPR
)
575 if (TREE_CODE (TREE_OPERAND (mult_op0
, 1)) == INTEGER_CST
)
577 t2
= TREE_OPERAND (mult_op0
, 0);
578 c2
= double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0
, 1)));
586 c2
= double_int_zero
;
589 c4
= double_int_udiv (index
, bpu
, FLOOR_DIV_EXPR
);
592 *poffset
= fold_build2 (MULT_EXPR
, sizetype
, t2
,
593 double_int_to_tree (sizetype
, c3
));
594 *pindex
= double_int_add (double_int_add (c1
, double_int_mul (c2
, c3
)), c4
);
600 /* Given GS which contains a data reference, create a CAND_REF entry in
601 the candidate table and attempt to find a basis. */
604 slsr_process_ref (gimple gs
)
606 tree ref_expr
, base
, offset
, type
;
607 HOST_WIDE_INT bitsize
, bitpos
;
608 enum machine_mode mode
;
609 int unsignedp
, volatilep
;
613 if (gimple_vdef (gs
))
614 ref_expr
= gimple_assign_lhs (gs
);
616 ref_expr
= gimple_assign_rhs1 (gs
);
618 if (!handled_component_p (ref_expr
)
619 || TREE_CODE (ref_expr
) == BIT_FIELD_REF
620 || (TREE_CODE (ref_expr
) == COMPONENT_REF
621 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr
, 1))))
624 base
= get_inner_reference (ref_expr
, &bitsize
, &bitpos
, &offset
, &mode
,
625 &unsignedp
, &volatilep
, false);
626 index
= uhwi_to_double_int (bitpos
);
628 if (!restructure_reference (&base
, &offset
, &index
, &type
))
631 c
= alloc_cand_and_find_basis (CAND_REF
, gs
, base
, index
, offset
,
634 /* Add the candidate to the statement-candidate mapping. */
635 add_cand_for_stmt (gs
, c
);
638 /* Create a candidate entry for a statement GS, where GS multiplies
639 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
640 about the two SSA names into the new candidate. Return the new
644 create_mul_ssa_cand (gimple gs
, tree base_in
, tree stride_in
, bool speed
)
646 tree base
= NULL_TREE
, stride
= NULL_TREE
, ctype
= NULL_TREE
;
648 unsigned savings
= 0;
650 slsr_cand_t base_cand
= base_cand_from_table (base_in
);
652 /* Look at all interpretations of the base candidate, if necessary,
653 to find information to propagate into this candidate. */
654 while (base_cand
&& !base
)
657 if (base_cand
->kind
== CAND_MULT
658 && operand_equal_p (base_cand
->stride
, integer_one_node
, 0))
664 base
= base_cand
->base_expr
;
665 index
= base_cand
->index
;
667 ctype
= base_cand
->cand_type
;
668 if (has_single_use (base_in
))
669 savings
= (base_cand
->dead_savings
670 + stmt_cost (base_cand
->cand_stmt
, speed
));
672 else if (base_cand
->kind
== CAND_ADD
673 && TREE_CODE (base_cand
->stride
) == INTEGER_CST
)
675 /* Y = B + (i' * S), S constant
677 ============================
678 X = B + ((i' * S) * Z) */
679 base
= base_cand
->base_expr
;
680 index
= double_int_mul (base_cand
->index
,
681 tree_to_double_int (base_cand
->stride
));
683 ctype
= base_cand
->cand_type
;
684 if (has_single_use (base_in
))
685 savings
= (base_cand
->dead_savings
686 + stmt_cost (base_cand
->cand_stmt
, speed
));
689 if (base_cand
->next_interp
)
690 base_cand
= lookup_cand (base_cand
->next_interp
);
697 /* No interpretations had anything useful to propagate, so
698 produce X = (Y + 0) * Z. */
700 index
= double_int_zero
;
702 ctype
= TREE_TYPE (base_in
);
705 c
= alloc_cand_and_find_basis (CAND_MULT
, gs
, base
, index
, stride
,
710 /* Create a candidate entry for a statement GS, where GS multiplies
711 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
712 information about BASE_IN into the new candidate. Return the new
716 create_mul_imm_cand (gimple gs
, tree base_in
, tree stride_in
, bool speed
)
718 tree base
= NULL_TREE
, stride
= NULL_TREE
, ctype
= NULL_TREE
;
719 double_int index
, temp
;
720 unsigned savings
= 0;
722 slsr_cand_t base_cand
= base_cand_from_table (base_in
);
724 /* Look at all interpretations of the base candidate, if necessary,
725 to find information to propagate into this candidate. */
726 while (base_cand
&& !base
)
728 if (base_cand
->kind
== CAND_MULT
729 && TREE_CODE (base_cand
->stride
) == INTEGER_CST
)
731 /* Y = (B + i') * S, S constant
733 ============================
734 X = (B + i') * (S * c) */
735 base
= base_cand
->base_expr
;
736 index
= base_cand
->index
;
737 temp
= double_int_mul (tree_to_double_int (base_cand
->stride
),
738 tree_to_double_int (stride_in
));
739 stride
= double_int_to_tree (TREE_TYPE (stride_in
), temp
);
740 ctype
= base_cand
->cand_type
;
741 if (has_single_use (base_in
))
742 savings
= (base_cand
->dead_savings
743 + stmt_cost (base_cand
->cand_stmt
, speed
));
745 else if (base_cand
->kind
== CAND_ADD
746 && operand_equal_p (base_cand
->stride
, integer_one_node
, 0))
750 ===========================
752 base
= base_cand
->base_expr
;
753 index
= base_cand
->index
;
755 ctype
= base_cand
->cand_type
;
756 if (has_single_use (base_in
))
757 savings
= (base_cand
->dead_savings
758 + stmt_cost (base_cand
->cand_stmt
, speed
));
760 else if (base_cand
->kind
== CAND_ADD
761 && double_int_one_p (base_cand
->index
)
762 && TREE_CODE (base_cand
->stride
) == INTEGER_CST
)
764 /* Y = B + (1 * S), S constant
766 ===========================
768 base
= base_cand
->base_expr
;
769 index
= tree_to_double_int (base_cand
->stride
);
771 ctype
= base_cand
->cand_type
;
772 if (has_single_use (base_in
))
773 savings
= (base_cand
->dead_savings
774 + stmt_cost (base_cand
->cand_stmt
, speed
));
777 if (base_cand
->next_interp
)
778 base_cand
= lookup_cand (base_cand
->next_interp
);
785 /* No interpretations had anything useful to propagate, so
786 produce X = (Y + 0) * c. */
788 index
= double_int_zero
;
790 ctype
= TREE_TYPE (base_in
);
793 c
= alloc_cand_and_find_basis (CAND_MULT
, gs
, base
, index
, stride
,
798 /* Given GS which is a multiply of scalar integers, make an appropriate
799 entry in the candidate table. If this is a multiply of two SSA names,
800 create two CAND_MULT interpretations and attempt to find a basis for
801 each of them. Otherwise, create a single CAND_MULT and attempt to
805 slsr_process_mul (gimple gs
, tree rhs1
, tree rhs2
, bool speed
)
809 /* If this is a multiply of an SSA name with itself, it is highly
810 unlikely that we will get a strength reduction opportunity, so
811 don't record it as a candidate. This simplifies the logic for
812 finding a basis, so if this is removed that must be considered. */
816 if (TREE_CODE (rhs2
) == SSA_NAME
)
818 /* Record an interpretation of this statement in the candidate table
819 assuming RHS1 is the base expression and RHS2 is the stride. */
820 c
= create_mul_ssa_cand (gs
, rhs1
, rhs2
, speed
);
822 /* Add the first interpretation to the statement-candidate mapping. */
823 add_cand_for_stmt (gs
, c
);
825 /* Record another interpretation of this statement assuming RHS1
826 is the stride and RHS2 is the base expression. */
827 c2
= create_mul_ssa_cand (gs
, rhs2
, rhs1
, speed
);
828 c
->next_interp
= c2
->cand_num
;
832 /* Record an interpretation for the multiply-immediate. */
833 c
= create_mul_imm_cand (gs
, rhs1
, rhs2
, speed
);
835 /* Add the interpretation to the statement-candidate mapping. */
836 add_cand_for_stmt (gs
, c
);
840 /* Create a candidate entry for a statement GS, where GS adds two
841 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
842 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
843 information about the two SSA names into the new candidate.
844 Return the new candidate. */
847 create_add_ssa_cand (gimple gs
, tree base_in
, tree addend_in
,
848 bool subtract_p
, bool speed
)
850 tree base
= NULL_TREE
, stride
= NULL_TREE
, ctype
= NULL
;
852 unsigned savings
= 0;
854 slsr_cand_t base_cand
= base_cand_from_table (base_in
);
855 slsr_cand_t addend_cand
= base_cand_from_table (addend_in
);
857 /* The most useful transformation is a multiply-immediate feeding
858 an add or subtract. Look for that first. */
859 while (addend_cand
&& !base
)
861 if (addend_cand
->kind
== CAND_MULT
862 && double_int_zero_p (addend_cand
->index
)
863 && TREE_CODE (addend_cand
->stride
) == INTEGER_CST
)
865 /* Z = (B + 0) * S, S constant
867 ===========================
868 X = Y + ((+/-1 * S) * B) */
870 index
= tree_to_double_int (addend_cand
->stride
);
872 index
= double_int_neg (index
);
873 stride
= addend_cand
->base_expr
;
874 ctype
= TREE_TYPE (base_in
);
875 if (has_single_use (addend_in
))
876 savings
= (addend_cand
->dead_savings
877 + stmt_cost (addend_cand
->cand_stmt
, speed
));
880 if (addend_cand
->next_interp
)
881 addend_cand
= lookup_cand (addend_cand
->next_interp
);
886 while (base_cand
&& !base
)
888 if (base_cand
->kind
== CAND_ADD
889 && (double_int_zero_p (base_cand
->index
)
890 || operand_equal_p (base_cand
->stride
,
891 integer_zero_node
, 0)))
893 /* Y = B + (i' * S), i' * S = 0
895 ============================
896 X = B + (+/-1 * Z) */
897 base
= base_cand
->base_expr
;
898 index
= subtract_p
? double_int_minus_one
: double_int_one
;
900 ctype
= base_cand
->cand_type
;
901 if (has_single_use (base_in
))
902 savings
= (base_cand
->dead_savings
903 + stmt_cost (base_cand
->cand_stmt
, speed
));
907 slsr_cand_t subtrahend_cand
= base_cand_from_table (addend_in
);
909 while (subtrahend_cand
&& !base
)
911 if (subtrahend_cand
->kind
== CAND_MULT
912 && double_int_zero_p (subtrahend_cand
->index
)
913 && TREE_CODE (subtrahend_cand
->stride
) == INTEGER_CST
)
915 /* Z = (B + 0) * S, S constant
917 ===========================
918 Value: X = Y + ((-1 * S) * B) */
920 index
= tree_to_double_int (subtrahend_cand
->stride
);
921 index
= double_int_neg (index
);
922 stride
= subtrahend_cand
->base_expr
;
923 ctype
= TREE_TYPE (base_in
);
924 if (has_single_use (addend_in
))
925 savings
= (subtrahend_cand
->dead_savings
926 + stmt_cost (subtrahend_cand
->cand_stmt
, speed
));
929 if (subtrahend_cand
->next_interp
)
930 subtrahend_cand
= lookup_cand (subtrahend_cand
->next_interp
);
932 subtrahend_cand
= NULL
;
936 if (base_cand
->next_interp
)
937 base_cand
= lookup_cand (base_cand
->next_interp
);
944 /* No interpretations had anything useful to propagate, so
945 produce X = Y + (1 * Z). */
947 index
= subtract_p
? double_int_minus_one
: double_int_one
;
949 ctype
= TREE_TYPE (base_in
);
952 c
= alloc_cand_and_find_basis (CAND_ADD
, gs
, base
, index
, stride
,
957 /* Create a candidate entry for a statement GS, where GS adds SSA
958 name BASE_IN to constant INDEX_IN. Propagate any known information
959 about BASE_IN into the new candidate. Return the new candidate. */
962 create_add_imm_cand (gimple gs
, tree base_in
, double_int index_in
, bool speed
)
964 enum cand_kind kind
= CAND_ADD
;
965 tree base
= NULL_TREE
, stride
= NULL_TREE
, ctype
= NULL_TREE
;
966 double_int index
, multiple
;
967 unsigned savings
= 0;
969 slsr_cand_t base_cand
= base_cand_from_table (base_in
);
971 while (base_cand
&& !base
)
973 bool unsigned_p
= TYPE_UNSIGNED (TREE_TYPE (base_cand
->stride
));
975 if (TREE_CODE (base_cand
->stride
) == INTEGER_CST
976 && double_int_multiple_of (index_in
,
977 tree_to_double_int (base_cand
->stride
),
981 /* Y = (B + i') * S, S constant, c = kS for some integer k
983 ============================
984 X = (B + (i'+ k)) * S
986 Y = B + (i' * S), S constant, c = kS for some integer k
988 ============================
989 X = (B + (i'+ k)) * S */
990 kind
= base_cand
->kind
;
991 base
= base_cand
->base_expr
;
992 index
= double_int_add (base_cand
->index
, multiple
);
993 stride
= base_cand
->stride
;
994 ctype
= base_cand
->cand_type
;
995 if (has_single_use (base_in
))
996 savings
= (base_cand
->dead_savings
997 + stmt_cost (base_cand
->cand_stmt
, speed
));
1000 if (base_cand
->next_interp
)
1001 base_cand
= lookup_cand (base_cand
->next_interp
);
1008 /* No interpretations had anything useful to propagate, so
1009 produce X = Y + (c * 1). */
1013 stride
= integer_one_node
;
1014 ctype
= TREE_TYPE (base_in
);
1017 c
= alloc_cand_and_find_basis (kind
, gs
, base
, index
, stride
,
1022 /* Given GS which is an add or subtract of scalar integers or pointers,
1023 make at least one appropriate entry in the candidate table. */
1026 slsr_process_add (gimple gs
, tree rhs1
, tree rhs2
, bool speed
)
1028 bool subtract_p
= gimple_assign_rhs_code (gs
) == MINUS_EXPR
;
1029 slsr_cand_t c
= NULL
, c2
;
1031 if (TREE_CODE (rhs2
) == SSA_NAME
)
1033 /* First record an interpretation assuming RHS1 is the base expression
1034 and RHS2 is the stride. But it doesn't make sense for the
1035 stride to be a pointer, so don't record a candidate in that case. */
1036 if (!POINTER_TYPE_P (TREE_TYPE (rhs2
)))
1038 c
= create_add_ssa_cand (gs
, rhs1
, rhs2
, subtract_p
, speed
);
1040 /* Add the first interpretation to the statement-candidate
1042 add_cand_for_stmt (gs
, c
);
1045 /* If the two RHS operands are identical, or this is a subtract,
1047 if (operand_equal_p (rhs1
, rhs2
, 0) || subtract_p
)
1050 /* Otherwise, record another interpretation assuming RHS2 is the
1051 base expression and RHS1 is the stride, again provided that the
1052 stride is not a pointer. */
1053 if (!POINTER_TYPE_P (TREE_TYPE (rhs1
)))
1055 c2
= create_add_ssa_cand (gs
, rhs2
, rhs1
, false, speed
);
1057 c
->next_interp
= c2
->cand_num
;
1059 add_cand_for_stmt (gs
, c2
);
1066 /* Record an interpretation for the add-immediate. */
1067 index
= tree_to_double_int (rhs2
);
1069 index
= double_int_neg (index
);
1071 c
= create_add_imm_cand (gs
, rhs1
, index
, speed
);
1073 /* Add the interpretation to the statement-candidate mapping. */
1074 add_cand_for_stmt (gs
, c
);
1078 /* Given GS which is a negate of a scalar integer, make an appropriate
1079 entry in the candidate table. A negate is equivalent to a multiply
1083 slsr_process_neg (gimple gs
, tree rhs1
, bool speed
)
1085 /* Record a CAND_MULT interpretation for the multiply by -1. */
1086 slsr_cand_t c
= create_mul_imm_cand (gs
, rhs1
, integer_minus_one_node
, speed
);
1088 /* Add the interpretation to the statement-candidate mapping. */
1089 add_cand_for_stmt (gs
, c
);
1092 /* Help function for legal_cast_p, operating on two trees. Checks
1093 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1094 for more details. */
1097 legal_cast_p_1 (tree lhs
, tree rhs
)
1099 tree lhs_type
, rhs_type
;
1100 unsigned lhs_size
, rhs_size
;
1101 bool lhs_wraps
, rhs_wraps
;
1103 lhs_type
= TREE_TYPE (lhs
);
1104 rhs_type
= TREE_TYPE (rhs
);
1105 lhs_size
= TYPE_PRECISION (lhs_type
);
1106 rhs_size
= TYPE_PRECISION (rhs_type
);
1107 lhs_wraps
= TYPE_OVERFLOW_WRAPS (lhs_type
);
1108 rhs_wraps
= TYPE_OVERFLOW_WRAPS (rhs_type
);
1110 if (lhs_size
< rhs_size
1111 || (rhs_wraps
&& !lhs_wraps
)
1112 || (rhs_wraps
&& lhs_wraps
&& rhs_size
!= lhs_size
))
1118 /* Return TRUE if GS is a statement that defines an SSA name from
1119 a conversion and is legal for us to combine with an add and multiply
1120 in the candidate table. For example, suppose we have:
1126 Without the type-cast, we would create a CAND_MULT for D with base B,
1127 index i, and stride S. We want to record this candidate only if it
1128 is equivalent to apply the type cast following the multiply:
1134 We will record the type with the candidate for D. This allows us
1135 to use a similar previous candidate as a basis. If we have earlier seen
1141 we can replace D with
1143 D = D' + (i - i') * S;
1145 But if moving the type-cast would change semantics, we mustn't do this.
1147 This is legitimate for casts from a non-wrapping integral type to
1148 any integral type of the same or larger size. It is not legitimate
1149 to convert a wrapping type to a non-wrapping type, or to a wrapping
1150 type of a different size. I.e., with a wrapping type, we must
1151 assume that the addition B + i could wrap, in which case performing
1152 the multiply before or after one of the "illegal" type casts will
1153 have different semantics. */
1156 legal_cast_p (gimple gs
, tree rhs
)
1158 if (!is_gimple_assign (gs
)
1159 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs
)))
1162 return legal_cast_p_1 (gimple_assign_lhs (gs
), rhs
);
1165 /* Given GS which is a cast to a scalar integer type, determine whether
1166 the cast is legal for strength reduction. If so, make at least one
1167 appropriate entry in the candidate table. */
1170 slsr_process_cast (gimple gs
, tree rhs1
, bool speed
)
1173 slsr_cand_t base_cand
, c
, c2
;
1174 unsigned savings
= 0;
1176 if (!legal_cast_p (gs
, rhs1
))
1179 lhs
= gimple_assign_lhs (gs
);
1180 base_cand
= base_cand_from_table (rhs1
);
1181 ctype
= TREE_TYPE (lhs
);
1187 /* Propagate all data from the base candidate except the type,
1188 which comes from the cast, and the base candidate's cast,
1189 which is no longer applicable. */
1190 if (has_single_use (rhs1
))
1191 savings
= (base_cand
->dead_savings
1192 + stmt_cost (base_cand
->cand_stmt
, speed
));
1194 c
= alloc_cand_and_find_basis (base_cand
->kind
, gs
,
1195 base_cand
->base_expr
,
1196 base_cand
->index
, base_cand
->stride
,
1198 if (base_cand
->next_interp
)
1199 base_cand
= lookup_cand (base_cand
->next_interp
);
1206 /* If nothing is known about the RHS, create fresh CAND_ADD and
1207 CAND_MULT interpretations:
1212 The first of these is somewhat arbitrary, but the choice of
1213 1 for the stride simplifies the logic for propagating casts
1215 c
= alloc_cand_and_find_basis (CAND_ADD
, gs
, rhs1
, double_int_zero
,
1216 integer_one_node
, ctype
, 0);
1217 c2
= alloc_cand_and_find_basis (CAND_MULT
, gs
, rhs1
, double_int_zero
,
1218 integer_one_node
, ctype
, 0);
1219 c
->next_interp
= c2
->cand_num
;
1222 /* Add the first (or only) interpretation to the statement-candidate
1224 add_cand_for_stmt (gs
, c
);
1227 /* Given GS which is a copy of a scalar integer type, make at least one
1228 appropriate entry in the candidate table.
1230 This interface is included for completeness, but is unnecessary
1231 if this pass immediately follows a pass that performs copy
1232 propagation, such as DOM. */
1235 slsr_process_copy (gimple gs
, tree rhs1
, bool speed
)
1237 slsr_cand_t base_cand
, c
, c2
;
1238 unsigned savings
= 0;
1240 base_cand
= base_cand_from_table (rhs1
);
1246 /* Propagate all data from the base candidate. */
1247 if (has_single_use (rhs1
))
1248 savings
= (base_cand
->dead_savings
1249 + stmt_cost (base_cand
->cand_stmt
, speed
));
1251 c
= alloc_cand_and_find_basis (base_cand
->kind
, gs
,
1252 base_cand
->base_expr
,
1253 base_cand
->index
, base_cand
->stride
,
1254 base_cand
->cand_type
, savings
);
1255 if (base_cand
->next_interp
)
1256 base_cand
= lookup_cand (base_cand
->next_interp
);
1263 /* If nothing is known about the RHS, create fresh CAND_ADD and
1264 CAND_MULT interpretations:
1269 The first of these is somewhat arbitrary, but the choice of
1270 1 for the stride simplifies the logic for propagating casts
1272 c
= alloc_cand_and_find_basis (CAND_ADD
, gs
, rhs1
, double_int_zero
,
1273 integer_one_node
, TREE_TYPE (rhs1
), 0);
1274 c2
= alloc_cand_and_find_basis (CAND_MULT
, gs
, rhs1
, double_int_zero
,
1275 integer_one_node
, TREE_TYPE (rhs1
), 0);
1276 c
->next_interp
= c2
->cand_num
;
1279 /* Add the first (or only) interpretation to the statement-candidate
1281 add_cand_for_stmt (gs
, c
);
1284 /* Find strength-reduction candidates in block BB. */
1287 find_candidates_in_block (struct dom_walk_data
*walk_data ATTRIBUTE_UNUSED
,
1290 bool speed
= optimize_bb_for_speed_p (bb
);
1291 gimple_stmt_iterator gsi
;
1293 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1295 gimple gs
= gsi_stmt (gsi
);
1297 if (gimple_vuse (gs
) && gimple_assign_single_p (gs
))
1298 slsr_process_ref (gs
);
1300 else if (is_gimple_assign (gs
)
1301 && SCALAR_INT_MODE_P
1302 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs
)))))
1304 tree rhs1
= NULL_TREE
, rhs2
= NULL_TREE
;
1306 switch (gimple_assign_rhs_code (gs
))
1310 rhs1
= gimple_assign_rhs1 (gs
);
1311 rhs2
= gimple_assign_rhs2 (gs
);
1312 /* Should never happen, but currently some buggy situations
1313 in earlier phases put constants in rhs1. */
1314 if (TREE_CODE (rhs1
) != SSA_NAME
)
1318 /* Possible future opportunity: rhs1 of a ptr+ can be
1320 case POINTER_PLUS_EXPR
:
1322 rhs2
= gimple_assign_rhs2 (gs
);
1328 rhs1
= gimple_assign_rhs1 (gs
);
1329 if (TREE_CODE (rhs1
) != SSA_NAME
)
1337 switch (gimple_assign_rhs_code (gs
))
1340 slsr_process_mul (gs
, rhs1
, rhs2
, speed
);
1344 case POINTER_PLUS_EXPR
:
1346 slsr_process_add (gs
, rhs1
, rhs2
, speed
);
1350 slsr_process_neg (gs
, rhs1
, speed
);
1354 slsr_process_cast (gs
, rhs1
, speed
);
1358 slsr_process_copy (gs
, rhs1
, speed
);
1368 /* Dump a candidate for debug. */
1371 dump_candidate (slsr_cand_t c
)
1373 fprintf (dump_file
, "%3d [%d] ", c
->cand_num
,
1374 gimple_bb (c
->cand_stmt
)->index
);
1375 print_gimple_stmt (dump_file
, c
->cand_stmt
, 0, 0);
1379 fputs (" MULT : (", dump_file
);
1380 print_generic_expr (dump_file
, c
->base_expr
, 0);
1381 fputs (" + ", dump_file
);
1382 dump_double_int (dump_file
, c
->index
, false);
1383 fputs (") * ", dump_file
);
1384 print_generic_expr (dump_file
, c
->stride
, 0);
1385 fputs (" : ", dump_file
);
1388 fputs (" ADD : ", dump_file
);
1389 print_generic_expr (dump_file
, c
->base_expr
, 0);
1390 fputs (" + (", dump_file
);
1391 dump_double_int (dump_file
, c
->index
, false);
1392 fputs (" * ", dump_file
);
1393 print_generic_expr (dump_file
, c
->stride
, 0);
1394 fputs (") : ", dump_file
);
1397 fputs (" REF : ", dump_file
);
1398 print_generic_expr (dump_file
, c
->base_expr
, 0);
1399 fputs (" + (", dump_file
);
1400 print_generic_expr (dump_file
, c
->stride
, 0);
1401 fputs (") + ", dump_file
);
1402 dump_double_int (dump_file
, c
->index
, false);
1403 fputs (" : ", dump_file
);
1408 print_generic_expr (dump_file
, c
->cand_type
, 0);
1409 fprintf (dump_file
, "\n basis: %d dependent: %d sibling: %d\n",
1410 c
->basis
, c
->dependent
, c
->sibling
);
1411 fprintf (dump_file
, " next-interp: %d dead-savings: %d\n",
1412 c
->next_interp
, c
->dead_savings
);
1415 fputs (" phi: ", dump_file
);
1416 print_gimple_stmt (dump_file
, c
->def_phi
, 0, 0);
1418 fputs ("\n", dump_file
);
1421 /* Dump the candidate vector for debug. */
1424 dump_cand_vec (void)
1429 fprintf (dump_file
, "\nStrength reduction candidate vector:\n\n");
1431 FOR_EACH_VEC_ELT (slsr_cand_t
, cand_vec
, i
, c
)
1435 /* Callback used to dump the candidate chains hash table. */
1438 base_cand_dump_callback (void **slot
, void *ignored ATTRIBUTE_UNUSED
)
1440 const_cand_chain_t chain
= *((const_cand_chain_t
*) slot
);
1443 print_generic_expr (dump_file
, chain
->base_expr
, 0);
1444 fprintf (dump_file
, " -> %d", chain
->cand
->cand_num
);
1446 for (p
= chain
->next
; p
; p
= p
->next
)
1447 fprintf (dump_file
, " -> %d", p
->cand
->cand_num
);
1449 fputs ("\n", dump_file
);
1453 /* Dump the candidate chains. */
1456 dump_cand_chains (void)
1458 fprintf (dump_file
, "\nStrength reduction candidate chains:\n\n");
1459 htab_traverse_noresize (base_cand_map
, base_cand_dump_callback
, NULL
);
1460 fputs ("\n", dump_file
);
1463 /* Dump the increment vector for debug. */
1466 dump_incr_vec (void)
1468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1472 fprintf (dump_file
, "\nIncrement vector:\n\n");
1474 for (i
= 0; i
< incr_vec_len
; i
++)
1476 fprintf (dump_file
, "%3d increment: ", i
);
1477 dump_double_int (dump_file
, incr_vec
[i
].incr
, false);
1478 fprintf (dump_file
, "\n count: %d", incr_vec
[i
].count
);
1479 fprintf (dump_file
, "\n cost: %d", incr_vec
[i
].cost
);
1480 fputs ("\n initializer: ", dump_file
);
1481 print_generic_expr (dump_file
, incr_vec
[i
].initializer
, 0);
1482 fputs ("\n\n", dump_file
);
1487 /* Recursive helper for unconditional_cands_with_known_stride_p.
1488 Returns TRUE iff C, its siblings, and its dependents are all
1489 unconditional candidates. */
1492 unconditional_cands (slsr_cand_t c
)
1497 if (c
->sibling
&& !unconditional_cands (lookup_cand (c
->sibling
)))
1500 if (c
->dependent
&& !unconditional_cands (lookup_cand (c
->dependent
)))
1506 /* Determine whether or not the tree of candidates rooted at
1507 ROOT consists entirely of unconditional increments with
1508 an INTEGER_CST stride. */
1511 unconditional_cands_with_known_stride_p (slsr_cand_t root
)
1513 /* The stride is identical for all related candidates, so
1515 if (TREE_CODE (root
->stride
) != INTEGER_CST
)
1518 return unconditional_cands (lookup_cand (root
->dependent
));
1521 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1525 replace_ref (tree
*expr
, slsr_cand_t c
)
1527 tree add_expr
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (c
->base_expr
),
1528 c
->base_expr
, c
->stride
);
1529 tree mem_ref
= fold_build2 (MEM_REF
, TREE_TYPE (*expr
), add_expr
,
1530 double_int_to_tree (c
->cand_type
, c
->index
));
1532 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1533 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
1534 TREE_OPERAND (mem_ref
, 0)
1535 = force_gimple_operand_gsi (&gsi
, TREE_OPERAND (mem_ref
, 0),
1536 /*simple_p=*/true, NULL
,
1537 /*before=*/true, GSI_SAME_STMT
);
1538 copy_ref_info (mem_ref
, *expr
);
1540 update_stmt (c
->cand_stmt
);
1543 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1544 dependent of candidate C with an equivalent strength-reduced data
1548 replace_refs (slsr_cand_t c
)
1550 if (gimple_vdef (c
->cand_stmt
))
1552 tree
*lhs
= gimple_assign_lhs_ptr (c
->cand_stmt
);
1553 replace_ref (lhs
, c
);
1557 tree
*rhs
= gimple_assign_rhs1_ptr (c
->cand_stmt
);
1558 replace_ref (rhs
, c
);
1562 replace_refs (lookup_cand (c
->sibling
));
1565 replace_refs (lookup_cand (c
->dependent
));
1568 /* Calculate the increment required for candidate C relative to
1572 cand_increment (slsr_cand_t c
)
1576 /* If the candidate doesn't have a basis, just return its own
1577 index. This is useful in record_increments to help us find
1578 an existing initializer. */
1582 basis
= lookup_cand (c
->basis
);
1583 gcc_assert (operand_equal_p (c
->base_expr
, basis
->base_expr
, 0));
1584 return double_int_sub (c
->index
, basis
->index
);
1587 /* Calculate the increment required for candidate C relative to
1588 its basis. If we aren't going to generate pointer arithmetic
1589 for this candidate, return the absolute value of that increment
1592 static inline double_int
1593 cand_abs_increment (slsr_cand_t c
)
1595 double_int increment
= cand_increment (c
);
1597 if (!address_arithmetic_p
&& double_int_negative_p (increment
))
1598 increment
= double_int_neg (increment
);
1603 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1604 new temporary reg of type TYPE and store it in *VAR. */
1607 lazy_create_slsr_reg (tree
*var
, tree type
)
1609 if (!*var
|| !types_compatible_p (TREE_TYPE (*var
), type
))
1610 *var
= create_tmp_reg (type
, "slsr");
1613 /* Return TRUE iff candidate C has already been replaced under
1614 another interpretation. */
1617 cand_already_replaced (slsr_cand_t c
)
1619 return (gimple_bb (c
->cand_stmt
) == 0);
1622 /* Helper routine for replace_dependents, doing the work for a
1623 single candidate C. */
1626 replace_dependent (slsr_cand_t c
, enum tree_code cand_code
)
1628 double_int stride
= tree_to_double_int (c
->stride
);
1629 double_int bump
= double_int_mul (cand_increment (c
), stride
);
1630 gimple stmt_to_print
= NULL
;
1632 tree basis_name
, incr_type
, bump_tree
;
1633 enum tree_code code
;
1635 /* It is highly unlikely, but possible, that the resulting
1636 bump doesn't fit in a HWI. Abandon the replacement
1637 in this case. Restriction to signed HWI is conservative
1638 for unsigned types but allows for safe negation without
1640 if (!double_int_fits_in_shwi_p (bump
))
1643 basis
= lookup_cand (c
->basis
);
1644 basis_name
= gimple_assign_lhs (basis
->cand_stmt
);
1645 incr_type
= TREE_TYPE (gimple_assign_rhs1 (c
->cand_stmt
));
1648 if (double_int_negative_p (bump
))
1651 bump
= double_int_neg (bump
);
1654 bump_tree
= double_int_to_tree (incr_type
, bump
);
1656 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1658 fputs ("Replacing: ", dump_file
);
1659 print_gimple_stmt (dump_file
, c
->cand_stmt
, 0, 0);
1662 if (double_int_zero_p (bump
))
1664 tree lhs
= gimple_assign_lhs (c
->cand_stmt
);
1665 gimple copy_stmt
= gimple_build_assign (lhs
, basis_name
);
1666 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
1667 gimple_set_location (copy_stmt
, gimple_location (c
->cand_stmt
));
1668 gsi_replace (&gsi
, copy_stmt
, false);
1669 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1670 stmt_to_print
= copy_stmt
;
1674 tree rhs1
= gimple_assign_rhs1 (c
->cand_stmt
);
1675 tree rhs2
= gimple_assign_rhs2 (c
->cand_stmt
);
1676 if (cand_code
!= NEGATE_EXPR
1677 && ((operand_equal_p (rhs1
, basis_name
, 0)
1678 && operand_equal_p (rhs2
, bump_tree
, 0))
1679 || (operand_equal_p (rhs1
, bump_tree
, 0)
1680 && operand_equal_p (rhs2
, basis_name
, 0))))
1682 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1684 fputs ("(duplicate, not actually replacing)", dump_file
);
1685 stmt_to_print
= c
->cand_stmt
;
1690 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
1691 gimple_assign_set_rhs_with_ops (&gsi
, code
, basis_name
, bump_tree
);
1692 update_stmt (gsi_stmt (gsi
));
1693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1694 stmt_to_print
= gsi_stmt (gsi
);
1698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1700 fputs ("With: ", dump_file
);
1701 print_gimple_stmt (dump_file
, stmt_to_print
, 0, 0);
1702 fputs ("\n", dump_file
);
1706 /* Replace candidate C, each sibling of candidate C, and each
1707 dependent of candidate C with an add or subtract. Note that we
1708 only operate on CAND_MULTs with known strides, so we will never
1709 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1710 replaced by X = Y + ((i - i') * S), as described in the module
1711 commentary. The folded value ((i - i') * S) is referred to here
1715 replace_dependents (slsr_cand_t c
)
1717 enum tree_code cand_code
= gimple_assign_rhs_code (c
->cand_stmt
);
1719 /* It is not useful to replace casts, copies, or adds of an SSA name
1720 and a constant. Also skip candidates that have already been
1721 replaced under another interpretation. */
1722 if (cand_code
!= MODIFY_EXPR
1723 && cand_code
!= NOP_EXPR
1724 && c
->kind
== CAND_MULT
1725 && !cand_already_replaced (c
))
1726 replace_dependent (c
, cand_code
);
1729 replace_dependents (lookup_cand (c
->sibling
));
1732 replace_dependents (lookup_cand (c
->dependent
));
1735 /* Return the index in the increment vector of the given INCREMENT. */
1737 static inline unsigned
1738 incr_vec_index (double_int increment
)
1743 i
< incr_vec_len
&& !double_int_equal_p (increment
, incr_vec
[i
].incr
);
1747 gcc_assert (i
< incr_vec_len
);
1751 /* Count the number of candidates in the tree rooted at C that have
1752 not already been replaced under other interpretations. */
1755 count_candidates (slsr_cand_t c
)
1757 unsigned count
= cand_already_replaced (c
) ? 0 : 1;
1760 count
+= count_candidates (lookup_cand (c
->sibling
));
1763 count
+= count_candidates (lookup_cand (c
->dependent
));
1768 /* Increase the count of INCREMENT by one in the increment vector.
1769 INCREMENT is associated with candidate C. If an initializer
1770 T_0 = stride * I is provided by a candidate that dominates all
1771 candidates with the same increment, also record T_0 for subsequent use. */
1774 record_increment (slsr_cand_t c
, double_int increment
)
1779 /* Treat increments that differ only in sign as identical so as to
1780 share initializers, unless we are generating pointer arithmetic. */
1781 if (!address_arithmetic_p
&& double_int_negative_p (increment
))
1782 increment
= double_int_neg (increment
);
1784 for (i
= 0; i
< incr_vec_len
; i
++)
1786 if (double_int_equal_p (incr_vec
[i
].incr
, increment
))
1788 incr_vec
[i
].count
++;
1791 /* If we previously recorded an initializer that doesn't
1792 dominate this candidate, it's not going to be useful to
1794 if (incr_vec
[i
].initializer
1795 && !dominated_by_p (CDI_DOMINATORS
,
1796 gimple_bb (c
->cand_stmt
),
1797 incr_vec
[i
].init_bb
))
1799 incr_vec
[i
].initializer
= NULL_TREE
;
1800 incr_vec
[i
].init_bb
= NULL
;
1809 /* The first time we see an increment, create the entry for it.
1810 If this is the root candidate which doesn't have a basis, set
1811 the count to zero. We're only processing it so it can possibly
1812 provide an initializer for other candidates. */
1813 incr_vec
[incr_vec_len
].incr
= increment
;
1814 incr_vec
[incr_vec_len
].count
= c
->basis
? 1 : 0;
1815 incr_vec
[incr_vec_len
].cost
= COST_INFINITE
;
1817 /* Optimistically record the first occurrence of this increment
1818 as providing an initializer (if it does); we will revise this
1819 opinion later if it doesn't dominate all other occurrences.
1820 Exception: increments of -1, 0, 1 never need initializers. */
1821 if (c
->kind
== CAND_ADD
1822 && double_int_equal_p (c
->index
, increment
)
1823 && (double_int_scmp (increment
, double_int_one
) > 0
1824 || double_int_scmp (increment
, double_int_minus_one
) < 0))
1827 tree rhs1
= gimple_assign_rhs1 (c
->cand_stmt
);
1828 tree rhs2
= gimple_assign_rhs2 (c
->cand_stmt
);
1829 if (operand_equal_p (rhs1
, c
->base_expr
, 0))
1833 if (SSA_NAME_DEF_STMT (t0
) && gimple_bb (SSA_NAME_DEF_STMT (t0
)))
1835 incr_vec
[incr_vec_len
].initializer
= t0
;
1836 incr_vec
[incr_vec_len
++].init_bb
1837 = gimple_bb (SSA_NAME_DEF_STMT (t0
));
1841 incr_vec
[incr_vec_len
].initializer
= NULL_TREE
;
1842 incr_vec
[incr_vec_len
++].init_bb
= NULL
;
1847 incr_vec
[incr_vec_len
].initializer
= NULL_TREE
;
1848 incr_vec
[incr_vec_len
++].init_bb
= NULL
;
1853 /* Determine how many times each unique increment occurs in the set
1854 of candidates rooted at C's parent, recording the data in the
1855 increment vector. For each unique increment I, if an initializer
1856 T_0 = stride * I is provided by a candidate that dominates all
1857 candidates with the same increment, also record T_0 for subsequent
1861 record_increments (slsr_cand_t c
)
1863 if (!cand_already_replaced (c
))
1864 record_increment (c
, cand_increment (c
));
1867 record_increments (lookup_cand (c
->sibling
));
1870 record_increments (lookup_cand (c
->dependent
));
1873 /* Return the first candidate in the tree rooted at C that has not
1874 already been replaced, favoring siblings over dependents. */
1877 unreplaced_cand_in_tree (slsr_cand_t c
)
1879 if (!cand_already_replaced (c
))
1884 slsr_cand_t sib
= unreplaced_cand_in_tree (lookup_cand (c
->sibling
));
1891 slsr_cand_t dep
= unreplaced_cand_in_tree (lookup_cand (c
->dependent
));
1899 /* Return TRUE if the candidates in the tree rooted at C should be
1900 optimized for speed, else FALSE. We estimate this based on the block
1901 containing the most dominant candidate in the tree that has not yet
1905 optimize_cands_for_speed_p (slsr_cand_t c
)
1907 slsr_cand_t c2
= unreplaced_cand_in_tree (c
);
1909 return optimize_bb_for_speed_p (gimple_bb (c2
->cand_stmt
));
1912 /* Add COST_IN to the lowest cost of any dependent path starting at
1913 candidate C or any of its siblings, counting only candidates along
1914 such paths with increment INCR. Assume that replacing a candidate
1915 reduces cost by REPL_SAVINGS. Also account for savings from any
1916 statements that would go dead. */
1919 lowest_cost_path (int cost_in
, int repl_savings
, slsr_cand_t c
, double_int incr
)
1921 int local_cost
, sib_cost
;
1922 double_int cand_incr
= cand_abs_increment (c
);
1924 if (cand_already_replaced (c
))
1925 local_cost
= cost_in
;
1926 else if (double_int_equal_p (incr
, cand_incr
))
1927 local_cost
= cost_in
- repl_savings
- c
->dead_savings
;
1929 local_cost
= cost_in
- c
->dead_savings
;
1932 local_cost
= lowest_cost_path (local_cost
, repl_savings
,
1933 lookup_cand (c
->dependent
), incr
);
1937 sib_cost
= lowest_cost_path (cost_in
, repl_savings
,
1938 lookup_cand (c
->sibling
), incr
);
1939 local_cost
= MIN (local_cost
, sib_cost
);
1945 /* Compute the total savings that would accrue from all replacements
1946 in the candidate tree rooted at C, counting only candidates with
1947 increment INCR. Assume that replacing a candidate reduces cost
1948 by REPL_SAVINGS. Also account for savings from statements that
1952 total_savings (int repl_savings
, slsr_cand_t c
, double_int incr
)
1955 double_int cand_incr
= cand_abs_increment (c
);
1957 if (double_int_equal_p (incr
, cand_incr
)
1958 && !cand_already_replaced (c
))
1959 savings
+= repl_savings
+ c
->dead_savings
;
1962 savings
+= total_savings (repl_savings
, lookup_cand (c
->dependent
), incr
);
1965 savings
+= total_savings (repl_savings
, lookup_cand (c
->sibling
), incr
);
1970 /* Use target-specific costs to determine and record which increments
1971 in the current candidate tree are profitable to replace, assuming
1972 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1975 One slight limitation here is that we don't account for the possible
1976 introduction of casts in some cases. See replace_one_candidate for
1977 the cases where these are introduced. This should probably be cleaned
1981 analyze_increments (slsr_cand_t first_dep
, enum machine_mode mode
, bool speed
)
1985 for (i
= 0; i
< incr_vec_len
; i
++)
1987 HOST_WIDE_INT incr
= double_int_to_shwi (incr_vec
[i
].incr
);
1989 /* If somehow this increment is bigger than a HWI, we won't
1990 be optimizing candidates that use it. And if the increment
1991 has a count of zero, nothing will be done with it. */
1992 if (!double_int_fits_in_shwi_p (incr_vec
[i
].incr
)
1993 || !incr_vec
[i
].count
)
1994 incr_vec
[i
].cost
= COST_INFINITE
;
1996 /* Increments of 0, 1, and -1 are always profitable to replace,
1997 because they always replace a multiply or add with an add or
1998 copy, and may cause one or more existing instructions to go
1999 dead. Exception: -1 can't be assumed to be profitable for
2000 pointer addition. */
2004 && (gimple_assign_rhs_code (first_dep
->cand_stmt
)
2005 != POINTER_PLUS_EXPR
)))
2006 incr_vec
[i
].cost
= COST_NEUTRAL
;
2008 /* FORNOW: If we need to add an initializer, give up if a cast from
2009 the candidate's type to its stride's type can lose precision.
2010 This could eventually be handled better by expressly retaining the
2011 result of a cast to a wider type in the stride. Example:
2016 _4 = x + _3; ADD: x + (10 * _1) : int
2018 _6 = x + _3; ADD: x + (15 * _1) : int
2020 Right now replacing _6 would cause insertion of an initializer
2021 of the form "short int T = _1 * 5;" followed by a cast to
2022 int, which could overflow incorrectly. Had we recorded _2 or
2023 (int)_1 as the stride, this wouldn't happen. However, doing
2024 this breaks other opportunities, so this will require some
2026 else if (!incr_vec
[i
].initializer
2027 && TREE_CODE (first_dep
->stride
) != INTEGER_CST
2028 && !legal_cast_p_1 (first_dep
->stride
,
2029 gimple_assign_lhs (first_dep
->cand_stmt
)))
2031 incr_vec
[i
].cost
= COST_INFINITE
;
2033 /* For any other increment, if this is a multiply candidate, we
2034 must introduce a temporary T and initialize it with
2035 T_0 = stride * increment. When optimizing for speed, walk the
2036 candidate tree to calculate the best cost reduction along any
2037 path; if it offsets the fixed cost of inserting the initializer,
2038 replacing the increment is profitable. When optimizing for
2039 size, instead calculate the total cost reduction from replacing
2040 all candidates with this increment. */
2041 else if (first_dep
->kind
== CAND_MULT
)
2043 int cost
= mult_by_coeff_cost (incr
, mode
, speed
);
2044 int repl_savings
= mul_cost (speed
, mode
) - add_cost (speed
, mode
);
2046 cost
= lowest_cost_path (cost
, repl_savings
, first_dep
,
2049 cost
-= total_savings (repl_savings
, first_dep
, incr_vec
[i
].incr
);
2051 incr_vec
[i
].cost
= cost
;
2054 /* If this is an add candidate, the initializer may already
2055 exist, so only calculate the cost of the initializer if it
2056 doesn't. We are replacing one add with another here, so the
2057 known replacement savings is zero. We will account for removal
2058 of dead instructions in lowest_cost_path or total_savings. */
2062 if (!incr_vec
[i
].initializer
)
2063 cost
= mult_by_coeff_cost (incr
, mode
, speed
);
2066 cost
= lowest_cost_path (cost
, 0, first_dep
, incr_vec
[i
].incr
);
2068 cost
-= total_savings (0, first_dep
, incr_vec
[i
].incr
);
2070 incr_vec
[i
].cost
= cost
;
2075 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2076 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2077 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2078 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2079 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2082 ncd_for_two_cands (basic_block bb1
, basic_block bb2
,
2083 slsr_cand_t c1
, slsr_cand_t c2
, slsr_cand_t
*where
)
2099 ncd
= nearest_common_dominator (CDI_DOMINATORS
, bb1
, bb2
);
2101 /* If both candidates are in the same block, the earlier
2103 if (bb1
== ncd
&& bb2
== ncd
)
2105 if (!c1
|| (c2
&& c2
->cand_num
< c1
->cand_num
))
2111 /* Otherwise, if one of them produced a candidate in the
2112 dominator, that one wins. */
2113 else if (bb1
== ncd
)
2116 else if (bb2
== ncd
)
2119 /* If neither matches the dominator, neither wins. */
2126 /* Consider all candidates in the tree rooted at C for which INCR
2127 represents the required increment of C relative to its basis.
2128 Find and return the basic block that most nearly dominates all
2129 such candidates. If the returned block contains one or more of
2130 the candidates, return the earliest candidate in the block in
2134 nearest_common_dominator_for_cands (slsr_cand_t c
, double_int incr
,
2137 basic_block sib_ncd
= NULL
, dep_ncd
= NULL
, this_ncd
= NULL
, ncd
;
2138 slsr_cand_t sib_where
= NULL
, dep_where
= NULL
, this_where
= NULL
, new_where
;
2139 double_int cand_incr
;
2141 /* First find the NCD of all siblings and dependents. */
2143 sib_ncd
= nearest_common_dominator_for_cands (lookup_cand (c
->sibling
),
2146 dep_ncd
= nearest_common_dominator_for_cands (lookup_cand (c
->dependent
),
2148 if (!sib_ncd
&& !dep_ncd
)
2153 else if (sib_ncd
&& !dep_ncd
)
2155 new_where
= sib_where
;
2158 else if (dep_ncd
&& !sib_ncd
)
2160 new_where
= dep_where
;
2164 ncd
= ncd_for_two_cands (sib_ncd
, dep_ncd
, sib_where
,
2165 dep_where
, &new_where
);
2167 /* If the candidate's increment doesn't match the one we're interested
2168 in, then the result depends only on siblings and dependents. */
2169 cand_incr
= cand_abs_increment (c
);
2171 if (!double_int_equal_p (cand_incr
, incr
) || cand_already_replaced (c
))
2177 /* Otherwise, compare this candidate with the result from all siblings
2180 this_ncd
= gimple_bb (c
->cand_stmt
);
2181 ncd
= ncd_for_two_cands (ncd
, this_ncd
, new_where
, this_where
, where
);
2186 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2189 profitable_increment_p (unsigned index
)
2191 return (incr_vec
[index
].cost
<= COST_NEUTRAL
);
2194 /* For each profitable increment in the increment vector not equal to
2195 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2196 dominator of all statements in the candidate chain rooted at C
2197 that require that increment, and insert an initializer
2198 T_0 = stride * increment at that location. Record T_0 with the
2199 increment record. */
2202 insert_initializers (slsr_cand_t c
)
2205 tree new_var
= NULL_TREE
;
2207 for (i
= 0; i
< incr_vec_len
; i
++)
2210 slsr_cand_t where
= NULL
;
2212 tree stride_type
, new_name
, incr_tree
;
2213 double_int incr
= incr_vec
[i
].incr
;
2215 if (!profitable_increment_p (i
)
2216 || double_int_one_p (incr
)
2217 || (double_int_minus_one_p (incr
)
2218 && gimple_assign_rhs_code (c
->cand_stmt
) != POINTER_PLUS_EXPR
)
2219 || double_int_zero_p (incr
))
2222 /* We may have already identified an existing initializer that
2224 if (incr_vec
[i
].initializer
)
2226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2228 fputs ("Using existing initializer: ", dump_file
);
2229 print_gimple_stmt (dump_file
,
2230 SSA_NAME_DEF_STMT (incr_vec
[i
].initializer
),
2236 /* Find the block that most closely dominates all candidates
2237 with this increment. If there is at least one candidate in
2238 that block, the earliest one will be returned in WHERE. */
2239 bb
= nearest_common_dominator_for_cands (c
, incr
, &where
);
2241 /* Create a new SSA name to hold the initializer's value. */
2242 stride_type
= TREE_TYPE (c
->stride
);
2243 lazy_create_slsr_reg (&new_var
, stride_type
);
2244 new_name
= make_ssa_name (new_var
, NULL
);
2245 incr_vec
[i
].initializer
= new_name
;
2247 /* Create the initializer and insert it in the latest possible
2248 dominating position. */
2249 incr_tree
= double_int_to_tree (stride_type
, incr
);
2250 init_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_name
,
2251 c
->stride
, incr_tree
);
2254 gimple_stmt_iterator gsi
= gsi_for_stmt (where
->cand_stmt
);
2255 gsi_insert_before (&gsi
, init_stmt
, GSI_SAME_STMT
);
2256 gimple_set_location (init_stmt
, gimple_location (where
->cand_stmt
));
2260 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2261 gimple basis_stmt
= lookup_cand (c
->basis
)->cand_stmt
;
2263 if (!gsi_end_p (gsi
) && is_ctrl_stmt (gsi_stmt (gsi
)))
2264 gsi_insert_before (&gsi
, init_stmt
, GSI_SAME_STMT
);
2266 gsi_insert_after (&gsi
, init_stmt
, GSI_SAME_STMT
);
2268 gimple_set_location (init_stmt
, gimple_location (basis_stmt
));
2271 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2273 fputs ("Inserting initializer: ", dump_file
);
2274 print_gimple_stmt (dump_file
, init_stmt
, 0, 0);
2279 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2280 type TO_TYPE, and insert it in front of the statement represented
2281 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2282 the new SSA name. */
2285 introduce_cast_before_cand (slsr_cand_t c
, tree to_type
,
2286 tree from_expr
, tree
*new_var
)
2290 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
2292 lazy_create_slsr_reg (new_var
, to_type
);
2293 cast_lhs
= make_ssa_name (*new_var
, NULL
);
2294 cast_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, cast_lhs
,
2295 from_expr
, NULL_TREE
);
2296 gimple_set_location (cast_stmt
, gimple_location (c
->cand_stmt
));
2297 gsi_insert_before (&gsi
, cast_stmt
, GSI_SAME_STMT
);
2299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2301 fputs (" Inserting: ", dump_file
);
2302 print_gimple_stmt (dump_file
, cast_stmt
, 0, 0);
2308 /* Replace the RHS of the statement represented by candidate C with
2309 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2310 leave C unchanged or just interchange its operands. The original
2311 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2312 If the replacement was made and we are doing a details dump,
2313 return the revised statement, else NULL. */
2316 replace_rhs_if_not_dup (enum tree_code new_code
, tree new_rhs1
, tree new_rhs2
,
2317 enum tree_code old_code
, tree old_rhs1
, tree old_rhs2
,
2320 if (new_code
!= old_code
2321 || ((!operand_equal_p (new_rhs1
, old_rhs1
, 0)
2322 || !operand_equal_p (new_rhs2
, old_rhs2
, 0))
2323 && (!operand_equal_p (new_rhs1
, old_rhs2
, 0)
2324 || !operand_equal_p (new_rhs2
, old_rhs1
, 0))))
2326 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
2327 gimple_assign_set_rhs_with_ops (&gsi
, new_code
, new_rhs1
, new_rhs2
);
2328 update_stmt (gsi_stmt (gsi
));
2330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2331 return gsi_stmt (gsi
);
2334 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2335 fputs (" (duplicate, not actually replacing)\n", dump_file
);
2340 /* Strength-reduce the statement represented by candidate C by replacing
2341 it with an equivalent addition or subtraction. I is the index into
2342 the increment vector identifying C's increment. NEW_VAR is used to
2343 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2344 is the rhs1 to use in creating the add/subtract. */
2347 replace_one_candidate (slsr_cand_t c
, unsigned i
, tree
*new_var
,
2350 gimple stmt_to_print
= NULL
;
2351 tree orig_rhs1
, orig_rhs2
;
2353 enum tree_code orig_code
, repl_code
;
2354 double_int cand_incr
;
2356 orig_code
= gimple_assign_rhs_code (c
->cand_stmt
);
2357 orig_rhs1
= gimple_assign_rhs1 (c
->cand_stmt
);
2358 orig_rhs2
= gimple_assign_rhs2 (c
->cand_stmt
);
2359 cand_incr
= cand_increment (c
);
2361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2363 fputs ("Replacing: ", dump_file
);
2364 print_gimple_stmt (dump_file
, c
->cand_stmt
, 0, 0);
2365 stmt_to_print
= c
->cand_stmt
;
2368 if (address_arithmetic_p
)
2369 repl_code
= POINTER_PLUS_EXPR
;
2371 repl_code
= PLUS_EXPR
;
2373 /* If the increment has an initializer T_0, replace the candidate
2374 statement with an add of the basis name and the initializer. */
2375 if (incr_vec
[i
].initializer
)
2377 tree init_type
= TREE_TYPE (incr_vec
[i
].initializer
);
2378 tree orig_type
= TREE_TYPE (orig_rhs2
);
2380 if (types_compatible_p (orig_type
, init_type
))
2381 rhs2
= incr_vec
[i
].initializer
;
2383 rhs2
= introduce_cast_before_cand (c
, orig_type
,
2384 incr_vec
[i
].initializer
,
2387 if (!double_int_equal_p (incr_vec
[i
].incr
, cand_incr
))
2389 gcc_assert (repl_code
== PLUS_EXPR
);
2390 repl_code
= MINUS_EXPR
;
2393 stmt_to_print
= replace_rhs_if_not_dup (repl_code
, basis_name
, rhs2
,
2394 orig_code
, orig_rhs1
, orig_rhs2
,
2398 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2399 with a subtract of the stride from the basis name, a copy
2400 from the basis name, or an add of the stride to the basis
2401 name, respectively. It may be necessary to introduce a
2402 cast (or reuse an existing cast). */
2403 else if (double_int_one_p (cand_incr
))
2405 tree stride_type
= TREE_TYPE (c
->stride
);
2406 tree orig_type
= TREE_TYPE (orig_rhs2
);
2408 if (types_compatible_p (orig_type
, stride_type
))
2411 rhs2
= introduce_cast_before_cand (c
, orig_type
, c
->stride
, new_var
);
2413 stmt_to_print
= replace_rhs_if_not_dup (repl_code
, basis_name
, rhs2
,
2414 orig_code
, orig_rhs1
, orig_rhs2
,
2418 else if (double_int_minus_one_p (cand_incr
))
2420 tree stride_type
= TREE_TYPE (c
->stride
);
2421 tree orig_type
= TREE_TYPE (orig_rhs2
);
2422 gcc_assert (repl_code
!= POINTER_PLUS_EXPR
);
2424 if (types_compatible_p (orig_type
, stride_type
))
2427 rhs2
= introduce_cast_before_cand (c
, orig_type
, c
->stride
, new_var
);
2429 if (orig_code
!= MINUS_EXPR
2430 || !operand_equal_p (basis_name
, orig_rhs1
, 0)
2431 || !operand_equal_p (rhs2
, orig_rhs2
, 0))
2433 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
2434 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, basis_name
, rhs2
);
2435 update_stmt (gsi_stmt (gsi
));
2437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2438 stmt_to_print
= gsi_stmt (gsi
);
2440 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2441 fputs (" (duplicate, not actually replacing)\n", dump_file
);
2444 else if (double_int_zero_p (cand_incr
))
2446 tree lhs
= gimple_assign_lhs (c
->cand_stmt
);
2447 tree lhs_type
= TREE_TYPE (lhs
);
2448 tree basis_type
= TREE_TYPE (basis_name
);
2450 if (types_compatible_p (lhs_type
, basis_type
))
2452 gimple copy_stmt
= gimple_build_assign (lhs
, basis_name
);
2453 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
2454 gimple_set_location (copy_stmt
, gimple_location (c
->cand_stmt
));
2455 gsi_replace (&gsi
, copy_stmt
, false);
2457 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2458 stmt_to_print
= copy_stmt
;
2462 gimple_stmt_iterator gsi
= gsi_for_stmt (c
->cand_stmt
);
2463 gimple cast_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, lhs
,
2466 gimple_set_location (cast_stmt
, gimple_location (c
->cand_stmt
));
2467 gsi_replace (&gsi
, cast_stmt
, false);
2469 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2470 stmt_to_print
= cast_stmt
;
2476 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && stmt_to_print
)
2478 fputs ("With: ", dump_file
);
2479 print_gimple_stmt (dump_file
, stmt_to_print
, 0, 0);
2480 fputs ("\n", dump_file
);
2484 /* For each candidate in the tree rooted at C, replace it with
2485 an increment if such has been shown to be profitable. */
2488 replace_profitable_candidates (slsr_cand_t c
)
2490 if (!cand_already_replaced (c
))
2492 double_int increment
= cand_abs_increment (c
);
2493 tree new_var
= NULL
;
2494 enum tree_code orig_code
= gimple_assign_rhs_code (c
->cand_stmt
);
2497 i
= incr_vec_index (increment
);
2499 /* Only process profitable increments. Nothing useful can be done
2500 to a cast or copy. */
2501 if (profitable_increment_p (i
)
2502 && orig_code
!= MODIFY_EXPR
2503 && orig_code
!= NOP_EXPR
)
2505 slsr_cand_t basis
= lookup_cand (c
->basis
);
2506 tree basis_name
= gimple_assign_lhs (basis
->cand_stmt
);
2507 replace_one_candidate (c
, i
, &new_var
, basis_name
);
2512 replace_profitable_candidates (lookup_cand (c
->sibling
));
2515 replace_profitable_candidates (lookup_cand (c
->dependent
));
2518 /* Analyze costs of related candidates in the candidate vector,
2519 and make beneficial replacements. */
2522 analyze_candidates_and_replace (void)
2527 /* Each candidate that has a null basis and a non-null
2528 dependent is the root of a tree of related statements.
2529 Analyze each tree to determine a subset of those
2530 statements that can be replaced with maximum benefit. */
2531 FOR_EACH_VEC_ELT (slsr_cand_t
, cand_vec
, i
, c
)
2533 slsr_cand_t first_dep
;
2535 if (c
->basis
!= 0 || c
->dependent
== 0)
2538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2539 fprintf (dump_file
, "\nProcessing dependency tree rooted at %d.\n",
2542 first_dep
= lookup_cand (c
->dependent
);
2544 /* If this is a chain of CAND_REFs, unconditionally replace
2545 each of them with a strength-reduced data reference. */
2546 if (c
->kind
== CAND_REF
)
2549 /* If the common stride of all related candidates is a
2550 known constant, and none of these has a phi-dependence,
2551 then all replacements are considered profitable.
2552 Each replaces a multiply by a single add, with the
2553 possibility that a feeding add also goes dead as a
2555 else if (unconditional_cands_with_known_stride_p (c
))
2556 replace_dependents (first_dep
);
2558 /* When the stride is an SSA name, it may still be profitable
2559 to replace some or all of the dependent candidates, depending
2560 on whether the introduced increments can be reused, or are
2561 less expensive to calculate than the replaced statements. */
2565 enum machine_mode mode
;
2568 /* Determine whether we'll be generating pointer arithmetic
2569 when replacing candidates. */
2570 address_arithmetic_p
= (c
->kind
== CAND_ADD
2571 && POINTER_TYPE_P (c
->cand_type
));
2573 /* If all candidates have already been replaced under other
2574 interpretations, nothing remains to be done. */
2575 length
= count_candidates (c
);
2579 /* Construct an array of increments for this candidate chain. */
2580 incr_vec
= XNEWVEC (incr_info
, length
);
2582 record_increments (c
);
2584 /* Determine which increments are profitable to replace. */
2585 mode
= TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c
->cand_stmt
)));
2586 speed
= optimize_cands_for_speed_p (c
);
2587 analyze_increments (first_dep
, mode
, speed
);
2589 /* Insert initializers of the form T_0 = stride * increment
2590 for use in profitable replacements. */
2591 insert_initializers (first_dep
);
2594 /* Perform the replacements. */
2595 replace_profitable_candidates (first_dep
);
2599 /* TODO: When conditional increments occur so that a
2600 candidate is dependent upon a phi-basis, the cost of
2601 introducing a temporary must be accounted for. */
2606 execute_strength_reduction (void)
2608 struct dom_walk_data walk_data
;
2610 /* Create the obstack where candidates will reside. */
2611 gcc_obstack_init (&cand_obstack
);
2613 /* Allocate the candidate vector. */
2614 cand_vec
= VEC_alloc (slsr_cand_t
, heap
, 128);
2616 /* Allocate the mapping from statements to candidate indices. */
2617 stmt_cand_map
= pointer_map_create ();
2619 /* Create the obstack where candidate chains will reside. */
2620 gcc_obstack_init (&chain_obstack
);
2622 /* Allocate the mapping from base expressions to candidate chains. */
2623 base_cand_map
= htab_create (500, base_cand_hash
,
2624 base_cand_eq
, base_cand_free
);
2626 /* Initialize the loop optimizer. We need to detect flow across
2627 back edges, and this gives us dominator information as well. */
2628 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2630 /* Set up callbacks for the generic dominator tree walker. */
2631 walk_data
.dom_direction
= CDI_DOMINATORS
;
2632 walk_data
.initialize_block_local_data
= NULL
;
2633 walk_data
.before_dom_children
= find_candidates_in_block
;
2634 walk_data
.after_dom_children
= NULL
;
2635 walk_data
.global_data
= NULL
;
2636 walk_data
.block_local_data_size
= 0;
2637 init_walk_dominator_tree (&walk_data
);
2639 /* Walk the CFG in predominator order looking for strength reduction
2641 walk_dominator_tree (&walk_data
, ENTRY_BLOCK_PTR
);
2643 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2646 dump_cand_chains ();
2649 /* Analyze costs and make appropriate replacements. */
2650 analyze_candidates_and_replace ();
2652 /* Free resources. */
2653 fini_walk_dominator_tree (&walk_data
);
2654 loop_optimizer_finalize ();
2655 htab_delete (base_cand_map
);
2656 obstack_free (&chain_obstack
, NULL
);
2657 pointer_map_destroy (stmt_cand_map
);
2658 VEC_free (slsr_cand_t
, heap
, cand_vec
);
2659 obstack_free (&cand_obstack
, NULL
);
2665 gate_strength_reduction (void)
2667 return flag_tree_slsr
;
2670 struct gimple_opt_pass pass_strength_reduction
=
2675 gate_strength_reduction
, /* gate */
2676 execute_strength_reduction
, /* execute */
2679 0, /* static_pass_number */
2680 TV_GIMPLE_SLSR
, /* tv_id */
2681 PROP_cfg
| PROP_ssa
, /* properties_required */
2682 0, /* properties_provided */
2683 0, /* properties_destroyed */
2684 0, /* todo_flags_start */
2685 TODO_verify_ssa
/* todo_flags_finish */