* ChangeLog: Fix whitespace.
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob8806f48a5a0439baeafd8dfe5df1067c7f52e347
1 /* Straight-line strength reduction.
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
58 /* Information about a strength reduction candidate. Each statement
59 in the candidate table represents an expression of one of the
60 following forms (the special case of CAND_REF will be described
61 later):
63 (CAND_MULT) S1: X = (B + i) * S
64 (CAND_ADD) S1: X = B + (i * S)
66 Here X and B are SSA names, i is an integer constant, and S is
67 either an SSA name or a constant. We call B the "base," i the
68 "index", and S the "stride."
70 Any statement S0 that dominates S1 and is of the form:
72 (CAND_MULT) S0: Y = (B + i') * S
73 (CAND_ADD) S0: Y = B + (i' * S)
75 is called a "basis" for S1. In both cases, S1 may be replaced by
77 S1': X = Y + (i - i') * S,
79 where (i - i') * S is folded to the extent possible.
81 All gimple statements are visited in dominator order, and each
82 statement that may contribute to one of the forms of S1 above is
83 given at least one entry in the candidate table. Such statements
84 include addition, pointer addition, subtraction, multiplication,
85 negation, copies, and nontrivial type casts. If a statement may
86 represent more than one expression of the forms of S1 above,
87 multiple "interpretations" are stored in the table and chained
88 together. Examples:
90 * An add of two SSA names may treat either operand as the base.
91 * A multiply of two SSA names, likewise.
92 * A copy or cast may be thought of as either a CAND_MULT with
93 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
95 Candidate records are allocated from an obstack. They are addressed
96 both from a hash table keyed on S1, and from a vector of candidate
97 pointers arranged in predominator order.
99 Opportunity note
100 ----------------
101 Currently we don't recognize:
103 S0: Y = (S * i') - B
104 S1: X = (S * i) - B
106 as a strength reduction opportunity, even though this S1 would
107 also be replaceable by the S1' above. This can be added if it
108 comes up in practice.
110 Strength reduction in addressing
111 --------------------------------
112 There is another kind of candidate known as CAND_REF. A CAND_REF
113 describes a statement containing a memory reference having
114 complex addressing that might benefit from strength reduction.
115 Specifically, we are interested in references for which
116 get_inner_reference returns a base address, offset, and bitpos as
117 follows:
119 base: MEM_REF (T1, C1)
120 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
121 bitpos: C4 * BITS_PER_UNIT
123 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
124 arbitrary integer constants. Note that C2 may be zero, in which
125 case the offset will be MULT_EXPR (T2, C3).
127 When this pattern is recognized, the original memory reference
128 can be replaced with:
130 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
131 C1 + (C2 * C3) + C4)
133 which distributes the multiply to allow constant folding. When
134 two or more addressing expressions can be represented by MEM_REFs
135 of this form, differing only in the constants C1, C2, and C4,
136 making this substitution produces more efficient addressing during
137 the RTL phases. When there are not at least two expressions with
138 the same values of T1, T2, and C3, there is nothing to be gained
139 by the replacement.
141 Strength reduction of CAND_REFs uses the same infrastructure as
142 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
143 field, MULT_EXPR (T2, C3) in the stride (S) field, and
144 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
145 is thus another CAND_REF with the same B and S values. When at
146 least two CAND_REFs are chained together using the basis relation,
147 each of them is replaced as above, resulting in improved code
148 generation for addressing. */
151 /* Index into the candidate vector, offset by 1. VECs are zero-based,
152 while cand_idx's are one-based, with zero indicating null. */
153 typedef unsigned cand_idx;
155 /* The kind of candidate. */
156 enum cand_kind
158 CAND_MULT,
159 CAND_ADD,
160 CAND_REF
163 struct slsr_cand_d
165 /* The candidate statement S1. */
166 gimple cand_stmt;
168 /* The base expression B: often an SSA name, but not always. */
169 tree base_expr;
171 /* The stride S. */
172 tree stride;
174 /* The index constant i. */
175 double_int index;
177 /* The type of the candidate. This is normally the type of base_expr,
178 but casts may have occurred when combining feeding instructions.
179 A candidate can only be a basis for candidates of the same final type.
180 (For CAND_REFs, this is the type to be used for operand 1 of the
181 replacement MEM_REF.) */
182 tree cand_type;
184 /* The kind of candidate (CAND_MULT, etc.). */
185 enum cand_kind kind;
187 /* Index of this candidate in the candidate vector. */
188 cand_idx cand_num;
190 /* Index of the next candidate record for the same statement.
191 A statement may be useful in more than one way (e.g., due to
192 commutativity). So we can have multiple "interpretations"
193 of a statement. */
194 cand_idx next_interp;
196 /* Index of the basis statement S0, if any, in the candidate vector. */
197 cand_idx basis;
199 /* First candidate for which this candidate is a basis, if one exists. */
200 cand_idx dependent;
202 /* Next candidate having the same basis as this one. */
203 cand_idx sibling;
205 /* If this is a conditional candidate, the defining PHI statement
206 for the base SSA name B. For future use; always NULL for now. */
207 gimple def_phi;
209 /* Savings that can be expected from eliminating dead code if this
210 candidate is replaced. */
211 int dead_savings;
214 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
215 typedef const struct slsr_cand_d *const_slsr_cand_t;
217 /* Pointers to candidates are chained together as part of a mapping
218 from base expressions to the candidates that use them. */
220 struct cand_chain_d
222 /* Base expression for the chain of candidates: often, but not
223 always, an SSA name. */
224 tree base_expr;
226 /* Pointer to a candidate. */
227 slsr_cand_t cand;
229 /* Chain pointer. */
230 struct cand_chain_d *next;
234 typedef struct cand_chain_d cand_chain, *cand_chain_t;
235 typedef const struct cand_chain_d *const_cand_chain_t;
237 /* Information about a unique "increment" associated with candidates
238 having an SSA name for a stride. An increment is the difference
239 between the index of the candidate and the index of its basis,
240 i.e., (i - i') as discussed in the module commentary.
242 When we are not going to generate address arithmetic we treat
243 increments that differ only in sign as the same, allowing sharing
244 of the cost of initializers. The absolute value of the increment
245 is stored in the incr_info. */
247 struct incr_info_d
249 /* The increment that relates a candidate to its basis. */
250 double_int incr;
252 /* How many times the increment occurs in the candidate tree. */
253 unsigned count;
255 /* Cost of replacing candidates using this increment. Negative and
256 zero costs indicate replacement should be performed. */
257 int cost;
259 /* If this increment is profitable but is not -1, 0, or 1, it requires
260 an initializer T_0 = stride * incr to be found or introduced in the
261 nearest common dominator of all candidates. This field holds T_0
262 for subsequent use. */
263 tree initializer;
265 /* If the initializer was found to already exist, this is the block
266 where it was found. */
267 basic_block init_bb;
270 typedef struct incr_info_d incr_info, *incr_info_t;
272 /* Candidates are maintained in a vector. If candidate X dominates
273 candidate Y, then X appears before Y in the vector; but the
274 converse does not necessarily hold. */
275 DEF_VEC_P (slsr_cand_t);
276 DEF_VEC_ALLOC_P (slsr_cand_t, heap);
277 static VEC (slsr_cand_t, heap) *cand_vec;
279 enum cost_consts
281 COST_NEUTRAL = 0,
282 COST_INFINITE = 1000
285 /* Pointer map embodying a mapping from statements to candidates. */
286 static struct pointer_map_t *stmt_cand_map;
288 /* Obstack for candidates. */
289 static struct obstack cand_obstack;
291 /* Hash table embodying a mapping from base exprs to chains of candidates. */
292 static htab_t base_cand_map;
294 /* Obstack for candidate chains. */
295 static struct obstack chain_obstack;
297 /* An array INCR_VEC of incr_infos is used during analysis of related
298 candidates having an SSA name for a stride. INCR_VEC_LEN describes
299 its current length. */
300 static incr_info_t incr_vec;
301 static unsigned incr_vec_len;
303 /* For a chain of candidates with unknown stride, indicates whether or not
304 we must generate pointer arithmetic when replacing statements. */
305 static bool address_arithmetic_p;
307 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
309 static slsr_cand_t
310 lookup_cand (cand_idx idx)
312 return VEC_index (slsr_cand_t, cand_vec, idx - 1);
315 /* Callback to produce a hash value for a candidate chain header. */
317 static hashval_t
318 base_cand_hash (const void *p)
320 tree base_expr = ((const_cand_chain_t) p)->base_expr;
321 return iterative_hash_expr (base_expr, 0);
324 /* Callback when an element is removed from the hash table.
325 We never remove entries until the entire table is released. */
327 static void
328 base_cand_free (void *p ATTRIBUTE_UNUSED)
332 /* Callback to return true if two candidate chain headers are equal. */
334 static int
335 base_cand_eq (const void *p1, const void *p2)
337 const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
338 const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
339 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
342 /* Use the base expr from candidate C to look for possible candidates
343 that can serve as a basis for C. Each potential basis must also
344 appear in a block that dominates the candidate statement and have
345 the same stride and type. If more than one possible basis exists,
346 the one with highest index in the vector is chosen; this will be
347 the most immediately dominating basis. */
349 static int
350 find_basis_for_candidate (slsr_cand_t c)
352 cand_chain mapping_key;
353 cand_chain_t chain;
354 slsr_cand_t basis = NULL;
356 mapping_key.base_expr = c->base_expr;
357 chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
359 for (; chain; chain = chain->next)
361 slsr_cand_t one_basis = chain->cand;
363 if (one_basis->kind != c->kind
364 || !operand_equal_p (one_basis->stride, c->stride, 0)
365 || !types_compatible_p (one_basis->cand_type, c->cand_type)
366 || !dominated_by_p (CDI_DOMINATORS,
367 gimple_bb (c->cand_stmt),
368 gimple_bb (one_basis->cand_stmt)))
369 continue;
371 if (!basis || basis->cand_num < one_basis->cand_num)
372 basis = one_basis;
375 if (basis)
377 c->sibling = basis->dependent;
378 basis->dependent = c->cand_num;
379 return basis->cand_num;
382 return 0;
385 /* Record a mapping from the base expression of C to C itself, indicating that
386 C may potentially serve as a basis using that base expression. */
388 static void
389 record_potential_basis (slsr_cand_t c)
391 cand_chain_t node;
392 void **slot;
394 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
395 node->base_expr = c->base_expr;
396 node->cand = c;
397 node->next = NULL;
398 slot = htab_find_slot (base_cand_map, node, INSERT);
400 if (*slot)
402 cand_chain_t head = (cand_chain_t) (*slot);
403 node->next = head->next;
404 head->next = node;
406 else
407 *slot = node;
410 /* Allocate storage for a new candidate and initialize its fields.
411 Attempt to find a basis for the candidate. */
413 static slsr_cand_t
414 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
415 double_int index, tree stride, tree ctype,
416 unsigned savings)
418 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
419 sizeof (slsr_cand));
420 c->cand_stmt = gs;
421 c->base_expr = base;
422 c->stride = stride;
423 c->index = index;
424 c->cand_type = ctype;
425 c->kind = kind;
426 c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1;
427 c->next_interp = 0;
428 c->dependent = 0;
429 c->sibling = 0;
430 c->def_phi = NULL;
431 c->dead_savings = savings;
433 VEC_safe_push (slsr_cand_t, heap, cand_vec, c);
434 c->basis = find_basis_for_candidate (c);
435 record_potential_basis (c);
437 return c;
440 /* Determine the target cost of statement GS when compiling according
441 to SPEED. */
443 static int
444 stmt_cost (gimple gs, bool speed)
446 tree lhs, rhs1, rhs2;
447 enum machine_mode lhs_mode;
449 gcc_assert (is_gimple_assign (gs));
450 lhs = gimple_assign_lhs (gs);
451 rhs1 = gimple_assign_rhs1 (gs);
452 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
454 switch (gimple_assign_rhs_code (gs))
456 case MULT_EXPR:
457 rhs2 = gimple_assign_rhs2 (gs);
459 if (host_integerp (rhs2, 0))
460 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
462 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
463 return mul_cost (speed, lhs_mode);
465 case PLUS_EXPR:
466 case POINTER_PLUS_EXPR:
467 case MINUS_EXPR:
468 return add_cost (speed, lhs_mode);
470 case NEGATE_EXPR:
471 return neg_cost (speed, lhs_mode);
473 case NOP_EXPR:
474 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
476 /* Note that we don't assign costs to copies that in most cases
477 will go away. */
478 default:
482 gcc_unreachable ();
483 return 0;
486 /* Look up the defining statement for BASE_IN and return a pointer
487 to its candidate in the candidate table, if any; otherwise NULL.
488 Only CAND_ADD and CAND_MULT candidates are returned. */
490 static slsr_cand_t
491 base_cand_from_table (tree base_in)
493 slsr_cand_t *result;
495 gimple def = SSA_NAME_DEF_STMT (base_in);
496 if (!def)
497 return (slsr_cand_t) NULL;
499 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
501 if (result && (*result)->kind != CAND_REF)
502 return *result;
504 return (slsr_cand_t) NULL;
507 /* Add an entry to the statement-to-candidate mapping. */
509 static void
510 add_cand_for_stmt (gimple gs, slsr_cand_t c)
512 void **slot = pointer_map_insert (stmt_cand_map, gs);
513 gcc_assert (!*slot);
514 *slot = c;
517 /* Look for the following pattern:
519 *PBASE: MEM_REF (T1, C1)
521 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
523 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
525 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
527 *PINDEX: C4 * BITS_PER_UNIT
529 If not present, leave the input values unchanged and return FALSE.
530 Otherwise, modify the input values as follows and return TRUE:
532 *PBASE: T1
533 *POFFSET: MULT_EXPR (T2, C3)
534 *PINDEX: C1 + (C2 * C3) + C4 */
536 static bool
537 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
538 tree *ptype)
540 tree base = *pbase, offset = *poffset;
541 double_int index = *pindex;
542 double_int bpu = uhwi_to_double_int (BITS_PER_UNIT);
543 tree mult_op0, mult_op1, t1, t2, type;
544 double_int c1, c2, c3, c4;
546 if (!base
547 || !offset
548 || TREE_CODE (base) != MEM_REF
549 || TREE_CODE (offset) != MULT_EXPR
550 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
551 || !double_int_zero_p (double_int_umod (index, bpu, FLOOR_MOD_EXPR)))
552 return false;
554 t1 = TREE_OPERAND (base, 0);
555 c1 = mem_ref_offset (base);
556 type = TREE_TYPE (TREE_OPERAND (base, 1));
558 mult_op0 = TREE_OPERAND (offset, 0);
559 mult_op1 = TREE_OPERAND (offset, 1);
561 c3 = tree_to_double_int (mult_op1);
563 if (TREE_CODE (mult_op0) == PLUS_EXPR)
565 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
567 t2 = TREE_OPERAND (mult_op0, 0);
568 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
570 else
571 return false;
573 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
575 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
577 t2 = TREE_OPERAND (mult_op0, 0);
578 c2 = double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0, 1)));
580 else
581 return false;
583 else
585 t2 = mult_op0;
586 c2 = double_int_zero;
589 c4 = double_int_udiv (index, bpu, FLOOR_DIV_EXPR);
591 *pbase = t1;
592 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
593 double_int_to_tree (sizetype, c3));
594 *pindex = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
595 *ptype = type;
597 return true;
600 /* Given GS which contains a data reference, create a CAND_REF entry in
601 the candidate table and attempt to find a basis. */
603 static void
604 slsr_process_ref (gimple gs)
606 tree ref_expr, base, offset, type;
607 HOST_WIDE_INT bitsize, bitpos;
608 enum machine_mode mode;
609 int unsignedp, volatilep;
610 double_int index;
611 slsr_cand_t c;
613 if (gimple_vdef (gs))
614 ref_expr = gimple_assign_lhs (gs);
615 else
616 ref_expr = gimple_assign_rhs1 (gs);
618 if (!handled_component_p (ref_expr)
619 || TREE_CODE (ref_expr) == BIT_FIELD_REF
620 || (TREE_CODE (ref_expr) == COMPONENT_REF
621 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
622 return;
624 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
625 &unsignedp, &volatilep, false);
626 index = uhwi_to_double_int (bitpos);
628 if (!restructure_reference (&base, &offset, &index, &type))
629 return;
631 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
632 type, 0);
634 /* Add the candidate to the statement-candidate mapping. */
635 add_cand_for_stmt (gs, c);
638 /* Create a candidate entry for a statement GS, where GS multiplies
639 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
640 about the two SSA names into the new candidate. Return the new
641 candidate. */
643 static slsr_cand_t
644 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
646 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
647 double_int index;
648 unsigned savings = 0;
649 slsr_cand_t c;
650 slsr_cand_t base_cand = base_cand_from_table (base_in);
652 /* Look at all interpretations of the base candidate, if necessary,
653 to find information to propagate into this candidate. */
654 while (base_cand && !base)
657 if (base_cand->kind == CAND_MULT
658 && operand_equal_p (base_cand->stride, integer_one_node, 0))
660 /* Y = (B + i') * 1
661 X = Y * Z
662 ================
663 X = (B + i') * Z */
664 base = base_cand->base_expr;
665 index = base_cand->index;
666 stride = stride_in;
667 ctype = base_cand->cand_type;
668 if (has_single_use (base_in))
669 savings = (base_cand->dead_savings
670 + stmt_cost (base_cand->cand_stmt, speed));
672 else if (base_cand->kind == CAND_ADD
673 && TREE_CODE (base_cand->stride) == INTEGER_CST)
675 /* Y = B + (i' * S), S constant
676 X = Y * Z
677 ============================
678 X = B + ((i' * S) * Z) */
679 base = base_cand->base_expr;
680 index = double_int_mul (base_cand->index,
681 tree_to_double_int (base_cand->stride));
682 stride = stride_in;
683 ctype = base_cand->cand_type;
684 if (has_single_use (base_in))
685 savings = (base_cand->dead_savings
686 + stmt_cost (base_cand->cand_stmt, speed));
689 if (base_cand->next_interp)
690 base_cand = lookup_cand (base_cand->next_interp);
691 else
692 base_cand = NULL;
695 if (!base)
697 /* No interpretations had anything useful to propagate, so
698 produce X = (Y + 0) * Z. */
699 base = base_in;
700 index = double_int_zero;
701 stride = stride_in;
702 ctype = TREE_TYPE (base_in);
705 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
706 ctype, savings);
707 return c;
710 /* Create a candidate entry for a statement GS, where GS multiplies
711 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
712 information about BASE_IN into the new candidate. Return the new
713 candidate. */
715 static slsr_cand_t
716 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
718 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
719 double_int index, temp;
720 unsigned savings = 0;
721 slsr_cand_t c;
722 slsr_cand_t base_cand = base_cand_from_table (base_in);
724 /* Look at all interpretations of the base candidate, if necessary,
725 to find information to propagate into this candidate. */
726 while (base_cand && !base)
728 if (base_cand->kind == CAND_MULT
729 && TREE_CODE (base_cand->stride) == INTEGER_CST)
731 /* Y = (B + i') * S, S constant
732 X = Y * c
733 ============================
734 X = (B + i') * (S * c) */
735 base = base_cand->base_expr;
736 index = base_cand->index;
737 temp = double_int_mul (tree_to_double_int (base_cand->stride),
738 tree_to_double_int (stride_in));
739 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
740 ctype = base_cand->cand_type;
741 if (has_single_use (base_in))
742 savings = (base_cand->dead_savings
743 + stmt_cost (base_cand->cand_stmt, speed));
745 else if (base_cand->kind == CAND_ADD
746 && operand_equal_p (base_cand->stride, integer_one_node, 0))
748 /* Y = B + (i' * 1)
749 X = Y * c
750 ===========================
751 X = (B + i') * c */
752 base = base_cand->base_expr;
753 index = base_cand->index;
754 stride = stride_in;
755 ctype = base_cand->cand_type;
756 if (has_single_use (base_in))
757 savings = (base_cand->dead_savings
758 + stmt_cost (base_cand->cand_stmt, speed));
760 else if (base_cand->kind == CAND_ADD
761 && double_int_one_p (base_cand->index)
762 && TREE_CODE (base_cand->stride) == INTEGER_CST)
764 /* Y = B + (1 * S), S constant
765 X = Y * c
766 ===========================
767 X = (B + S) * c */
768 base = base_cand->base_expr;
769 index = tree_to_double_int (base_cand->stride);
770 stride = stride_in;
771 ctype = base_cand->cand_type;
772 if (has_single_use (base_in))
773 savings = (base_cand->dead_savings
774 + stmt_cost (base_cand->cand_stmt, speed));
777 if (base_cand->next_interp)
778 base_cand = lookup_cand (base_cand->next_interp);
779 else
780 base_cand = NULL;
783 if (!base)
785 /* No interpretations had anything useful to propagate, so
786 produce X = (Y + 0) * c. */
787 base = base_in;
788 index = double_int_zero;
789 stride = stride_in;
790 ctype = TREE_TYPE (base_in);
793 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
794 ctype, savings);
795 return c;
798 /* Given GS which is a multiply of scalar integers, make an appropriate
799 entry in the candidate table. If this is a multiply of two SSA names,
800 create two CAND_MULT interpretations and attempt to find a basis for
801 each of them. Otherwise, create a single CAND_MULT and attempt to
802 find a basis. */
804 static void
805 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
807 slsr_cand_t c, c2;
809 /* If this is a multiply of an SSA name with itself, it is highly
810 unlikely that we will get a strength reduction opportunity, so
811 don't record it as a candidate. This simplifies the logic for
812 finding a basis, so if this is removed that must be considered. */
813 if (rhs1 == rhs2)
814 return;
816 if (TREE_CODE (rhs2) == SSA_NAME)
818 /* Record an interpretation of this statement in the candidate table
819 assuming RHS1 is the base expression and RHS2 is the stride. */
820 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
822 /* Add the first interpretation to the statement-candidate mapping. */
823 add_cand_for_stmt (gs, c);
825 /* Record another interpretation of this statement assuming RHS1
826 is the stride and RHS2 is the base expression. */
827 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
828 c->next_interp = c2->cand_num;
830 else
832 /* Record an interpretation for the multiply-immediate. */
833 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
835 /* Add the interpretation to the statement-candidate mapping. */
836 add_cand_for_stmt (gs, c);
840 /* Create a candidate entry for a statement GS, where GS adds two
841 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
842 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
843 information about the two SSA names into the new candidate.
844 Return the new candidate. */
846 static slsr_cand_t
847 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
848 bool subtract_p, bool speed)
850 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
851 double_int index;
852 unsigned savings = 0;
853 slsr_cand_t c;
854 slsr_cand_t base_cand = base_cand_from_table (base_in);
855 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
857 /* The most useful transformation is a multiply-immediate feeding
858 an add or subtract. Look for that first. */
859 while (addend_cand && !base)
861 if (addend_cand->kind == CAND_MULT
862 && double_int_zero_p (addend_cand->index)
863 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
865 /* Z = (B + 0) * S, S constant
866 X = Y +/- Z
867 ===========================
868 X = Y + ((+/-1 * S) * B) */
869 base = base_in;
870 index = tree_to_double_int (addend_cand->stride);
871 if (subtract_p)
872 index = double_int_neg (index);
873 stride = addend_cand->base_expr;
874 ctype = TREE_TYPE (base_in);
875 if (has_single_use (addend_in))
876 savings = (addend_cand->dead_savings
877 + stmt_cost (addend_cand->cand_stmt, speed));
880 if (addend_cand->next_interp)
881 addend_cand = lookup_cand (addend_cand->next_interp);
882 else
883 addend_cand = NULL;
886 while (base_cand && !base)
888 if (base_cand->kind == CAND_ADD
889 && (double_int_zero_p (base_cand->index)
890 || operand_equal_p (base_cand->stride,
891 integer_zero_node, 0)))
893 /* Y = B + (i' * S), i' * S = 0
894 X = Y +/- Z
895 ============================
896 X = B + (+/-1 * Z) */
897 base = base_cand->base_expr;
898 index = subtract_p ? double_int_minus_one : double_int_one;
899 stride = addend_in;
900 ctype = base_cand->cand_type;
901 if (has_single_use (base_in))
902 savings = (base_cand->dead_savings
903 + stmt_cost (base_cand->cand_stmt, speed));
905 else if (subtract_p)
907 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
909 while (subtrahend_cand && !base)
911 if (subtrahend_cand->kind == CAND_MULT
912 && double_int_zero_p (subtrahend_cand->index)
913 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
915 /* Z = (B + 0) * S, S constant
916 X = Y - Z
917 ===========================
918 Value: X = Y + ((-1 * S) * B) */
919 base = base_in;
920 index = tree_to_double_int (subtrahend_cand->stride);
921 index = double_int_neg (index);
922 stride = subtrahend_cand->base_expr;
923 ctype = TREE_TYPE (base_in);
924 if (has_single_use (addend_in))
925 savings = (subtrahend_cand->dead_savings
926 + stmt_cost (subtrahend_cand->cand_stmt, speed));
929 if (subtrahend_cand->next_interp)
930 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
931 else
932 subtrahend_cand = NULL;
936 if (base_cand->next_interp)
937 base_cand = lookup_cand (base_cand->next_interp);
938 else
939 base_cand = NULL;
942 if (!base)
944 /* No interpretations had anything useful to propagate, so
945 produce X = Y + (1 * Z). */
946 base = base_in;
947 index = subtract_p ? double_int_minus_one : double_int_one;
948 stride = addend_in;
949 ctype = TREE_TYPE (base_in);
952 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
953 ctype, savings);
954 return c;
957 /* Create a candidate entry for a statement GS, where GS adds SSA
958 name BASE_IN to constant INDEX_IN. Propagate any known information
959 about BASE_IN into the new candidate. Return the new candidate. */
961 static slsr_cand_t
962 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
964 enum cand_kind kind = CAND_ADD;
965 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
966 double_int index, multiple;
967 unsigned savings = 0;
968 slsr_cand_t c;
969 slsr_cand_t base_cand = base_cand_from_table (base_in);
971 while (base_cand && !base)
973 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
975 if (TREE_CODE (base_cand->stride) == INTEGER_CST
976 && double_int_multiple_of (index_in,
977 tree_to_double_int (base_cand->stride),
978 unsigned_p,
979 &multiple))
981 /* Y = (B + i') * S, S constant, c = kS for some integer k
982 X = Y + c
983 ============================
984 X = (B + (i'+ k)) * S
986 Y = B + (i' * S), S constant, c = kS for some integer k
987 X = Y + c
988 ============================
989 X = (B + (i'+ k)) * S */
990 kind = base_cand->kind;
991 base = base_cand->base_expr;
992 index = double_int_add (base_cand->index, multiple);
993 stride = base_cand->stride;
994 ctype = base_cand->cand_type;
995 if (has_single_use (base_in))
996 savings = (base_cand->dead_savings
997 + stmt_cost (base_cand->cand_stmt, speed));
1000 if (base_cand->next_interp)
1001 base_cand = lookup_cand (base_cand->next_interp);
1002 else
1003 base_cand = NULL;
1006 if (!base)
1008 /* No interpretations had anything useful to propagate, so
1009 produce X = Y + (c * 1). */
1010 kind = CAND_ADD;
1011 base = base_in;
1012 index = index_in;
1013 stride = integer_one_node;
1014 ctype = TREE_TYPE (base_in);
1017 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1018 ctype, savings);
1019 return c;
1022 /* Given GS which is an add or subtract of scalar integers or pointers,
1023 make at least one appropriate entry in the candidate table. */
1025 static void
1026 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1028 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1029 slsr_cand_t c = NULL, c2;
1031 if (TREE_CODE (rhs2) == SSA_NAME)
1033 /* First record an interpretation assuming RHS1 is the base expression
1034 and RHS2 is the stride. But it doesn't make sense for the
1035 stride to be a pointer, so don't record a candidate in that case. */
1036 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1038 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1040 /* Add the first interpretation to the statement-candidate
1041 mapping. */
1042 add_cand_for_stmt (gs, c);
1045 /* If the two RHS operands are identical, or this is a subtract,
1046 we're done. */
1047 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1048 return;
1050 /* Otherwise, record another interpretation assuming RHS2 is the
1051 base expression and RHS1 is the stride, again provided that the
1052 stride is not a pointer. */
1053 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1055 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1056 if (c)
1057 c->next_interp = c2->cand_num;
1058 else
1059 add_cand_for_stmt (gs, c2);
1062 else
1064 double_int index;
1066 /* Record an interpretation for the add-immediate. */
1067 index = tree_to_double_int (rhs2);
1068 if (subtract_p)
1069 index = double_int_neg (index);
1071 c = create_add_imm_cand (gs, rhs1, index, speed);
1073 /* Add the interpretation to the statement-candidate mapping. */
1074 add_cand_for_stmt (gs, c);
1078 /* Given GS which is a negate of a scalar integer, make an appropriate
1079 entry in the candidate table. A negate is equivalent to a multiply
1080 by -1. */
1082 static void
1083 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1085 /* Record a CAND_MULT interpretation for the multiply by -1. */
1086 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1088 /* Add the interpretation to the statement-candidate mapping. */
1089 add_cand_for_stmt (gs, c);
1092 /* Help function for legal_cast_p, operating on two trees. Checks
1093 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1094 for more details. */
1096 static bool
1097 legal_cast_p_1 (tree lhs, tree rhs)
1099 tree lhs_type, rhs_type;
1100 unsigned lhs_size, rhs_size;
1101 bool lhs_wraps, rhs_wraps;
1103 lhs_type = TREE_TYPE (lhs);
1104 rhs_type = TREE_TYPE (rhs);
1105 lhs_size = TYPE_PRECISION (lhs_type);
1106 rhs_size = TYPE_PRECISION (rhs_type);
1107 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1108 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1110 if (lhs_size < rhs_size
1111 || (rhs_wraps && !lhs_wraps)
1112 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1113 return false;
1115 return true;
1118 /* Return TRUE if GS is a statement that defines an SSA name from
1119 a conversion and is legal for us to combine with an add and multiply
1120 in the candidate table. For example, suppose we have:
1122 A = B + i;
1123 C = (type) A;
1124 D = C * S;
1126 Without the type-cast, we would create a CAND_MULT for D with base B,
1127 index i, and stride S. We want to record this candidate only if it
1128 is equivalent to apply the type cast following the multiply:
1130 A = B + i;
1131 E = A * S;
1132 D = (type) E;
1134 We will record the type with the candidate for D. This allows us
1135 to use a similar previous candidate as a basis. If we have earlier seen
1137 A' = B + i';
1138 C' = (type) A';
1139 D' = C' * S;
1141 we can replace D with
1143 D = D' + (i - i') * S;
1145 But if moving the type-cast would change semantics, we mustn't do this.
1147 This is legitimate for casts from a non-wrapping integral type to
1148 any integral type of the same or larger size. It is not legitimate
1149 to convert a wrapping type to a non-wrapping type, or to a wrapping
1150 type of a different size. I.e., with a wrapping type, we must
1151 assume that the addition B + i could wrap, in which case performing
1152 the multiply before or after one of the "illegal" type casts will
1153 have different semantics. */
1155 static bool
1156 legal_cast_p (gimple gs, tree rhs)
1158 if (!is_gimple_assign (gs)
1159 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1160 return false;
1162 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1165 /* Given GS which is a cast to a scalar integer type, determine whether
1166 the cast is legal for strength reduction. If so, make at least one
1167 appropriate entry in the candidate table. */
1169 static void
1170 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1172 tree lhs, ctype;
1173 slsr_cand_t base_cand, c, c2;
1174 unsigned savings = 0;
1176 if (!legal_cast_p (gs, rhs1))
1177 return;
1179 lhs = gimple_assign_lhs (gs);
1180 base_cand = base_cand_from_table (rhs1);
1181 ctype = TREE_TYPE (lhs);
1183 if (base_cand)
1185 while (base_cand)
1187 /* Propagate all data from the base candidate except the type,
1188 which comes from the cast, and the base candidate's cast,
1189 which is no longer applicable. */
1190 if (has_single_use (rhs1))
1191 savings = (base_cand->dead_savings
1192 + stmt_cost (base_cand->cand_stmt, speed));
1194 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1195 base_cand->base_expr,
1196 base_cand->index, base_cand->stride,
1197 ctype, savings);
1198 if (base_cand->next_interp)
1199 base_cand = lookup_cand (base_cand->next_interp);
1200 else
1201 base_cand = NULL;
1204 else
1206 /* If nothing is known about the RHS, create fresh CAND_ADD and
1207 CAND_MULT interpretations:
1209 X = Y + (0 * 1)
1210 X = (Y + 0) * 1
1212 The first of these is somewhat arbitrary, but the choice of
1213 1 for the stride simplifies the logic for propagating casts
1214 into their uses. */
1215 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1216 integer_one_node, ctype, 0);
1217 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1218 integer_one_node, ctype, 0);
1219 c->next_interp = c2->cand_num;
1222 /* Add the first (or only) interpretation to the statement-candidate
1223 mapping. */
1224 add_cand_for_stmt (gs, c);
1227 /* Given GS which is a copy of a scalar integer type, make at least one
1228 appropriate entry in the candidate table.
1230 This interface is included for completeness, but is unnecessary
1231 if this pass immediately follows a pass that performs copy
1232 propagation, such as DOM. */
1234 static void
1235 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1237 slsr_cand_t base_cand, c, c2;
1238 unsigned savings = 0;
1240 base_cand = base_cand_from_table (rhs1);
1242 if (base_cand)
1244 while (base_cand)
1246 /* Propagate all data from the base candidate. */
1247 if (has_single_use (rhs1))
1248 savings = (base_cand->dead_savings
1249 + stmt_cost (base_cand->cand_stmt, speed));
1251 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1252 base_cand->base_expr,
1253 base_cand->index, base_cand->stride,
1254 base_cand->cand_type, savings);
1255 if (base_cand->next_interp)
1256 base_cand = lookup_cand (base_cand->next_interp);
1257 else
1258 base_cand = NULL;
1261 else
1263 /* If nothing is known about the RHS, create fresh CAND_ADD and
1264 CAND_MULT interpretations:
1266 X = Y + (0 * 1)
1267 X = (Y + 0) * 1
1269 The first of these is somewhat arbitrary, but the choice of
1270 1 for the stride simplifies the logic for propagating casts
1271 into their uses. */
1272 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1273 integer_one_node, TREE_TYPE (rhs1), 0);
1274 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1275 integer_one_node, TREE_TYPE (rhs1), 0);
1276 c->next_interp = c2->cand_num;
1279 /* Add the first (or only) interpretation to the statement-candidate
1280 mapping. */
1281 add_cand_for_stmt (gs, c);
1284 /* Find strength-reduction candidates in block BB. */
1286 static void
1287 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1288 basic_block bb)
1290 bool speed = optimize_bb_for_speed_p (bb);
1291 gimple_stmt_iterator gsi;
1293 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1295 gimple gs = gsi_stmt (gsi);
1297 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1298 slsr_process_ref (gs);
1300 else if (is_gimple_assign (gs)
1301 && SCALAR_INT_MODE_P
1302 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1304 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1306 switch (gimple_assign_rhs_code (gs))
1308 case MULT_EXPR:
1309 case PLUS_EXPR:
1310 rhs1 = gimple_assign_rhs1 (gs);
1311 rhs2 = gimple_assign_rhs2 (gs);
1312 /* Should never happen, but currently some buggy situations
1313 in earlier phases put constants in rhs1. */
1314 if (TREE_CODE (rhs1) != SSA_NAME)
1315 continue;
1316 break;
1318 /* Possible future opportunity: rhs1 of a ptr+ can be
1319 an ADDR_EXPR. */
1320 case POINTER_PLUS_EXPR:
1321 case MINUS_EXPR:
1322 rhs2 = gimple_assign_rhs2 (gs);
1323 /* Fall-through. */
1325 case NOP_EXPR:
1326 case MODIFY_EXPR:
1327 case NEGATE_EXPR:
1328 rhs1 = gimple_assign_rhs1 (gs);
1329 if (TREE_CODE (rhs1) != SSA_NAME)
1330 continue;
1331 break;
1333 default:
1337 switch (gimple_assign_rhs_code (gs))
1339 case MULT_EXPR:
1340 slsr_process_mul (gs, rhs1, rhs2, speed);
1341 break;
1343 case PLUS_EXPR:
1344 case POINTER_PLUS_EXPR:
1345 case MINUS_EXPR:
1346 slsr_process_add (gs, rhs1, rhs2, speed);
1347 break;
1349 case NEGATE_EXPR:
1350 slsr_process_neg (gs, rhs1, speed);
1351 break;
1353 case NOP_EXPR:
1354 slsr_process_cast (gs, rhs1, speed);
1355 break;
1357 case MODIFY_EXPR:
1358 slsr_process_copy (gs, rhs1, speed);
1359 break;
1361 default:
1368 /* Dump a candidate for debug. */
1370 static void
1371 dump_candidate (slsr_cand_t c)
1373 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1374 gimple_bb (c->cand_stmt)->index);
1375 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1376 switch (c->kind)
1378 case CAND_MULT:
1379 fputs (" MULT : (", dump_file);
1380 print_generic_expr (dump_file, c->base_expr, 0);
1381 fputs (" + ", dump_file);
1382 dump_double_int (dump_file, c->index, false);
1383 fputs (") * ", dump_file);
1384 print_generic_expr (dump_file, c->stride, 0);
1385 fputs (" : ", dump_file);
1386 break;
1387 case CAND_ADD:
1388 fputs (" ADD : ", dump_file);
1389 print_generic_expr (dump_file, c->base_expr, 0);
1390 fputs (" + (", dump_file);
1391 dump_double_int (dump_file, c->index, false);
1392 fputs (" * ", dump_file);
1393 print_generic_expr (dump_file, c->stride, 0);
1394 fputs (") : ", dump_file);
1395 break;
1396 case CAND_REF:
1397 fputs (" REF : ", dump_file);
1398 print_generic_expr (dump_file, c->base_expr, 0);
1399 fputs (" + (", dump_file);
1400 print_generic_expr (dump_file, c->stride, 0);
1401 fputs (") + ", dump_file);
1402 dump_double_int (dump_file, c->index, false);
1403 fputs (" : ", dump_file);
1404 break;
1405 default:
1406 gcc_unreachable ();
1408 print_generic_expr (dump_file, c->cand_type, 0);
1409 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1410 c->basis, c->dependent, c->sibling);
1411 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1412 c->next_interp, c->dead_savings);
1413 if (c->def_phi)
1415 fputs (" phi: ", dump_file);
1416 print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1418 fputs ("\n", dump_file);
1421 /* Dump the candidate vector for debug. */
1423 static void
1424 dump_cand_vec (void)
1426 unsigned i;
1427 slsr_cand_t c;
1429 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1431 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
1432 dump_candidate (c);
1435 /* Callback used to dump the candidate chains hash table. */
1437 static int
1438 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1440 const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1441 cand_chain_t p;
1443 print_generic_expr (dump_file, chain->base_expr, 0);
1444 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1446 for (p = chain->next; p; p = p->next)
1447 fprintf (dump_file, " -> %d", p->cand->cand_num);
1449 fputs ("\n", dump_file);
1450 return 1;
1453 /* Dump the candidate chains. */
1455 static void
1456 dump_cand_chains (void)
1458 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1459 htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1460 fputs ("\n", dump_file);
1463 /* Dump the increment vector for debug. */
1465 static void
1466 dump_incr_vec (void)
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1470 unsigned i;
1472 fprintf (dump_file, "\nIncrement vector:\n\n");
1474 for (i = 0; i < incr_vec_len; i++)
1476 fprintf (dump_file, "%3d increment: ", i);
1477 dump_double_int (dump_file, incr_vec[i].incr, false);
1478 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1479 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1480 fputs ("\n initializer: ", dump_file);
1481 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1482 fputs ("\n\n", dump_file);
1487 /* Recursive helper for unconditional_cands_with_known_stride_p.
1488 Returns TRUE iff C, its siblings, and its dependents are all
1489 unconditional candidates. */
1491 static bool
1492 unconditional_cands (slsr_cand_t c)
1494 if (c->def_phi)
1495 return false;
1497 if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1498 return false;
1500 if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1501 return false;
1503 return true;
1506 /* Determine whether or not the tree of candidates rooted at
1507 ROOT consists entirely of unconditional increments with
1508 an INTEGER_CST stride. */
1510 static bool
1511 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1513 /* The stride is identical for all related candidates, so
1514 check it once. */
1515 if (TREE_CODE (root->stride) != INTEGER_CST)
1516 return false;
1518 return unconditional_cands (lookup_cand (root->dependent));
1521 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1522 data reference. */
1524 static void
1525 replace_ref (tree *expr, slsr_cand_t c)
1527 tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1528 c->base_expr, c->stride);
1529 tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
1530 double_int_to_tree (c->cand_type, c->index));
1532 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1533 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1534 TREE_OPERAND (mem_ref, 0)
1535 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1536 /*simple_p=*/true, NULL,
1537 /*before=*/true, GSI_SAME_STMT);
1538 copy_ref_info (mem_ref, *expr);
1539 *expr = mem_ref;
1540 update_stmt (c->cand_stmt);
1543 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1544 dependent of candidate C with an equivalent strength-reduced data
1545 reference. */
1547 static void
1548 replace_refs (slsr_cand_t c)
1550 if (gimple_vdef (c->cand_stmt))
1552 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1553 replace_ref (lhs, c);
1555 else
1557 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1558 replace_ref (rhs, c);
1561 if (c->sibling)
1562 replace_refs (lookup_cand (c->sibling));
1564 if (c->dependent)
1565 replace_refs (lookup_cand (c->dependent));
1568 /* Calculate the increment required for candidate C relative to
1569 its basis. */
1571 static double_int
1572 cand_increment (slsr_cand_t c)
1574 slsr_cand_t basis;
1576 /* If the candidate doesn't have a basis, just return its own
1577 index. This is useful in record_increments to help us find
1578 an existing initializer. */
1579 if (!c->basis)
1580 return c->index;
1582 basis = lookup_cand (c->basis);
1583 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1584 return double_int_sub (c->index, basis->index);
1587 /* Calculate the increment required for candidate C relative to
1588 its basis. If we aren't going to generate pointer arithmetic
1589 for this candidate, return the absolute value of that increment
1590 instead. */
1592 static inline double_int
1593 cand_abs_increment (slsr_cand_t c)
1595 double_int increment = cand_increment (c);
1597 if (!address_arithmetic_p && double_int_negative_p (increment))
1598 increment = double_int_neg (increment);
1600 return increment;
1603 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1604 new temporary reg of type TYPE and store it in *VAR. */
1606 static inline void
1607 lazy_create_slsr_reg (tree *var, tree type)
1609 if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1610 *var = create_tmp_reg (type, "slsr");
1613 /* Return TRUE iff candidate C has already been replaced under
1614 another interpretation. */
1616 static inline bool
1617 cand_already_replaced (slsr_cand_t c)
1619 return (gimple_bb (c->cand_stmt) == 0);
1622 /* Helper routine for replace_dependents, doing the work for a
1623 single candidate C. */
1625 static void
1626 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1628 double_int stride = tree_to_double_int (c->stride);
1629 double_int bump = double_int_mul (cand_increment (c), stride);
1630 gimple stmt_to_print = NULL;
1631 slsr_cand_t basis;
1632 tree basis_name, incr_type, bump_tree;
1633 enum tree_code code;
1635 /* It is highly unlikely, but possible, that the resulting
1636 bump doesn't fit in a HWI. Abandon the replacement
1637 in this case. Restriction to signed HWI is conservative
1638 for unsigned types but allows for safe negation without
1639 twisted logic. */
1640 if (!double_int_fits_in_shwi_p (bump))
1641 return;
1643 basis = lookup_cand (c->basis);
1644 basis_name = gimple_assign_lhs (basis->cand_stmt);
1645 incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1646 code = PLUS_EXPR;
1648 if (double_int_negative_p (bump))
1650 code = MINUS_EXPR;
1651 bump = double_int_neg (bump);
1654 bump_tree = double_int_to_tree (incr_type, bump);
1656 if (dump_file && (dump_flags & TDF_DETAILS))
1658 fputs ("Replacing: ", dump_file);
1659 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1662 if (double_int_zero_p (bump))
1664 tree lhs = gimple_assign_lhs (c->cand_stmt);
1665 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1666 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1667 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1668 gsi_replace (&gsi, copy_stmt, false);
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 stmt_to_print = copy_stmt;
1672 else
1674 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1675 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1676 if (cand_code != NEGATE_EXPR
1677 && ((operand_equal_p (rhs1, basis_name, 0)
1678 && operand_equal_p (rhs2, bump_tree, 0))
1679 || (operand_equal_p (rhs1, bump_tree, 0)
1680 && operand_equal_p (rhs2, basis_name, 0))))
1682 if (dump_file && (dump_flags & TDF_DETAILS))
1684 fputs ("(duplicate, not actually replacing)", dump_file);
1685 stmt_to_print = c->cand_stmt;
1688 else
1690 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1691 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1692 update_stmt (gsi_stmt (gsi));
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 stmt_to_print = gsi_stmt (gsi);
1698 if (dump_file && (dump_flags & TDF_DETAILS))
1700 fputs ("With: ", dump_file);
1701 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1702 fputs ("\n", dump_file);
1706 /* Replace candidate C, each sibling of candidate C, and each
1707 dependent of candidate C with an add or subtract. Note that we
1708 only operate on CAND_MULTs with known strides, so we will never
1709 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1710 replaced by X = Y + ((i - i') * S), as described in the module
1711 commentary. The folded value ((i - i') * S) is referred to here
1712 as the "bump." */
1714 static void
1715 replace_dependents (slsr_cand_t c)
1717 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1719 /* It is not useful to replace casts, copies, or adds of an SSA name
1720 and a constant. Also skip candidates that have already been
1721 replaced under another interpretation. */
1722 if (cand_code != MODIFY_EXPR
1723 && cand_code != NOP_EXPR
1724 && c->kind == CAND_MULT
1725 && !cand_already_replaced (c))
1726 replace_dependent (c, cand_code);
1728 if (c->sibling)
1729 replace_dependents (lookup_cand (c->sibling));
1731 if (c->dependent)
1732 replace_dependents (lookup_cand (c->dependent));
1735 /* Return the index in the increment vector of the given INCREMENT. */
1737 static inline unsigned
1738 incr_vec_index (double_int increment)
1740 unsigned i;
1742 for (i = 0;
1743 i < incr_vec_len && !double_int_equal_p (increment, incr_vec[i].incr);
1744 i++)
1747 gcc_assert (i < incr_vec_len);
1748 return i;
1751 /* Count the number of candidates in the tree rooted at C that have
1752 not already been replaced under other interpretations. */
1754 static unsigned
1755 count_candidates (slsr_cand_t c)
1757 unsigned count = cand_already_replaced (c) ? 0 : 1;
1759 if (c->sibling)
1760 count += count_candidates (lookup_cand (c->sibling));
1762 if (c->dependent)
1763 count += count_candidates (lookup_cand (c->dependent));
1765 return count;
1768 /* Increase the count of INCREMENT by one in the increment vector.
1769 INCREMENT is associated with candidate C. If an initializer
1770 T_0 = stride * I is provided by a candidate that dominates all
1771 candidates with the same increment, also record T_0 for subsequent use. */
1773 static void
1774 record_increment (slsr_cand_t c, double_int increment)
1776 bool found = false;
1777 unsigned i;
1779 /* Treat increments that differ only in sign as identical so as to
1780 share initializers, unless we are generating pointer arithmetic. */
1781 if (!address_arithmetic_p && double_int_negative_p (increment))
1782 increment = double_int_neg (increment);
1784 for (i = 0; i < incr_vec_len; i++)
1786 if (double_int_equal_p (incr_vec[i].incr, increment))
1788 incr_vec[i].count++;
1789 found = true;
1791 /* If we previously recorded an initializer that doesn't
1792 dominate this candidate, it's not going to be useful to
1793 us after all. */
1794 if (incr_vec[i].initializer
1795 && !dominated_by_p (CDI_DOMINATORS,
1796 gimple_bb (c->cand_stmt),
1797 incr_vec[i].init_bb))
1799 incr_vec[i].initializer = NULL_TREE;
1800 incr_vec[i].init_bb = NULL;
1803 break;
1807 if (!found)
1809 /* The first time we see an increment, create the entry for it.
1810 If this is the root candidate which doesn't have a basis, set
1811 the count to zero. We're only processing it so it can possibly
1812 provide an initializer for other candidates. */
1813 incr_vec[incr_vec_len].incr = increment;
1814 incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1815 incr_vec[incr_vec_len].cost = COST_INFINITE;
1817 /* Optimistically record the first occurrence of this increment
1818 as providing an initializer (if it does); we will revise this
1819 opinion later if it doesn't dominate all other occurrences.
1820 Exception: increments of -1, 0, 1 never need initializers. */
1821 if (c->kind == CAND_ADD
1822 && double_int_equal_p (c->index, increment)
1823 && (double_int_scmp (increment, double_int_one) > 0
1824 || double_int_scmp (increment, double_int_minus_one) < 0))
1826 tree t0;
1827 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1828 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1829 if (operand_equal_p (rhs1, c->base_expr, 0))
1830 t0 = rhs2;
1831 else
1832 t0 = rhs1;
1833 if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1835 incr_vec[incr_vec_len].initializer = t0;
1836 incr_vec[incr_vec_len++].init_bb
1837 = gimple_bb (SSA_NAME_DEF_STMT (t0));
1839 else
1841 incr_vec[incr_vec_len].initializer = NULL_TREE;
1842 incr_vec[incr_vec_len++].init_bb = NULL;
1845 else
1847 incr_vec[incr_vec_len].initializer = NULL_TREE;
1848 incr_vec[incr_vec_len++].init_bb = NULL;
1853 /* Determine how many times each unique increment occurs in the set
1854 of candidates rooted at C's parent, recording the data in the
1855 increment vector. For each unique increment I, if an initializer
1856 T_0 = stride * I is provided by a candidate that dominates all
1857 candidates with the same increment, also record T_0 for subsequent
1858 use. */
1860 static void
1861 record_increments (slsr_cand_t c)
1863 if (!cand_already_replaced (c))
1864 record_increment (c, cand_increment (c));
1866 if (c->sibling)
1867 record_increments (lookup_cand (c->sibling));
1869 if (c->dependent)
1870 record_increments (lookup_cand (c->dependent));
1873 /* Return the first candidate in the tree rooted at C that has not
1874 already been replaced, favoring siblings over dependents. */
1876 static slsr_cand_t
1877 unreplaced_cand_in_tree (slsr_cand_t c)
1879 if (!cand_already_replaced (c))
1880 return c;
1882 if (c->sibling)
1884 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1885 if (sib)
1886 return sib;
1889 if (c->dependent)
1891 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1892 if (dep)
1893 return dep;
1896 return NULL;
1899 /* Return TRUE if the candidates in the tree rooted at C should be
1900 optimized for speed, else FALSE. We estimate this based on the block
1901 containing the most dominant candidate in the tree that has not yet
1902 been replaced. */
1904 static bool
1905 optimize_cands_for_speed_p (slsr_cand_t c)
1907 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1908 gcc_assert (c2);
1909 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1912 /* Add COST_IN to the lowest cost of any dependent path starting at
1913 candidate C or any of its siblings, counting only candidates along
1914 such paths with increment INCR. Assume that replacing a candidate
1915 reduces cost by REPL_SAVINGS. Also account for savings from any
1916 statements that would go dead. */
1918 static int
1919 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1921 int local_cost, sib_cost;
1922 double_int cand_incr = cand_abs_increment (c);
1924 if (cand_already_replaced (c))
1925 local_cost = cost_in;
1926 else if (double_int_equal_p (incr, cand_incr))
1927 local_cost = cost_in - repl_savings - c->dead_savings;
1928 else
1929 local_cost = cost_in - c->dead_savings;
1931 if (c->dependent)
1932 local_cost = lowest_cost_path (local_cost, repl_savings,
1933 lookup_cand (c->dependent), incr);
1935 if (c->sibling)
1937 sib_cost = lowest_cost_path (cost_in, repl_savings,
1938 lookup_cand (c->sibling), incr);
1939 local_cost = MIN (local_cost, sib_cost);
1942 return local_cost;
1945 /* Compute the total savings that would accrue from all replacements
1946 in the candidate tree rooted at C, counting only candidates with
1947 increment INCR. Assume that replacing a candidate reduces cost
1948 by REPL_SAVINGS. Also account for savings from statements that
1949 would go dead. */
1951 static int
1952 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1954 int savings = 0;
1955 double_int cand_incr = cand_abs_increment (c);
1957 if (double_int_equal_p (incr, cand_incr)
1958 && !cand_already_replaced (c))
1959 savings += repl_savings + c->dead_savings;
1961 if (c->dependent)
1962 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1964 if (c->sibling)
1965 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1967 return savings;
1970 /* Use target-specific costs to determine and record which increments
1971 in the current candidate tree are profitable to replace, assuming
1972 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1973 the candidate tree.
1975 One slight limitation here is that we don't account for the possible
1976 introduction of casts in some cases. See replace_one_candidate for
1977 the cases where these are introduced. This should probably be cleaned
1978 up sometime. */
1980 static void
1981 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
1983 unsigned i;
1985 for (i = 0; i < incr_vec_len; i++)
1987 HOST_WIDE_INT incr = double_int_to_shwi (incr_vec[i].incr);
1989 /* If somehow this increment is bigger than a HWI, we won't
1990 be optimizing candidates that use it. And if the increment
1991 has a count of zero, nothing will be done with it. */
1992 if (!double_int_fits_in_shwi_p (incr_vec[i].incr)
1993 || !incr_vec[i].count)
1994 incr_vec[i].cost = COST_INFINITE;
1996 /* Increments of 0, 1, and -1 are always profitable to replace,
1997 because they always replace a multiply or add with an add or
1998 copy, and may cause one or more existing instructions to go
1999 dead. Exception: -1 can't be assumed to be profitable for
2000 pointer addition. */
2001 else if (incr == 0
2002 || incr == 1
2003 || (incr == -1
2004 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2005 != POINTER_PLUS_EXPR)))
2006 incr_vec[i].cost = COST_NEUTRAL;
2008 /* FORNOW: If we need to add an initializer, give up if a cast from
2009 the candidate's type to its stride's type can lose precision.
2010 This could eventually be handled better by expressly retaining the
2011 result of a cast to a wider type in the stride. Example:
2013 short int _1;
2014 _2 = (int) _1;
2015 _3 = _2 * 10;
2016 _4 = x + _3; ADD: x + (10 * _1) : int
2017 _5 = _2 * 15;
2018 _6 = x + _3; ADD: x + (15 * _1) : int
2020 Right now replacing _6 would cause insertion of an initializer
2021 of the form "short int T = _1 * 5;" followed by a cast to
2022 int, which could overflow incorrectly. Had we recorded _2 or
2023 (int)_1 as the stride, this wouldn't happen. However, doing
2024 this breaks other opportunities, so this will require some
2025 care. */
2026 else if (!incr_vec[i].initializer
2027 && TREE_CODE (first_dep->stride) != INTEGER_CST
2028 && !legal_cast_p_1 (first_dep->stride,
2029 gimple_assign_lhs (first_dep->cand_stmt)))
2031 incr_vec[i].cost = COST_INFINITE;
2033 /* For any other increment, if this is a multiply candidate, we
2034 must introduce a temporary T and initialize it with
2035 T_0 = stride * increment. When optimizing for speed, walk the
2036 candidate tree to calculate the best cost reduction along any
2037 path; if it offsets the fixed cost of inserting the initializer,
2038 replacing the increment is profitable. When optimizing for
2039 size, instead calculate the total cost reduction from replacing
2040 all candidates with this increment. */
2041 else if (first_dep->kind == CAND_MULT)
2043 int cost = mult_by_coeff_cost (incr, mode, speed);
2044 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2045 if (speed)
2046 cost = lowest_cost_path (cost, repl_savings, first_dep,
2047 incr_vec[i].incr);
2048 else
2049 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2051 incr_vec[i].cost = cost;
2054 /* If this is an add candidate, the initializer may already
2055 exist, so only calculate the cost of the initializer if it
2056 doesn't. We are replacing one add with another here, so the
2057 known replacement savings is zero. We will account for removal
2058 of dead instructions in lowest_cost_path or total_savings. */
2059 else
2061 int cost = 0;
2062 if (!incr_vec[i].initializer)
2063 cost = mult_by_coeff_cost (incr, mode, speed);
2065 if (speed)
2066 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2067 else
2068 cost -= total_savings (0, first_dep, incr_vec[i].incr);
2070 incr_vec[i].cost = cost;
2075 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2076 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2077 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2078 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2079 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2081 static basic_block
2082 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2083 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2085 basic_block ncd;
2087 if (!bb1)
2089 *where = c2;
2090 return bb2;
2093 if (!bb2)
2095 *where = c1;
2096 return bb1;
2099 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2101 /* If both candidates are in the same block, the earlier
2102 candidate wins. */
2103 if (bb1 == ncd && bb2 == ncd)
2105 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2106 *where = c2;
2107 else
2108 *where = c1;
2111 /* Otherwise, if one of them produced a candidate in the
2112 dominator, that one wins. */
2113 else if (bb1 == ncd)
2114 *where = c1;
2116 else if (bb2 == ncd)
2117 *where = c2;
2119 /* If neither matches the dominator, neither wins. */
2120 else
2121 *where = NULL;
2123 return ncd;
2126 /* Consider all candidates in the tree rooted at C for which INCR
2127 represents the required increment of C relative to its basis.
2128 Find and return the basic block that most nearly dominates all
2129 such candidates. If the returned block contains one or more of
2130 the candidates, return the earliest candidate in the block in
2131 *WHERE. */
2133 static basic_block
2134 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2135 slsr_cand_t *where)
2137 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2138 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2139 double_int cand_incr;
2141 /* First find the NCD of all siblings and dependents. */
2142 if (c->sibling)
2143 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2144 incr, &sib_where);
2145 if (c->dependent)
2146 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2147 incr, &dep_where);
2148 if (!sib_ncd && !dep_ncd)
2150 new_where = NULL;
2151 ncd = NULL;
2153 else if (sib_ncd && !dep_ncd)
2155 new_where = sib_where;
2156 ncd = sib_ncd;
2158 else if (dep_ncd && !sib_ncd)
2160 new_where = dep_where;
2161 ncd = dep_ncd;
2163 else
2164 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2165 dep_where, &new_where);
2167 /* If the candidate's increment doesn't match the one we're interested
2168 in, then the result depends only on siblings and dependents. */
2169 cand_incr = cand_abs_increment (c);
2171 if (!double_int_equal_p (cand_incr, incr) || cand_already_replaced (c))
2173 *where = new_where;
2174 return ncd;
2177 /* Otherwise, compare this candidate with the result from all siblings
2178 and dependents. */
2179 this_where = c;
2180 this_ncd = gimple_bb (c->cand_stmt);
2181 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2183 return ncd;
2186 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2188 static inline bool
2189 profitable_increment_p (unsigned index)
2191 return (incr_vec[index].cost <= COST_NEUTRAL);
2194 /* For each profitable increment in the increment vector not equal to
2195 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2196 dominator of all statements in the candidate chain rooted at C
2197 that require that increment, and insert an initializer
2198 T_0 = stride * increment at that location. Record T_0 with the
2199 increment record. */
2201 static void
2202 insert_initializers (slsr_cand_t c)
2204 unsigned i;
2205 tree new_var = NULL_TREE;
2207 for (i = 0; i < incr_vec_len; i++)
2209 basic_block bb;
2210 slsr_cand_t where = NULL;
2211 gimple init_stmt;
2212 tree stride_type, new_name, incr_tree;
2213 double_int incr = incr_vec[i].incr;
2215 if (!profitable_increment_p (i)
2216 || double_int_one_p (incr)
2217 || (double_int_minus_one_p (incr)
2218 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2219 || double_int_zero_p (incr))
2220 continue;
2222 /* We may have already identified an existing initializer that
2223 will suffice. */
2224 if (incr_vec[i].initializer)
2226 if (dump_file && (dump_flags & TDF_DETAILS))
2228 fputs ("Using existing initializer: ", dump_file);
2229 print_gimple_stmt (dump_file,
2230 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2231 0, 0);
2233 continue;
2236 /* Find the block that most closely dominates all candidates
2237 with this increment. If there is at least one candidate in
2238 that block, the earliest one will be returned in WHERE. */
2239 bb = nearest_common_dominator_for_cands (c, incr, &where);
2241 /* Create a new SSA name to hold the initializer's value. */
2242 stride_type = TREE_TYPE (c->stride);
2243 lazy_create_slsr_reg (&new_var, stride_type);
2244 new_name = make_ssa_name (new_var, NULL);
2245 incr_vec[i].initializer = new_name;
2247 /* Create the initializer and insert it in the latest possible
2248 dominating position. */
2249 incr_tree = double_int_to_tree (stride_type, incr);
2250 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2251 c->stride, incr_tree);
2252 if (where)
2254 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2255 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2256 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2258 else
2260 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2261 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2263 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2264 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2265 else
2266 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2268 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2271 if (dump_file && (dump_flags & TDF_DETAILS))
2273 fputs ("Inserting initializer: ", dump_file);
2274 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2279 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2280 type TO_TYPE, and insert it in front of the statement represented
2281 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2282 the new SSA name. */
2284 static tree
2285 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2286 tree from_expr, tree *new_var)
2288 tree cast_lhs;
2289 gimple cast_stmt;
2290 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2292 lazy_create_slsr_reg (new_var, to_type);
2293 cast_lhs = make_ssa_name (*new_var, NULL);
2294 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2295 from_expr, NULL_TREE);
2296 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2297 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2301 fputs (" Inserting: ", dump_file);
2302 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2305 return cast_lhs;
2308 /* Replace the RHS of the statement represented by candidate C with
2309 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2310 leave C unchanged or just interchange its operands. The original
2311 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2312 If the replacement was made and we are doing a details dump,
2313 return the revised statement, else NULL. */
2315 static gimple
2316 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2317 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2318 slsr_cand_t c)
2320 if (new_code != old_code
2321 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2322 || !operand_equal_p (new_rhs2, old_rhs2, 0))
2323 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2324 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2326 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2327 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2328 update_stmt (gsi_stmt (gsi));
2330 if (dump_file && (dump_flags & TDF_DETAILS))
2331 return gsi_stmt (gsi);
2334 else if (dump_file && (dump_flags & TDF_DETAILS))
2335 fputs (" (duplicate, not actually replacing)\n", dump_file);
2337 return NULL;
2340 /* Strength-reduce the statement represented by candidate C by replacing
2341 it with an equivalent addition or subtraction. I is the index into
2342 the increment vector identifying C's increment. NEW_VAR is used to
2343 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2344 is the rhs1 to use in creating the add/subtract. */
2346 static void
2347 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2348 tree basis_name)
2350 gimple stmt_to_print = NULL;
2351 tree orig_rhs1, orig_rhs2;
2352 tree rhs2;
2353 enum tree_code orig_code, repl_code;
2354 double_int cand_incr;
2356 orig_code = gimple_assign_rhs_code (c->cand_stmt);
2357 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2358 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2359 cand_incr = cand_increment (c);
2361 if (dump_file && (dump_flags & TDF_DETAILS))
2363 fputs ("Replacing: ", dump_file);
2364 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2365 stmt_to_print = c->cand_stmt;
2368 if (address_arithmetic_p)
2369 repl_code = POINTER_PLUS_EXPR;
2370 else
2371 repl_code = PLUS_EXPR;
2373 /* If the increment has an initializer T_0, replace the candidate
2374 statement with an add of the basis name and the initializer. */
2375 if (incr_vec[i].initializer)
2377 tree init_type = TREE_TYPE (incr_vec[i].initializer);
2378 tree orig_type = TREE_TYPE (orig_rhs2);
2380 if (types_compatible_p (orig_type, init_type))
2381 rhs2 = incr_vec[i].initializer;
2382 else
2383 rhs2 = introduce_cast_before_cand (c, orig_type,
2384 incr_vec[i].initializer,
2385 new_var);
2387 if (!double_int_equal_p (incr_vec[i].incr, cand_incr))
2389 gcc_assert (repl_code == PLUS_EXPR);
2390 repl_code = MINUS_EXPR;
2393 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2394 orig_code, orig_rhs1, orig_rhs2,
2398 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2399 with a subtract of the stride from the basis name, a copy
2400 from the basis name, or an add of the stride to the basis
2401 name, respectively. It may be necessary to introduce a
2402 cast (or reuse an existing cast). */
2403 else if (double_int_one_p (cand_incr))
2405 tree stride_type = TREE_TYPE (c->stride);
2406 tree orig_type = TREE_TYPE (orig_rhs2);
2408 if (types_compatible_p (orig_type, stride_type))
2409 rhs2 = c->stride;
2410 else
2411 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2413 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2414 orig_code, orig_rhs1, orig_rhs2,
2418 else if (double_int_minus_one_p (cand_incr))
2420 tree stride_type = TREE_TYPE (c->stride);
2421 tree orig_type = TREE_TYPE (orig_rhs2);
2422 gcc_assert (repl_code != POINTER_PLUS_EXPR);
2424 if (types_compatible_p (orig_type, stride_type))
2425 rhs2 = c->stride;
2426 else
2427 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2429 if (orig_code != MINUS_EXPR
2430 || !operand_equal_p (basis_name, orig_rhs1, 0)
2431 || !operand_equal_p (rhs2, orig_rhs2, 0))
2433 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2434 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2435 update_stmt (gsi_stmt (gsi));
2437 if (dump_file && (dump_flags & TDF_DETAILS))
2438 stmt_to_print = gsi_stmt (gsi);
2440 else if (dump_file && (dump_flags & TDF_DETAILS))
2441 fputs (" (duplicate, not actually replacing)\n", dump_file);
2444 else if (double_int_zero_p (cand_incr))
2446 tree lhs = gimple_assign_lhs (c->cand_stmt);
2447 tree lhs_type = TREE_TYPE (lhs);
2448 tree basis_type = TREE_TYPE (basis_name);
2450 if (types_compatible_p (lhs_type, basis_type))
2452 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2453 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2454 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2455 gsi_replace (&gsi, copy_stmt, false);
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2458 stmt_to_print = copy_stmt;
2460 else
2462 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2463 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2464 basis_name,
2465 NULL_TREE);
2466 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2467 gsi_replace (&gsi, cast_stmt, false);
2469 if (dump_file && (dump_flags & TDF_DETAILS))
2470 stmt_to_print = cast_stmt;
2473 else
2474 gcc_unreachable ();
2476 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2478 fputs ("With: ", dump_file);
2479 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2480 fputs ("\n", dump_file);
2484 /* For each candidate in the tree rooted at C, replace it with
2485 an increment if such has been shown to be profitable. */
2487 static void
2488 replace_profitable_candidates (slsr_cand_t c)
2490 if (!cand_already_replaced (c))
2492 double_int increment = cand_abs_increment (c);
2493 tree new_var = NULL;
2494 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2495 unsigned i;
2497 i = incr_vec_index (increment);
2499 /* Only process profitable increments. Nothing useful can be done
2500 to a cast or copy. */
2501 if (profitable_increment_p (i)
2502 && orig_code != MODIFY_EXPR
2503 && orig_code != NOP_EXPR)
2505 slsr_cand_t basis = lookup_cand (c->basis);
2506 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2507 replace_one_candidate (c, i, &new_var, basis_name);
2511 if (c->sibling)
2512 replace_profitable_candidates (lookup_cand (c->sibling));
2514 if (c->dependent)
2515 replace_profitable_candidates (lookup_cand (c->dependent));
2518 /* Analyze costs of related candidates in the candidate vector,
2519 and make beneficial replacements. */
2521 static void
2522 analyze_candidates_and_replace (void)
2524 unsigned i;
2525 slsr_cand_t c;
2527 /* Each candidate that has a null basis and a non-null
2528 dependent is the root of a tree of related statements.
2529 Analyze each tree to determine a subset of those
2530 statements that can be replaced with maximum benefit. */
2531 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
2533 slsr_cand_t first_dep;
2535 if (c->basis != 0 || c->dependent == 0)
2536 continue;
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2540 c->cand_num);
2542 first_dep = lookup_cand (c->dependent);
2544 /* If this is a chain of CAND_REFs, unconditionally replace
2545 each of them with a strength-reduced data reference. */
2546 if (c->kind == CAND_REF)
2547 replace_refs (c);
2549 /* If the common stride of all related candidates is a
2550 known constant, and none of these has a phi-dependence,
2551 then all replacements are considered profitable.
2552 Each replaces a multiply by a single add, with the
2553 possibility that a feeding add also goes dead as a
2554 result. */
2555 else if (unconditional_cands_with_known_stride_p (c))
2556 replace_dependents (first_dep);
2558 /* When the stride is an SSA name, it may still be profitable
2559 to replace some or all of the dependent candidates, depending
2560 on whether the introduced increments can be reused, or are
2561 less expensive to calculate than the replaced statements. */
2562 else
2564 unsigned length;
2565 enum machine_mode mode;
2566 bool speed;
2568 /* Determine whether we'll be generating pointer arithmetic
2569 when replacing candidates. */
2570 address_arithmetic_p = (c->kind == CAND_ADD
2571 && POINTER_TYPE_P (c->cand_type));
2573 /* If all candidates have already been replaced under other
2574 interpretations, nothing remains to be done. */
2575 length = count_candidates (c);
2576 if (!length)
2577 continue;
2579 /* Construct an array of increments for this candidate chain. */
2580 incr_vec = XNEWVEC (incr_info, length);
2581 incr_vec_len = 0;
2582 record_increments (c);
2584 /* Determine which increments are profitable to replace. */
2585 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2586 speed = optimize_cands_for_speed_p (c);
2587 analyze_increments (first_dep, mode, speed);
2589 /* Insert initializers of the form T_0 = stride * increment
2590 for use in profitable replacements. */
2591 insert_initializers (first_dep);
2592 dump_incr_vec ();
2594 /* Perform the replacements. */
2595 replace_profitable_candidates (first_dep);
2596 free (incr_vec);
2599 /* TODO: When conditional increments occur so that a
2600 candidate is dependent upon a phi-basis, the cost of
2601 introducing a temporary must be accounted for. */
2605 static unsigned
2606 execute_strength_reduction (void)
2608 struct dom_walk_data walk_data;
2610 /* Create the obstack where candidates will reside. */
2611 gcc_obstack_init (&cand_obstack);
2613 /* Allocate the candidate vector. */
2614 cand_vec = VEC_alloc (slsr_cand_t, heap, 128);
2616 /* Allocate the mapping from statements to candidate indices. */
2617 stmt_cand_map = pointer_map_create ();
2619 /* Create the obstack where candidate chains will reside. */
2620 gcc_obstack_init (&chain_obstack);
2622 /* Allocate the mapping from base expressions to candidate chains. */
2623 base_cand_map = htab_create (500, base_cand_hash,
2624 base_cand_eq, base_cand_free);
2626 /* Initialize the loop optimizer. We need to detect flow across
2627 back edges, and this gives us dominator information as well. */
2628 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2630 /* Set up callbacks for the generic dominator tree walker. */
2631 walk_data.dom_direction = CDI_DOMINATORS;
2632 walk_data.initialize_block_local_data = NULL;
2633 walk_data.before_dom_children = find_candidates_in_block;
2634 walk_data.after_dom_children = NULL;
2635 walk_data.global_data = NULL;
2636 walk_data.block_local_data_size = 0;
2637 init_walk_dominator_tree (&walk_data);
2639 /* Walk the CFG in predominator order looking for strength reduction
2640 candidates. */
2641 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2643 if (dump_file && (dump_flags & TDF_DETAILS))
2645 dump_cand_vec ();
2646 dump_cand_chains ();
2649 /* Analyze costs and make appropriate replacements. */
2650 analyze_candidates_and_replace ();
2652 /* Free resources. */
2653 fini_walk_dominator_tree (&walk_data);
2654 loop_optimizer_finalize ();
2655 htab_delete (base_cand_map);
2656 obstack_free (&chain_obstack, NULL);
2657 pointer_map_destroy (stmt_cand_map);
2658 VEC_free (slsr_cand_t, heap, cand_vec);
2659 obstack_free (&cand_obstack, NULL);
2661 return 0;
2664 static bool
2665 gate_strength_reduction (void)
2667 return flag_tree_slsr;
2670 struct gimple_opt_pass pass_strength_reduction =
2673 GIMPLE_PASS,
2674 "slsr", /* name */
2675 gate_strength_reduction, /* gate */
2676 execute_strength_reduction, /* execute */
2677 NULL, /* sub */
2678 NULL, /* next */
2679 0, /* static_pass_number */
2680 TV_GIMPLE_SLSR, /* tv_id */
2681 PROP_cfg | PROP_ssa, /* properties_required */
2682 0, /* properties_provided */
2683 0, /* properties_destroyed */
2684 0, /* todo_flags_start */
2685 TODO_verify_ssa /* todo_flags_finish */