Mark ChangeLog
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob95fab746fe04a04234a7fd4b6dc126a3d36adf61
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
57 #include "params.h"
59 /* Information about a strength reduction candidate. Each statement
60 in the candidate table represents an expression of one of the
61 following forms (the special case of CAND_REF will be described
62 later):
64 (CAND_MULT) S1: X = (B + i) * S
65 (CAND_ADD) S1: X = B + (i * S)
67 Here X and B are SSA names, i is an integer constant, and S is
68 either an SSA name or a constant. We call B the "base," i the
69 "index", and S the "stride."
71 Any statement S0 that dominates S1 and is of the form:
73 (CAND_MULT) S0: Y = (B + i') * S
74 (CAND_ADD) S0: Y = B + (i' * S)
76 is called a "basis" for S1. In both cases, S1 may be replaced by
78 S1': X = Y + (i - i') * S,
80 where (i - i') * S is folded to the extent possible.
82 All gimple statements are visited in dominator order, and each
83 statement that may contribute to one of the forms of S1 above is
84 given at least one entry in the candidate table. Such statements
85 include addition, pointer addition, subtraction, multiplication,
86 negation, copies, and nontrivial type casts. If a statement may
87 represent more than one expression of the forms of S1 above,
88 multiple "interpretations" are stored in the table and chained
89 together. Examples:
91 * An add of two SSA names may treat either operand as the base.
92 * A multiply of two SSA names, likewise.
93 * A copy or cast may be thought of as either a CAND_MULT with
94 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 Candidate records are allocated from an obstack. They are addressed
97 both from a hash table keyed on S1, and from a vector of candidate
98 pointers arranged in predominator order.
100 Opportunity note
101 ----------------
102 Currently we don't recognize:
104 S0: Y = (S * i') - B
105 S1: X = (S * i) - B
107 as a strength reduction opportunity, even though this S1 would
108 also be replaceable by the S1' above. This can be added if it
109 comes up in practice.
111 Strength reduction in addressing
112 --------------------------------
113 There is another kind of candidate known as CAND_REF. A CAND_REF
114 describes a statement containing a memory reference having
115 complex addressing that might benefit from strength reduction.
116 Specifically, we are interested in references for which
117 get_inner_reference returns a base address, offset, and bitpos as
118 follows:
120 base: MEM_REF (T1, C1)
121 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
122 bitpos: C4 * BITS_PER_UNIT
124 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
125 arbitrary integer constants. Note that C2 may be zero, in which
126 case the offset will be MULT_EXPR (T2, C3).
128 When this pattern is recognized, the original memory reference
129 can be replaced with:
131 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
132 C1 + (C2 * C3) + C4)
134 which distributes the multiply to allow constant folding. When
135 two or more addressing expressions can be represented by MEM_REFs
136 of this form, differing only in the constants C1, C2, and C4,
137 making this substitution produces more efficient addressing during
138 the RTL phases. When there are not at least two expressions with
139 the same values of T1, T2, and C3, there is nothing to be gained
140 by the replacement.
142 Strength reduction of CAND_REFs uses the same infrastructure as
143 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
144 field, MULT_EXPR (T2, C3) in the stride (S) field, and
145 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
146 is thus another CAND_REF with the same B and S values. When at
147 least two CAND_REFs are chained together using the basis relation,
148 each of them is replaced as above, resulting in improved code
149 generation for addressing. */
152 /* Index into the candidate vector, offset by 1. VECs are zero-based,
153 while cand_idx's are one-based, with zero indicating null. */
154 typedef unsigned cand_idx;
156 /* The kind of candidate. */
157 enum cand_kind
159 CAND_MULT,
160 CAND_ADD,
161 CAND_REF
164 struct slsr_cand_d
166 /* The candidate statement S1. */
167 gimple cand_stmt;
169 /* The base expression B: often an SSA name, but not always. */
170 tree base_expr;
172 /* The stride S. */
173 tree stride;
175 /* The index constant i. */
176 double_int index;
178 /* The type of the candidate. This is normally the type of base_expr,
179 but casts may have occurred when combining feeding instructions.
180 A candidate can only be a basis for candidates of the same final type.
181 (For CAND_REFs, this is the type to be used for operand 1 of the
182 replacement MEM_REF.) */
183 tree cand_type;
185 /* The kind of candidate (CAND_MULT, etc.). */
186 enum cand_kind kind;
188 /* Index of this candidate in the candidate vector. */
189 cand_idx cand_num;
191 /* Index of the next candidate record for the same statement.
192 A statement may be useful in more than one way (e.g., due to
193 commutativity). So we can have multiple "interpretations"
194 of a statement. */
195 cand_idx next_interp;
197 /* Index of the basis statement S0, if any, in the candidate vector. */
198 cand_idx basis;
200 /* First candidate for which this candidate is a basis, if one exists. */
201 cand_idx dependent;
203 /* Next candidate having the same basis as this one. */
204 cand_idx sibling;
206 /* If this is a conditional candidate, the defining PHI statement
207 for the base SSA name B. For future use; always NULL for now. */
208 gimple def_phi;
210 /* Savings that can be expected from eliminating dead code if this
211 candidate is replaced. */
212 int dead_savings;
215 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
216 typedef const struct slsr_cand_d *const_slsr_cand_t;
218 /* Pointers to candidates are chained together as part of a mapping
219 from base expressions to the candidates that use them. */
221 struct cand_chain_d
223 /* Base expression for the chain of candidates: often, but not
224 always, an SSA name. */
225 tree base_expr;
227 /* Pointer to a candidate. */
228 slsr_cand_t cand;
230 /* Chain pointer. */
231 struct cand_chain_d *next;
235 typedef struct cand_chain_d cand_chain, *cand_chain_t;
236 typedef const struct cand_chain_d *const_cand_chain_t;
238 /* Information about a unique "increment" associated with candidates
239 having an SSA name for a stride. An increment is the difference
240 between the index of the candidate and the index of its basis,
241 i.e., (i - i') as discussed in the module commentary.
243 When we are not going to generate address arithmetic we treat
244 increments that differ only in sign as the same, allowing sharing
245 of the cost of initializers. The absolute value of the increment
246 is stored in the incr_info. */
248 struct incr_info_d
250 /* The increment that relates a candidate to its basis. */
251 double_int incr;
253 /* How many times the increment occurs in the candidate tree. */
254 unsigned count;
256 /* Cost of replacing candidates using this increment. Negative and
257 zero costs indicate replacement should be performed. */
258 int cost;
260 /* If this increment is profitable but is not -1, 0, or 1, it requires
261 an initializer T_0 = stride * incr to be found or introduced in the
262 nearest common dominator of all candidates. This field holds T_0
263 for subsequent use. */
264 tree initializer;
266 /* If the initializer was found to already exist, this is the block
267 where it was found. */
268 basic_block init_bb;
271 typedef struct incr_info_d incr_info, *incr_info_t;
273 /* Candidates are maintained in a vector. If candidate X dominates
274 candidate Y, then X appears before Y in the vector; but the
275 converse does not necessarily hold. */
276 static vec<slsr_cand_t> cand_vec;
278 enum cost_consts
280 COST_NEUTRAL = 0,
281 COST_INFINITE = 1000
284 /* Pointer map embodying a mapping from statements to candidates. */
285 static struct pointer_map_t *stmt_cand_map;
287 /* Obstack for candidates. */
288 static struct obstack cand_obstack;
290 /* Hash table embodying a mapping from base exprs to chains of candidates. */
291 static htab_t base_cand_map;
293 /* Obstack for candidate chains. */
294 static struct obstack chain_obstack;
296 /* An array INCR_VEC of incr_infos is used during analysis of related
297 candidates having an SSA name for a stride. INCR_VEC_LEN describes
298 its current length. */
299 static incr_info_t incr_vec;
300 static unsigned incr_vec_len;
302 /* For a chain of candidates with unknown stride, indicates whether or not
303 we must generate pointer arithmetic when replacing statements. */
304 static bool address_arithmetic_p;
306 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
308 static slsr_cand_t
309 lookup_cand (cand_idx idx)
311 return cand_vec[idx - 1];
314 /* Callback to produce a hash value for a candidate chain header. */
316 static hashval_t
317 base_cand_hash (const void *p)
319 tree base_expr = ((const_cand_chain_t) p)->base_expr;
320 return iterative_hash_expr (base_expr, 0);
323 /* Callback when an element is removed from the hash table.
324 We never remove entries until the entire table is released. */
326 static void
327 base_cand_free (void *p ATTRIBUTE_UNUSED)
331 /* Callback to return true if two candidate chain headers are equal. */
333 static int
334 base_cand_eq (const void *p1, const void *p2)
336 const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
337 const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
338 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
341 /* Use the base expr from candidate C to look for possible candidates
342 that can serve as a basis for C. Each potential basis must also
343 appear in a block that dominates the candidate statement and have
344 the same stride and type. If more than one possible basis exists,
345 the one with highest index in the vector is chosen; this will be
346 the most immediately dominating basis. */
348 static int
349 find_basis_for_candidate (slsr_cand_t c)
351 cand_chain mapping_key;
352 cand_chain_t chain;
353 slsr_cand_t basis = NULL;
355 // Limit potential of N^2 behavior for long candidate chains.
356 int iters = 0;
357 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
359 mapping_key.base_expr = c->base_expr;
360 chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
362 for (; chain && iters < max_iters; chain = chain->next, ++iters)
364 slsr_cand_t one_basis = chain->cand;
366 if (one_basis->kind != c->kind
367 || one_basis->cand_stmt == c->cand_stmt
368 || !operand_equal_p (one_basis->stride, c->stride, 0)
369 || !types_compatible_p (one_basis->cand_type, c->cand_type)
370 || !dominated_by_p (CDI_DOMINATORS,
371 gimple_bb (c->cand_stmt),
372 gimple_bb (one_basis->cand_stmt)))
373 continue;
375 if (!basis || basis->cand_num < one_basis->cand_num)
376 basis = one_basis;
379 if (basis)
381 c->sibling = basis->dependent;
382 basis->dependent = c->cand_num;
383 return basis->cand_num;
386 return 0;
389 /* Record a mapping from the base expression of C to C itself, indicating that
390 C may potentially serve as a basis using that base expression. */
392 static void
393 record_potential_basis (slsr_cand_t c)
395 cand_chain_t node;
396 void **slot;
398 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
399 node->base_expr = c->base_expr;
400 node->cand = c;
401 node->next = NULL;
402 slot = htab_find_slot (base_cand_map, node, INSERT);
404 if (*slot)
406 cand_chain_t head = (cand_chain_t) (*slot);
407 node->next = head->next;
408 head->next = node;
410 else
411 *slot = node;
414 /* Allocate storage for a new candidate and initialize its fields.
415 Attempt to find a basis for the candidate. */
417 static slsr_cand_t
418 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
419 double_int index, tree stride, tree ctype,
420 unsigned savings)
422 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
423 sizeof (slsr_cand));
424 c->cand_stmt = gs;
425 c->base_expr = base;
426 c->stride = stride;
427 c->index = index;
428 c->cand_type = ctype;
429 c->kind = kind;
430 c->cand_num = cand_vec.length () + 1;
431 c->next_interp = 0;
432 c->dependent = 0;
433 c->sibling = 0;
434 c->def_phi = NULL;
435 c->dead_savings = savings;
437 cand_vec.safe_push (c);
438 c->basis = find_basis_for_candidate (c);
439 record_potential_basis (c);
441 return c;
444 /* Determine the target cost of statement GS when compiling according
445 to SPEED. */
447 static int
448 stmt_cost (gimple gs, bool speed)
450 tree lhs, rhs1, rhs2;
451 enum machine_mode lhs_mode;
453 gcc_assert (is_gimple_assign (gs));
454 lhs = gimple_assign_lhs (gs);
455 rhs1 = gimple_assign_rhs1 (gs);
456 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
458 switch (gimple_assign_rhs_code (gs))
460 case MULT_EXPR:
461 rhs2 = gimple_assign_rhs2 (gs);
463 if (host_integerp (rhs2, 0))
464 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
466 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
467 return mul_cost (speed, lhs_mode);
469 case PLUS_EXPR:
470 case POINTER_PLUS_EXPR:
471 case MINUS_EXPR:
472 return add_cost (speed, lhs_mode);
474 case NEGATE_EXPR:
475 return neg_cost (speed, lhs_mode);
477 case NOP_EXPR:
478 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
480 /* Note that we don't assign costs to copies that in most cases
481 will go away. */
482 default:
486 gcc_unreachable ();
487 return 0;
490 /* Look up the defining statement for BASE_IN and return a pointer
491 to its candidate in the candidate table, if any; otherwise NULL.
492 Only CAND_ADD and CAND_MULT candidates are returned. */
494 static slsr_cand_t
495 base_cand_from_table (tree base_in)
497 slsr_cand_t *result;
499 gimple def = SSA_NAME_DEF_STMT (base_in);
500 if (!def)
501 return (slsr_cand_t) NULL;
503 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
505 if (result && (*result)->kind != CAND_REF)
506 return *result;
508 return (slsr_cand_t) NULL;
511 /* Add an entry to the statement-to-candidate mapping. */
513 static void
514 add_cand_for_stmt (gimple gs, slsr_cand_t c)
516 void **slot = pointer_map_insert (stmt_cand_map, gs);
517 gcc_assert (!*slot);
518 *slot = c;
521 /* Look for the following pattern:
523 *PBASE: MEM_REF (T1, C1)
525 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
527 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
529 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
531 *PINDEX: C4 * BITS_PER_UNIT
533 If not present, leave the input values unchanged and return FALSE.
534 Otherwise, modify the input values as follows and return TRUE:
536 *PBASE: T1
537 *POFFSET: MULT_EXPR (T2, C3)
538 *PINDEX: C1 + (C2 * C3) + C4 */
540 static bool
541 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
542 tree *ptype)
544 tree base = *pbase, offset = *poffset;
545 double_int index = *pindex;
546 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
547 tree mult_op0, mult_op1, t1, t2, type;
548 double_int c1, c2, c3, c4;
550 if (!base
551 || !offset
552 || TREE_CODE (base) != MEM_REF
553 || TREE_CODE (offset) != MULT_EXPR
554 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
555 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
556 return false;
558 t1 = TREE_OPERAND (base, 0);
559 c1 = mem_ref_offset (base);
560 type = TREE_TYPE (TREE_OPERAND (base, 1));
562 mult_op0 = TREE_OPERAND (offset, 0);
563 mult_op1 = TREE_OPERAND (offset, 1);
565 c3 = tree_to_double_int (mult_op1);
567 if (TREE_CODE (mult_op0) == PLUS_EXPR)
569 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
571 t2 = TREE_OPERAND (mult_op0, 0);
572 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
574 else
575 return false;
577 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
579 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
581 t2 = TREE_OPERAND (mult_op0, 0);
582 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
584 else
585 return false;
587 else
589 t2 = mult_op0;
590 c2 = double_int_zero;
593 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
595 *pbase = t1;
596 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
597 double_int_to_tree (sizetype, c3));
598 *pindex = c1 + c2 * c3 + c4;
599 *ptype = type;
601 return true;
604 /* Given GS which contains a data reference, create a CAND_REF entry in
605 the candidate table and attempt to find a basis. */
607 static void
608 slsr_process_ref (gimple gs)
610 tree ref_expr, base, offset, type;
611 HOST_WIDE_INT bitsize, bitpos;
612 enum machine_mode mode;
613 int unsignedp, volatilep;
614 double_int index;
615 slsr_cand_t c;
617 if (gimple_vdef (gs))
618 ref_expr = gimple_assign_lhs (gs);
619 else
620 ref_expr = gimple_assign_rhs1 (gs);
622 if (!handled_component_p (ref_expr)
623 || TREE_CODE (ref_expr) == BIT_FIELD_REF
624 || (TREE_CODE (ref_expr) == COMPONENT_REF
625 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
626 return;
628 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
629 &unsignedp, &volatilep, false);
630 index = double_int::from_uhwi (bitpos);
632 if (!restructure_reference (&base, &offset, &index, &type))
633 return;
635 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
636 type, 0);
638 /* Add the candidate to the statement-candidate mapping. */
639 add_cand_for_stmt (gs, c);
642 /* Create a candidate entry for a statement GS, where GS multiplies
643 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
644 about the two SSA names into the new candidate. Return the new
645 candidate. */
647 static slsr_cand_t
648 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
650 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
651 double_int index;
652 unsigned savings = 0;
653 slsr_cand_t c;
654 slsr_cand_t base_cand = base_cand_from_table (base_in);
656 /* Look at all interpretations of the base candidate, if necessary,
657 to find information to propagate into this candidate. */
658 while (base_cand && !base)
661 if (base_cand->kind == CAND_MULT
662 && operand_equal_p (base_cand->stride, integer_one_node, 0))
664 /* Y = (B + i') * 1
665 X = Y * Z
666 ================
667 X = (B + i') * Z */
668 base = base_cand->base_expr;
669 index = base_cand->index;
670 stride = stride_in;
671 ctype = base_cand->cand_type;
672 if (has_single_use (base_in))
673 savings = (base_cand->dead_savings
674 + stmt_cost (base_cand->cand_stmt, speed));
676 else if (base_cand->kind == CAND_ADD
677 && TREE_CODE (base_cand->stride) == INTEGER_CST)
679 /* Y = B + (i' * S), S constant
680 X = Y * Z
681 ============================
682 X = B + ((i' * S) * Z) */
683 base = base_cand->base_expr;
684 index = base_cand->index * tree_to_double_int (base_cand->stride);
685 stride = stride_in;
686 ctype = base_cand->cand_type;
687 if (has_single_use (base_in))
688 savings = (base_cand->dead_savings
689 + stmt_cost (base_cand->cand_stmt, speed));
692 if (base_cand->next_interp)
693 base_cand = lookup_cand (base_cand->next_interp);
694 else
695 base_cand = NULL;
698 if (!base)
700 /* No interpretations had anything useful to propagate, so
701 produce X = (Y + 0) * Z. */
702 base = base_in;
703 index = double_int_zero;
704 stride = stride_in;
705 ctype = TREE_TYPE (base_in);
708 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
709 ctype, savings);
710 return c;
713 /* Create a candidate entry for a statement GS, where GS multiplies
714 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
715 information about BASE_IN into the new candidate. Return the new
716 candidate. */
718 static slsr_cand_t
719 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
721 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
722 double_int index, temp;
723 unsigned savings = 0;
724 slsr_cand_t c;
725 slsr_cand_t base_cand = base_cand_from_table (base_in);
727 /* Look at all interpretations of the base candidate, if necessary,
728 to find information to propagate into this candidate. */
729 while (base_cand && !base)
731 if (base_cand->kind == CAND_MULT
732 && TREE_CODE (base_cand->stride) == INTEGER_CST)
734 /* Y = (B + i') * S, S constant
735 X = Y * c
736 ============================
737 X = (B + i') * (S * c) */
738 temp = tree_to_double_int (base_cand->stride)
739 * tree_to_double_int (stride_in);
740 if (double_int_fits_to_tree_p (TREE_TYPE (stride_in), temp))
742 base = base_cand->base_expr;
743 index = base_cand->index;
744 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
745 ctype = base_cand->cand_type;
746 if (has_single_use (base_in))
747 savings = (base_cand->dead_savings
748 + stmt_cost (base_cand->cand_stmt, speed));
751 else if (base_cand->kind == CAND_ADD
752 && operand_equal_p (base_cand->stride, integer_one_node, 0))
754 /* Y = B + (i' * 1)
755 X = Y * c
756 ===========================
757 X = (B + i') * c */
758 base = base_cand->base_expr;
759 index = base_cand->index;
760 stride = stride_in;
761 ctype = base_cand->cand_type;
762 if (has_single_use (base_in))
763 savings = (base_cand->dead_savings
764 + stmt_cost (base_cand->cand_stmt, speed));
766 else if (base_cand->kind == CAND_ADD
767 && base_cand->index.is_one ()
768 && TREE_CODE (base_cand->stride) == INTEGER_CST)
770 /* Y = B + (1 * S), S constant
771 X = Y * c
772 ===========================
773 X = (B + S) * c */
774 base = base_cand->base_expr;
775 index = tree_to_double_int (base_cand->stride);
776 stride = stride_in;
777 ctype = base_cand->cand_type;
778 if (has_single_use (base_in))
779 savings = (base_cand->dead_savings
780 + stmt_cost (base_cand->cand_stmt, speed));
783 if (base_cand->next_interp)
784 base_cand = lookup_cand (base_cand->next_interp);
785 else
786 base_cand = NULL;
789 if (!base)
791 /* No interpretations had anything useful to propagate, so
792 produce X = (Y + 0) * c. */
793 base = base_in;
794 index = double_int_zero;
795 stride = stride_in;
796 ctype = TREE_TYPE (base_in);
799 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
800 ctype, savings);
801 return c;
804 /* Given GS which is a multiply of scalar integers, make an appropriate
805 entry in the candidate table. If this is a multiply of two SSA names,
806 create two CAND_MULT interpretations and attempt to find a basis for
807 each of them. Otherwise, create a single CAND_MULT and attempt to
808 find a basis. */
810 static void
811 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
813 slsr_cand_t c, c2;
815 /* If this is a multiply of an SSA name with itself, it is highly
816 unlikely that we will get a strength reduction opportunity, so
817 don't record it as a candidate. This simplifies the logic for
818 finding a basis, so if this is removed that must be considered. */
819 if (rhs1 == rhs2)
820 return;
822 if (TREE_CODE (rhs2) == SSA_NAME)
824 /* Record an interpretation of this statement in the candidate table
825 assuming RHS1 is the base expression and RHS2 is the stride. */
826 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
828 /* Add the first interpretation to the statement-candidate mapping. */
829 add_cand_for_stmt (gs, c);
831 /* Record another interpretation of this statement assuming RHS1
832 is the stride and RHS2 is the base expression. */
833 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
834 c->next_interp = c2->cand_num;
836 else
838 /* Record an interpretation for the multiply-immediate. */
839 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
841 /* Add the interpretation to the statement-candidate mapping. */
842 add_cand_for_stmt (gs, c);
846 /* Create a candidate entry for a statement GS, where GS adds two
847 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
848 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
849 information about the two SSA names into the new candidate.
850 Return the new candidate. */
852 static slsr_cand_t
853 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
854 bool subtract_p, bool speed)
856 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
857 double_int index;
858 unsigned savings = 0;
859 slsr_cand_t c;
860 slsr_cand_t base_cand = base_cand_from_table (base_in);
861 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
863 /* The most useful transformation is a multiply-immediate feeding
864 an add or subtract. Look for that first. */
865 while (addend_cand && !base)
867 if (addend_cand->kind == CAND_MULT
868 && addend_cand->index.is_zero ()
869 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
871 /* Z = (B + 0) * S, S constant
872 X = Y +/- Z
873 ===========================
874 X = Y + ((+/-1 * S) * B) */
875 base = base_in;
876 index = tree_to_double_int (addend_cand->stride);
877 if (subtract_p)
878 index = -index;
879 stride = addend_cand->base_expr;
880 ctype = TREE_TYPE (base_in);
881 if (has_single_use (addend_in))
882 savings = (addend_cand->dead_savings
883 + stmt_cost (addend_cand->cand_stmt, speed));
886 if (addend_cand->next_interp)
887 addend_cand = lookup_cand (addend_cand->next_interp);
888 else
889 addend_cand = NULL;
892 while (base_cand && !base)
894 if (base_cand->kind == CAND_ADD
895 && (base_cand->index.is_zero ()
896 || operand_equal_p (base_cand->stride,
897 integer_zero_node, 0)))
899 /* Y = B + (i' * S), i' * S = 0
900 X = Y +/- Z
901 ============================
902 X = B + (+/-1 * Z) */
903 base = base_cand->base_expr;
904 index = subtract_p ? double_int_minus_one : double_int_one;
905 stride = addend_in;
906 ctype = base_cand->cand_type;
907 if (has_single_use (base_in))
908 savings = (base_cand->dead_savings
909 + stmt_cost (base_cand->cand_stmt, speed));
911 else if (subtract_p)
913 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
915 while (subtrahend_cand && !base)
917 if (subtrahend_cand->kind == CAND_MULT
918 && subtrahend_cand->index.is_zero ()
919 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
921 /* Z = (B + 0) * S, S constant
922 X = Y - Z
923 ===========================
924 Value: X = Y + ((-1 * S) * B) */
925 base = base_in;
926 index = tree_to_double_int (subtrahend_cand->stride);
927 index = -index;
928 stride = subtrahend_cand->base_expr;
929 ctype = TREE_TYPE (base_in);
930 if (has_single_use (addend_in))
931 savings = (subtrahend_cand->dead_savings
932 + stmt_cost (subtrahend_cand->cand_stmt, speed));
935 if (subtrahend_cand->next_interp)
936 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
937 else
938 subtrahend_cand = NULL;
942 if (base_cand->next_interp)
943 base_cand = lookup_cand (base_cand->next_interp);
944 else
945 base_cand = NULL;
948 if (!base)
950 /* No interpretations had anything useful to propagate, so
951 produce X = Y + (1 * Z). */
952 base = base_in;
953 index = subtract_p ? double_int_minus_one : double_int_one;
954 stride = addend_in;
955 ctype = TREE_TYPE (base_in);
958 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
959 ctype, savings);
960 return c;
963 /* Create a candidate entry for a statement GS, where GS adds SSA
964 name BASE_IN to constant INDEX_IN. Propagate any known information
965 about BASE_IN into the new candidate. Return the new candidate. */
967 static slsr_cand_t
968 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
970 enum cand_kind kind = CAND_ADD;
971 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
972 double_int index, multiple;
973 unsigned savings = 0;
974 slsr_cand_t c;
975 slsr_cand_t base_cand = base_cand_from_table (base_in);
977 while (base_cand && !base)
979 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
981 if (TREE_CODE (base_cand->stride) == INTEGER_CST
982 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
983 unsigned_p, &multiple))
985 /* Y = (B + i') * S, S constant, c = kS for some integer k
986 X = Y + c
987 ============================
988 X = (B + (i'+ k)) * S
990 Y = B + (i' * S), S constant, c = kS for some integer k
991 X = Y + c
992 ============================
993 X = (B + (i'+ k)) * S */
994 kind = base_cand->kind;
995 base = base_cand->base_expr;
996 index = base_cand->index + multiple;
997 stride = base_cand->stride;
998 ctype = base_cand->cand_type;
999 if (has_single_use (base_in))
1000 savings = (base_cand->dead_savings
1001 + stmt_cost (base_cand->cand_stmt, speed));
1004 if (base_cand->next_interp)
1005 base_cand = lookup_cand (base_cand->next_interp);
1006 else
1007 base_cand = NULL;
1010 if (!base)
1012 /* No interpretations had anything useful to propagate, so
1013 produce X = Y + (c * 1). */
1014 kind = CAND_ADD;
1015 base = base_in;
1016 index = index_in;
1017 stride = integer_one_node;
1018 ctype = TREE_TYPE (base_in);
1021 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1022 ctype, savings);
1023 return c;
1026 /* Given GS which is an add or subtract of scalar integers or pointers,
1027 make at least one appropriate entry in the candidate table. */
1029 static void
1030 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1032 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1033 slsr_cand_t c = NULL, c2;
1035 if (TREE_CODE (rhs2) == SSA_NAME)
1037 /* First record an interpretation assuming RHS1 is the base expression
1038 and RHS2 is the stride. But it doesn't make sense for the
1039 stride to be a pointer, so don't record a candidate in that case. */
1040 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1042 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1044 /* Add the first interpretation to the statement-candidate
1045 mapping. */
1046 add_cand_for_stmt (gs, c);
1049 /* If the two RHS operands are identical, or this is a subtract,
1050 we're done. */
1051 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1052 return;
1054 /* Otherwise, record another interpretation assuming RHS2 is the
1055 base expression and RHS1 is the stride, again provided that the
1056 stride is not a pointer. */
1057 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1059 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1060 if (c)
1061 c->next_interp = c2->cand_num;
1062 else
1063 add_cand_for_stmt (gs, c2);
1066 else
1068 double_int index;
1070 /* Record an interpretation for the add-immediate. */
1071 index = tree_to_double_int (rhs2);
1072 if (subtract_p)
1073 index = -index;
1075 c = create_add_imm_cand (gs, rhs1, index, speed);
1077 /* Add the interpretation to the statement-candidate mapping. */
1078 add_cand_for_stmt (gs, c);
1082 /* Given GS which is a negate of a scalar integer, make an appropriate
1083 entry in the candidate table. A negate is equivalent to a multiply
1084 by -1. */
1086 static void
1087 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1089 /* Record a CAND_MULT interpretation for the multiply by -1. */
1090 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1092 /* Add the interpretation to the statement-candidate mapping. */
1093 add_cand_for_stmt (gs, c);
1096 /* Help function for legal_cast_p, operating on two trees. Checks
1097 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1098 for more details. */
1100 static bool
1101 legal_cast_p_1 (tree lhs, tree rhs)
1103 tree lhs_type, rhs_type;
1104 unsigned lhs_size, rhs_size;
1105 bool lhs_wraps, rhs_wraps;
1107 lhs_type = TREE_TYPE (lhs);
1108 rhs_type = TREE_TYPE (rhs);
1109 lhs_size = TYPE_PRECISION (lhs_type);
1110 rhs_size = TYPE_PRECISION (rhs_type);
1111 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1112 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1114 if (lhs_size < rhs_size
1115 || (rhs_wraps && !lhs_wraps)
1116 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1117 return false;
1119 return true;
1122 /* Return TRUE if GS is a statement that defines an SSA name from
1123 a conversion and is legal for us to combine with an add and multiply
1124 in the candidate table. For example, suppose we have:
1126 A = B + i;
1127 C = (type) A;
1128 D = C * S;
1130 Without the type-cast, we would create a CAND_MULT for D with base B,
1131 index i, and stride S. We want to record this candidate only if it
1132 is equivalent to apply the type cast following the multiply:
1134 A = B + i;
1135 E = A * S;
1136 D = (type) E;
1138 We will record the type with the candidate for D. This allows us
1139 to use a similar previous candidate as a basis. If we have earlier seen
1141 A' = B + i';
1142 C' = (type) A';
1143 D' = C' * S;
1145 we can replace D with
1147 D = D' + (i - i') * S;
1149 But if moving the type-cast would change semantics, we mustn't do this.
1151 This is legitimate for casts from a non-wrapping integral type to
1152 any integral type of the same or larger size. It is not legitimate
1153 to convert a wrapping type to a non-wrapping type, or to a wrapping
1154 type of a different size. I.e., with a wrapping type, we must
1155 assume that the addition B + i could wrap, in which case performing
1156 the multiply before or after one of the "illegal" type casts will
1157 have different semantics. */
1159 static bool
1160 legal_cast_p (gimple gs, tree rhs)
1162 if (!is_gimple_assign (gs)
1163 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1164 return false;
1166 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1169 /* Given GS which is a cast to a scalar integer type, determine whether
1170 the cast is legal for strength reduction. If so, make at least one
1171 appropriate entry in the candidate table. */
1173 static void
1174 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1176 tree lhs, ctype;
1177 slsr_cand_t base_cand, c, c2;
1178 unsigned savings = 0;
1180 if (!legal_cast_p (gs, rhs1))
1181 return;
1183 lhs = gimple_assign_lhs (gs);
1184 base_cand = base_cand_from_table (rhs1);
1185 ctype = TREE_TYPE (lhs);
1187 if (base_cand)
1189 while (base_cand)
1191 /* Propagate all data from the base candidate except the type,
1192 which comes from the cast, and the base candidate's cast,
1193 which is no longer applicable. */
1194 if (has_single_use (rhs1))
1195 savings = (base_cand->dead_savings
1196 + stmt_cost (base_cand->cand_stmt, speed));
1198 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1199 base_cand->base_expr,
1200 base_cand->index, base_cand->stride,
1201 ctype, savings);
1202 if (base_cand->next_interp)
1203 base_cand = lookup_cand (base_cand->next_interp);
1204 else
1205 base_cand = NULL;
1208 else
1210 /* If nothing is known about the RHS, create fresh CAND_ADD and
1211 CAND_MULT interpretations:
1213 X = Y + (0 * 1)
1214 X = (Y + 0) * 1
1216 The first of these is somewhat arbitrary, but the choice of
1217 1 for the stride simplifies the logic for propagating casts
1218 into their uses. */
1219 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1220 integer_one_node, ctype, 0);
1221 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1222 integer_one_node, ctype, 0);
1223 c->next_interp = c2->cand_num;
1226 /* Add the first (or only) interpretation to the statement-candidate
1227 mapping. */
1228 add_cand_for_stmt (gs, c);
1231 /* Given GS which is a copy of a scalar integer type, make at least one
1232 appropriate entry in the candidate table.
1234 This interface is included for completeness, but is unnecessary
1235 if this pass immediately follows a pass that performs copy
1236 propagation, such as DOM. */
1238 static void
1239 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1241 slsr_cand_t base_cand, c, c2;
1242 unsigned savings = 0;
1244 base_cand = base_cand_from_table (rhs1);
1246 if (base_cand)
1248 while (base_cand)
1250 /* Propagate all data from the base candidate. */
1251 if (has_single_use (rhs1))
1252 savings = (base_cand->dead_savings
1253 + stmt_cost (base_cand->cand_stmt, speed));
1255 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1256 base_cand->base_expr,
1257 base_cand->index, base_cand->stride,
1258 base_cand->cand_type, savings);
1259 if (base_cand->next_interp)
1260 base_cand = lookup_cand (base_cand->next_interp);
1261 else
1262 base_cand = NULL;
1265 else
1267 /* If nothing is known about the RHS, create fresh CAND_ADD and
1268 CAND_MULT interpretations:
1270 X = Y + (0 * 1)
1271 X = (Y + 0) * 1
1273 The first of these is somewhat arbitrary, but the choice of
1274 1 for the stride simplifies the logic for propagating casts
1275 into their uses. */
1276 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1277 integer_one_node, TREE_TYPE (rhs1), 0);
1278 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1279 integer_one_node, TREE_TYPE (rhs1), 0);
1280 c->next_interp = c2->cand_num;
1283 /* Add the first (or only) interpretation to the statement-candidate
1284 mapping. */
1285 add_cand_for_stmt (gs, c);
1288 /* Find strength-reduction candidates in block BB. */
1290 static void
1291 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1292 basic_block bb)
1294 bool speed = optimize_bb_for_speed_p (bb);
1295 gimple_stmt_iterator gsi;
1297 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1299 gimple gs = gsi_stmt (gsi);
1301 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1302 slsr_process_ref (gs);
1304 else if (is_gimple_assign (gs)
1305 && SCALAR_INT_MODE_P
1306 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1308 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1310 switch (gimple_assign_rhs_code (gs))
1312 case MULT_EXPR:
1313 case PLUS_EXPR:
1314 rhs1 = gimple_assign_rhs1 (gs);
1315 rhs2 = gimple_assign_rhs2 (gs);
1316 /* Should never happen, but currently some buggy situations
1317 in earlier phases put constants in rhs1. */
1318 if (TREE_CODE (rhs1) != SSA_NAME)
1319 continue;
1320 break;
1322 /* Possible future opportunity: rhs1 of a ptr+ can be
1323 an ADDR_EXPR. */
1324 case POINTER_PLUS_EXPR:
1325 case MINUS_EXPR:
1326 rhs2 = gimple_assign_rhs2 (gs);
1327 /* Fall-through. */
1329 case NOP_EXPR:
1330 case MODIFY_EXPR:
1331 case NEGATE_EXPR:
1332 rhs1 = gimple_assign_rhs1 (gs);
1333 if (TREE_CODE (rhs1) != SSA_NAME)
1334 continue;
1335 break;
1337 default:
1341 switch (gimple_assign_rhs_code (gs))
1343 case MULT_EXPR:
1344 slsr_process_mul (gs, rhs1, rhs2, speed);
1345 break;
1347 case PLUS_EXPR:
1348 case POINTER_PLUS_EXPR:
1349 case MINUS_EXPR:
1350 slsr_process_add (gs, rhs1, rhs2, speed);
1351 break;
1353 case NEGATE_EXPR:
1354 slsr_process_neg (gs, rhs1, speed);
1355 break;
1357 case NOP_EXPR:
1358 slsr_process_cast (gs, rhs1, speed);
1359 break;
1361 case MODIFY_EXPR:
1362 slsr_process_copy (gs, rhs1, speed);
1363 break;
1365 default:
1372 /* Dump a candidate for debug. */
1374 static void
1375 dump_candidate (slsr_cand_t c)
1377 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1378 gimple_bb (c->cand_stmt)->index);
1379 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1380 switch (c->kind)
1382 case CAND_MULT:
1383 fputs (" MULT : (", dump_file);
1384 print_generic_expr (dump_file, c->base_expr, 0);
1385 fputs (" + ", dump_file);
1386 dump_double_int (dump_file, c->index, false);
1387 fputs (") * ", dump_file);
1388 print_generic_expr (dump_file, c->stride, 0);
1389 fputs (" : ", dump_file);
1390 break;
1391 case CAND_ADD:
1392 fputs (" ADD : ", dump_file);
1393 print_generic_expr (dump_file, c->base_expr, 0);
1394 fputs (" + (", dump_file);
1395 dump_double_int (dump_file, c->index, false);
1396 fputs (" * ", dump_file);
1397 print_generic_expr (dump_file, c->stride, 0);
1398 fputs (") : ", dump_file);
1399 break;
1400 case CAND_REF:
1401 fputs (" REF : ", dump_file);
1402 print_generic_expr (dump_file, c->base_expr, 0);
1403 fputs (" + (", dump_file);
1404 print_generic_expr (dump_file, c->stride, 0);
1405 fputs (") + ", dump_file);
1406 dump_double_int (dump_file, c->index, false);
1407 fputs (" : ", dump_file);
1408 break;
1409 default:
1410 gcc_unreachable ();
1412 print_generic_expr (dump_file, c->cand_type, 0);
1413 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1414 c->basis, c->dependent, c->sibling);
1415 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1416 c->next_interp, c->dead_savings);
1417 if (c->def_phi)
1419 fputs (" phi: ", dump_file);
1420 print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1422 fputs ("\n", dump_file);
1425 /* Dump the candidate vector for debug. */
1427 static void
1428 dump_cand_vec (void)
1430 unsigned i;
1431 slsr_cand_t c;
1433 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1435 FOR_EACH_VEC_ELT (cand_vec, i, c)
1436 dump_candidate (c);
1439 /* Callback used to dump the candidate chains hash table. */
1441 static int
1442 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1444 const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1445 cand_chain_t p;
1447 print_generic_expr (dump_file, chain->base_expr, 0);
1448 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1450 for (p = chain->next; p; p = p->next)
1451 fprintf (dump_file, " -> %d", p->cand->cand_num);
1453 fputs ("\n", dump_file);
1454 return 1;
1457 /* Dump the candidate chains. */
1459 static void
1460 dump_cand_chains (void)
1462 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1463 htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1464 fputs ("\n", dump_file);
1467 /* Dump the increment vector for debug. */
1469 static void
1470 dump_incr_vec (void)
1472 if (dump_file && (dump_flags & TDF_DETAILS))
1474 unsigned i;
1476 fprintf (dump_file, "\nIncrement vector:\n\n");
1478 for (i = 0; i < incr_vec_len; i++)
1480 fprintf (dump_file, "%3d increment: ", i);
1481 dump_double_int (dump_file, incr_vec[i].incr, false);
1482 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1483 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1484 fputs ("\n initializer: ", dump_file);
1485 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1486 fputs ("\n\n", dump_file);
1491 /* Recursive helper for unconditional_cands_with_known_stride_p.
1492 Returns TRUE iff C, its siblings, and its dependents are all
1493 unconditional candidates. */
1495 static bool
1496 unconditional_cands (slsr_cand_t c)
1498 if (c->def_phi)
1499 return false;
1501 if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1502 return false;
1504 if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1505 return false;
1507 return true;
1510 /* Determine whether or not the tree of candidates rooted at
1511 ROOT consists entirely of unconditional increments with
1512 an INTEGER_CST stride. */
1514 static bool
1515 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1517 /* The stride is identical for all related candidates, so
1518 check it once. */
1519 if (TREE_CODE (root->stride) != INTEGER_CST)
1520 return false;
1522 return unconditional_cands (lookup_cand (root->dependent));
1525 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1526 data reference. */
1528 static void
1529 replace_ref (tree *expr, slsr_cand_t c)
1531 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1532 unsigned HOST_WIDE_INT misalign;
1533 unsigned align;
1535 /* Ensure the memory reference carries the minimum alignment
1536 requirement for the data type. See PR58041. */
1537 get_object_alignment_1 (*expr, &align, &misalign);
1538 if (misalign != 0)
1539 align = (misalign & -misalign);
1540 if (align < TYPE_ALIGN (acc_type))
1541 acc_type = build_aligned_type (acc_type, align);
1543 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1544 c->base_expr, c->stride);
1545 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1546 double_int_to_tree (c->cand_type, c->index));
1548 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1549 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1550 TREE_OPERAND (mem_ref, 0)
1551 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1552 /*simple_p=*/true, NULL,
1553 /*before=*/true, GSI_SAME_STMT);
1554 copy_ref_info (mem_ref, *expr);
1555 *expr = mem_ref;
1556 update_stmt (c->cand_stmt);
1559 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1560 dependent of candidate C with an equivalent strength-reduced data
1561 reference. */
1563 static void
1564 replace_refs (slsr_cand_t c)
1566 if (gimple_vdef (c->cand_stmt))
1568 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1569 replace_ref (lhs, c);
1571 else
1573 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1574 replace_ref (rhs, c);
1577 if (c->sibling)
1578 replace_refs (lookup_cand (c->sibling));
1580 if (c->dependent)
1581 replace_refs (lookup_cand (c->dependent));
1584 /* Calculate the increment required for candidate C relative to
1585 its basis. */
1587 static double_int
1588 cand_increment (slsr_cand_t c)
1590 slsr_cand_t basis;
1592 /* If the candidate doesn't have a basis, just return its own
1593 index. This is useful in record_increments to help us find
1594 an existing initializer. */
1595 if (!c->basis)
1596 return c->index;
1598 basis = lookup_cand (c->basis);
1599 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1600 return c->index - basis->index;
1603 /* Calculate the increment required for candidate C relative to
1604 its basis. If we aren't going to generate pointer arithmetic
1605 for this candidate, return the absolute value of that increment
1606 instead. */
1608 static inline double_int
1609 cand_abs_increment (slsr_cand_t c)
1611 double_int increment = cand_increment (c);
1613 if (!address_arithmetic_p && increment.is_negative ())
1614 increment = -increment;
1616 return increment;
1619 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1620 new temporary reg of type TYPE and store it in *VAR. */
1622 static inline void
1623 lazy_create_slsr_reg (tree *var, tree type)
1625 if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1626 *var = create_tmp_reg (type, "slsr");
1629 /* Return TRUE iff candidate C has already been replaced under
1630 another interpretation. */
1632 static inline bool
1633 cand_already_replaced (slsr_cand_t c)
1635 return (gimple_bb (c->cand_stmt) == 0);
1638 /* Helper routine for replace_dependents, doing the work for a
1639 single candidate C. */
1641 static void
1642 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1644 double_int stride = tree_to_double_int (c->stride);
1645 double_int bump = cand_increment (c) * stride;
1646 gimple stmt_to_print = NULL;
1647 slsr_cand_t basis;
1648 tree basis_name, incr_type, bump_tree;
1649 enum tree_code code;
1651 /* It is highly unlikely, but possible, that the resulting
1652 bump doesn't fit in a HWI. Abandon the replacement
1653 in this case. Restriction to signed HWI is conservative
1654 for unsigned types but allows for safe negation without
1655 twisted logic. */
1656 if (!bump.fits_shwi ())
1657 return;
1659 basis = lookup_cand (c->basis);
1660 basis_name = gimple_assign_lhs (basis->cand_stmt);
1661 if (cand_code == POINTER_PLUS_EXPR)
1663 incr_type = sizetype;
1664 code = cand_code;
1666 else
1668 incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1669 code = PLUS_EXPR;
1672 if (bump.is_negative ()
1673 && cand_code != POINTER_PLUS_EXPR)
1675 code = MINUS_EXPR;
1676 bump = -bump;
1679 bump_tree = double_int_to_tree (incr_type, bump);
1681 if (dump_file && (dump_flags & TDF_DETAILS))
1683 fputs ("Replacing: ", dump_file);
1684 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1687 if (bump.is_zero ())
1689 tree lhs = gimple_assign_lhs (c->cand_stmt);
1690 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1691 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1692 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1693 gsi_replace (&gsi, copy_stmt, false);
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 stmt_to_print = copy_stmt;
1697 else
1699 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1700 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1701 if (cand_code != NEGATE_EXPR
1702 && ((operand_equal_p (rhs1, basis_name, 0)
1703 && operand_equal_p (rhs2, bump_tree, 0))
1704 || (operand_equal_p (rhs1, bump_tree, 0)
1705 && operand_equal_p (rhs2, basis_name, 0))))
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1709 fputs ("(duplicate, not actually replacing)", dump_file);
1710 stmt_to_print = c->cand_stmt;
1713 else
1715 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1716 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1717 update_stmt (gsi_stmt (gsi));
1718 if (dump_file && (dump_flags & TDF_DETAILS))
1719 stmt_to_print = gsi_stmt (gsi);
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1725 fputs ("With: ", dump_file);
1726 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1727 fputs ("\n", dump_file);
1731 /* Replace candidate C, each sibling of candidate C, and each
1732 dependent of candidate C with an add or subtract. Note that we
1733 only operate on CAND_MULTs with known strides, so we will never
1734 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1735 replaced by X = Y + ((i - i') * S), as described in the module
1736 commentary. The folded value ((i - i') * S) is referred to here
1737 as the "bump." */
1739 static void
1740 replace_dependents (slsr_cand_t c)
1742 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1744 /* It is not useful to replace casts, copies, or adds of an SSA name
1745 and a constant. Also skip candidates that have already been
1746 replaced under another interpretation. */
1747 if (cand_code != MODIFY_EXPR
1748 && cand_code != NOP_EXPR
1749 && c->kind == CAND_MULT
1750 && !cand_already_replaced (c))
1751 replace_dependent (c, cand_code);
1753 if (c->sibling)
1754 replace_dependents (lookup_cand (c->sibling));
1756 if (c->dependent)
1757 replace_dependents (lookup_cand (c->dependent));
1760 /* Return the index in the increment vector of the given INCREMENT. */
1762 static inline unsigned
1763 incr_vec_index (double_int increment)
1765 unsigned i;
1767 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1770 gcc_assert (i < incr_vec_len);
1771 return i;
1774 /* Count the number of candidates in the tree rooted at C that have
1775 not already been replaced under other interpretations. */
1777 static unsigned
1778 count_candidates (slsr_cand_t c)
1780 unsigned count = cand_already_replaced (c) ? 0 : 1;
1782 if (c->sibling)
1783 count += count_candidates (lookup_cand (c->sibling));
1785 if (c->dependent)
1786 count += count_candidates (lookup_cand (c->dependent));
1788 return count;
1791 /* Increase the count of INCREMENT by one in the increment vector.
1792 INCREMENT is associated with candidate C. If an initializer
1793 T_0 = stride * I is provided by a candidate that dominates all
1794 candidates with the same increment, also record T_0 for subsequent use. */
1796 static void
1797 record_increment (slsr_cand_t c, double_int increment)
1799 bool found = false;
1800 unsigned i;
1802 /* Treat increments that differ only in sign as identical so as to
1803 share initializers, unless we are generating pointer arithmetic. */
1804 if (!address_arithmetic_p && increment.is_negative ())
1805 increment = -increment;
1807 for (i = 0; i < incr_vec_len; i++)
1809 if (incr_vec[i].incr == increment)
1811 incr_vec[i].count++;
1812 found = true;
1814 /* If we previously recorded an initializer that doesn't
1815 dominate this candidate, it's not going to be useful to
1816 us after all. */
1817 if (incr_vec[i].initializer
1818 && !dominated_by_p (CDI_DOMINATORS,
1819 gimple_bb (c->cand_stmt),
1820 incr_vec[i].init_bb))
1822 incr_vec[i].initializer = NULL_TREE;
1823 incr_vec[i].init_bb = NULL;
1826 break;
1830 if (!found)
1832 /* The first time we see an increment, create the entry for it.
1833 If this is the root candidate which doesn't have a basis, set
1834 the count to zero. We're only processing it so it can possibly
1835 provide an initializer for other candidates. */
1836 incr_vec[incr_vec_len].incr = increment;
1837 incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1838 incr_vec[incr_vec_len].cost = COST_INFINITE;
1840 /* Optimistically record the first occurrence of this increment
1841 as providing an initializer (if it does); we will revise this
1842 opinion later if it doesn't dominate all other occurrences.
1843 Exception: increments of -1, 0, 1 never need initializers. */
1844 if (c->kind == CAND_ADD
1845 && c->index == increment
1846 && (increment.sgt (double_int_one)
1847 || increment.slt (double_int_minus_one))
1848 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
1849 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
1851 tree t0 = NULL_TREE;
1852 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1853 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1854 if (operand_equal_p (rhs1, c->base_expr, 0))
1855 t0 = rhs2;
1856 else if (operand_equal_p (rhs2, c->base_expr, 0))
1857 t0 = rhs1;
1858 if (t0
1859 && SSA_NAME_DEF_STMT (t0)
1860 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1862 incr_vec[incr_vec_len].initializer = t0;
1863 incr_vec[incr_vec_len++].init_bb
1864 = gimple_bb (SSA_NAME_DEF_STMT (t0));
1866 else
1868 incr_vec[incr_vec_len].initializer = NULL_TREE;
1869 incr_vec[incr_vec_len++].init_bb = NULL;
1872 else
1874 incr_vec[incr_vec_len].initializer = NULL_TREE;
1875 incr_vec[incr_vec_len++].init_bb = NULL;
1880 /* Determine how many times each unique increment occurs in the set
1881 of candidates rooted at C's parent, recording the data in the
1882 increment vector. For each unique increment I, if an initializer
1883 T_0 = stride * I is provided by a candidate that dominates all
1884 candidates with the same increment, also record T_0 for subsequent
1885 use. */
1887 static void
1888 record_increments (slsr_cand_t c)
1890 if (!cand_already_replaced (c))
1891 record_increment (c, cand_increment (c));
1893 if (c->sibling)
1894 record_increments (lookup_cand (c->sibling));
1896 if (c->dependent)
1897 record_increments (lookup_cand (c->dependent));
1900 /* Return the first candidate in the tree rooted at C that has not
1901 already been replaced, favoring siblings over dependents. */
1903 static slsr_cand_t
1904 unreplaced_cand_in_tree (slsr_cand_t c)
1906 if (!cand_already_replaced (c))
1907 return c;
1909 if (c->sibling)
1911 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1912 if (sib)
1913 return sib;
1916 if (c->dependent)
1918 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1919 if (dep)
1920 return dep;
1923 return NULL;
1926 /* Return TRUE if the candidates in the tree rooted at C should be
1927 optimized for speed, else FALSE. We estimate this based on the block
1928 containing the most dominant candidate in the tree that has not yet
1929 been replaced. */
1931 static bool
1932 optimize_cands_for_speed_p (slsr_cand_t c)
1934 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1935 gcc_assert (c2);
1936 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1939 /* Add COST_IN to the lowest cost of any dependent path starting at
1940 candidate C or any of its siblings, counting only candidates along
1941 such paths with increment INCR. Assume that replacing a candidate
1942 reduces cost by REPL_SAVINGS. Also account for savings from any
1943 statements that would go dead. */
1945 static int
1946 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1948 int local_cost, sib_cost;
1949 double_int cand_incr = cand_abs_increment (c);
1951 if (cand_already_replaced (c))
1952 local_cost = cost_in;
1953 else if (incr == cand_incr)
1954 local_cost = cost_in - repl_savings - c->dead_savings;
1955 else
1956 local_cost = cost_in - c->dead_savings;
1958 if (c->dependent)
1959 local_cost = lowest_cost_path (local_cost, repl_savings,
1960 lookup_cand (c->dependent), incr);
1962 if (c->sibling)
1964 sib_cost = lowest_cost_path (cost_in, repl_savings,
1965 lookup_cand (c->sibling), incr);
1966 local_cost = MIN (local_cost, sib_cost);
1969 return local_cost;
1972 /* Compute the total savings that would accrue from all replacements
1973 in the candidate tree rooted at C, counting only candidates with
1974 increment INCR. Assume that replacing a candidate reduces cost
1975 by REPL_SAVINGS. Also account for savings from statements that
1976 would go dead. */
1978 static int
1979 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1981 int savings = 0;
1982 double_int cand_incr = cand_abs_increment (c);
1984 if (incr == cand_incr && !cand_already_replaced (c))
1985 savings += repl_savings + c->dead_savings;
1987 if (c->dependent)
1988 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1990 if (c->sibling)
1991 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1993 return savings;
1996 /* Use target-specific costs to determine and record which increments
1997 in the current candidate tree are profitable to replace, assuming
1998 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1999 the candidate tree.
2001 One slight limitation here is that we don't account for the possible
2002 introduction of casts in some cases. See replace_one_candidate for
2003 the cases where these are introduced. This should probably be cleaned
2004 up sometime. */
2006 static void
2007 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2009 unsigned i;
2011 for (i = 0; i < incr_vec_len; i++)
2013 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2015 /* If somehow this increment is bigger than a HWI, we won't
2016 be optimizing candidates that use it. And if the increment
2017 has a count of zero, nothing will be done with it. */
2018 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2019 incr_vec[i].cost = COST_INFINITE;
2021 /* Increments of 0, 1, and -1 are always profitable to replace,
2022 because they always replace a multiply or add with an add or
2023 copy, and may cause one or more existing instructions to go
2024 dead. Exception: -1 can't be assumed to be profitable for
2025 pointer addition. */
2026 else if (incr == 0
2027 || incr == 1
2028 || (incr == -1
2029 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2030 != POINTER_PLUS_EXPR)))
2031 incr_vec[i].cost = COST_NEUTRAL;
2033 /* FORNOW: If we need to add an initializer, give up if a cast from
2034 the candidate's type to its stride's type can lose precision.
2035 This could eventually be handled better by expressly retaining the
2036 result of a cast to a wider type in the stride. Example:
2038 short int _1;
2039 _2 = (int) _1;
2040 _3 = _2 * 10;
2041 _4 = x + _3; ADD: x + (10 * _1) : int
2042 _5 = _2 * 15;
2043 _6 = x + _3; ADD: x + (15 * _1) : int
2045 Right now replacing _6 would cause insertion of an initializer
2046 of the form "short int T = _1 * 5;" followed by a cast to
2047 int, which could overflow incorrectly. Had we recorded _2 or
2048 (int)_1 as the stride, this wouldn't happen. However, doing
2049 this breaks other opportunities, so this will require some
2050 care. */
2051 else if (!incr_vec[i].initializer
2052 && TREE_CODE (first_dep->stride) != INTEGER_CST
2053 && !legal_cast_p_1 (first_dep->stride,
2054 gimple_assign_lhs (first_dep->cand_stmt)))
2056 incr_vec[i].cost = COST_INFINITE;
2058 /* If we need to add an initializer, make sure we don't introduce
2059 a multiply by a pointer type, which can happen in certain cast
2060 scenarios. FIXME: When cleaning up these cast issues, we can
2061 afford to introduce the multiply provided we cast out to an
2062 unsigned int of appropriate size. */
2063 else if (!incr_vec[i].initializer
2064 && TREE_CODE (first_dep->stride) != INTEGER_CST
2065 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2067 incr_vec[i].cost = COST_INFINITE;
2069 /* For any other increment, if this is a multiply candidate, we
2070 must introduce a temporary T and initialize it with
2071 T_0 = stride * increment. When optimizing for speed, walk the
2072 candidate tree to calculate the best cost reduction along any
2073 path; if it offsets the fixed cost of inserting the initializer,
2074 replacing the increment is profitable. When optimizing for
2075 size, instead calculate the total cost reduction from replacing
2076 all candidates with this increment. */
2077 else if (first_dep->kind == CAND_MULT)
2079 int cost = mult_by_coeff_cost (incr, mode, speed);
2080 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2081 if (speed)
2082 cost = lowest_cost_path (cost, repl_savings, first_dep,
2083 incr_vec[i].incr);
2084 else
2085 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2087 incr_vec[i].cost = cost;
2090 /* If this is an add candidate, the initializer may already
2091 exist, so only calculate the cost of the initializer if it
2092 doesn't. We are replacing one add with another here, so the
2093 known replacement savings is zero. We will account for removal
2094 of dead instructions in lowest_cost_path or total_savings. */
2095 else
2097 int cost = 0;
2098 if (!incr_vec[i].initializer)
2099 cost = mult_by_coeff_cost (incr, mode, speed);
2101 if (speed)
2102 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2103 else
2104 cost -= total_savings (0, first_dep, incr_vec[i].incr);
2106 incr_vec[i].cost = cost;
2111 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2112 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2113 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2114 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2115 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2117 static basic_block
2118 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2119 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2121 basic_block ncd;
2123 if (!bb1)
2125 *where = c2;
2126 return bb2;
2129 if (!bb2)
2131 *where = c1;
2132 return bb1;
2135 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2137 /* If both candidates are in the same block, the earlier
2138 candidate wins. */
2139 if (bb1 == ncd && bb2 == ncd)
2141 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2142 *where = c2;
2143 else
2144 *where = c1;
2147 /* Otherwise, if one of them produced a candidate in the
2148 dominator, that one wins. */
2149 else if (bb1 == ncd)
2150 *where = c1;
2152 else if (bb2 == ncd)
2153 *where = c2;
2155 /* If neither matches the dominator, neither wins. */
2156 else
2157 *where = NULL;
2159 return ncd;
2162 /* Consider all candidates in the tree rooted at C for which INCR
2163 represents the required increment of C relative to its basis.
2164 Find and return the basic block that most nearly dominates all
2165 such candidates. If the returned block contains one or more of
2166 the candidates, return the earliest candidate in the block in
2167 *WHERE. */
2169 static basic_block
2170 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2171 slsr_cand_t *where)
2173 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2174 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2175 double_int cand_incr;
2177 /* First find the NCD of all siblings and dependents. */
2178 if (c->sibling)
2179 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2180 incr, &sib_where);
2181 if (c->dependent)
2182 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2183 incr, &dep_where);
2184 if (!sib_ncd && !dep_ncd)
2186 new_where = NULL;
2187 ncd = NULL;
2189 else if (sib_ncd && !dep_ncd)
2191 new_where = sib_where;
2192 ncd = sib_ncd;
2194 else if (dep_ncd && !sib_ncd)
2196 new_where = dep_where;
2197 ncd = dep_ncd;
2199 else
2200 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2201 dep_where, &new_where);
2203 /* If the candidate's increment doesn't match the one we're interested
2204 in, then the result depends only on siblings and dependents. */
2205 cand_incr = cand_abs_increment (c);
2207 if (cand_incr != incr || cand_already_replaced (c))
2209 *where = new_where;
2210 return ncd;
2213 /* Otherwise, compare this candidate with the result from all siblings
2214 and dependents. */
2215 this_where = c;
2216 this_ncd = gimple_bb (c->cand_stmt);
2217 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2219 return ncd;
2222 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2224 static inline bool
2225 profitable_increment_p (unsigned index)
2227 return (incr_vec[index].cost <= COST_NEUTRAL);
2230 /* For each profitable increment in the increment vector not equal to
2231 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2232 dominator of all statements in the candidate chain rooted at C
2233 that require that increment, and insert an initializer
2234 T_0 = stride * increment at that location. Record T_0 with the
2235 increment record. */
2237 static void
2238 insert_initializers (slsr_cand_t c)
2240 unsigned i;
2241 tree new_var = NULL_TREE;
2243 for (i = 0; i < incr_vec_len; i++)
2245 basic_block bb;
2246 slsr_cand_t where = NULL;
2247 gimple init_stmt;
2248 tree stride_type, new_name, incr_tree;
2249 double_int incr = incr_vec[i].incr;
2251 if (!profitable_increment_p (i)
2252 || incr.is_one ()
2253 || (incr.is_minus_one ()
2254 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2255 || incr.is_zero ())
2256 continue;
2258 /* We may have already identified an existing initializer that
2259 will suffice. */
2260 if (incr_vec[i].initializer)
2262 if (dump_file && (dump_flags & TDF_DETAILS))
2264 fputs ("Using existing initializer: ", dump_file);
2265 print_gimple_stmt (dump_file,
2266 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2267 0, 0);
2269 continue;
2272 /* Find the block that most closely dominates all candidates
2273 with this increment. If there is at least one candidate in
2274 that block, the earliest one will be returned in WHERE. */
2275 bb = nearest_common_dominator_for_cands (c, incr, &where);
2277 /* Create a new SSA name to hold the initializer's value. */
2278 stride_type = TREE_TYPE (c->stride);
2279 lazy_create_slsr_reg (&new_var, stride_type);
2280 new_name = make_ssa_name (new_var, NULL);
2281 incr_vec[i].initializer = new_name;
2283 /* Create the initializer and insert it in the latest possible
2284 dominating position. */
2285 incr_tree = double_int_to_tree (stride_type, incr);
2286 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2287 c->stride, incr_tree);
2288 if (where)
2290 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2291 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2292 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2294 else
2296 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2297 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2299 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2300 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2301 else
2302 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2304 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2307 if (dump_file && (dump_flags & TDF_DETAILS))
2309 fputs ("Inserting initializer: ", dump_file);
2310 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2315 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2316 type TO_TYPE, and insert it in front of the statement represented
2317 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2318 the new SSA name. */
2320 static tree
2321 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2322 tree from_expr, tree *new_var)
2324 tree cast_lhs;
2325 gimple cast_stmt;
2326 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2328 lazy_create_slsr_reg (new_var, to_type);
2329 cast_lhs = make_ssa_name (*new_var, NULL);
2330 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2331 from_expr, NULL_TREE);
2332 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2333 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2335 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fputs (" Inserting: ", dump_file);
2338 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2341 return cast_lhs;
2344 /* Replace the RHS of the statement represented by candidate C with
2345 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2346 leave C unchanged or just interchange its operands. The original
2347 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2348 If the replacement was made and we are doing a details dump,
2349 return the revised statement, else NULL. */
2351 static gimple
2352 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2353 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2354 slsr_cand_t c)
2356 if (new_code != old_code
2357 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2358 || !operand_equal_p (new_rhs2, old_rhs2, 0))
2359 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2360 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2362 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2363 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2364 update_stmt (gsi_stmt (gsi));
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 return gsi_stmt (gsi);
2370 else if (dump_file && (dump_flags & TDF_DETAILS))
2371 fputs (" (duplicate, not actually replacing)\n", dump_file);
2373 return NULL;
2376 /* Strength-reduce the statement represented by candidate C by replacing
2377 it with an equivalent addition or subtraction. I is the index into
2378 the increment vector identifying C's increment. NEW_VAR is used to
2379 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2380 is the rhs1 to use in creating the add/subtract. */
2382 static void
2383 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2384 tree basis_name)
2386 gimple stmt_to_print = NULL;
2387 tree orig_rhs1, orig_rhs2;
2388 tree rhs2;
2389 enum tree_code orig_code, repl_code;
2390 double_int cand_incr;
2392 orig_code = gimple_assign_rhs_code (c->cand_stmt);
2393 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2394 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2395 cand_incr = cand_increment (c);
2397 if (dump_file && (dump_flags & TDF_DETAILS))
2399 fputs ("Replacing: ", dump_file);
2400 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2401 stmt_to_print = c->cand_stmt;
2404 if (address_arithmetic_p)
2405 repl_code = POINTER_PLUS_EXPR;
2406 else
2407 repl_code = PLUS_EXPR;
2409 /* If the increment has an initializer T_0, replace the candidate
2410 statement with an add of the basis name and the initializer. */
2411 if (incr_vec[i].initializer)
2413 tree init_type = TREE_TYPE (incr_vec[i].initializer);
2414 tree orig_type = TREE_TYPE (orig_rhs2);
2416 if (types_compatible_p (orig_type, init_type))
2417 rhs2 = incr_vec[i].initializer;
2418 else
2419 rhs2 = introduce_cast_before_cand (c, orig_type,
2420 incr_vec[i].initializer,
2421 new_var);
2423 if (incr_vec[i].incr != cand_incr)
2425 gcc_assert (repl_code == PLUS_EXPR);
2426 repl_code = MINUS_EXPR;
2429 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2430 orig_code, orig_rhs1, orig_rhs2,
2434 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2435 with a subtract of the stride from the basis name, a copy
2436 from the basis name, or an add of the stride to the basis
2437 name, respectively. It may be necessary to introduce a
2438 cast (or reuse an existing cast). */
2439 else if (cand_incr.is_one ())
2441 tree stride_type = TREE_TYPE (c->stride);
2442 tree orig_type = TREE_TYPE (orig_rhs2);
2444 if (types_compatible_p (orig_type, stride_type))
2445 rhs2 = c->stride;
2446 else
2447 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2449 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2450 orig_code, orig_rhs1, orig_rhs2,
2454 else if (cand_incr.is_minus_one ())
2456 tree stride_type = TREE_TYPE (c->stride);
2457 tree orig_type = TREE_TYPE (orig_rhs2);
2458 gcc_assert (repl_code != POINTER_PLUS_EXPR);
2460 if (types_compatible_p (orig_type, stride_type))
2461 rhs2 = c->stride;
2462 else
2463 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2465 if (orig_code != MINUS_EXPR
2466 || !operand_equal_p (basis_name, orig_rhs1, 0)
2467 || !operand_equal_p (rhs2, orig_rhs2, 0))
2469 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2470 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2471 update_stmt (gsi_stmt (gsi));
2473 if (dump_file && (dump_flags & TDF_DETAILS))
2474 stmt_to_print = gsi_stmt (gsi);
2476 else if (dump_file && (dump_flags & TDF_DETAILS))
2477 fputs (" (duplicate, not actually replacing)\n", dump_file);
2480 else if (cand_incr.is_zero ())
2482 tree lhs = gimple_assign_lhs (c->cand_stmt);
2483 tree lhs_type = TREE_TYPE (lhs);
2484 tree basis_type = TREE_TYPE (basis_name);
2486 if (types_compatible_p (lhs_type, basis_type))
2488 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2489 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2490 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2491 gsi_replace (&gsi, copy_stmt, false);
2493 if (dump_file && (dump_flags & TDF_DETAILS))
2494 stmt_to_print = copy_stmt;
2496 else
2498 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2499 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2500 basis_name,
2501 NULL_TREE);
2502 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2503 gsi_replace (&gsi, cast_stmt, false);
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2506 stmt_to_print = cast_stmt;
2509 else
2510 gcc_unreachable ();
2512 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2514 fputs ("With: ", dump_file);
2515 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2516 fputs ("\n", dump_file);
2520 /* For each candidate in the tree rooted at C, replace it with
2521 an increment if such has been shown to be profitable. */
2523 static void
2524 replace_profitable_candidates (slsr_cand_t c)
2526 if (!cand_already_replaced (c))
2528 double_int increment = cand_abs_increment (c);
2529 tree new_var = NULL;
2530 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2531 unsigned i;
2533 i = incr_vec_index (increment);
2535 /* Only process profitable increments. Nothing useful can be done
2536 to a cast or copy. */
2537 if (profitable_increment_p (i)
2538 && orig_code != MODIFY_EXPR
2539 && orig_code != NOP_EXPR)
2541 slsr_cand_t basis = lookup_cand (c->basis);
2542 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2543 replace_one_candidate (c, i, &new_var, basis_name);
2547 if (c->sibling)
2548 replace_profitable_candidates (lookup_cand (c->sibling));
2550 if (c->dependent)
2551 replace_profitable_candidates (lookup_cand (c->dependent));
2554 /* Analyze costs of related candidates in the candidate vector,
2555 and make beneficial replacements. */
2557 static void
2558 analyze_candidates_and_replace (void)
2560 unsigned i;
2561 slsr_cand_t c;
2563 /* Each candidate that has a null basis and a non-null
2564 dependent is the root of a tree of related statements.
2565 Analyze each tree to determine a subset of those
2566 statements that can be replaced with maximum benefit. */
2567 FOR_EACH_VEC_ELT (cand_vec, i, c)
2569 slsr_cand_t first_dep;
2571 if (c->basis != 0 || c->dependent == 0)
2572 continue;
2574 if (dump_file && (dump_flags & TDF_DETAILS))
2575 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2576 c->cand_num);
2578 first_dep = lookup_cand (c->dependent);
2580 /* If this is a chain of CAND_REFs, unconditionally replace
2581 each of them with a strength-reduced data reference. */
2582 if (c->kind == CAND_REF)
2583 replace_refs (c);
2585 /* If the common stride of all related candidates is a
2586 known constant, and none of these has a phi-dependence,
2587 then all replacements are considered profitable.
2588 Each replaces a multiply by a single add, with the
2589 possibility that a feeding add also goes dead as a
2590 result. */
2591 else if (unconditional_cands_with_known_stride_p (c))
2592 replace_dependents (first_dep);
2594 /* When the stride is an SSA name, it may still be profitable
2595 to replace some or all of the dependent candidates, depending
2596 on whether the introduced increments can be reused, or are
2597 less expensive to calculate than the replaced statements. */
2598 else
2600 unsigned length;
2601 enum machine_mode mode;
2602 bool speed;
2604 /* Determine whether we'll be generating pointer arithmetic
2605 when replacing candidates. */
2606 address_arithmetic_p = (c->kind == CAND_ADD
2607 && POINTER_TYPE_P (c->cand_type));
2609 /* If all candidates have already been replaced under other
2610 interpretations, nothing remains to be done. */
2611 length = count_candidates (c);
2612 if (!length)
2613 continue;
2615 /* Construct an array of increments for this candidate chain. */
2616 incr_vec = XNEWVEC (incr_info, length);
2617 incr_vec_len = 0;
2618 record_increments (c);
2620 /* Determine which increments are profitable to replace. */
2621 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2622 speed = optimize_cands_for_speed_p (c);
2623 analyze_increments (first_dep, mode, speed);
2625 /* Insert initializers of the form T_0 = stride * increment
2626 for use in profitable replacements. */
2627 insert_initializers (first_dep);
2628 dump_incr_vec ();
2630 /* Perform the replacements. */
2631 replace_profitable_candidates (first_dep);
2632 free (incr_vec);
2635 /* TODO: When conditional increments occur so that a
2636 candidate is dependent upon a phi-basis, the cost of
2637 introducing a temporary must be accounted for. */
2641 static unsigned
2642 execute_strength_reduction (void)
2644 struct dom_walk_data walk_data;
2646 /* Create the obstack where candidates will reside. */
2647 gcc_obstack_init (&cand_obstack);
2649 /* Allocate the candidate vector. */
2650 cand_vec.create (128);
2652 /* Allocate the mapping from statements to candidate indices. */
2653 stmt_cand_map = pointer_map_create ();
2655 /* Create the obstack where candidate chains will reside. */
2656 gcc_obstack_init (&chain_obstack);
2658 /* Allocate the mapping from base expressions to candidate chains. */
2659 base_cand_map = htab_create (500, base_cand_hash,
2660 base_cand_eq, base_cand_free);
2662 /* Initialize the loop optimizer. We need to detect flow across
2663 back edges, and this gives us dominator information as well. */
2664 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2666 /* Set up callbacks for the generic dominator tree walker. */
2667 walk_data.dom_direction = CDI_DOMINATORS;
2668 walk_data.initialize_block_local_data = NULL;
2669 walk_data.before_dom_children = find_candidates_in_block;
2670 walk_data.after_dom_children = NULL;
2671 walk_data.global_data = NULL;
2672 walk_data.block_local_data_size = 0;
2673 init_walk_dominator_tree (&walk_data);
2675 /* Walk the CFG in predominator order looking for strength reduction
2676 candidates. */
2677 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2679 if (dump_file && (dump_flags & TDF_DETAILS))
2681 dump_cand_vec ();
2682 dump_cand_chains ();
2685 /* Analyze costs and make appropriate replacements. */
2686 analyze_candidates_and_replace ();
2688 /* Free resources. */
2689 fini_walk_dominator_tree (&walk_data);
2690 loop_optimizer_finalize ();
2691 htab_delete (base_cand_map);
2692 obstack_free (&chain_obstack, NULL);
2693 pointer_map_destroy (stmt_cand_map);
2694 cand_vec.release ();
2695 obstack_free (&cand_obstack, NULL);
2697 return 0;
2700 static bool
2701 gate_strength_reduction (void)
2703 return flag_tree_slsr;
2706 struct gimple_opt_pass pass_strength_reduction =
2709 GIMPLE_PASS,
2710 "slsr", /* name */
2711 OPTGROUP_NONE, /* optinfo_flags */
2712 gate_strength_reduction, /* gate */
2713 execute_strength_reduction, /* execute */
2714 NULL, /* sub */
2715 NULL, /* next */
2716 0, /* static_pass_number */
2717 TV_GIMPLE_SLSR, /* tv_id */
2718 PROP_cfg | PROP_ssa, /* properties_required */
2719 0, /* properties_provided */
2720 0, /* properties_destroyed */
2721 0, /* todo_flags_start */
2722 TODO_verify_ssa /* todo_flags_finish */