2013-11-19 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob5cda3873eb33529f20a02d3016aa6e424ad2cd72
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
57 #include "params.h"
59 /* Information about a strength reduction candidate. Each statement
60 in the candidate table represents an expression of one of the
61 following forms (the special case of CAND_REF will be described
62 later):
64 (CAND_MULT) S1: X = (B + i) * S
65 (CAND_ADD) S1: X = B + (i * S)
67 Here X and B are SSA names, i is an integer constant, and S is
68 either an SSA name or a constant. We call B the "base," i the
69 "index", and S the "stride."
71 Any statement S0 that dominates S1 and is of the form:
73 (CAND_MULT) S0: Y = (B + i') * S
74 (CAND_ADD) S0: Y = B + (i' * S)
76 is called a "basis" for S1. In both cases, S1 may be replaced by
78 S1': X = Y + (i - i') * S,
80 where (i - i') * S is folded to the extent possible.
82 All gimple statements are visited in dominator order, and each
83 statement that may contribute to one of the forms of S1 above is
84 given at least one entry in the candidate table. Such statements
85 include addition, pointer addition, subtraction, multiplication,
86 negation, copies, and nontrivial type casts. If a statement may
87 represent more than one expression of the forms of S1 above,
88 multiple "interpretations" are stored in the table and chained
89 together. Examples:
91 * An add of two SSA names may treat either operand as the base.
92 * A multiply of two SSA names, likewise.
93 * A copy or cast may be thought of as either a CAND_MULT with
94 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 Candidate records are allocated from an obstack. They are addressed
97 both from a hash table keyed on S1, and from a vector of candidate
98 pointers arranged in predominator order.
100 Opportunity note
101 ----------------
102 Currently we don't recognize:
104 S0: Y = (S * i') - B
105 S1: X = (S * i) - B
107 as a strength reduction opportunity, even though this S1 would
108 also be replaceable by the S1' above. This can be added if it
109 comes up in practice.
111 Strength reduction in addressing
112 --------------------------------
113 There is another kind of candidate known as CAND_REF. A CAND_REF
114 describes a statement containing a memory reference having
115 complex addressing that might benefit from strength reduction.
116 Specifically, we are interested in references for which
117 get_inner_reference returns a base address, offset, and bitpos as
118 follows:
120 base: MEM_REF (T1, C1)
121 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
122 bitpos: C4 * BITS_PER_UNIT
124 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
125 arbitrary integer constants. Note that C2 may be zero, in which
126 case the offset will be MULT_EXPR (T2, C3).
128 When this pattern is recognized, the original memory reference
129 can be replaced with:
131 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
132 C1 + (C2 * C3) + C4)
134 which distributes the multiply to allow constant folding. When
135 two or more addressing expressions can be represented by MEM_REFs
136 of this form, differing only in the constants C1, C2, and C4,
137 making this substitution produces more efficient addressing during
138 the RTL phases. When there are not at least two expressions with
139 the same values of T1, T2, and C3, there is nothing to be gained
140 by the replacement.
142 Strength reduction of CAND_REFs uses the same infrastructure as
143 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
144 field, MULT_EXPR (T2, C3) in the stride (S) field, and
145 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
146 is thus another CAND_REF with the same B and S values. When at
147 least two CAND_REFs are chained together using the basis relation,
148 each of them is replaced as above, resulting in improved code
149 generation for addressing. */
152 /* Index into the candidate vector, offset by 1. VECs are zero-based,
153 while cand_idx's are one-based, with zero indicating null. */
154 typedef unsigned cand_idx;
156 /* The kind of candidate. */
157 enum cand_kind
159 CAND_MULT,
160 CAND_ADD,
161 CAND_REF
164 struct slsr_cand_d
166 /* The candidate statement S1. */
167 gimple cand_stmt;
169 /* The base expression B: often an SSA name, but not always. */
170 tree base_expr;
172 /* The stride S. */
173 tree stride;
175 /* The index constant i. */
176 double_int index;
178 /* The type of the candidate. This is normally the type of base_expr,
179 but casts may have occurred when combining feeding instructions.
180 A candidate can only be a basis for candidates of the same final type.
181 (For CAND_REFs, this is the type to be used for operand 1 of the
182 replacement MEM_REF.) */
183 tree cand_type;
185 /* The kind of candidate (CAND_MULT, etc.). */
186 enum cand_kind kind;
188 /* Index of this candidate in the candidate vector. */
189 cand_idx cand_num;
191 /* Index of the next candidate record for the same statement.
192 A statement may be useful in more than one way (e.g., due to
193 commutativity). So we can have multiple "interpretations"
194 of a statement. */
195 cand_idx next_interp;
197 /* Index of the basis statement S0, if any, in the candidate vector. */
198 cand_idx basis;
200 /* First candidate for which this candidate is a basis, if one exists. */
201 cand_idx dependent;
203 /* Next candidate having the same basis as this one. */
204 cand_idx sibling;
206 /* If this is a conditional candidate, the defining PHI statement
207 for the base SSA name B. For future use; always NULL for now. */
208 gimple def_phi;
210 /* Savings that can be expected from eliminating dead code if this
211 candidate is replaced. */
212 int dead_savings;
215 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
216 typedef const struct slsr_cand_d *const_slsr_cand_t;
218 /* Pointers to candidates are chained together as part of a mapping
219 from base expressions to the candidates that use them. */
221 struct cand_chain_d
223 /* Base expression for the chain of candidates: often, but not
224 always, an SSA name. */
225 tree base_expr;
227 /* Pointer to a candidate. */
228 slsr_cand_t cand;
230 /* Chain pointer. */
231 struct cand_chain_d *next;
235 typedef struct cand_chain_d cand_chain, *cand_chain_t;
236 typedef const struct cand_chain_d *const_cand_chain_t;
238 /* Information about a unique "increment" associated with candidates
239 having an SSA name for a stride. An increment is the difference
240 between the index of the candidate and the index of its basis,
241 i.e., (i - i') as discussed in the module commentary.
243 When we are not going to generate address arithmetic we treat
244 increments that differ only in sign as the same, allowing sharing
245 of the cost of initializers. The absolute value of the increment
246 is stored in the incr_info. */
248 struct incr_info_d
250 /* The increment that relates a candidate to its basis. */
251 double_int incr;
253 /* How many times the increment occurs in the candidate tree. */
254 unsigned count;
256 /* Cost of replacing candidates using this increment. Negative and
257 zero costs indicate replacement should be performed. */
258 int cost;
260 /* If this increment is profitable but is not -1, 0, or 1, it requires
261 an initializer T_0 = stride * incr to be found or introduced in the
262 nearest common dominator of all candidates. This field holds T_0
263 for subsequent use. */
264 tree initializer;
266 /* If the initializer was found to already exist, this is the block
267 where it was found. */
268 basic_block init_bb;
271 typedef struct incr_info_d incr_info, *incr_info_t;
273 /* Candidates are maintained in a vector. If candidate X dominates
274 candidate Y, then X appears before Y in the vector; but the
275 converse does not necessarily hold. */
276 static vec<slsr_cand_t> cand_vec;
278 enum cost_consts
280 COST_NEUTRAL = 0,
281 COST_INFINITE = 1000
284 /* Pointer map embodying a mapping from statements to candidates. */
285 static struct pointer_map_t *stmt_cand_map;
287 /* Obstack for candidates. */
288 static struct obstack cand_obstack;
290 /* Hash table embodying a mapping from base exprs to chains of candidates. */
291 static htab_t base_cand_map;
293 /* Obstack for candidate chains. */
294 static struct obstack chain_obstack;
296 /* An array INCR_VEC of incr_infos is used during analysis of related
297 candidates having an SSA name for a stride. INCR_VEC_LEN describes
298 its current length. */
299 static incr_info_t incr_vec;
300 static unsigned incr_vec_len;
302 /* For a chain of candidates with unknown stride, indicates whether or not
303 we must generate pointer arithmetic when replacing statements. */
304 static bool address_arithmetic_p;
306 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
308 static slsr_cand_t
309 lookup_cand (cand_idx idx)
311 return cand_vec[idx - 1];
314 /* Callback to produce a hash value for a candidate chain header. */
316 static hashval_t
317 base_cand_hash (const void *p)
319 tree base_expr = ((const_cand_chain_t) p)->base_expr;
320 return iterative_hash_expr (base_expr, 0);
323 /* Callback when an element is removed from the hash table.
324 We never remove entries until the entire table is released. */
326 static void
327 base_cand_free (void *p ATTRIBUTE_UNUSED)
331 /* Callback to return true if two candidate chain headers are equal. */
333 static int
334 base_cand_eq (const void *p1, const void *p2)
336 const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
337 const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
338 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
341 /* Use the base expr from candidate C to look for possible candidates
342 that can serve as a basis for C. Each potential basis must also
343 appear in a block that dominates the candidate statement and have
344 the same stride and type. If more than one possible basis exists,
345 the one with highest index in the vector is chosen; this will be
346 the most immediately dominating basis. */
348 static int
349 find_basis_for_candidate (slsr_cand_t c)
351 cand_chain mapping_key;
352 cand_chain_t chain;
353 slsr_cand_t basis = NULL;
355 // Limit potential of N^2 behavior for long candidate chains.
356 int iters = 0;
357 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
359 mapping_key.base_expr = c->base_expr;
360 chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
362 for (; chain && iters < max_iters; chain = chain->next, ++iters)
364 slsr_cand_t one_basis = chain->cand;
366 if (one_basis->kind != c->kind
367 || one_basis->cand_stmt == c->cand_stmt
368 || !operand_equal_p (one_basis->stride, c->stride, 0)
369 || !types_compatible_p (one_basis->cand_type, c->cand_type)
370 || !dominated_by_p (CDI_DOMINATORS,
371 gimple_bb (c->cand_stmt),
372 gimple_bb (one_basis->cand_stmt)))
373 continue;
375 if (!basis || basis->cand_num < one_basis->cand_num)
376 basis = one_basis;
379 if (basis)
381 c->sibling = basis->dependent;
382 basis->dependent = c->cand_num;
383 return basis->cand_num;
386 return 0;
389 /* Record a mapping from the base expression of C to C itself, indicating that
390 C may potentially serve as a basis using that base expression. */
392 static void
393 record_potential_basis (slsr_cand_t c)
395 cand_chain_t node;
396 void **slot;
398 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
399 node->base_expr = c->base_expr;
400 node->cand = c;
401 node->next = NULL;
402 slot = htab_find_slot (base_cand_map, node, INSERT);
404 if (*slot)
406 cand_chain_t head = (cand_chain_t) (*slot);
407 node->next = head->next;
408 head->next = node;
410 else
411 *slot = node;
414 /* Allocate storage for a new candidate and initialize its fields.
415 Attempt to find a basis for the candidate. */
417 static slsr_cand_t
418 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
419 double_int index, tree stride, tree ctype,
420 unsigned savings)
422 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
423 sizeof (slsr_cand));
424 c->cand_stmt = gs;
425 c->base_expr = base;
426 c->stride = stride;
427 c->index = index;
428 c->cand_type = ctype;
429 c->kind = kind;
430 c->cand_num = cand_vec.length () + 1;
431 c->next_interp = 0;
432 c->dependent = 0;
433 c->sibling = 0;
434 c->def_phi = NULL;
435 c->dead_savings = savings;
437 cand_vec.safe_push (c);
438 c->basis = find_basis_for_candidate (c);
439 record_potential_basis (c);
441 return c;
444 /* Determine the target cost of statement GS when compiling according
445 to SPEED. */
447 static int
448 stmt_cost (gimple gs, bool speed)
450 tree lhs, rhs1, rhs2;
451 enum machine_mode lhs_mode;
453 gcc_assert (is_gimple_assign (gs));
454 lhs = gimple_assign_lhs (gs);
455 rhs1 = gimple_assign_rhs1 (gs);
456 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
458 switch (gimple_assign_rhs_code (gs))
460 case MULT_EXPR:
461 rhs2 = gimple_assign_rhs2 (gs);
463 if (host_integerp (rhs2, 0))
464 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
466 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
467 return mul_cost (speed, lhs_mode);
469 case PLUS_EXPR:
470 case POINTER_PLUS_EXPR:
471 case MINUS_EXPR:
472 return add_cost (speed, lhs_mode);
474 case NEGATE_EXPR:
475 return neg_cost (speed, lhs_mode);
477 case NOP_EXPR:
478 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
480 /* Note that we don't assign costs to copies that in most cases
481 will go away. */
482 default:
486 gcc_unreachable ();
487 return 0;
490 /* Look up the defining statement for BASE_IN and return a pointer
491 to its candidate in the candidate table, if any; otherwise NULL.
492 Only CAND_ADD and CAND_MULT candidates are returned. */
494 static slsr_cand_t
495 base_cand_from_table (tree base_in)
497 slsr_cand_t *result;
499 gimple def = SSA_NAME_DEF_STMT (base_in);
500 if (!def)
501 return (slsr_cand_t) NULL;
503 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
505 if (result && (*result)->kind != CAND_REF)
506 return *result;
508 return (slsr_cand_t) NULL;
511 /* Add an entry to the statement-to-candidate mapping. */
513 static void
514 add_cand_for_stmt (gimple gs, slsr_cand_t c)
516 void **slot = pointer_map_insert (stmt_cand_map, gs);
517 gcc_assert (!*slot);
518 *slot = c;
521 /* Look for the following pattern:
523 *PBASE: MEM_REF (T1, C1)
525 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
527 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
529 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
531 *PINDEX: C4 * BITS_PER_UNIT
533 If not present, leave the input values unchanged and return FALSE.
534 Otherwise, modify the input values as follows and return TRUE:
536 *PBASE: T1
537 *POFFSET: MULT_EXPR (T2, C3)
538 *PINDEX: C1 + (C2 * C3) + C4 */
540 static bool
541 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
542 tree *ptype)
544 tree base = *pbase, offset = *poffset;
545 double_int index = *pindex;
546 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
547 tree mult_op0, mult_op1, t1, t2, type;
548 double_int c1, c2, c3, c4;
550 if (!base
551 || !offset
552 || TREE_CODE (base) != MEM_REF
553 || TREE_CODE (offset) != MULT_EXPR
554 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
555 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
556 return false;
558 t1 = TREE_OPERAND (base, 0);
559 c1 = mem_ref_offset (base);
560 type = TREE_TYPE (TREE_OPERAND (base, 1));
562 mult_op0 = TREE_OPERAND (offset, 0);
563 mult_op1 = TREE_OPERAND (offset, 1);
565 c3 = tree_to_double_int (mult_op1);
567 if (TREE_CODE (mult_op0) == PLUS_EXPR)
569 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
571 t2 = TREE_OPERAND (mult_op0, 0);
572 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
574 else
575 return false;
577 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
579 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
581 t2 = TREE_OPERAND (mult_op0, 0);
582 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
584 else
585 return false;
587 else
589 t2 = mult_op0;
590 c2 = double_int_zero;
593 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
595 *pbase = t1;
596 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
597 double_int_to_tree (sizetype, c3));
598 *pindex = c1 + c2 * c3 + c4;
599 *ptype = type;
601 return true;
604 /* Given GS which contains a data reference, create a CAND_REF entry in
605 the candidate table and attempt to find a basis. */
607 static void
608 slsr_process_ref (gimple gs)
610 tree ref_expr, base, offset, type;
611 HOST_WIDE_INT bitsize, bitpos;
612 enum machine_mode mode;
613 int unsignedp, volatilep;
614 double_int index;
615 slsr_cand_t c;
617 if (gimple_vdef (gs))
618 ref_expr = gimple_assign_lhs (gs);
619 else
620 ref_expr = gimple_assign_rhs1 (gs);
622 if (!handled_component_p (ref_expr)
623 || TREE_CODE (ref_expr) == BIT_FIELD_REF
624 || (TREE_CODE (ref_expr) == COMPONENT_REF
625 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
626 return;
628 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
629 &unsignedp, &volatilep, false);
630 index = double_int::from_uhwi (bitpos);
632 if (!restructure_reference (&base, &offset, &index, &type))
633 return;
635 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
636 type, 0);
638 /* Add the candidate to the statement-candidate mapping. */
639 add_cand_for_stmt (gs, c);
642 /* Create a candidate entry for a statement GS, where GS multiplies
643 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
644 about the two SSA names into the new candidate. Return the new
645 candidate. */
647 static slsr_cand_t
648 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
650 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
651 double_int index;
652 unsigned savings = 0;
653 slsr_cand_t c;
654 slsr_cand_t base_cand = base_cand_from_table (base_in);
656 /* Look at all interpretations of the base candidate, if necessary,
657 to find information to propagate into this candidate. */
658 while (base_cand && !base)
661 if (base_cand->kind == CAND_MULT
662 && operand_equal_p (base_cand->stride, integer_one_node, 0))
664 /* Y = (B + i') * 1
665 X = Y * Z
666 ================
667 X = (B + i') * Z */
668 base = base_cand->base_expr;
669 index = base_cand->index;
670 stride = stride_in;
671 ctype = base_cand->cand_type;
672 if (has_single_use (base_in))
673 savings = (base_cand->dead_savings
674 + stmt_cost (base_cand->cand_stmt, speed));
676 else if (base_cand->kind == CAND_ADD
677 && TREE_CODE (base_cand->stride) == INTEGER_CST)
679 /* Y = B + (i' * S), S constant
680 X = Y * Z
681 ============================
682 X = B + ((i' * S) * Z) */
683 base = base_cand->base_expr;
684 index = base_cand->index * tree_to_double_int (base_cand->stride);
685 stride = stride_in;
686 ctype = base_cand->cand_type;
687 if (has_single_use (base_in))
688 savings = (base_cand->dead_savings
689 + stmt_cost (base_cand->cand_stmt, speed));
692 if (base_cand->next_interp)
693 base_cand = lookup_cand (base_cand->next_interp);
694 else
695 base_cand = NULL;
698 if (!base)
700 /* No interpretations had anything useful to propagate, so
701 produce X = (Y + 0) * Z. */
702 base = base_in;
703 index = double_int_zero;
704 stride = stride_in;
705 ctype = TREE_TYPE (base_in);
708 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
709 ctype, savings);
710 return c;
713 /* Create a candidate entry for a statement GS, where GS multiplies
714 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
715 information about BASE_IN into the new candidate. Return the new
716 candidate. */
718 static slsr_cand_t
719 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
721 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
722 double_int index, temp;
723 unsigned savings = 0;
724 slsr_cand_t c;
725 slsr_cand_t base_cand = base_cand_from_table (base_in);
727 /* Look at all interpretations of the base candidate, if necessary,
728 to find information to propagate into this candidate. */
729 while (base_cand && !base)
731 if (base_cand->kind == CAND_MULT
732 && TREE_CODE (base_cand->stride) == INTEGER_CST)
734 /* Y = (B + i') * S, S constant
735 X = Y * c
736 ============================
737 X = (B + i') * (S * c) */
738 base = base_cand->base_expr;
739 index = base_cand->index;
740 temp = tree_to_double_int (base_cand->stride)
741 * tree_to_double_int (stride_in);
742 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
743 ctype = base_cand->cand_type;
744 if (has_single_use (base_in))
745 savings = (base_cand->dead_savings
746 + stmt_cost (base_cand->cand_stmt, speed));
748 else if (base_cand->kind == CAND_ADD
749 && operand_equal_p (base_cand->stride, integer_one_node, 0))
751 /* Y = B + (i' * 1)
752 X = Y * c
753 ===========================
754 X = (B + i') * c */
755 base = base_cand->base_expr;
756 index = base_cand->index;
757 stride = stride_in;
758 ctype = base_cand->cand_type;
759 if (has_single_use (base_in))
760 savings = (base_cand->dead_savings
761 + stmt_cost (base_cand->cand_stmt, speed));
763 else if (base_cand->kind == CAND_ADD
764 && base_cand->index.is_one ()
765 && TREE_CODE (base_cand->stride) == INTEGER_CST)
767 /* Y = B + (1 * S), S constant
768 X = Y * c
769 ===========================
770 X = (B + S) * c */
771 base = base_cand->base_expr;
772 index = tree_to_double_int (base_cand->stride);
773 stride = stride_in;
774 ctype = base_cand->cand_type;
775 if (has_single_use (base_in))
776 savings = (base_cand->dead_savings
777 + stmt_cost (base_cand->cand_stmt, speed));
780 if (base_cand->next_interp)
781 base_cand = lookup_cand (base_cand->next_interp);
782 else
783 base_cand = NULL;
786 if (!base)
788 /* No interpretations had anything useful to propagate, so
789 produce X = (Y + 0) * c. */
790 base = base_in;
791 index = double_int_zero;
792 stride = stride_in;
793 ctype = TREE_TYPE (base_in);
796 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
797 ctype, savings);
798 return c;
801 /* Given GS which is a multiply of scalar integers, make an appropriate
802 entry in the candidate table. If this is a multiply of two SSA names,
803 create two CAND_MULT interpretations and attempt to find a basis for
804 each of them. Otherwise, create a single CAND_MULT and attempt to
805 find a basis. */
807 static void
808 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
810 slsr_cand_t c, c2;
812 /* If this is a multiply of an SSA name with itself, it is highly
813 unlikely that we will get a strength reduction opportunity, so
814 don't record it as a candidate. This simplifies the logic for
815 finding a basis, so if this is removed that must be considered. */
816 if (rhs1 == rhs2)
817 return;
819 if (TREE_CODE (rhs2) == SSA_NAME)
821 /* Record an interpretation of this statement in the candidate table
822 assuming RHS1 is the base expression and RHS2 is the stride. */
823 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
825 /* Add the first interpretation to the statement-candidate mapping. */
826 add_cand_for_stmt (gs, c);
828 /* Record another interpretation of this statement assuming RHS1
829 is the stride and RHS2 is the base expression. */
830 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
831 c->next_interp = c2->cand_num;
833 else
835 /* Record an interpretation for the multiply-immediate. */
836 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
838 /* Add the interpretation to the statement-candidate mapping. */
839 add_cand_for_stmt (gs, c);
843 /* Create a candidate entry for a statement GS, where GS adds two
844 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
845 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
846 information about the two SSA names into the new candidate.
847 Return the new candidate. */
849 static slsr_cand_t
850 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
851 bool subtract_p, bool speed)
853 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
854 double_int index;
855 unsigned savings = 0;
856 slsr_cand_t c;
857 slsr_cand_t base_cand = base_cand_from_table (base_in);
858 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
860 /* The most useful transformation is a multiply-immediate feeding
861 an add or subtract. Look for that first. */
862 while (addend_cand && !base)
864 if (addend_cand->kind == CAND_MULT
865 && addend_cand->index.is_zero ()
866 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
868 /* Z = (B + 0) * S, S constant
869 X = Y +/- Z
870 ===========================
871 X = Y + ((+/-1 * S) * B) */
872 base = base_in;
873 index = tree_to_double_int (addend_cand->stride);
874 if (subtract_p)
875 index = -index;
876 stride = addend_cand->base_expr;
877 ctype = TREE_TYPE (base_in);
878 if (has_single_use (addend_in))
879 savings = (addend_cand->dead_savings
880 + stmt_cost (addend_cand->cand_stmt, speed));
883 if (addend_cand->next_interp)
884 addend_cand = lookup_cand (addend_cand->next_interp);
885 else
886 addend_cand = NULL;
889 while (base_cand && !base)
891 if (base_cand->kind == CAND_ADD
892 && (base_cand->index.is_zero ()
893 || operand_equal_p (base_cand->stride,
894 integer_zero_node, 0)))
896 /* Y = B + (i' * S), i' * S = 0
897 X = Y +/- Z
898 ============================
899 X = B + (+/-1 * Z) */
900 base = base_cand->base_expr;
901 index = subtract_p ? double_int_minus_one : double_int_one;
902 stride = addend_in;
903 ctype = base_cand->cand_type;
904 if (has_single_use (base_in))
905 savings = (base_cand->dead_savings
906 + stmt_cost (base_cand->cand_stmt, speed));
908 else if (subtract_p)
910 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
912 while (subtrahend_cand && !base)
914 if (subtrahend_cand->kind == CAND_MULT
915 && subtrahend_cand->index.is_zero ()
916 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
918 /* Z = (B + 0) * S, S constant
919 X = Y - Z
920 ===========================
921 Value: X = Y + ((-1 * S) * B) */
922 base = base_in;
923 index = tree_to_double_int (subtrahend_cand->stride);
924 index = -index;
925 stride = subtrahend_cand->base_expr;
926 ctype = TREE_TYPE (base_in);
927 if (has_single_use (addend_in))
928 savings = (subtrahend_cand->dead_savings
929 + stmt_cost (subtrahend_cand->cand_stmt, speed));
932 if (subtrahend_cand->next_interp)
933 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
934 else
935 subtrahend_cand = NULL;
939 if (base_cand->next_interp)
940 base_cand = lookup_cand (base_cand->next_interp);
941 else
942 base_cand = NULL;
945 if (!base)
947 /* No interpretations had anything useful to propagate, so
948 produce X = Y + (1 * Z). */
949 base = base_in;
950 index = subtract_p ? double_int_minus_one : double_int_one;
951 stride = addend_in;
952 ctype = TREE_TYPE (base_in);
955 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
956 ctype, savings);
957 return c;
960 /* Create a candidate entry for a statement GS, where GS adds SSA
961 name BASE_IN to constant INDEX_IN. Propagate any known information
962 about BASE_IN into the new candidate. Return the new candidate. */
964 static slsr_cand_t
965 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
967 enum cand_kind kind = CAND_ADD;
968 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
969 double_int index, multiple;
970 unsigned savings = 0;
971 slsr_cand_t c;
972 slsr_cand_t base_cand = base_cand_from_table (base_in);
974 while (base_cand && !base)
976 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
978 if (TREE_CODE (base_cand->stride) == INTEGER_CST
979 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
980 unsigned_p, &multiple))
982 /* Y = (B + i') * S, S constant, c = kS for some integer k
983 X = Y + c
984 ============================
985 X = (B + (i'+ k)) * S
987 Y = B + (i' * S), S constant, c = kS for some integer k
988 X = Y + c
989 ============================
990 X = (B + (i'+ k)) * S */
991 kind = base_cand->kind;
992 base = base_cand->base_expr;
993 index = base_cand->index + multiple;
994 stride = base_cand->stride;
995 ctype = base_cand->cand_type;
996 if (has_single_use (base_in))
997 savings = (base_cand->dead_savings
998 + stmt_cost (base_cand->cand_stmt, speed));
1001 if (base_cand->next_interp)
1002 base_cand = lookup_cand (base_cand->next_interp);
1003 else
1004 base_cand = NULL;
1007 if (!base)
1009 /* No interpretations had anything useful to propagate, so
1010 produce X = Y + (c * 1). */
1011 kind = CAND_ADD;
1012 base = base_in;
1013 index = index_in;
1014 stride = integer_one_node;
1015 ctype = TREE_TYPE (base_in);
1018 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1019 ctype, savings);
1020 return c;
1023 /* Given GS which is an add or subtract of scalar integers or pointers,
1024 make at least one appropriate entry in the candidate table. */
1026 static void
1027 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1029 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1030 slsr_cand_t c = NULL, c2;
1032 if (TREE_CODE (rhs2) == SSA_NAME)
1034 /* First record an interpretation assuming RHS1 is the base expression
1035 and RHS2 is the stride. But it doesn't make sense for the
1036 stride to be a pointer, so don't record a candidate in that case. */
1037 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1039 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1041 /* Add the first interpretation to the statement-candidate
1042 mapping. */
1043 add_cand_for_stmt (gs, c);
1046 /* If the two RHS operands are identical, or this is a subtract,
1047 we're done. */
1048 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1049 return;
1051 /* Otherwise, record another interpretation assuming RHS2 is the
1052 base expression and RHS1 is the stride, again provided that the
1053 stride is not a pointer. */
1054 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1056 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1057 if (c)
1058 c->next_interp = c2->cand_num;
1059 else
1060 add_cand_for_stmt (gs, c2);
1063 else
1065 double_int index;
1067 /* Record an interpretation for the add-immediate. */
1068 index = tree_to_double_int (rhs2);
1069 if (subtract_p)
1070 index = -index;
1072 c = create_add_imm_cand (gs, rhs1, index, speed);
1074 /* Add the interpretation to the statement-candidate mapping. */
1075 add_cand_for_stmt (gs, c);
1079 /* Given GS which is a negate of a scalar integer, make an appropriate
1080 entry in the candidate table. A negate is equivalent to a multiply
1081 by -1. */
1083 static void
1084 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1086 /* Record a CAND_MULT interpretation for the multiply by -1. */
1087 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1089 /* Add the interpretation to the statement-candidate mapping. */
1090 add_cand_for_stmt (gs, c);
1093 /* Help function for legal_cast_p, operating on two trees. Checks
1094 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1095 for more details. */
1097 static bool
1098 legal_cast_p_1 (tree lhs, tree rhs)
1100 tree lhs_type, rhs_type;
1101 unsigned lhs_size, rhs_size;
1102 bool lhs_wraps, rhs_wraps;
1104 lhs_type = TREE_TYPE (lhs);
1105 rhs_type = TREE_TYPE (rhs);
1106 lhs_size = TYPE_PRECISION (lhs_type);
1107 rhs_size = TYPE_PRECISION (rhs_type);
1108 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1109 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1111 if (lhs_size < rhs_size
1112 || (rhs_wraps && !lhs_wraps)
1113 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1114 return false;
1116 return true;
1119 /* Return TRUE if GS is a statement that defines an SSA name from
1120 a conversion and is legal for us to combine with an add and multiply
1121 in the candidate table. For example, suppose we have:
1123 A = B + i;
1124 C = (type) A;
1125 D = C * S;
1127 Without the type-cast, we would create a CAND_MULT for D with base B,
1128 index i, and stride S. We want to record this candidate only if it
1129 is equivalent to apply the type cast following the multiply:
1131 A = B + i;
1132 E = A * S;
1133 D = (type) E;
1135 We will record the type with the candidate for D. This allows us
1136 to use a similar previous candidate as a basis. If we have earlier seen
1138 A' = B + i';
1139 C' = (type) A';
1140 D' = C' * S;
1142 we can replace D with
1144 D = D' + (i - i') * S;
1146 But if moving the type-cast would change semantics, we mustn't do this.
1148 This is legitimate for casts from a non-wrapping integral type to
1149 any integral type of the same or larger size. It is not legitimate
1150 to convert a wrapping type to a non-wrapping type, or to a wrapping
1151 type of a different size. I.e., with a wrapping type, we must
1152 assume that the addition B + i could wrap, in which case performing
1153 the multiply before or after one of the "illegal" type casts will
1154 have different semantics. */
1156 static bool
1157 legal_cast_p (gimple gs, tree rhs)
1159 if (!is_gimple_assign (gs)
1160 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1161 return false;
1163 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1166 /* Given GS which is a cast to a scalar integer type, determine whether
1167 the cast is legal for strength reduction. If so, make at least one
1168 appropriate entry in the candidate table. */
1170 static void
1171 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1173 tree lhs, ctype;
1174 slsr_cand_t base_cand, c, c2;
1175 unsigned savings = 0;
1177 if (!legal_cast_p (gs, rhs1))
1178 return;
1180 lhs = gimple_assign_lhs (gs);
1181 base_cand = base_cand_from_table (rhs1);
1182 ctype = TREE_TYPE (lhs);
1184 if (base_cand)
1186 while (base_cand)
1188 /* Propagate all data from the base candidate except the type,
1189 which comes from the cast, and the base candidate's cast,
1190 which is no longer applicable. */
1191 if (has_single_use (rhs1))
1192 savings = (base_cand->dead_savings
1193 + stmt_cost (base_cand->cand_stmt, speed));
1195 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1196 base_cand->base_expr,
1197 base_cand->index, base_cand->stride,
1198 ctype, savings);
1199 if (base_cand->next_interp)
1200 base_cand = lookup_cand (base_cand->next_interp);
1201 else
1202 base_cand = NULL;
1205 else
1207 /* If nothing is known about the RHS, create fresh CAND_ADD and
1208 CAND_MULT interpretations:
1210 X = Y + (0 * 1)
1211 X = (Y + 0) * 1
1213 The first of these is somewhat arbitrary, but the choice of
1214 1 for the stride simplifies the logic for propagating casts
1215 into their uses. */
1216 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1217 integer_one_node, ctype, 0);
1218 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1219 integer_one_node, ctype, 0);
1220 c->next_interp = c2->cand_num;
1223 /* Add the first (or only) interpretation to the statement-candidate
1224 mapping. */
1225 add_cand_for_stmt (gs, c);
1228 /* Given GS which is a copy of a scalar integer type, make at least one
1229 appropriate entry in the candidate table.
1231 This interface is included for completeness, but is unnecessary
1232 if this pass immediately follows a pass that performs copy
1233 propagation, such as DOM. */
1235 static void
1236 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1238 slsr_cand_t base_cand, c, c2;
1239 unsigned savings = 0;
1241 base_cand = base_cand_from_table (rhs1);
1243 if (base_cand)
1245 while (base_cand)
1247 /* Propagate all data from the base candidate. */
1248 if (has_single_use (rhs1))
1249 savings = (base_cand->dead_savings
1250 + stmt_cost (base_cand->cand_stmt, speed));
1252 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1253 base_cand->base_expr,
1254 base_cand->index, base_cand->stride,
1255 base_cand->cand_type, savings);
1256 if (base_cand->next_interp)
1257 base_cand = lookup_cand (base_cand->next_interp);
1258 else
1259 base_cand = NULL;
1262 else
1264 /* If nothing is known about the RHS, create fresh CAND_ADD and
1265 CAND_MULT interpretations:
1267 X = Y + (0 * 1)
1268 X = (Y + 0) * 1
1270 The first of these is somewhat arbitrary, but the choice of
1271 1 for the stride simplifies the logic for propagating casts
1272 into their uses. */
1273 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1274 integer_one_node, TREE_TYPE (rhs1), 0);
1275 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1276 integer_one_node, TREE_TYPE (rhs1), 0);
1277 c->next_interp = c2->cand_num;
1280 /* Add the first (or only) interpretation to the statement-candidate
1281 mapping. */
1282 add_cand_for_stmt (gs, c);
1285 /* Find strength-reduction candidates in block BB. */
1287 static void
1288 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1289 basic_block bb)
1291 bool speed = optimize_bb_for_speed_p (bb);
1292 gimple_stmt_iterator gsi;
1294 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1296 gimple gs = gsi_stmt (gsi);
1298 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1299 slsr_process_ref (gs);
1301 else if (is_gimple_assign (gs)
1302 && SCALAR_INT_MODE_P
1303 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1305 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1307 switch (gimple_assign_rhs_code (gs))
1309 case MULT_EXPR:
1310 case PLUS_EXPR:
1311 rhs1 = gimple_assign_rhs1 (gs);
1312 rhs2 = gimple_assign_rhs2 (gs);
1313 /* Should never happen, but currently some buggy situations
1314 in earlier phases put constants in rhs1. */
1315 if (TREE_CODE (rhs1) != SSA_NAME)
1316 continue;
1317 break;
1319 /* Possible future opportunity: rhs1 of a ptr+ can be
1320 an ADDR_EXPR. */
1321 case POINTER_PLUS_EXPR:
1322 case MINUS_EXPR:
1323 rhs2 = gimple_assign_rhs2 (gs);
1324 /* Fall-through. */
1326 case NOP_EXPR:
1327 case MODIFY_EXPR:
1328 case NEGATE_EXPR:
1329 rhs1 = gimple_assign_rhs1 (gs);
1330 if (TREE_CODE (rhs1) != SSA_NAME)
1331 continue;
1332 break;
1334 default:
1338 switch (gimple_assign_rhs_code (gs))
1340 case MULT_EXPR:
1341 slsr_process_mul (gs, rhs1, rhs2, speed);
1342 break;
1344 case PLUS_EXPR:
1345 case POINTER_PLUS_EXPR:
1346 case MINUS_EXPR:
1347 slsr_process_add (gs, rhs1, rhs2, speed);
1348 break;
1350 case NEGATE_EXPR:
1351 slsr_process_neg (gs, rhs1, speed);
1352 break;
1354 case NOP_EXPR:
1355 slsr_process_cast (gs, rhs1, speed);
1356 break;
1358 case MODIFY_EXPR:
1359 slsr_process_copy (gs, rhs1, speed);
1360 break;
1362 default:
1369 /* Dump a candidate for debug. */
1371 static void
1372 dump_candidate (slsr_cand_t c)
1374 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1375 gimple_bb (c->cand_stmt)->index);
1376 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1377 switch (c->kind)
1379 case CAND_MULT:
1380 fputs (" MULT : (", dump_file);
1381 print_generic_expr (dump_file, c->base_expr, 0);
1382 fputs (" + ", dump_file);
1383 dump_double_int (dump_file, c->index, false);
1384 fputs (") * ", dump_file);
1385 print_generic_expr (dump_file, c->stride, 0);
1386 fputs (" : ", dump_file);
1387 break;
1388 case CAND_ADD:
1389 fputs (" ADD : ", dump_file);
1390 print_generic_expr (dump_file, c->base_expr, 0);
1391 fputs (" + (", dump_file);
1392 dump_double_int (dump_file, c->index, false);
1393 fputs (" * ", dump_file);
1394 print_generic_expr (dump_file, c->stride, 0);
1395 fputs (") : ", dump_file);
1396 break;
1397 case CAND_REF:
1398 fputs (" REF : ", dump_file);
1399 print_generic_expr (dump_file, c->base_expr, 0);
1400 fputs (" + (", dump_file);
1401 print_generic_expr (dump_file, c->stride, 0);
1402 fputs (") + ", dump_file);
1403 dump_double_int (dump_file, c->index, false);
1404 fputs (" : ", dump_file);
1405 break;
1406 default:
1407 gcc_unreachable ();
1409 print_generic_expr (dump_file, c->cand_type, 0);
1410 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1411 c->basis, c->dependent, c->sibling);
1412 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1413 c->next_interp, c->dead_savings);
1414 if (c->def_phi)
1416 fputs (" phi: ", dump_file);
1417 print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1419 fputs ("\n", dump_file);
1422 /* Dump the candidate vector for debug. */
1424 static void
1425 dump_cand_vec (void)
1427 unsigned i;
1428 slsr_cand_t c;
1430 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1432 FOR_EACH_VEC_ELT (cand_vec, i, c)
1433 dump_candidate (c);
1436 /* Callback used to dump the candidate chains hash table. */
1438 static int
1439 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1441 const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1442 cand_chain_t p;
1444 print_generic_expr (dump_file, chain->base_expr, 0);
1445 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1447 for (p = chain->next; p; p = p->next)
1448 fprintf (dump_file, " -> %d", p->cand->cand_num);
1450 fputs ("\n", dump_file);
1451 return 1;
1454 /* Dump the candidate chains. */
1456 static void
1457 dump_cand_chains (void)
1459 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1460 htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1461 fputs ("\n", dump_file);
1464 /* Dump the increment vector for debug. */
1466 static void
1467 dump_incr_vec (void)
1469 if (dump_file && (dump_flags & TDF_DETAILS))
1471 unsigned i;
1473 fprintf (dump_file, "\nIncrement vector:\n\n");
1475 for (i = 0; i < incr_vec_len; i++)
1477 fprintf (dump_file, "%3d increment: ", i);
1478 dump_double_int (dump_file, incr_vec[i].incr, false);
1479 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1480 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1481 fputs ("\n initializer: ", dump_file);
1482 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1483 fputs ("\n\n", dump_file);
1488 /* Recursive helper for unconditional_cands_with_known_stride_p.
1489 Returns TRUE iff C, its siblings, and its dependents are all
1490 unconditional candidates. */
1492 static bool
1493 unconditional_cands (slsr_cand_t c)
1495 if (c->def_phi)
1496 return false;
1498 if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1499 return false;
1501 if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1502 return false;
1504 return true;
1507 /* Determine whether or not the tree of candidates rooted at
1508 ROOT consists entirely of unconditional increments with
1509 an INTEGER_CST stride. */
1511 static bool
1512 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1514 /* The stride is identical for all related candidates, so
1515 check it once. */
1516 if (TREE_CODE (root->stride) != INTEGER_CST)
1517 return false;
1519 return unconditional_cands (lookup_cand (root->dependent));
1522 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1523 data reference. */
1525 static void
1526 replace_ref (tree *expr, slsr_cand_t c)
1528 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1529 unsigned HOST_WIDE_INT misalign;
1530 unsigned align;
1532 /* Ensure the memory reference carries the minimum alignment
1533 requirement for the data type. See PR58041. */
1534 get_object_alignment_1 (*expr, &align, &misalign);
1535 if (misalign != 0)
1536 align = (misalign & -misalign);
1537 if (align < TYPE_ALIGN (acc_type))
1538 acc_type = build_aligned_type (acc_type, align);
1540 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1541 c->base_expr, c->stride);
1542 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1543 double_int_to_tree (c->cand_type, c->index));
1545 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1546 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1547 TREE_OPERAND (mem_ref, 0)
1548 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1549 /*simple_p=*/true, NULL,
1550 /*before=*/true, GSI_SAME_STMT);
1551 copy_ref_info (mem_ref, *expr);
1552 *expr = mem_ref;
1553 update_stmt (c->cand_stmt);
1556 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1557 dependent of candidate C with an equivalent strength-reduced data
1558 reference. */
1560 static void
1561 replace_refs (slsr_cand_t c)
1563 if (gimple_vdef (c->cand_stmt))
1565 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1566 replace_ref (lhs, c);
1568 else
1570 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1571 replace_ref (rhs, c);
1574 if (c->sibling)
1575 replace_refs (lookup_cand (c->sibling));
1577 if (c->dependent)
1578 replace_refs (lookup_cand (c->dependent));
1581 /* Calculate the increment required for candidate C relative to
1582 its basis. */
1584 static double_int
1585 cand_increment (slsr_cand_t c)
1587 slsr_cand_t basis;
1589 /* If the candidate doesn't have a basis, just return its own
1590 index. This is useful in record_increments to help us find
1591 an existing initializer. */
1592 if (!c->basis)
1593 return c->index;
1595 basis = lookup_cand (c->basis);
1596 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1597 return c->index - basis->index;
1600 /* Calculate the increment required for candidate C relative to
1601 its basis. If we aren't going to generate pointer arithmetic
1602 for this candidate, return the absolute value of that increment
1603 instead. */
1605 static inline double_int
1606 cand_abs_increment (slsr_cand_t c)
1608 double_int increment = cand_increment (c);
1610 if (!address_arithmetic_p && increment.is_negative ())
1611 increment = -increment;
1613 return increment;
1616 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1617 new temporary reg of type TYPE and store it in *VAR. */
1619 static inline void
1620 lazy_create_slsr_reg (tree *var, tree type)
1622 if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1623 *var = create_tmp_reg (type, "slsr");
1626 /* Return TRUE iff candidate C has already been replaced under
1627 another interpretation. */
1629 static inline bool
1630 cand_already_replaced (slsr_cand_t c)
1632 return (gimple_bb (c->cand_stmt) == 0);
1635 /* Helper routine for replace_dependents, doing the work for a
1636 single candidate C. */
1638 static void
1639 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1641 double_int stride = tree_to_double_int (c->stride);
1642 double_int bump = cand_increment (c) * stride;
1643 gimple stmt_to_print = NULL;
1644 slsr_cand_t basis;
1645 tree basis_name, incr_type, bump_tree;
1646 enum tree_code code;
1648 /* It is highly unlikely, but possible, that the resulting
1649 bump doesn't fit in a HWI. Abandon the replacement
1650 in this case. Restriction to signed HWI is conservative
1651 for unsigned types but allows for safe negation without
1652 twisted logic. */
1653 if (!bump.fits_shwi ())
1654 return;
1656 basis = lookup_cand (c->basis);
1657 basis_name = gimple_assign_lhs (basis->cand_stmt);
1658 if (cand_code == POINTER_PLUS_EXPR)
1660 incr_type = sizetype;
1661 code = cand_code;
1663 else
1665 incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1666 code = PLUS_EXPR;
1669 if (bump.is_negative ()
1670 && cand_code != POINTER_PLUS_EXPR)
1672 code = MINUS_EXPR;
1673 bump = -bump;
1676 bump_tree = double_int_to_tree (incr_type, bump);
1678 if (dump_file && (dump_flags & TDF_DETAILS))
1680 fputs ("Replacing: ", dump_file);
1681 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1684 if (bump.is_zero ())
1686 tree lhs = gimple_assign_lhs (c->cand_stmt);
1687 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1688 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1689 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1690 gsi_replace (&gsi, copy_stmt, false);
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 stmt_to_print = copy_stmt;
1694 else
1696 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1697 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1698 if (cand_code != NEGATE_EXPR
1699 && ((operand_equal_p (rhs1, basis_name, 0)
1700 && operand_equal_p (rhs2, bump_tree, 0))
1701 || (operand_equal_p (rhs1, bump_tree, 0)
1702 && operand_equal_p (rhs2, basis_name, 0))))
1704 if (dump_file && (dump_flags & TDF_DETAILS))
1706 fputs ("(duplicate, not actually replacing)", dump_file);
1707 stmt_to_print = c->cand_stmt;
1710 else
1712 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1713 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1714 update_stmt (gsi_stmt (gsi));
1715 if (dump_file && (dump_flags & TDF_DETAILS))
1716 stmt_to_print = gsi_stmt (gsi);
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1722 fputs ("With: ", dump_file);
1723 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1724 fputs ("\n", dump_file);
1728 /* Replace candidate C, each sibling of candidate C, and each
1729 dependent of candidate C with an add or subtract. Note that we
1730 only operate on CAND_MULTs with known strides, so we will never
1731 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1732 replaced by X = Y + ((i - i') * S), as described in the module
1733 commentary. The folded value ((i - i') * S) is referred to here
1734 as the "bump." */
1736 static void
1737 replace_dependents (slsr_cand_t c)
1739 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1741 /* It is not useful to replace casts, copies, or adds of an SSA name
1742 and a constant. Also skip candidates that have already been
1743 replaced under another interpretation. */
1744 if (cand_code != MODIFY_EXPR
1745 && cand_code != NOP_EXPR
1746 && c->kind == CAND_MULT
1747 && !cand_already_replaced (c))
1748 replace_dependent (c, cand_code);
1750 if (c->sibling)
1751 replace_dependents (lookup_cand (c->sibling));
1753 if (c->dependent)
1754 replace_dependents (lookup_cand (c->dependent));
1757 /* Return the index in the increment vector of the given INCREMENT. */
1759 static inline unsigned
1760 incr_vec_index (double_int increment)
1762 unsigned i;
1764 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1767 gcc_assert (i < incr_vec_len);
1768 return i;
1771 /* Count the number of candidates in the tree rooted at C that have
1772 not already been replaced under other interpretations. */
1774 static unsigned
1775 count_candidates (slsr_cand_t c)
1777 unsigned count = cand_already_replaced (c) ? 0 : 1;
1779 if (c->sibling)
1780 count += count_candidates (lookup_cand (c->sibling));
1782 if (c->dependent)
1783 count += count_candidates (lookup_cand (c->dependent));
1785 return count;
1788 /* Increase the count of INCREMENT by one in the increment vector.
1789 INCREMENT is associated with candidate C. If an initializer
1790 T_0 = stride * I is provided by a candidate that dominates all
1791 candidates with the same increment, also record T_0 for subsequent use. */
1793 static void
1794 record_increment (slsr_cand_t c, double_int increment)
1796 bool found = false;
1797 unsigned i;
1799 /* Treat increments that differ only in sign as identical so as to
1800 share initializers, unless we are generating pointer arithmetic. */
1801 if (!address_arithmetic_p && increment.is_negative ())
1802 increment = -increment;
1804 for (i = 0; i < incr_vec_len; i++)
1806 if (incr_vec[i].incr == increment)
1808 incr_vec[i].count++;
1809 found = true;
1811 /* If we previously recorded an initializer that doesn't
1812 dominate this candidate, it's not going to be useful to
1813 us after all. */
1814 if (incr_vec[i].initializer
1815 && !dominated_by_p (CDI_DOMINATORS,
1816 gimple_bb (c->cand_stmt),
1817 incr_vec[i].init_bb))
1819 incr_vec[i].initializer = NULL_TREE;
1820 incr_vec[i].init_bb = NULL;
1823 break;
1827 if (!found)
1829 /* The first time we see an increment, create the entry for it.
1830 If this is the root candidate which doesn't have a basis, set
1831 the count to zero. We're only processing it so it can possibly
1832 provide an initializer for other candidates. */
1833 incr_vec[incr_vec_len].incr = increment;
1834 incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1835 incr_vec[incr_vec_len].cost = COST_INFINITE;
1837 /* Optimistically record the first occurrence of this increment
1838 as providing an initializer (if it does); we will revise this
1839 opinion later if it doesn't dominate all other occurrences.
1840 Exception: increments of -1, 0, 1 never need initializers. */
1841 if (c->kind == CAND_ADD
1842 && c->index == increment
1843 && (increment.sgt (double_int_one)
1844 || increment.slt (double_int_minus_one))
1845 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
1846 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
1848 tree t0 = NULL_TREE;
1849 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1850 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1851 if (operand_equal_p (rhs1, c->base_expr, 0))
1852 t0 = rhs2;
1853 else if (operand_equal_p (rhs2, c->base_expr, 0))
1854 t0 = rhs1;
1855 if (t0
1856 && SSA_NAME_DEF_STMT (t0)
1857 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1859 incr_vec[incr_vec_len].initializer = t0;
1860 incr_vec[incr_vec_len++].init_bb
1861 = gimple_bb (SSA_NAME_DEF_STMT (t0));
1863 else
1865 incr_vec[incr_vec_len].initializer = NULL_TREE;
1866 incr_vec[incr_vec_len++].init_bb = NULL;
1869 else
1871 incr_vec[incr_vec_len].initializer = NULL_TREE;
1872 incr_vec[incr_vec_len++].init_bb = NULL;
1877 /* Determine how many times each unique increment occurs in the set
1878 of candidates rooted at C's parent, recording the data in the
1879 increment vector. For each unique increment I, if an initializer
1880 T_0 = stride * I is provided by a candidate that dominates all
1881 candidates with the same increment, also record T_0 for subsequent
1882 use. */
1884 static void
1885 record_increments (slsr_cand_t c)
1887 if (!cand_already_replaced (c))
1888 record_increment (c, cand_increment (c));
1890 if (c->sibling)
1891 record_increments (lookup_cand (c->sibling));
1893 if (c->dependent)
1894 record_increments (lookup_cand (c->dependent));
1897 /* Return the first candidate in the tree rooted at C that has not
1898 already been replaced, favoring siblings over dependents. */
1900 static slsr_cand_t
1901 unreplaced_cand_in_tree (slsr_cand_t c)
1903 if (!cand_already_replaced (c))
1904 return c;
1906 if (c->sibling)
1908 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1909 if (sib)
1910 return sib;
1913 if (c->dependent)
1915 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1916 if (dep)
1917 return dep;
1920 return NULL;
1923 /* Return TRUE if the candidates in the tree rooted at C should be
1924 optimized for speed, else FALSE. We estimate this based on the block
1925 containing the most dominant candidate in the tree that has not yet
1926 been replaced. */
1928 static bool
1929 optimize_cands_for_speed_p (slsr_cand_t c)
1931 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1932 gcc_assert (c2);
1933 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1936 /* Add COST_IN to the lowest cost of any dependent path starting at
1937 candidate C or any of its siblings, counting only candidates along
1938 such paths with increment INCR. Assume that replacing a candidate
1939 reduces cost by REPL_SAVINGS. Also account for savings from any
1940 statements that would go dead. */
1942 static int
1943 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1945 int local_cost, sib_cost;
1946 double_int cand_incr = cand_abs_increment (c);
1948 if (cand_already_replaced (c))
1949 local_cost = cost_in;
1950 else if (incr == cand_incr)
1951 local_cost = cost_in - repl_savings - c->dead_savings;
1952 else
1953 local_cost = cost_in - c->dead_savings;
1955 if (c->dependent)
1956 local_cost = lowest_cost_path (local_cost, repl_savings,
1957 lookup_cand (c->dependent), incr);
1959 if (c->sibling)
1961 sib_cost = lowest_cost_path (cost_in, repl_savings,
1962 lookup_cand (c->sibling), incr);
1963 local_cost = MIN (local_cost, sib_cost);
1966 return local_cost;
1969 /* Compute the total savings that would accrue from all replacements
1970 in the candidate tree rooted at C, counting only candidates with
1971 increment INCR. Assume that replacing a candidate reduces cost
1972 by REPL_SAVINGS. Also account for savings from statements that
1973 would go dead. */
1975 static int
1976 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1978 int savings = 0;
1979 double_int cand_incr = cand_abs_increment (c);
1981 if (incr == cand_incr && !cand_already_replaced (c))
1982 savings += repl_savings + c->dead_savings;
1984 if (c->dependent)
1985 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1987 if (c->sibling)
1988 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1990 return savings;
1993 /* Use target-specific costs to determine and record which increments
1994 in the current candidate tree are profitable to replace, assuming
1995 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1996 the candidate tree.
1998 One slight limitation here is that we don't account for the possible
1999 introduction of casts in some cases. See replace_one_candidate for
2000 the cases where these are introduced. This should probably be cleaned
2001 up sometime. */
2003 static void
2004 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2006 unsigned i;
2008 for (i = 0; i < incr_vec_len; i++)
2010 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2012 /* If somehow this increment is bigger than a HWI, we won't
2013 be optimizing candidates that use it. And if the increment
2014 has a count of zero, nothing will be done with it. */
2015 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2016 incr_vec[i].cost = COST_INFINITE;
2018 /* Increments of 0, 1, and -1 are always profitable to replace,
2019 because they always replace a multiply or add with an add or
2020 copy, and may cause one or more existing instructions to go
2021 dead. Exception: -1 can't be assumed to be profitable for
2022 pointer addition. */
2023 else if (incr == 0
2024 || incr == 1
2025 || (incr == -1
2026 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2027 != POINTER_PLUS_EXPR)))
2028 incr_vec[i].cost = COST_NEUTRAL;
2030 /* FORNOW: If we need to add an initializer, give up if a cast from
2031 the candidate's type to its stride's type can lose precision.
2032 This could eventually be handled better by expressly retaining the
2033 result of a cast to a wider type in the stride. Example:
2035 short int _1;
2036 _2 = (int) _1;
2037 _3 = _2 * 10;
2038 _4 = x + _3; ADD: x + (10 * _1) : int
2039 _5 = _2 * 15;
2040 _6 = x + _3; ADD: x + (15 * _1) : int
2042 Right now replacing _6 would cause insertion of an initializer
2043 of the form "short int T = _1 * 5;" followed by a cast to
2044 int, which could overflow incorrectly. Had we recorded _2 or
2045 (int)_1 as the stride, this wouldn't happen. However, doing
2046 this breaks other opportunities, so this will require some
2047 care. */
2048 else if (!incr_vec[i].initializer
2049 && TREE_CODE (first_dep->stride) != INTEGER_CST
2050 && !legal_cast_p_1 (first_dep->stride,
2051 gimple_assign_lhs (first_dep->cand_stmt)))
2053 incr_vec[i].cost = COST_INFINITE;
2055 /* If we need to add an initializer, make sure we don't introduce
2056 a multiply by a pointer type, which can happen in certain cast
2057 scenarios. FIXME: When cleaning up these cast issues, we can
2058 afford to introduce the multiply provided we cast out to an
2059 unsigned int of appropriate size. */
2060 else if (!incr_vec[i].initializer
2061 && TREE_CODE (first_dep->stride) != INTEGER_CST
2062 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2064 incr_vec[i].cost = COST_INFINITE;
2066 /* For any other increment, if this is a multiply candidate, we
2067 must introduce a temporary T and initialize it with
2068 T_0 = stride * increment. When optimizing for speed, walk the
2069 candidate tree to calculate the best cost reduction along any
2070 path; if it offsets the fixed cost of inserting the initializer,
2071 replacing the increment is profitable. When optimizing for
2072 size, instead calculate the total cost reduction from replacing
2073 all candidates with this increment. */
2074 else if (first_dep->kind == CAND_MULT)
2076 int cost = mult_by_coeff_cost (incr, mode, speed);
2077 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2078 if (speed)
2079 cost = lowest_cost_path (cost, repl_savings, first_dep,
2080 incr_vec[i].incr);
2081 else
2082 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2084 incr_vec[i].cost = cost;
2087 /* If this is an add candidate, the initializer may already
2088 exist, so only calculate the cost of the initializer if it
2089 doesn't. We are replacing one add with another here, so the
2090 known replacement savings is zero. We will account for removal
2091 of dead instructions in lowest_cost_path or total_savings. */
2092 else
2094 int cost = 0;
2095 if (!incr_vec[i].initializer)
2096 cost = mult_by_coeff_cost (incr, mode, speed);
2098 if (speed)
2099 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2100 else
2101 cost -= total_savings (0, first_dep, incr_vec[i].incr);
2103 incr_vec[i].cost = cost;
2108 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2109 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2110 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2111 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2112 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2114 static basic_block
2115 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2116 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2118 basic_block ncd;
2120 if (!bb1)
2122 *where = c2;
2123 return bb2;
2126 if (!bb2)
2128 *where = c1;
2129 return bb1;
2132 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2134 /* If both candidates are in the same block, the earlier
2135 candidate wins. */
2136 if (bb1 == ncd && bb2 == ncd)
2138 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2139 *where = c2;
2140 else
2141 *where = c1;
2144 /* Otherwise, if one of them produced a candidate in the
2145 dominator, that one wins. */
2146 else if (bb1 == ncd)
2147 *where = c1;
2149 else if (bb2 == ncd)
2150 *where = c2;
2152 /* If neither matches the dominator, neither wins. */
2153 else
2154 *where = NULL;
2156 return ncd;
2159 /* Consider all candidates in the tree rooted at C for which INCR
2160 represents the required increment of C relative to its basis.
2161 Find and return the basic block that most nearly dominates all
2162 such candidates. If the returned block contains one or more of
2163 the candidates, return the earliest candidate in the block in
2164 *WHERE. */
2166 static basic_block
2167 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2168 slsr_cand_t *where)
2170 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2171 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2172 double_int cand_incr;
2174 /* First find the NCD of all siblings and dependents. */
2175 if (c->sibling)
2176 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2177 incr, &sib_where);
2178 if (c->dependent)
2179 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2180 incr, &dep_where);
2181 if (!sib_ncd && !dep_ncd)
2183 new_where = NULL;
2184 ncd = NULL;
2186 else if (sib_ncd && !dep_ncd)
2188 new_where = sib_where;
2189 ncd = sib_ncd;
2191 else if (dep_ncd && !sib_ncd)
2193 new_where = dep_where;
2194 ncd = dep_ncd;
2196 else
2197 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2198 dep_where, &new_where);
2200 /* If the candidate's increment doesn't match the one we're interested
2201 in, then the result depends only on siblings and dependents. */
2202 cand_incr = cand_abs_increment (c);
2204 if (cand_incr != incr || cand_already_replaced (c))
2206 *where = new_where;
2207 return ncd;
2210 /* Otherwise, compare this candidate with the result from all siblings
2211 and dependents. */
2212 this_where = c;
2213 this_ncd = gimple_bb (c->cand_stmt);
2214 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2216 return ncd;
2219 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2221 static inline bool
2222 profitable_increment_p (unsigned index)
2224 return (incr_vec[index].cost <= COST_NEUTRAL);
2227 /* For each profitable increment in the increment vector not equal to
2228 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2229 dominator of all statements in the candidate chain rooted at C
2230 that require that increment, and insert an initializer
2231 T_0 = stride * increment at that location. Record T_0 with the
2232 increment record. */
2234 static void
2235 insert_initializers (slsr_cand_t c)
2237 unsigned i;
2238 tree new_var = NULL_TREE;
2240 for (i = 0; i < incr_vec_len; i++)
2242 basic_block bb;
2243 slsr_cand_t where = NULL;
2244 gimple init_stmt;
2245 tree stride_type, new_name, incr_tree;
2246 double_int incr = incr_vec[i].incr;
2248 if (!profitable_increment_p (i)
2249 || incr.is_one ()
2250 || (incr.is_minus_one ()
2251 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2252 || incr.is_zero ())
2253 continue;
2255 /* We may have already identified an existing initializer that
2256 will suffice. */
2257 if (incr_vec[i].initializer)
2259 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fputs ("Using existing initializer: ", dump_file);
2262 print_gimple_stmt (dump_file,
2263 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2264 0, 0);
2266 continue;
2269 /* Find the block that most closely dominates all candidates
2270 with this increment. If there is at least one candidate in
2271 that block, the earliest one will be returned in WHERE. */
2272 bb = nearest_common_dominator_for_cands (c, incr, &where);
2274 /* Create a new SSA name to hold the initializer's value. */
2275 stride_type = TREE_TYPE (c->stride);
2276 lazy_create_slsr_reg (&new_var, stride_type);
2277 new_name = make_ssa_name (new_var, NULL);
2278 incr_vec[i].initializer = new_name;
2280 /* Create the initializer and insert it in the latest possible
2281 dominating position. */
2282 incr_tree = double_int_to_tree (stride_type, incr);
2283 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2284 c->stride, incr_tree);
2285 if (where)
2287 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2288 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2289 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2291 else
2293 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2294 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2296 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2297 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2298 else
2299 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2301 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2304 if (dump_file && (dump_flags & TDF_DETAILS))
2306 fputs ("Inserting initializer: ", dump_file);
2307 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2312 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2313 type TO_TYPE, and insert it in front of the statement represented
2314 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2315 the new SSA name. */
2317 static tree
2318 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2319 tree from_expr, tree *new_var)
2321 tree cast_lhs;
2322 gimple cast_stmt;
2323 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2325 lazy_create_slsr_reg (new_var, to_type);
2326 cast_lhs = make_ssa_name (*new_var, NULL);
2327 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2328 from_expr, NULL_TREE);
2329 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2330 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2332 if (dump_file && (dump_flags & TDF_DETAILS))
2334 fputs (" Inserting: ", dump_file);
2335 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2338 return cast_lhs;
2341 /* Replace the RHS of the statement represented by candidate C with
2342 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2343 leave C unchanged or just interchange its operands. The original
2344 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2345 If the replacement was made and we are doing a details dump,
2346 return the revised statement, else NULL. */
2348 static gimple
2349 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2350 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2351 slsr_cand_t c)
2353 if (new_code != old_code
2354 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2355 || !operand_equal_p (new_rhs2, old_rhs2, 0))
2356 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2357 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2359 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2360 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2361 update_stmt (gsi_stmt (gsi));
2363 if (dump_file && (dump_flags & TDF_DETAILS))
2364 return gsi_stmt (gsi);
2367 else if (dump_file && (dump_flags & TDF_DETAILS))
2368 fputs (" (duplicate, not actually replacing)\n", dump_file);
2370 return NULL;
2373 /* Strength-reduce the statement represented by candidate C by replacing
2374 it with an equivalent addition or subtraction. I is the index into
2375 the increment vector identifying C's increment. NEW_VAR is used to
2376 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2377 is the rhs1 to use in creating the add/subtract. */
2379 static void
2380 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2381 tree basis_name)
2383 gimple stmt_to_print = NULL;
2384 tree orig_rhs1, orig_rhs2;
2385 tree rhs2;
2386 enum tree_code orig_code, repl_code;
2387 double_int cand_incr;
2389 orig_code = gimple_assign_rhs_code (c->cand_stmt);
2390 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2391 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2392 cand_incr = cand_increment (c);
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2396 fputs ("Replacing: ", dump_file);
2397 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2398 stmt_to_print = c->cand_stmt;
2401 if (address_arithmetic_p)
2402 repl_code = POINTER_PLUS_EXPR;
2403 else
2404 repl_code = PLUS_EXPR;
2406 /* If the increment has an initializer T_0, replace the candidate
2407 statement with an add of the basis name and the initializer. */
2408 if (incr_vec[i].initializer)
2410 tree init_type = TREE_TYPE (incr_vec[i].initializer);
2411 tree orig_type = TREE_TYPE (orig_rhs2);
2413 if (types_compatible_p (orig_type, init_type))
2414 rhs2 = incr_vec[i].initializer;
2415 else
2416 rhs2 = introduce_cast_before_cand (c, orig_type,
2417 incr_vec[i].initializer,
2418 new_var);
2420 if (incr_vec[i].incr != cand_incr)
2422 gcc_assert (repl_code == PLUS_EXPR);
2423 repl_code = MINUS_EXPR;
2426 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2427 orig_code, orig_rhs1, orig_rhs2,
2431 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2432 with a subtract of the stride from the basis name, a copy
2433 from the basis name, or an add of the stride to the basis
2434 name, respectively. It may be necessary to introduce a
2435 cast (or reuse an existing cast). */
2436 else if (cand_incr.is_one ())
2438 tree stride_type = TREE_TYPE (c->stride);
2439 tree orig_type = TREE_TYPE (orig_rhs2);
2441 if (types_compatible_p (orig_type, stride_type))
2442 rhs2 = c->stride;
2443 else
2444 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2446 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2447 orig_code, orig_rhs1, orig_rhs2,
2451 else if (cand_incr.is_minus_one ())
2453 tree stride_type = TREE_TYPE (c->stride);
2454 tree orig_type = TREE_TYPE (orig_rhs2);
2455 gcc_assert (repl_code != POINTER_PLUS_EXPR);
2457 if (types_compatible_p (orig_type, stride_type))
2458 rhs2 = c->stride;
2459 else
2460 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2462 if (orig_code != MINUS_EXPR
2463 || !operand_equal_p (basis_name, orig_rhs1, 0)
2464 || !operand_equal_p (rhs2, orig_rhs2, 0))
2466 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2467 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2468 update_stmt (gsi_stmt (gsi));
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2471 stmt_to_print = gsi_stmt (gsi);
2473 else if (dump_file && (dump_flags & TDF_DETAILS))
2474 fputs (" (duplicate, not actually replacing)\n", dump_file);
2477 else if (cand_incr.is_zero ())
2479 tree lhs = gimple_assign_lhs (c->cand_stmt);
2480 tree lhs_type = TREE_TYPE (lhs);
2481 tree basis_type = TREE_TYPE (basis_name);
2483 if (types_compatible_p (lhs_type, basis_type))
2485 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2486 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2487 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2488 gsi_replace (&gsi, copy_stmt, false);
2490 if (dump_file && (dump_flags & TDF_DETAILS))
2491 stmt_to_print = copy_stmt;
2493 else
2495 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2496 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2497 basis_name,
2498 NULL_TREE);
2499 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2500 gsi_replace (&gsi, cast_stmt, false);
2502 if (dump_file && (dump_flags & TDF_DETAILS))
2503 stmt_to_print = cast_stmt;
2506 else
2507 gcc_unreachable ();
2509 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2511 fputs ("With: ", dump_file);
2512 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2513 fputs ("\n", dump_file);
2517 /* For each candidate in the tree rooted at C, replace it with
2518 an increment if such has been shown to be profitable. */
2520 static void
2521 replace_profitable_candidates (slsr_cand_t c)
2523 if (!cand_already_replaced (c))
2525 double_int increment = cand_abs_increment (c);
2526 tree new_var = NULL;
2527 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2528 unsigned i;
2530 i = incr_vec_index (increment);
2532 /* Only process profitable increments. Nothing useful can be done
2533 to a cast or copy. */
2534 if (profitable_increment_p (i)
2535 && orig_code != MODIFY_EXPR
2536 && orig_code != NOP_EXPR)
2538 slsr_cand_t basis = lookup_cand (c->basis);
2539 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2540 replace_one_candidate (c, i, &new_var, basis_name);
2544 if (c->sibling)
2545 replace_profitable_candidates (lookup_cand (c->sibling));
2547 if (c->dependent)
2548 replace_profitable_candidates (lookup_cand (c->dependent));
2551 /* Analyze costs of related candidates in the candidate vector,
2552 and make beneficial replacements. */
2554 static void
2555 analyze_candidates_and_replace (void)
2557 unsigned i;
2558 slsr_cand_t c;
2560 /* Each candidate that has a null basis and a non-null
2561 dependent is the root of a tree of related statements.
2562 Analyze each tree to determine a subset of those
2563 statements that can be replaced with maximum benefit. */
2564 FOR_EACH_VEC_ELT (cand_vec, i, c)
2566 slsr_cand_t first_dep;
2568 if (c->basis != 0 || c->dependent == 0)
2569 continue;
2571 if (dump_file && (dump_flags & TDF_DETAILS))
2572 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2573 c->cand_num);
2575 first_dep = lookup_cand (c->dependent);
2577 /* If this is a chain of CAND_REFs, unconditionally replace
2578 each of them with a strength-reduced data reference. */
2579 if (c->kind == CAND_REF)
2580 replace_refs (c);
2582 /* If the common stride of all related candidates is a
2583 known constant, and none of these has a phi-dependence,
2584 then all replacements are considered profitable.
2585 Each replaces a multiply by a single add, with the
2586 possibility that a feeding add also goes dead as a
2587 result. */
2588 else if (unconditional_cands_with_known_stride_p (c))
2589 replace_dependents (first_dep);
2591 /* When the stride is an SSA name, it may still be profitable
2592 to replace some or all of the dependent candidates, depending
2593 on whether the introduced increments can be reused, or are
2594 less expensive to calculate than the replaced statements. */
2595 else
2597 unsigned length;
2598 enum machine_mode mode;
2599 bool speed;
2601 /* Determine whether we'll be generating pointer arithmetic
2602 when replacing candidates. */
2603 address_arithmetic_p = (c->kind == CAND_ADD
2604 && POINTER_TYPE_P (c->cand_type));
2606 /* If all candidates have already been replaced under other
2607 interpretations, nothing remains to be done. */
2608 length = count_candidates (c);
2609 if (!length)
2610 continue;
2612 /* Construct an array of increments for this candidate chain. */
2613 incr_vec = XNEWVEC (incr_info, length);
2614 incr_vec_len = 0;
2615 record_increments (c);
2617 /* Determine which increments are profitable to replace. */
2618 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2619 speed = optimize_cands_for_speed_p (c);
2620 analyze_increments (first_dep, mode, speed);
2622 /* Insert initializers of the form T_0 = stride * increment
2623 for use in profitable replacements. */
2624 insert_initializers (first_dep);
2625 dump_incr_vec ();
2627 /* Perform the replacements. */
2628 replace_profitable_candidates (first_dep);
2629 free (incr_vec);
2632 /* TODO: When conditional increments occur so that a
2633 candidate is dependent upon a phi-basis, the cost of
2634 introducing a temporary must be accounted for. */
2638 static unsigned
2639 execute_strength_reduction (void)
2641 struct dom_walk_data walk_data;
2643 /* Create the obstack where candidates will reside. */
2644 gcc_obstack_init (&cand_obstack);
2646 /* Allocate the candidate vector. */
2647 cand_vec.create (128);
2649 /* Allocate the mapping from statements to candidate indices. */
2650 stmt_cand_map = pointer_map_create ();
2652 /* Create the obstack where candidate chains will reside. */
2653 gcc_obstack_init (&chain_obstack);
2655 /* Allocate the mapping from base expressions to candidate chains. */
2656 base_cand_map = htab_create (500, base_cand_hash,
2657 base_cand_eq, base_cand_free);
2659 /* Initialize the loop optimizer. We need to detect flow across
2660 back edges, and this gives us dominator information as well. */
2661 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2663 /* Set up callbacks for the generic dominator tree walker. */
2664 walk_data.dom_direction = CDI_DOMINATORS;
2665 walk_data.initialize_block_local_data = NULL;
2666 walk_data.before_dom_children = find_candidates_in_block;
2667 walk_data.after_dom_children = NULL;
2668 walk_data.global_data = NULL;
2669 walk_data.block_local_data_size = 0;
2670 init_walk_dominator_tree (&walk_data);
2672 /* Walk the CFG in predominator order looking for strength reduction
2673 candidates. */
2674 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2676 if (dump_file && (dump_flags & TDF_DETAILS))
2678 dump_cand_vec ();
2679 dump_cand_chains ();
2682 /* Analyze costs and make appropriate replacements. */
2683 analyze_candidates_and_replace ();
2685 /* Free resources. */
2686 fini_walk_dominator_tree (&walk_data);
2687 loop_optimizer_finalize ();
2688 htab_delete (base_cand_map);
2689 obstack_free (&chain_obstack, NULL);
2690 pointer_map_destroy (stmt_cand_map);
2691 cand_vec.release ();
2692 obstack_free (&cand_obstack, NULL);
2694 return 0;
2697 static bool
2698 gate_strength_reduction (void)
2700 return flag_tree_slsr;
2703 struct gimple_opt_pass pass_strength_reduction =
2706 GIMPLE_PASS,
2707 "slsr", /* name */
2708 OPTGROUP_NONE, /* optinfo_flags */
2709 gate_strength_reduction, /* gate */
2710 execute_strength_reduction, /* execute */
2711 NULL, /* sub */
2712 NULL, /* next */
2713 0, /* static_pass_number */
2714 TV_GIMPLE_SLSR, /* tv_id */
2715 PROP_cfg | PROP_ssa, /* properties_required */
2716 0, /* properties_provided */
2717 0, /* properties_destroyed */
2718 0, /* todo_flags_start */
2719 TODO_verify_ssa /* todo_flags_finish */