var-tracking.c (vt_add_function_parameter): Adjust for VEC changes.
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob87684b3054362c3bceb050478d01cd8bfff81f9c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
57 #include "params.h"
59 /* Information about a strength reduction candidate. Each statement
60 in the candidate table represents an expression of one of the
61 following forms (the special case of CAND_REF will be described
62 later):
64 (CAND_MULT) S1: X = (B + i) * S
65 (CAND_ADD) S1: X = B + (i * S)
67 Here X and B are SSA names, i is an integer constant, and S is
68 either an SSA name or a constant. We call B the "base," i the
69 "index", and S the "stride."
71 Any statement S0 that dominates S1 and is of the form:
73 (CAND_MULT) S0: Y = (B + i') * S
74 (CAND_ADD) S0: Y = B + (i' * S)
76 is called a "basis" for S1. In both cases, S1 may be replaced by
78 S1': X = Y + (i - i') * S,
80 where (i - i') * S is folded to the extent possible.
82 All gimple statements are visited in dominator order, and each
83 statement that may contribute to one of the forms of S1 above is
84 given at least one entry in the candidate table. Such statements
85 include addition, pointer addition, subtraction, multiplication,
86 negation, copies, and nontrivial type casts. If a statement may
87 represent more than one expression of the forms of S1 above,
88 multiple "interpretations" are stored in the table and chained
89 together. Examples:
91 * An add of two SSA names may treat either operand as the base.
92 * A multiply of two SSA names, likewise.
93 * A copy or cast may be thought of as either a CAND_MULT with
94 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 Candidate records are allocated from an obstack. They are addressed
97 both from a hash table keyed on S1, and from a vector of candidate
98 pointers arranged in predominator order.
100 Opportunity note
101 ----------------
102 Currently we don't recognize:
104 S0: Y = (S * i') - B
105 S1: X = (S * i) - B
107 as a strength reduction opportunity, even though this S1 would
108 also be replaceable by the S1' above. This can be added if it
109 comes up in practice.
111 Strength reduction in addressing
112 --------------------------------
113 There is another kind of candidate known as CAND_REF. A CAND_REF
114 describes a statement containing a memory reference having
115 complex addressing that might benefit from strength reduction.
116 Specifically, we are interested in references for which
117 get_inner_reference returns a base address, offset, and bitpos as
118 follows:
120 base: MEM_REF (T1, C1)
121 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
122 bitpos: C4 * BITS_PER_UNIT
124 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
125 arbitrary integer constants. Note that C2 may be zero, in which
126 case the offset will be MULT_EXPR (T2, C3).
128 When this pattern is recognized, the original memory reference
129 can be replaced with:
131 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
132 C1 + (C2 * C3) + C4)
134 which distributes the multiply to allow constant folding. When
135 two or more addressing expressions can be represented by MEM_REFs
136 of this form, differing only in the constants C1, C2, and C4,
137 making this substitution produces more efficient addressing during
138 the RTL phases. When there are not at least two expressions with
139 the same values of T1, T2, and C3, there is nothing to be gained
140 by the replacement.
142 Strength reduction of CAND_REFs uses the same infrastructure as
143 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
144 field, MULT_EXPR (T2, C3) in the stride (S) field, and
145 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
146 is thus another CAND_REF with the same B and S values. When at
147 least two CAND_REFs are chained together using the basis relation,
148 each of them is replaced as above, resulting in improved code
149 generation for addressing. */
152 /* Index into the candidate vector, offset by 1. VECs are zero-based,
153 while cand_idx's are one-based, with zero indicating null. */
154 typedef unsigned cand_idx;
156 /* The kind of candidate. */
157 enum cand_kind
159 CAND_MULT,
160 CAND_ADD,
161 CAND_REF
164 struct slsr_cand_d
166 /* The candidate statement S1. */
167 gimple cand_stmt;
169 /* The base expression B: often an SSA name, but not always. */
170 tree base_expr;
172 /* The stride S. */
173 tree stride;
175 /* The index constant i. */
176 double_int index;
178 /* The type of the candidate. This is normally the type of base_expr,
179 but casts may have occurred when combining feeding instructions.
180 A candidate can only be a basis for candidates of the same final type.
181 (For CAND_REFs, this is the type to be used for operand 1 of the
182 replacement MEM_REF.) */
183 tree cand_type;
185 /* The kind of candidate (CAND_MULT, etc.). */
186 enum cand_kind kind;
188 /* Index of this candidate in the candidate vector. */
189 cand_idx cand_num;
191 /* Index of the next candidate record for the same statement.
192 A statement may be useful in more than one way (e.g., due to
193 commutativity). So we can have multiple "interpretations"
194 of a statement. */
195 cand_idx next_interp;
197 /* Index of the basis statement S0, if any, in the candidate vector. */
198 cand_idx basis;
200 /* First candidate for which this candidate is a basis, if one exists. */
201 cand_idx dependent;
203 /* Next candidate having the same basis as this one. */
204 cand_idx sibling;
206 /* If this is a conditional candidate, the defining PHI statement
207 for the base SSA name B. For future use; always NULL for now. */
208 gimple def_phi;
210 /* Savings that can be expected from eliminating dead code if this
211 candidate is replaced. */
212 int dead_savings;
215 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
216 typedef const struct slsr_cand_d *const_slsr_cand_t;
218 /* Pointers to candidates are chained together as part of a mapping
219 from base expressions to the candidates that use them. */
221 struct cand_chain_d
223 /* Base expression for the chain of candidates: often, but not
224 always, an SSA name. */
225 tree base_expr;
227 /* Pointer to a candidate. */
228 slsr_cand_t cand;
230 /* Chain pointer. */
231 struct cand_chain_d *next;
235 typedef struct cand_chain_d cand_chain, *cand_chain_t;
236 typedef const struct cand_chain_d *const_cand_chain_t;
238 /* Information about a unique "increment" associated with candidates
239 having an SSA name for a stride. An increment is the difference
240 between the index of the candidate and the index of its basis,
241 i.e., (i - i') as discussed in the module commentary.
243 When we are not going to generate address arithmetic we treat
244 increments that differ only in sign as the same, allowing sharing
245 of the cost of initializers. The absolute value of the increment
246 is stored in the incr_info. */
248 struct incr_info_d
250 /* The increment that relates a candidate to its basis. */
251 double_int incr;
253 /* How many times the increment occurs in the candidate tree. */
254 unsigned count;
256 /* Cost of replacing candidates using this increment. Negative and
257 zero costs indicate replacement should be performed. */
258 int cost;
260 /* If this increment is profitable but is not -1, 0, or 1, it requires
261 an initializer T_0 = stride * incr to be found or introduced in the
262 nearest common dominator of all candidates. This field holds T_0
263 for subsequent use. */
264 tree initializer;
266 /* If the initializer was found to already exist, this is the block
267 where it was found. */
268 basic_block init_bb;
271 typedef struct incr_info_d incr_info, *incr_info_t;
273 /* Candidates are maintained in a vector. If candidate X dominates
274 candidate Y, then X appears before Y in the vector; but the
275 converse does not necessarily hold. */
276 DEF_VEC_P (slsr_cand_t);
277 DEF_VEC_ALLOC_P (slsr_cand_t, heap);
278 static VEC (slsr_cand_t, heap) *cand_vec;
280 enum cost_consts
282 COST_NEUTRAL = 0,
283 COST_INFINITE = 1000
286 /* Pointer map embodying a mapping from statements to candidates. */
287 static struct pointer_map_t *stmt_cand_map;
289 /* Obstack for candidates. */
290 static struct obstack cand_obstack;
292 /* Hash table embodying a mapping from base exprs to chains of candidates. */
293 static htab_t base_cand_map;
295 /* Obstack for candidate chains. */
296 static struct obstack chain_obstack;
298 /* An array INCR_VEC of incr_infos is used during analysis of related
299 candidates having an SSA name for a stride. INCR_VEC_LEN describes
300 its current length. */
301 static incr_info_t incr_vec;
302 static unsigned incr_vec_len;
304 /* For a chain of candidates with unknown stride, indicates whether or not
305 we must generate pointer arithmetic when replacing statements. */
306 static bool address_arithmetic_p;
308 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
310 static slsr_cand_t
311 lookup_cand (cand_idx idx)
313 return VEC_index (slsr_cand_t, cand_vec, idx - 1);
316 /* Callback to produce a hash value for a candidate chain header. */
318 static hashval_t
319 base_cand_hash (const void *p)
321 tree base_expr = ((const_cand_chain_t) p)->base_expr;
322 return iterative_hash_expr (base_expr, 0);
325 /* Callback when an element is removed from the hash table.
326 We never remove entries until the entire table is released. */
328 static void
329 base_cand_free (void *p ATTRIBUTE_UNUSED)
333 /* Callback to return true if two candidate chain headers are equal. */
335 static int
336 base_cand_eq (const void *p1, const void *p2)
338 const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
339 const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
340 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
343 /* Use the base expr from candidate C to look for possible candidates
344 that can serve as a basis for C. Each potential basis must also
345 appear in a block that dominates the candidate statement and have
346 the same stride and type. If more than one possible basis exists,
347 the one with highest index in the vector is chosen; this will be
348 the most immediately dominating basis. */
350 static int
351 find_basis_for_candidate (slsr_cand_t c)
353 cand_chain mapping_key;
354 cand_chain_t chain;
355 slsr_cand_t basis = NULL;
357 // Limit potential of N^2 behavior for long candidate chains.
358 int iters = 0;
359 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
361 mapping_key.base_expr = c->base_expr;
362 chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
364 for (; chain && iters < max_iters; chain = chain->next, ++iters)
366 slsr_cand_t one_basis = chain->cand;
368 if (one_basis->kind != c->kind
369 || !operand_equal_p (one_basis->stride, c->stride, 0)
370 || !types_compatible_p (one_basis->cand_type, c->cand_type)
371 || !dominated_by_p (CDI_DOMINATORS,
372 gimple_bb (c->cand_stmt),
373 gimple_bb (one_basis->cand_stmt)))
374 continue;
376 if (!basis || basis->cand_num < one_basis->cand_num)
377 basis = one_basis;
380 if (basis)
382 c->sibling = basis->dependent;
383 basis->dependent = c->cand_num;
384 return basis->cand_num;
387 return 0;
390 /* Record a mapping from the base expression of C to C itself, indicating that
391 C may potentially serve as a basis using that base expression. */
393 static void
394 record_potential_basis (slsr_cand_t c)
396 cand_chain_t node;
397 void **slot;
399 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
400 node->base_expr = c->base_expr;
401 node->cand = c;
402 node->next = NULL;
403 slot = htab_find_slot (base_cand_map, node, INSERT);
405 if (*slot)
407 cand_chain_t head = (cand_chain_t) (*slot);
408 node->next = head->next;
409 head->next = node;
411 else
412 *slot = node;
415 /* Allocate storage for a new candidate and initialize its fields.
416 Attempt to find a basis for the candidate. */
418 static slsr_cand_t
419 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
420 double_int index, tree stride, tree ctype,
421 unsigned savings)
423 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
424 sizeof (slsr_cand));
425 c->cand_stmt = gs;
426 c->base_expr = base;
427 c->stride = stride;
428 c->index = index;
429 c->cand_type = ctype;
430 c->kind = kind;
431 c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1;
432 c->next_interp = 0;
433 c->dependent = 0;
434 c->sibling = 0;
435 c->def_phi = NULL;
436 c->dead_savings = savings;
438 VEC_safe_push (slsr_cand_t, heap, cand_vec, c);
439 c->basis = find_basis_for_candidate (c);
440 record_potential_basis (c);
442 return c;
445 /* Determine the target cost of statement GS when compiling according
446 to SPEED. */
448 static int
449 stmt_cost (gimple gs, bool speed)
451 tree lhs, rhs1, rhs2;
452 enum machine_mode lhs_mode;
454 gcc_assert (is_gimple_assign (gs));
455 lhs = gimple_assign_lhs (gs);
456 rhs1 = gimple_assign_rhs1 (gs);
457 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
459 switch (gimple_assign_rhs_code (gs))
461 case MULT_EXPR:
462 rhs2 = gimple_assign_rhs2 (gs);
464 if (host_integerp (rhs2, 0))
465 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
467 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
468 return mul_cost (speed, lhs_mode);
470 case PLUS_EXPR:
471 case POINTER_PLUS_EXPR:
472 case MINUS_EXPR:
473 return add_cost (speed, lhs_mode);
475 case NEGATE_EXPR:
476 return neg_cost (speed, lhs_mode);
478 case NOP_EXPR:
479 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
481 /* Note that we don't assign costs to copies that in most cases
482 will go away. */
483 default:
487 gcc_unreachable ();
488 return 0;
491 /* Look up the defining statement for BASE_IN and return a pointer
492 to its candidate in the candidate table, if any; otherwise NULL.
493 Only CAND_ADD and CAND_MULT candidates are returned. */
495 static slsr_cand_t
496 base_cand_from_table (tree base_in)
498 slsr_cand_t *result;
500 gimple def = SSA_NAME_DEF_STMT (base_in);
501 if (!def)
502 return (slsr_cand_t) NULL;
504 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
506 if (result && (*result)->kind != CAND_REF)
507 return *result;
509 return (slsr_cand_t) NULL;
512 /* Add an entry to the statement-to-candidate mapping. */
514 static void
515 add_cand_for_stmt (gimple gs, slsr_cand_t c)
517 void **slot = pointer_map_insert (stmt_cand_map, gs);
518 gcc_assert (!*slot);
519 *slot = c;
522 /* Look for the following pattern:
524 *PBASE: MEM_REF (T1, C1)
526 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
528 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
530 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
532 *PINDEX: C4 * BITS_PER_UNIT
534 If not present, leave the input values unchanged and return FALSE.
535 Otherwise, modify the input values as follows and return TRUE:
537 *PBASE: T1
538 *POFFSET: MULT_EXPR (T2, C3)
539 *PINDEX: C1 + (C2 * C3) + C4 */
541 static bool
542 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
543 tree *ptype)
545 tree base = *pbase, offset = *poffset;
546 double_int index = *pindex;
547 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
548 tree mult_op0, mult_op1, t1, t2, type;
549 double_int c1, c2, c3, c4;
551 if (!base
552 || !offset
553 || TREE_CODE (base) != MEM_REF
554 || TREE_CODE (offset) != MULT_EXPR
555 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
556 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
557 return false;
559 t1 = TREE_OPERAND (base, 0);
560 c1 = mem_ref_offset (base);
561 type = TREE_TYPE (TREE_OPERAND (base, 1));
563 mult_op0 = TREE_OPERAND (offset, 0);
564 mult_op1 = TREE_OPERAND (offset, 1);
566 c3 = tree_to_double_int (mult_op1);
568 if (TREE_CODE (mult_op0) == PLUS_EXPR)
570 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
572 t2 = TREE_OPERAND (mult_op0, 0);
573 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
575 else
576 return false;
578 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
580 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
582 t2 = TREE_OPERAND (mult_op0, 0);
583 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
585 else
586 return false;
588 else
590 t2 = mult_op0;
591 c2 = double_int_zero;
594 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
596 *pbase = t1;
597 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
598 double_int_to_tree (sizetype, c3));
599 *pindex = c1 + c2 * c3 + c4;
600 *ptype = type;
602 return true;
605 /* Given GS which contains a data reference, create a CAND_REF entry in
606 the candidate table and attempt to find a basis. */
608 static void
609 slsr_process_ref (gimple gs)
611 tree ref_expr, base, offset, type;
612 HOST_WIDE_INT bitsize, bitpos;
613 enum machine_mode mode;
614 int unsignedp, volatilep;
615 double_int index;
616 slsr_cand_t c;
618 if (gimple_vdef (gs))
619 ref_expr = gimple_assign_lhs (gs);
620 else
621 ref_expr = gimple_assign_rhs1 (gs);
623 if (!handled_component_p (ref_expr)
624 || TREE_CODE (ref_expr) == BIT_FIELD_REF
625 || (TREE_CODE (ref_expr) == COMPONENT_REF
626 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
627 return;
629 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
630 &unsignedp, &volatilep, false);
631 index = double_int::from_uhwi (bitpos);
633 if (!restructure_reference (&base, &offset, &index, &type))
634 return;
636 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
637 type, 0);
639 /* Add the candidate to the statement-candidate mapping. */
640 add_cand_for_stmt (gs, c);
643 /* Create a candidate entry for a statement GS, where GS multiplies
644 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
645 about the two SSA names into the new candidate. Return the new
646 candidate. */
648 static slsr_cand_t
649 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
651 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
652 double_int index;
653 unsigned savings = 0;
654 slsr_cand_t c;
655 slsr_cand_t base_cand = base_cand_from_table (base_in);
657 /* Look at all interpretations of the base candidate, if necessary,
658 to find information to propagate into this candidate. */
659 while (base_cand && !base)
662 if (base_cand->kind == CAND_MULT
663 && operand_equal_p (base_cand->stride, integer_one_node, 0))
665 /* Y = (B + i') * 1
666 X = Y * Z
667 ================
668 X = (B + i') * Z */
669 base = base_cand->base_expr;
670 index = base_cand->index;
671 stride = stride_in;
672 ctype = base_cand->cand_type;
673 if (has_single_use (base_in))
674 savings = (base_cand->dead_savings
675 + stmt_cost (base_cand->cand_stmt, speed));
677 else if (base_cand->kind == CAND_ADD
678 && TREE_CODE (base_cand->stride) == INTEGER_CST)
680 /* Y = B + (i' * S), S constant
681 X = Y * Z
682 ============================
683 X = B + ((i' * S) * Z) */
684 base = base_cand->base_expr;
685 index = base_cand->index * tree_to_double_int (base_cand->stride);
686 stride = stride_in;
687 ctype = base_cand->cand_type;
688 if (has_single_use (base_in))
689 savings = (base_cand->dead_savings
690 + stmt_cost (base_cand->cand_stmt, speed));
693 if (base_cand->next_interp)
694 base_cand = lookup_cand (base_cand->next_interp);
695 else
696 base_cand = NULL;
699 if (!base)
701 /* No interpretations had anything useful to propagate, so
702 produce X = (Y + 0) * Z. */
703 base = base_in;
704 index = double_int_zero;
705 stride = stride_in;
706 ctype = TREE_TYPE (base_in);
709 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
710 ctype, savings);
711 return c;
714 /* Create a candidate entry for a statement GS, where GS multiplies
715 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
716 information about BASE_IN into the new candidate. Return the new
717 candidate. */
719 static slsr_cand_t
720 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
722 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
723 double_int index, temp;
724 unsigned savings = 0;
725 slsr_cand_t c;
726 slsr_cand_t base_cand = base_cand_from_table (base_in);
728 /* Look at all interpretations of the base candidate, if necessary,
729 to find information to propagate into this candidate. */
730 while (base_cand && !base)
732 if (base_cand->kind == CAND_MULT
733 && TREE_CODE (base_cand->stride) == INTEGER_CST)
735 /* Y = (B + i') * S, S constant
736 X = Y * c
737 ============================
738 X = (B + i') * (S * c) */
739 base = base_cand->base_expr;
740 index = base_cand->index;
741 temp = tree_to_double_int (base_cand->stride)
742 * tree_to_double_int (stride_in);
743 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
744 ctype = base_cand->cand_type;
745 if (has_single_use (base_in))
746 savings = (base_cand->dead_savings
747 + stmt_cost (base_cand->cand_stmt, speed));
749 else if (base_cand->kind == CAND_ADD
750 && operand_equal_p (base_cand->stride, integer_one_node, 0))
752 /* Y = B + (i' * 1)
753 X = Y * c
754 ===========================
755 X = (B + i') * c */
756 base = base_cand->base_expr;
757 index = base_cand->index;
758 stride = stride_in;
759 ctype = base_cand->cand_type;
760 if (has_single_use (base_in))
761 savings = (base_cand->dead_savings
762 + stmt_cost (base_cand->cand_stmt, speed));
764 else if (base_cand->kind == CAND_ADD
765 && base_cand->index.is_one ()
766 && TREE_CODE (base_cand->stride) == INTEGER_CST)
768 /* Y = B + (1 * S), S constant
769 X = Y * c
770 ===========================
771 X = (B + S) * c */
772 base = base_cand->base_expr;
773 index = tree_to_double_int (base_cand->stride);
774 stride = stride_in;
775 ctype = base_cand->cand_type;
776 if (has_single_use (base_in))
777 savings = (base_cand->dead_savings
778 + stmt_cost (base_cand->cand_stmt, speed));
781 if (base_cand->next_interp)
782 base_cand = lookup_cand (base_cand->next_interp);
783 else
784 base_cand = NULL;
787 if (!base)
789 /* No interpretations had anything useful to propagate, so
790 produce X = (Y + 0) * c. */
791 base = base_in;
792 index = double_int_zero;
793 stride = stride_in;
794 ctype = TREE_TYPE (base_in);
797 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
798 ctype, savings);
799 return c;
802 /* Given GS which is a multiply of scalar integers, make an appropriate
803 entry in the candidate table. If this is a multiply of two SSA names,
804 create two CAND_MULT interpretations and attempt to find a basis for
805 each of them. Otherwise, create a single CAND_MULT and attempt to
806 find a basis. */
808 static void
809 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
811 slsr_cand_t c, c2;
813 /* If this is a multiply of an SSA name with itself, it is highly
814 unlikely that we will get a strength reduction opportunity, so
815 don't record it as a candidate. This simplifies the logic for
816 finding a basis, so if this is removed that must be considered. */
817 if (rhs1 == rhs2)
818 return;
820 if (TREE_CODE (rhs2) == SSA_NAME)
822 /* Record an interpretation of this statement in the candidate table
823 assuming RHS1 is the base expression and RHS2 is the stride. */
824 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
826 /* Add the first interpretation to the statement-candidate mapping. */
827 add_cand_for_stmt (gs, c);
829 /* Record another interpretation of this statement assuming RHS1
830 is the stride and RHS2 is the base expression. */
831 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
832 c->next_interp = c2->cand_num;
834 else
836 /* Record an interpretation for the multiply-immediate. */
837 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
839 /* Add the interpretation to the statement-candidate mapping. */
840 add_cand_for_stmt (gs, c);
844 /* Create a candidate entry for a statement GS, where GS adds two
845 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
846 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
847 information about the two SSA names into the new candidate.
848 Return the new candidate. */
850 static slsr_cand_t
851 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
852 bool subtract_p, bool speed)
854 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
855 double_int index;
856 unsigned savings = 0;
857 slsr_cand_t c;
858 slsr_cand_t base_cand = base_cand_from_table (base_in);
859 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
861 /* The most useful transformation is a multiply-immediate feeding
862 an add or subtract. Look for that first. */
863 while (addend_cand && !base)
865 if (addend_cand->kind == CAND_MULT
866 && addend_cand->index.is_zero ()
867 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
869 /* Z = (B + 0) * S, S constant
870 X = Y +/- Z
871 ===========================
872 X = Y + ((+/-1 * S) * B) */
873 base = base_in;
874 index = tree_to_double_int (addend_cand->stride);
875 if (subtract_p)
876 index = -index;
877 stride = addend_cand->base_expr;
878 ctype = TREE_TYPE (base_in);
879 if (has_single_use (addend_in))
880 savings = (addend_cand->dead_savings
881 + stmt_cost (addend_cand->cand_stmt, speed));
884 if (addend_cand->next_interp)
885 addend_cand = lookup_cand (addend_cand->next_interp);
886 else
887 addend_cand = NULL;
890 while (base_cand && !base)
892 if (base_cand->kind == CAND_ADD
893 && (base_cand->index.is_zero ()
894 || operand_equal_p (base_cand->stride,
895 integer_zero_node, 0)))
897 /* Y = B + (i' * S), i' * S = 0
898 X = Y +/- Z
899 ============================
900 X = B + (+/-1 * Z) */
901 base = base_cand->base_expr;
902 index = subtract_p ? double_int_minus_one : double_int_one;
903 stride = addend_in;
904 ctype = base_cand->cand_type;
905 if (has_single_use (base_in))
906 savings = (base_cand->dead_savings
907 + stmt_cost (base_cand->cand_stmt, speed));
909 else if (subtract_p)
911 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
913 while (subtrahend_cand && !base)
915 if (subtrahend_cand->kind == CAND_MULT
916 && subtrahend_cand->index.is_zero ()
917 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
919 /* Z = (B + 0) * S, S constant
920 X = Y - Z
921 ===========================
922 Value: X = Y + ((-1 * S) * B) */
923 base = base_in;
924 index = tree_to_double_int (subtrahend_cand->stride);
925 index = -index;
926 stride = subtrahend_cand->base_expr;
927 ctype = TREE_TYPE (base_in);
928 if (has_single_use (addend_in))
929 savings = (subtrahend_cand->dead_savings
930 + stmt_cost (subtrahend_cand->cand_stmt, speed));
933 if (subtrahend_cand->next_interp)
934 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
935 else
936 subtrahend_cand = NULL;
940 if (base_cand->next_interp)
941 base_cand = lookup_cand (base_cand->next_interp);
942 else
943 base_cand = NULL;
946 if (!base)
948 /* No interpretations had anything useful to propagate, so
949 produce X = Y + (1 * Z). */
950 base = base_in;
951 index = subtract_p ? double_int_minus_one : double_int_one;
952 stride = addend_in;
953 ctype = TREE_TYPE (base_in);
956 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
957 ctype, savings);
958 return c;
961 /* Create a candidate entry for a statement GS, where GS adds SSA
962 name BASE_IN to constant INDEX_IN. Propagate any known information
963 about BASE_IN into the new candidate. Return the new candidate. */
965 static slsr_cand_t
966 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
968 enum cand_kind kind = CAND_ADD;
969 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
970 double_int index, multiple;
971 unsigned savings = 0;
972 slsr_cand_t c;
973 slsr_cand_t base_cand = base_cand_from_table (base_in);
975 while (base_cand && !base)
977 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
979 if (TREE_CODE (base_cand->stride) == INTEGER_CST
980 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
981 unsigned_p, &multiple))
983 /* Y = (B + i') * S, S constant, c = kS for some integer k
984 X = Y + c
985 ============================
986 X = (B + (i'+ k)) * S
988 Y = B + (i' * S), S constant, c = kS for some integer k
989 X = Y + c
990 ============================
991 X = (B + (i'+ k)) * S */
992 kind = base_cand->kind;
993 base = base_cand->base_expr;
994 index = base_cand->index + multiple;
995 stride = base_cand->stride;
996 ctype = base_cand->cand_type;
997 if (has_single_use (base_in))
998 savings = (base_cand->dead_savings
999 + stmt_cost (base_cand->cand_stmt, speed));
1002 if (base_cand->next_interp)
1003 base_cand = lookup_cand (base_cand->next_interp);
1004 else
1005 base_cand = NULL;
1008 if (!base)
1010 /* No interpretations had anything useful to propagate, so
1011 produce X = Y + (c * 1). */
1012 kind = CAND_ADD;
1013 base = base_in;
1014 index = index_in;
1015 stride = integer_one_node;
1016 ctype = TREE_TYPE (base_in);
1019 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1020 ctype, savings);
1021 return c;
1024 /* Given GS which is an add or subtract of scalar integers or pointers,
1025 make at least one appropriate entry in the candidate table. */
1027 static void
1028 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1030 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1031 slsr_cand_t c = NULL, c2;
1033 if (TREE_CODE (rhs2) == SSA_NAME)
1035 /* First record an interpretation assuming RHS1 is the base expression
1036 and RHS2 is the stride. But it doesn't make sense for the
1037 stride to be a pointer, so don't record a candidate in that case. */
1038 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1040 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1042 /* Add the first interpretation to the statement-candidate
1043 mapping. */
1044 add_cand_for_stmt (gs, c);
1047 /* If the two RHS operands are identical, or this is a subtract,
1048 we're done. */
1049 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1050 return;
1052 /* Otherwise, record another interpretation assuming RHS2 is the
1053 base expression and RHS1 is the stride, again provided that the
1054 stride is not a pointer. */
1055 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1057 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1058 if (c)
1059 c->next_interp = c2->cand_num;
1060 else
1061 add_cand_for_stmt (gs, c2);
1064 else
1066 double_int index;
1068 /* Record an interpretation for the add-immediate. */
1069 index = tree_to_double_int (rhs2);
1070 if (subtract_p)
1071 index = -index;
1073 c = create_add_imm_cand (gs, rhs1, index, speed);
1075 /* Add the interpretation to the statement-candidate mapping. */
1076 add_cand_for_stmt (gs, c);
1080 /* Given GS which is a negate of a scalar integer, make an appropriate
1081 entry in the candidate table. A negate is equivalent to a multiply
1082 by -1. */
1084 static void
1085 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1087 /* Record a CAND_MULT interpretation for the multiply by -1. */
1088 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1090 /* Add the interpretation to the statement-candidate mapping. */
1091 add_cand_for_stmt (gs, c);
1094 /* Help function for legal_cast_p, operating on two trees. Checks
1095 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1096 for more details. */
1098 static bool
1099 legal_cast_p_1 (tree lhs, tree rhs)
1101 tree lhs_type, rhs_type;
1102 unsigned lhs_size, rhs_size;
1103 bool lhs_wraps, rhs_wraps;
1105 lhs_type = TREE_TYPE (lhs);
1106 rhs_type = TREE_TYPE (rhs);
1107 lhs_size = TYPE_PRECISION (lhs_type);
1108 rhs_size = TYPE_PRECISION (rhs_type);
1109 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1110 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1112 if (lhs_size < rhs_size
1113 || (rhs_wraps && !lhs_wraps)
1114 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1115 return false;
1117 return true;
1120 /* Return TRUE if GS is a statement that defines an SSA name from
1121 a conversion and is legal for us to combine with an add and multiply
1122 in the candidate table. For example, suppose we have:
1124 A = B + i;
1125 C = (type) A;
1126 D = C * S;
1128 Without the type-cast, we would create a CAND_MULT for D with base B,
1129 index i, and stride S. We want to record this candidate only if it
1130 is equivalent to apply the type cast following the multiply:
1132 A = B + i;
1133 E = A * S;
1134 D = (type) E;
1136 We will record the type with the candidate for D. This allows us
1137 to use a similar previous candidate as a basis. If we have earlier seen
1139 A' = B + i';
1140 C' = (type) A';
1141 D' = C' * S;
1143 we can replace D with
1145 D = D' + (i - i') * S;
1147 But if moving the type-cast would change semantics, we mustn't do this.
1149 This is legitimate for casts from a non-wrapping integral type to
1150 any integral type of the same or larger size. It is not legitimate
1151 to convert a wrapping type to a non-wrapping type, or to a wrapping
1152 type of a different size. I.e., with a wrapping type, we must
1153 assume that the addition B + i could wrap, in which case performing
1154 the multiply before or after one of the "illegal" type casts will
1155 have different semantics. */
1157 static bool
1158 legal_cast_p (gimple gs, tree rhs)
1160 if (!is_gimple_assign (gs)
1161 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1162 return false;
1164 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1167 /* Given GS which is a cast to a scalar integer type, determine whether
1168 the cast is legal for strength reduction. If so, make at least one
1169 appropriate entry in the candidate table. */
1171 static void
1172 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1174 tree lhs, ctype;
1175 slsr_cand_t base_cand, c, c2;
1176 unsigned savings = 0;
1178 if (!legal_cast_p (gs, rhs1))
1179 return;
1181 lhs = gimple_assign_lhs (gs);
1182 base_cand = base_cand_from_table (rhs1);
1183 ctype = TREE_TYPE (lhs);
1185 if (base_cand)
1187 while (base_cand)
1189 /* Propagate all data from the base candidate except the type,
1190 which comes from the cast, and the base candidate's cast,
1191 which is no longer applicable. */
1192 if (has_single_use (rhs1))
1193 savings = (base_cand->dead_savings
1194 + stmt_cost (base_cand->cand_stmt, speed));
1196 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1197 base_cand->base_expr,
1198 base_cand->index, base_cand->stride,
1199 ctype, savings);
1200 if (base_cand->next_interp)
1201 base_cand = lookup_cand (base_cand->next_interp);
1202 else
1203 base_cand = NULL;
1206 else
1208 /* If nothing is known about the RHS, create fresh CAND_ADD and
1209 CAND_MULT interpretations:
1211 X = Y + (0 * 1)
1212 X = (Y + 0) * 1
1214 The first of these is somewhat arbitrary, but the choice of
1215 1 for the stride simplifies the logic for propagating casts
1216 into their uses. */
1217 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1218 integer_one_node, ctype, 0);
1219 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1220 integer_one_node, ctype, 0);
1221 c->next_interp = c2->cand_num;
1224 /* Add the first (or only) interpretation to the statement-candidate
1225 mapping. */
1226 add_cand_for_stmt (gs, c);
1229 /* Given GS which is a copy of a scalar integer type, make at least one
1230 appropriate entry in the candidate table.
1232 This interface is included for completeness, but is unnecessary
1233 if this pass immediately follows a pass that performs copy
1234 propagation, such as DOM. */
1236 static void
1237 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1239 slsr_cand_t base_cand, c, c2;
1240 unsigned savings = 0;
1242 base_cand = base_cand_from_table (rhs1);
1244 if (base_cand)
1246 while (base_cand)
1248 /* Propagate all data from the base candidate. */
1249 if (has_single_use (rhs1))
1250 savings = (base_cand->dead_savings
1251 + stmt_cost (base_cand->cand_stmt, speed));
1253 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1254 base_cand->base_expr,
1255 base_cand->index, base_cand->stride,
1256 base_cand->cand_type, savings);
1257 if (base_cand->next_interp)
1258 base_cand = lookup_cand (base_cand->next_interp);
1259 else
1260 base_cand = NULL;
1263 else
1265 /* If nothing is known about the RHS, create fresh CAND_ADD and
1266 CAND_MULT interpretations:
1268 X = Y + (0 * 1)
1269 X = (Y + 0) * 1
1271 The first of these is somewhat arbitrary, but the choice of
1272 1 for the stride simplifies the logic for propagating casts
1273 into their uses. */
1274 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1275 integer_one_node, TREE_TYPE (rhs1), 0);
1276 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1277 integer_one_node, TREE_TYPE (rhs1), 0);
1278 c->next_interp = c2->cand_num;
1281 /* Add the first (or only) interpretation to the statement-candidate
1282 mapping. */
1283 add_cand_for_stmt (gs, c);
1286 /* Find strength-reduction candidates in block BB. */
1288 static void
1289 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1290 basic_block bb)
1292 bool speed = optimize_bb_for_speed_p (bb);
1293 gimple_stmt_iterator gsi;
1295 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1297 gimple gs = gsi_stmt (gsi);
1299 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1300 slsr_process_ref (gs);
1302 else if (is_gimple_assign (gs)
1303 && SCALAR_INT_MODE_P
1304 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1306 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1308 switch (gimple_assign_rhs_code (gs))
1310 case MULT_EXPR:
1311 case PLUS_EXPR:
1312 rhs1 = gimple_assign_rhs1 (gs);
1313 rhs2 = gimple_assign_rhs2 (gs);
1314 /* Should never happen, but currently some buggy situations
1315 in earlier phases put constants in rhs1. */
1316 if (TREE_CODE (rhs1) != SSA_NAME)
1317 continue;
1318 break;
1320 /* Possible future opportunity: rhs1 of a ptr+ can be
1321 an ADDR_EXPR. */
1322 case POINTER_PLUS_EXPR:
1323 case MINUS_EXPR:
1324 rhs2 = gimple_assign_rhs2 (gs);
1325 /* Fall-through. */
1327 case NOP_EXPR:
1328 case MODIFY_EXPR:
1329 case NEGATE_EXPR:
1330 rhs1 = gimple_assign_rhs1 (gs);
1331 if (TREE_CODE (rhs1) != SSA_NAME)
1332 continue;
1333 break;
1335 default:
1339 switch (gimple_assign_rhs_code (gs))
1341 case MULT_EXPR:
1342 slsr_process_mul (gs, rhs1, rhs2, speed);
1343 break;
1345 case PLUS_EXPR:
1346 case POINTER_PLUS_EXPR:
1347 case MINUS_EXPR:
1348 slsr_process_add (gs, rhs1, rhs2, speed);
1349 break;
1351 case NEGATE_EXPR:
1352 slsr_process_neg (gs, rhs1, speed);
1353 break;
1355 case NOP_EXPR:
1356 slsr_process_cast (gs, rhs1, speed);
1357 break;
1359 case MODIFY_EXPR:
1360 slsr_process_copy (gs, rhs1, speed);
1361 break;
1363 default:
1370 /* Dump a candidate for debug. */
1372 static void
1373 dump_candidate (slsr_cand_t c)
1375 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1376 gimple_bb (c->cand_stmt)->index);
1377 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1378 switch (c->kind)
1380 case CAND_MULT:
1381 fputs (" MULT : (", dump_file);
1382 print_generic_expr (dump_file, c->base_expr, 0);
1383 fputs (" + ", dump_file);
1384 dump_double_int (dump_file, c->index, false);
1385 fputs (") * ", dump_file);
1386 print_generic_expr (dump_file, c->stride, 0);
1387 fputs (" : ", dump_file);
1388 break;
1389 case CAND_ADD:
1390 fputs (" ADD : ", dump_file);
1391 print_generic_expr (dump_file, c->base_expr, 0);
1392 fputs (" + (", dump_file);
1393 dump_double_int (dump_file, c->index, false);
1394 fputs (" * ", dump_file);
1395 print_generic_expr (dump_file, c->stride, 0);
1396 fputs (") : ", dump_file);
1397 break;
1398 case CAND_REF:
1399 fputs (" REF : ", dump_file);
1400 print_generic_expr (dump_file, c->base_expr, 0);
1401 fputs (" + (", dump_file);
1402 print_generic_expr (dump_file, c->stride, 0);
1403 fputs (") + ", dump_file);
1404 dump_double_int (dump_file, c->index, false);
1405 fputs (" : ", dump_file);
1406 break;
1407 default:
1408 gcc_unreachable ();
1410 print_generic_expr (dump_file, c->cand_type, 0);
1411 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1412 c->basis, c->dependent, c->sibling);
1413 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1414 c->next_interp, c->dead_savings);
1415 if (c->def_phi)
1417 fputs (" phi: ", dump_file);
1418 print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1420 fputs ("\n", dump_file);
1423 /* Dump the candidate vector for debug. */
1425 static void
1426 dump_cand_vec (void)
1428 unsigned i;
1429 slsr_cand_t c;
1431 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1433 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
1434 dump_candidate (c);
1437 /* Callback used to dump the candidate chains hash table. */
1439 static int
1440 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1442 const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1443 cand_chain_t p;
1445 print_generic_expr (dump_file, chain->base_expr, 0);
1446 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1448 for (p = chain->next; p; p = p->next)
1449 fprintf (dump_file, " -> %d", p->cand->cand_num);
1451 fputs ("\n", dump_file);
1452 return 1;
1455 /* Dump the candidate chains. */
1457 static void
1458 dump_cand_chains (void)
1460 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1461 htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1462 fputs ("\n", dump_file);
1465 /* Dump the increment vector for debug. */
1467 static void
1468 dump_incr_vec (void)
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1472 unsigned i;
1474 fprintf (dump_file, "\nIncrement vector:\n\n");
1476 for (i = 0; i < incr_vec_len; i++)
1478 fprintf (dump_file, "%3d increment: ", i);
1479 dump_double_int (dump_file, incr_vec[i].incr, false);
1480 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1481 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1482 fputs ("\n initializer: ", dump_file);
1483 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1484 fputs ("\n\n", dump_file);
1489 /* Recursive helper for unconditional_cands_with_known_stride_p.
1490 Returns TRUE iff C, its siblings, and its dependents are all
1491 unconditional candidates. */
1493 static bool
1494 unconditional_cands (slsr_cand_t c)
1496 if (c->def_phi)
1497 return false;
1499 if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1500 return false;
1502 if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1503 return false;
1505 return true;
1508 /* Determine whether or not the tree of candidates rooted at
1509 ROOT consists entirely of unconditional increments with
1510 an INTEGER_CST stride. */
1512 static bool
1513 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1515 /* The stride is identical for all related candidates, so
1516 check it once. */
1517 if (TREE_CODE (root->stride) != INTEGER_CST)
1518 return false;
1520 return unconditional_cands (lookup_cand (root->dependent));
1523 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1524 data reference. */
1526 static void
1527 replace_ref (tree *expr, slsr_cand_t c)
1529 tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1530 c->base_expr, c->stride);
1531 tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
1532 double_int_to_tree (c->cand_type, c->index));
1534 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1535 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1536 TREE_OPERAND (mem_ref, 0)
1537 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1538 /*simple_p=*/true, NULL,
1539 /*before=*/true, GSI_SAME_STMT);
1540 copy_ref_info (mem_ref, *expr);
1541 *expr = mem_ref;
1542 update_stmt (c->cand_stmt);
1545 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1546 dependent of candidate C with an equivalent strength-reduced data
1547 reference. */
1549 static void
1550 replace_refs (slsr_cand_t c)
1552 if (gimple_vdef (c->cand_stmt))
1554 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1555 replace_ref (lhs, c);
1557 else
1559 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1560 replace_ref (rhs, c);
1563 if (c->sibling)
1564 replace_refs (lookup_cand (c->sibling));
1566 if (c->dependent)
1567 replace_refs (lookup_cand (c->dependent));
1570 /* Calculate the increment required for candidate C relative to
1571 its basis. */
1573 static double_int
1574 cand_increment (slsr_cand_t c)
1576 slsr_cand_t basis;
1578 /* If the candidate doesn't have a basis, just return its own
1579 index. This is useful in record_increments to help us find
1580 an existing initializer. */
1581 if (!c->basis)
1582 return c->index;
1584 basis = lookup_cand (c->basis);
1585 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1586 return c->index - basis->index;
1589 /* Calculate the increment required for candidate C relative to
1590 its basis. If we aren't going to generate pointer arithmetic
1591 for this candidate, return the absolute value of that increment
1592 instead. */
1594 static inline double_int
1595 cand_abs_increment (slsr_cand_t c)
1597 double_int increment = cand_increment (c);
1599 if (!address_arithmetic_p && increment.is_negative ())
1600 increment = -increment;
1602 return increment;
1605 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1606 new temporary reg of type TYPE and store it in *VAR. */
1608 static inline void
1609 lazy_create_slsr_reg (tree *var, tree type)
1611 if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1612 *var = create_tmp_reg (type, "slsr");
1615 /* Return TRUE iff candidate C has already been replaced under
1616 another interpretation. */
1618 static inline bool
1619 cand_already_replaced (slsr_cand_t c)
1621 return (gimple_bb (c->cand_stmt) == 0);
1624 /* Helper routine for replace_dependents, doing the work for a
1625 single candidate C. */
1627 static void
1628 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1630 double_int stride = tree_to_double_int (c->stride);
1631 double_int bump = cand_increment (c) * stride;
1632 gimple stmt_to_print = NULL;
1633 slsr_cand_t basis;
1634 tree basis_name, incr_type, bump_tree;
1635 enum tree_code code;
1637 /* It is highly unlikely, but possible, that the resulting
1638 bump doesn't fit in a HWI. Abandon the replacement
1639 in this case. Restriction to signed HWI is conservative
1640 for unsigned types but allows for safe negation without
1641 twisted logic. */
1642 if (!bump.fits_shwi ())
1643 return;
1645 basis = lookup_cand (c->basis);
1646 basis_name = gimple_assign_lhs (basis->cand_stmt);
1647 incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1648 code = PLUS_EXPR;
1650 if (bump.is_negative ())
1652 code = MINUS_EXPR;
1653 bump = -bump;
1656 bump_tree = double_int_to_tree (incr_type, bump);
1658 if (dump_file && (dump_flags & TDF_DETAILS))
1660 fputs ("Replacing: ", dump_file);
1661 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1664 if (bump.is_zero ())
1666 tree lhs = gimple_assign_lhs (c->cand_stmt);
1667 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1668 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1669 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1670 gsi_replace (&gsi, copy_stmt, false);
1671 if (dump_file && (dump_flags & TDF_DETAILS))
1672 stmt_to_print = copy_stmt;
1674 else
1676 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1677 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1678 if (cand_code != NEGATE_EXPR
1679 && ((operand_equal_p (rhs1, basis_name, 0)
1680 && operand_equal_p (rhs2, bump_tree, 0))
1681 || (operand_equal_p (rhs1, bump_tree, 0)
1682 && operand_equal_p (rhs2, basis_name, 0))))
1684 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fputs ("(duplicate, not actually replacing)", dump_file);
1687 stmt_to_print = c->cand_stmt;
1690 else
1692 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1693 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1694 update_stmt (gsi_stmt (gsi));
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 stmt_to_print = gsi_stmt (gsi);
1700 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fputs ("With: ", dump_file);
1703 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1704 fputs ("\n", dump_file);
1708 /* Replace candidate C, each sibling of candidate C, and each
1709 dependent of candidate C with an add or subtract. Note that we
1710 only operate on CAND_MULTs with known strides, so we will never
1711 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1712 replaced by X = Y + ((i - i') * S), as described in the module
1713 commentary. The folded value ((i - i') * S) is referred to here
1714 as the "bump." */
1716 static void
1717 replace_dependents (slsr_cand_t c)
1719 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1721 /* It is not useful to replace casts, copies, or adds of an SSA name
1722 and a constant. Also skip candidates that have already been
1723 replaced under another interpretation. */
1724 if (cand_code != MODIFY_EXPR
1725 && cand_code != NOP_EXPR
1726 && c->kind == CAND_MULT
1727 && !cand_already_replaced (c))
1728 replace_dependent (c, cand_code);
1730 if (c->sibling)
1731 replace_dependents (lookup_cand (c->sibling));
1733 if (c->dependent)
1734 replace_dependents (lookup_cand (c->dependent));
1737 /* Return the index in the increment vector of the given INCREMENT. */
1739 static inline unsigned
1740 incr_vec_index (double_int increment)
1742 unsigned i;
1744 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1747 gcc_assert (i < incr_vec_len);
1748 return i;
1751 /* Count the number of candidates in the tree rooted at C that have
1752 not already been replaced under other interpretations. */
1754 static unsigned
1755 count_candidates (slsr_cand_t c)
1757 unsigned count = cand_already_replaced (c) ? 0 : 1;
1759 if (c->sibling)
1760 count += count_candidates (lookup_cand (c->sibling));
1762 if (c->dependent)
1763 count += count_candidates (lookup_cand (c->dependent));
1765 return count;
1768 /* Increase the count of INCREMENT by one in the increment vector.
1769 INCREMENT is associated with candidate C. If an initializer
1770 T_0 = stride * I is provided by a candidate that dominates all
1771 candidates with the same increment, also record T_0 for subsequent use. */
1773 static void
1774 record_increment (slsr_cand_t c, double_int increment)
1776 bool found = false;
1777 unsigned i;
1779 /* Treat increments that differ only in sign as identical so as to
1780 share initializers, unless we are generating pointer arithmetic. */
1781 if (!address_arithmetic_p && increment.is_negative ())
1782 increment = -increment;
1784 for (i = 0; i < incr_vec_len; i++)
1786 if (incr_vec[i].incr == increment)
1788 incr_vec[i].count++;
1789 found = true;
1791 /* If we previously recorded an initializer that doesn't
1792 dominate this candidate, it's not going to be useful to
1793 us after all. */
1794 if (incr_vec[i].initializer
1795 && !dominated_by_p (CDI_DOMINATORS,
1796 gimple_bb (c->cand_stmt),
1797 incr_vec[i].init_bb))
1799 incr_vec[i].initializer = NULL_TREE;
1800 incr_vec[i].init_bb = NULL;
1803 break;
1807 if (!found)
1809 /* The first time we see an increment, create the entry for it.
1810 If this is the root candidate which doesn't have a basis, set
1811 the count to zero. We're only processing it so it can possibly
1812 provide an initializer for other candidates. */
1813 incr_vec[incr_vec_len].incr = increment;
1814 incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1815 incr_vec[incr_vec_len].cost = COST_INFINITE;
1817 /* Optimistically record the first occurrence of this increment
1818 as providing an initializer (if it does); we will revise this
1819 opinion later if it doesn't dominate all other occurrences.
1820 Exception: increments of -1, 0, 1 never need initializers. */
1821 if (c->kind == CAND_ADD
1822 && c->index == increment
1823 && (increment.sgt (double_int_one)
1824 || increment.slt (double_int_minus_one)))
1826 tree t0;
1827 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1828 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1829 if (operand_equal_p (rhs1, c->base_expr, 0))
1830 t0 = rhs2;
1831 else
1832 t0 = rhs1;
1833 if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1835 incr_vec[incr_vec_len].initializer = t0;
1836 incr_vec[incr_vec_len++].init_bb
1837 = gimple_bb (SSA_NAME_DEF_STMT (t0));
1839 else
1841 incr_vec[incr_vec_len].initializer = NULL_TREE;
1842 incr_vec[incr_vec_len++].init_bb = NULL;
1845 else
1847 incr_vec[incr_vec_len].initializer = NULL_TREE;
1848 incr_vec[incr_vec_len++].init_bb = NULL;
1853 /* Determine how many times each unique increment occurs in the set
1854 of candidates rooted at C's parent, recording the data in the
1855 increment vector. For each unique increment I, if an initializer
1856 T_0 = stride * I is provided by a candidate that dominates all
1857 candidates with the same increment, also record T_0 for subsequent
1858 use. */
1860 static void
1861 record_increments (slsr_cand_t c)
1863 if (!cand_already_replaced (c))
1864 record_increment (c, cand_increment (c));
1866 if (c->sibling)
1867 record_increments (lookup_cand (c->sibling));
1869 if (c->dependent)
1870 record_increments (lookup_cand (c->dependent));
1873 /* Return the first candidate in the tree rooted at C that has not
1874 already been replaced, favoring siblings over dependents. */
1876 static slsr_cand_t
1877 unreplaced_cand_in_tree (slsr_cand_t c)
1879 if (!cand_already_replaced (c))
1880 return c;
1882 if (c->sibling)
1884 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1885 if (sib)
1886 return sib;
1889 if (c->dependent)
1891 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1892 if (dep)
1893 return dep;
1896 return NULL;
1899 /* Return TRUE if the candidates in the tree rooted at C should be
1900 optimized for speed, else FALSE. We estimate this based on the block
1901 containing the most dominant candidate in the tree that has not yet
1902 been replaced. */
1904 static bool
1905 optimize_cands_for_speed_p (slsr_cand_t c)
1907 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1908 gcc_assert (c2);
1909 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1912 /* Add COST_IN to the lowest cost of any dependent path starting at
1913 candidate C or any of its siblings, counting only candidates along
1914 such paths with increment INCR. Assume that replacing a candidate
1915 reduces cost by REPL_SAVINGS. Also account for savings from any
1916 statements that would go dead. */
1918 static int
1919 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1921 int local_cost, sib_cost;
1922 double_int cand_incr = cand_abs_increment (c);
1924 if (cand_already_replaced (c))
1925 local_cost = cost_in;
1926 else if (incr == cand_incr)
1927 local_cost = cost_in - repl_savings - c->dead_savings;
1928 else
1929 local_cost = cost_in - c->dead_savings;
1931 if (c->dependent)
1932 local_cost = lowest_cost_path (local_cost, repl_savings,
1933 lookup_cand (c->dependent), incr);
1935 if (c->sibling)
1937 sib_cost = lowest_cost_path (cost_in, repl_savings,
1938 lookup_cand (c->sibling), incr);
1939 local_cost = MIN (local_cost, sib_cost);
1942 return local_cost;
1945 /* Compute the total savings that would accrue from all replacements
1946 in the candidate tree rooted at C, counting only candidates with
1947 increment INCR. Assume that replacing a candidate reduces cost
1948 by REPL_SAVINGS. Also account for savings from statements that
1949 would go dead. */
1951 static int
1952 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1954 int savings = 0;
1955 double_int cand_incr = cand_abs_increment (c);
1957 if (incr == cand_incr && !cand_already_replaced (c))
1958 savings += repl_savings + c->dead_savings;
1960 if (c->dependent)
1961 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1963 if (c->sibling)
1964 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1966 return savings;
1969 /* Use target-specific costs to determine and record which increments
1970 in the current candidate tree are profitable to replace, assuming
1971 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1972 the candidate tree.
1974 One slight limitation here is that we don't account for the possible
1975 introduction of casts in some cases. See replace_one_candidate for
1976 the cases where these are introduced. This should probably be cleaned
1977 up sometime. */
1979 static void
1980 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
1982 unsigned i;
1984 for (i = 0; i < incr_vec_len; i++)
1986 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
1988 /* If somehow this increment is bigger than a HWI, we won't
1989 be optimizing candidates that use it. And if the increment
1990 has a count of zero, nothing will be done with it. */
1991 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
1992 incr_vec[i].cost = COST_INFINITE;
1994 /* Increments of 0, 1, and -1 are always profitable to replace,
1995 because they always replace a multiply or add with an add or
1996 copy, and may cause one or more existing instructions to go
1997 dead. Exception: -1 can't be assumed to be profitable for
1998 pointer addition. */
1999 else if (incr == 0
2000 || incr == 1
2001 || (incr == -1
2002 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2003 != POINTER_PLUS_EXPR)))
2004 incr_vec[i].cost = COST_NEUTRAL;
2006 /* FORNOW: If we need to add an initializer, give up if a cast from
2007 the candidate's type to its stride's type can lose precision.
2008 This could eventually be handled better by expressly retaining the
2009 result of a cast to a wider type in the stride. Example:
2011 short int _1;
2012 _2 = (int) _1;
2013 _3 = _2 * 10;
2014 _4 = x + _3; ADD: x + (10 * _1) : int
2015 _5 = _2 * 15;
2016 _6 = x + _3; ADD: x + (15 * _1) : int
2018 Right now replacing _6 would cause insertion of an initializer
2019 of the form "short int T = _1 * 5;" followed by a cast to
2020 int, which could overflow incorrectly. Had we recorded _2 or
2021 (int)_1 as the stride, this wouldn't happen. However, doing
2022 this breaks other opportunities, so this will require some
2023 care. */
2024 else if (!incr_vec[i].initializer
2025 && TREE_CODE (first_dep->stride) != INTEGER_CST
2026 && !legal_cast_p_1 (first_dep->stride,
2027 gimple_assign_lhs (first_dep->cand_stmt)))
2029 incr_vec[i].cost = COST_INFINITE;
2031 /* For any other increment, if this is a multiply candidate, we
2032 must introduce a temporary T and initialize it with
2033 T_0 = stride * increment. When optimizing for speed, walk the
2034 candidate tree to calculate the best cost reduction along any
2035 path; if it offsets the fixed cost of inserting the initializer,
2036 replacing the increment is profitable. When optimizing for
2037 size, instead calculate the total cost reduction from replacing
2038 all candidates with this increment. */
2039 else if (first_dep->kind == CAND_MULT)
2041 int cost = mult_by_coeff_cost (incr, mode, speed);
2042 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2043 if (speed)
2044 cost = lowest_cost_path (cost, repl_savings, first_dep,
2045 incr_vec[i].incr);
2046 else
2047 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2049 incr_vec[i].cost = cost;
2052 /* If this is an add candidate, the initializer may already
2053 exist, so only calculate the cost of the initializer if it
2054 doesn't. We are replacing one add with another here, so the
2055 known replacement savings is zero. We will account for removal
2056 of dead instructions in lowest_cost_path or total_savings. */
2057 else
2059 int cost = 0;
2060 if (!incr_vec[i].initializer)
2061 cost = mult_by_coeff_cost (incr, mode, speed);
2063 if (speed)
2064 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2065 else
2066 cost -= total_savings (0, first_dep, incr_vec[i].incr);
2068 incr_vec[i].cost = cost;
2073 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2074 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2075 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2076 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2077 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2079 static basic_block
2080 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2081 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2083 basic_block ncd;
2085 if (!bb1)
2087 *where = c2;
2088 return bb2;
2091 if (!bb2)
2093 *where = c1;
2094 return bb1;
2097 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2099 /* If both candidates are in the same block, the earlier
2100 candidate wins. */
2101 if (bb1 == ncd && bb2 == ncd)
2103 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2104 *where = c2;
2105 else
2106 *where = c1;
2109 /* Otherwise, if one of them produced a candidate in the
2110 dominator, that one wins. */
2111 else if (bb1 == ncd)
2112 *where = c1;
2114 else if (bb2 == ncd)
2115 *where = c2;
2117 /* If neither matches the dominator, neither wins. */
2118 else
2119 *where = NULL;
2121 return ncd;
2124 /* Consider all candidates in the tree rooted at C for which INCR
2125 represents the required increment of C relative to its basis.
2126 Find and return the basic block that most nearly dominates all
2127 such candidates. If the returned block contains one or more of
2128 the candidates, return the earliest candidate in the block in
2129 *WHERE. */
2131 static basic_block
2132 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2133 slsr_cand_t *where)
2135 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2136 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2137 double_int cand_incr;
2139 /* First find the NCD of all siblings and dependents. */
2140 if (c->sibling)
2141 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2142 incr, &sib_where);
2143 if (c->dependent)
2144 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2145 incr, &dep_where);
2146 if (!sib_ncd && !dep_ncd)
2148 new_where = NULL;
2149 ncd = NULL;
2151 else if (sib_ncd && !dep_ncd)
2153 new_where = sib_where;
2154 ncd = sib_ncd;
2156 else if (dep_ncd && !sib_ncd)
2158 new_where = dep_where;
2159 ncd = dep_ncd;
2161 else
2162 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2163 dep_where, &new_where);
2165 /* If the candidate's increment doesn't match the one we're interested
2166 in, then the result depends only on siblings and dependents. */
2167 cand_incr = cand_abs_increment (c);
2169 if (cand_incr != incr || cand_already_replaced (c))
2171 *where = new_where;
2172 return ncd;
2175 /* Otherwise, compare this candidate with the result from all siblings
2176 and dependents. */
2177 this_where = c;
2178 this_ncd = gimple_bb (c->cand_stmt);
2179 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2181 return ncd;
2184 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2186 static inline bool
2187 profitable_increment_p (unsigned index)
2189 return (incr_vec[index].cost <= COST_NEUTRAL);
2192 /* For each profitable increment in the increment vector not equal to
2193 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2194 dominator of all statements in the candidate chain rooted at C
2195 that require that increment, and insert an initializer
2196 T_0 = stride * increment at that location. Record T_0 with the
2197 increment record. */
2199 static void
2200 insert_initializers (slsr_cand_t c)
2202 unsigned i;
2203 tree new_var = NULL_TREE;
2205 for (i = 0; i < incr_vec_len; i++)
2207 basic_block bb;
2208 slsr_cand_t where = NULL;
2209 gimple init_stmt;
2210 tree stride_type, new_name, incr_tree;
2211 double_int incr = incr_vec[i].incr;
2213 if (!profitable_increment_p (i)
2214 || incr.is_one ()
2215 || (incr.is_minus_one ()
2216 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2217 || incr.is_zero ())
2218 continue;
2220 /* We may have already identified an existing initializer that
2221 will suffice. */
2222 if (incr_vec[i].initializer)
2224 if (dump_file && (dump_flags & TDF_DETAILS))
2226 fputs ("Using existing initializer: ", dump_file);
2227 print_gimple_stmt (dump_file,
2228 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2229 0, 0);
2231 continue;
2234 /* Find the block that most closely dominates all candidates
2235 with this increment. If there is at least one candidate in
2236 that block, the earliest one will be returned in WHERE. */
2237 bb = nearest_common_dominator_for_cands (c, incr, &where);
2239 /* Create a new SSA name to hold the initializer's value. */
2240 stride_type = TREE_TYPE (c->stride);
2241 lazy_create_slsr_reg (&new_var, stride_type);
2242 new_name = make_ssa_name (new_var, NULL);
2243 incr_vec[i].initializer = new_name;
2245 /* Create the initializer and insert it in the latest possible
2246 dominating position. */
2247 incr_tree = double_int_to_tree (stride_type, incr);
2248 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2249 c->stride, incr_tree);
2250 if (where)
2252 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2253 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2254 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2256 else
2258 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2259 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2261 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2262 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2263 else
2264 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2266 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2269 if (dump_file && (dump_flags & TDF_DETAILS))
2271 fputs ("Inserting initializer: ", dump_file);
2272 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2277 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2278 type TO_TYPE, and insert it in front of the statement represented
2279 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2280 the new SSA name. */
2282 static tree
2283 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2284 tree from_expr, tree *new_var)
2286 tree cast_lhs;
2287 gimple cast_stmt;
2288 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2290 lazy_create_slsr_reg (new_var, to_type);
2291 cast_lhs = make_ssa_name (*new_var, NULL);
2292 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2293 from_expr, NULL_TREE);
2294 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2295 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2299 fputs (" Inserting: ", dump_file);
2300 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2303 return cast_lhs;
2306 /* Replace the RHS of the statement represented by candidate C with
2307 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2308 leave C unchanged or just interchange its operands. The original
2309 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2310 If the replacement was made and we are doing a details dump,
2311 return the revised statement, else NULL. */
2313 static gimple
2314 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2315 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2316 slsr_cand_t c)
2318 if (new_code != old_code
2319 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2320 || !operand_equal_p (new_rhs2, old_rhs2, 0))
2321 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2322 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2324 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2325 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2326 update_stmt (gsi_stmt (gsi));
2328 if (dump_file && (dump_flags & TDF_DETAILS))
2329 return gsi_stmt (gsi);
2332 else if (dump_file && (dump_flags & TDF_DETAILS))
2333 fputs (" (duplicate, not actually replacing)\n", dump_file);
2335 return NULL;
2338 /* Strength-reduce the statement represented by candidate C by replacing
2339 it with an equivalent addition or subtraction. I is the index into
2340 the increment vector identifying C's increment. NEW_VAR is used to
2341 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2342 is the rhs1 to use in creating the add/subtract. */
2344 static void
2345 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2346 tree basis_name)
2348 gimple stmt_to_print = NULL;
2349 tree orig_rhs1, orig_rhs2;
2350 tree rhs2;
2351 enum tree_code orig_code, repl_code;
2352 double_int cand_incr;
2354 orig_code = gimple_assign_rhs_code (c->cand_stmt);
2355 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2356 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2357 cand_incr = cand_increment (c);
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2361 fputs ("Replacing: ", dump_file);
2362 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2363 stmt_to_print = c->cand_stmt;
2366 if (address_arithmetic_p)
2367 repl_code = POINTER_PLUS_EXPR;
2368 else
2369 repl_code = PLUS_EXPR;
2371 /* If the increment has an initializer T_0, replace the candidate
2372 statement with an add of the basis name and the initializer. */
2373 if (incr_vec[i].initializer)
2375 tree init_type = TREE_TYPE (incr_vec[i].initializer);
2376 tree orig_type = TREE_TYPE (orig_rhs2);
2378 if (types_compatible_p (orig_type, init_type))
2379 rhs2 = incr_vec[i].initializer;
2380 else
2381 rhs2 = introduce_cast_before_cand (c, orig_type,
2382 incr_vec[i].initializer,
2383 new_var);
2385 if (incr_vec[i].incr != cand_incr)
2387 gcc_assert (repl_code == PLUS_EXPR);
2388 repl_code = MINUS_EXPR;
2391 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2392 orig_code, orig_rhs1, orig_rhs2,
2396 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2397 with a subtract of the stride from the basis name, a copy
2398 from the basis name, or an add of the stride to the basis
2399 name, respectively. It may be necessary to introduce a
2400 cast (or reuse an existing cast). */
2401 else if (cand_incr.is_one ())
2403 tree stride_type = TREE_TYPE (c->stride);
2404 tree orig_type = TREE_TYPE (orig_rhs2);
2406 if (types_compatible_p (orig_type, stride_type))
2407 rhs2 = c->stride;
2408 else
2409 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2411 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2412 orig_code, orig_rhs1, orig_rhs2,
2416 else if (cand_incr.is_minus_one ())
2418 tree stride_type = TREE_TYPE (c->stride);
2419 tree orig_type = TREE_TYPE (orig_rhs2);
2420 gcc_assert (repl_code != POINTER_PLUS_EXPR);
2422 if (types_compatible_p (orig_type, stride_type))
2423 rhs2 = c->stride;
2424 else
2425 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2427 if (orig_code != MINUS_EXPR
2428 || !operand_equal_p (basis_name, orig_rhs1, 0)
2429 || !operand_equal_p (rhs2, orig_rhs2, 0))
2431 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2432 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2433 update_stmt (gsi_stmt (gsi));
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 stmt_to_print = gsi_stmt (gsi);
2438 else if (dump_file && (dump_flags & TDF_DETAILS))
2439 fputs (" (duplicate, not actually replacing)\n", dump_file);
2442 else if (cand_incr.is_zero ())
2444 tree lhs = gimple_assign_lhs (c->cand_stmt);
2445 tree lhs_type = TREE_TYPE (lhs);
2446 tree basis_type = TREE_TYPE (basis_name);
2448 if (types_compatible_p (lhs_type, basis_type))
2450 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2451 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2452 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2453 gsi_replace (&gsi, copy_stmt, false);
2455 if (dump_file && (dump_flags & TDF_DETAILS))
2456 stmt_to_print = copy_stmt;
2458 else
2460 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2461 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2462 basis_name,
2463 NULL_TREE);
2464 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2465 gsi_replace (&gsi, cast_stmt, false);
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 stmt_to_print = cast_stmt;
2471 else
2472 gcc_unreachable ();
2474 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2476 fputs ("With: ", dump_file);
2477 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2478 fputs ("\n", dump_file);
2482 /* For each candidate in the tree rooted at C, replace it with
2483 an increment if such has been shown to be profitable. */
2485 static void
2486 replace_profitable_candidates (slsr_cand_t c)
2488 if (!cand_already_replaced (c))
2490 double_int increment = cand_abs_increment (c);
2491 tree new_var = NULL;
2492 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2493 unsigned i;
2495 i = incr_vec_index (increment);
2497 /* Only process profitable increments. Nothing useful can be done
2498 to a cast or copy. */
2499 if (profitable_increment_p (i)
2500 && orig_code != MODIFY_EXPR
2501 && orig_code != NOP_EXPR)
2503 slsr_cand_t basis = lookup_cand (c->basis);
2504 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2505 replace_one_candidate (c, i, &new_var, basis_name);
2509 if (c->sibling)
2510 replace_profitable_candidates (lookup_cand (c->sibling));
2512 if (c->dependent)
2513 replace_profitable_candidates (lookup_cand (c->dependent));
2516 /* Analyze costs of related candidates in the candidate vector,
2517 and make beneficial replacements. */
2519 static void
2520 analyze_candidates_and_replace (void)
2522 unsigned i;
2523 slsr_cand_t c;
2525 /* Each candidate that has a null basis and a non-null
2526 dependent is the root of a tree of related statements.
2527 Analyze each tree to determine a subset of those
2528 statements that can be replaced with maximum benefit. */
2529 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
2531 slsr_cand_t first_dep;
2533 if (c->basis != 0 || c->dependent == 0)
2534 continue;
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2537 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2538 c->cand_num);
2540 first_dep = lookup_cand (c->dependent);
2542 /* If this is a chain of CAND_REFs, unconditionally replace
2543 each of them with a strength-reduced data reference. */
2544 if (c->kind == CAND_REF)
2545 replace_refs (c);
2547 /* If the common stride of all related candidates is a
2548 known constant, and none of these has a phi-dependence,
2549 then all replacements are considered profitable.
2550 Each replaces a multiply by a single add, with the
2551 possibility that a feeding add also goes dead as a
2552 result. */
2553 else if (unconditional_cands_with_known_stride_p (c))
2554 replace_dependents (first_dep);
2556 /* When the stride is an SSA name, it may still be profitable
2557 to replace some or all of the dependent candidates, depending
2558 on whether the introduced increments can be reused, or are
2559 less expensive to calculate than the replaced statements. */
2560 else
2562 unsigned length;
2563 enum machine_mode mode;
2564 bool speed;
2566 /* Determine whether we'll be generating pointer arithmetic
2567 when replacing candidates. */
2568 address_arithmetic_p = (c->kind == CAND_ADD
2569 && POINTER_TYPE_P (c->cand_type));
2571 /* If all candidates have already been replaced under other
2572 interpretations, nothing remains to be done. */
2573 length = count_candidates (c);
2574 if (!length)
2575 continue;
2577 /* Construct an array of increments for this candidate chain. */
2578 incr_vec = XNEWVEC (incr_info, length);
2579 incr_vec_len = 0;
2580 record_increments (c);
2582 /* Determine which increments are profitable to replace. */
2583 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2584 speed = optimize_cands_for_speed_p (c);
2585 analyze_increments (first_dep, mode, speed);
2587 /* Insert initializers of the form T_0 = stride * increment
2588 for use in profitable replacements. */
2589 insert_initializers (first_dep);
2590 dump_incr_vec ();
2592 /* Perform the replacements. */
2593 replace_profitable_candidates (first_dep);
2594 free (incr_vec);
2597 /* TODO: When conditional increments occur so that a
2598 candidate is dependent upon a phi-basis, the cost of
2599 introducing a temporary must be accounted for. */
2603 static unsigned
2604 execute_strength_reduction (void)
2606 struct dom_walk_data walk_data;
2608 /* Create the obstack where candidates will reside. */
2609 gcc_obstack_init (&cand_obstack);
2611 /* Allocate the candidate vector. */
2612 cand_vec = VEC_alloc (slsr_cand_t, heap, 128);
2614 /* Allocate the mapping from statements to candidate indices. */
2615 stmt_cand_map = pointer_map_create ();
2617 /* Create the obstack where candidate chains will reside. */
2618 gcc_obstack_init (&chain_obstack);
2620 /* Allocate the mapping from base expressions to candidate chains. */
2621 base_cand_map = htab_create (500, base_cand_hash,
2622 base_cand_eq, base_cand_free);
2624 /* Initialize the loop optimizer. We need to detect flow across
2625 back edges, and this gives us dominator information as well. */
2626 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2628 /* Set up callbacks for the generic dominator tree walker. */
2629 walk_data.dom_direction = CDI_DOMINATORS;
2630 walk_data.initialize_block_local_data = NULL;
2631 walk_data.before_dom_children = find_candidates_in_block;
2632 walk_data.after_dom_children = NULL;
2633 walk_data.global_data = NULL;
2634 walk_data.block_local_data_size = 0;
2635 init_walk_dominator_tree (&walk_data);
2637 /* Walk the CFG in predominator order looking for strength reduction
2638 candidates. */
2639 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2641 if (dump_file && (dump_flags & TDF_DETAILS))
2643 dump_cand_vec ();
2644 dump_cand_chains ();
2647 /* Analyze costs and make appropriate replacements. */
2648 analyze_candidates_and_replace ();
2650 /* Free resources. */
2651 fini_walk_dominator_tree (&walk_data);
2652 loop_optimizer_finalize ();
2653 htab_delete (base_cand_map);
2654 obstack_free (&chain_obstack, NULL);
2655 pointer_map_destroy (stmt_cand_map);
2656 VEC_free (slsr_cand_t, heap, cand_vec);
2657 obstack_free (&cand_obstack, NULL);
2659 return 0;
2662 static bool
2663 gate_strength_reduction (void)
2665 return flag_tree_slsr;
2668 struct gimple_opt_pass pass_strength_reduction =
2671 GIMPLE_PASS,
2672 "slsr", /* name */
2673 gate_strength_reduction, /* gate */
2674 execute_strength_reduction, /* execute */
2675 NULL, /* sub */
2676 NULL, /* next */
2677 0, /* static_pass_number */
2678 TV_GIMPLE_SLSR, /* tv_id */
2679 PROP_cfg | PROP_ssa, /* properties_required */
2680 0, /* properties_provided */
2681 0, /* properties_destroyed */
2682 0, /* todo_flags_start */
2683 TODO_verify_ssa /* todo_flags_finish */