2012-11-10 Sandra Loosemore <sandra@codesourcery.com>
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blobad02589faee01ab14e8c49d15dd74d3e96302c8f
1 /* Straight-line strength reduction.
2 Copyright (C) 2012 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction will be implemented in four stages, gradually
28 adding more complex candidates:
30 1) Explicit multiplies, known constant multipliers, no
31 conditional increments. (complete)
32 2) Explicit multiplies, unknown constant multipliers,
33 no conditional increments. (complete)
34 3) Implicit multiplies in addressing expressions. (complete)
35 4) Explicit multiplies, conditional increments. (pending)
37 It would also be possible to apply strength reduction to divisions
38 and modulos, but such opportunities are relatively uncommon.
40 Strength reduction is also currently restricted to integer operations.
41 If desired, it could be extended to floating-point operations under
42 control of something like -funsafe-math-optimizations. */
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "gimple.h"
49 #include "basic-block.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "gimple-pretty-print.h"
53 #include "tree-flow.h"
54 #include "domwalk.h"
55 #include "pointer-set.h"
56 #include "expmed.h"
57 #include "params.h"
59 /* Information about a strength reduction candidate. Each statement
60 in the candidate table represents an expression of one of the
61 following forms (the special case of CAND_REF will be described
62 later):
64 (CAND_MULT) S1: X = (B + i) * S
65 (CAND_ADD) S1: X = B + (i * S)
67 Here X and B are SSA names, i is an integer constant, and S is
68 either an SSA name or a constant. We call B the "base," i the
69 "index", and S the "stride."
71 Any statement S0 that dominates S1 and is of the form:
73 (CAND_MULT) S0: Y = (B + i') * S
74 (CAND_ADD) S0: Y = B + (i' * S)
76 is called a "basis" for S1. In both cases, S1 may be replaced by
78 S1': X = Y + (i - i') * S,
80 where (i - i') * S is folded to the extent possible.
82 All gimple statements are visited in dominator order, and each
83 statement that may contribute to one of the forms of S1 above is
84 given at least one entry in the candidate table. Such statements
85 include addition, pointer addition, subtraction, multiplication,
86 negation, copies, and nontrivial type casts. If a statement may
87 represent more than one expression of the forms of S1 above,
88 multiple "interpretations" are stored in the table and chained
89 together. Examples:
91 * An add of two SSA names may treat either operand as the base.
92 * A multiply of two SSA names, likewise.
93 * A copy or cast may be thought of as either a CAND_MULT with
94 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
96 Candidate records are allocated from an obstack. They are addressed
97 both from a hash table keyed on S1, and from a vector of candidate
98 pointers arranged in predominator order.
100 Opportunity note
101 ----------------
102 Currently we don't recognize:
104 S0: Y = (S * i') - B
105 S1: X = (S * i) - B
107 as a strength reduction opportunity, even though this S1 would
108 also be replaceable by the S1' above. This can be added if it
109 comes up in practice.
111 Strength reduction in addressing
112 --------------------------------
113 There is another kind of candidate known as CAND_REF. A CAND_REF
114 describes a statement containing a memory reference having
115 complex addressing that might benefit from strength reduction.
116 Specifically, we are interested in references for which
117 get_inner_reference returns a base address, offset, and bitpos as
118 follows:
120 base: MEM_REF (T1, C1)
121 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
122 bitpos: C4 * BITS_PER_UNIT
124 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
125 arbitrary integer constants. Note that C2 may be zero, in which
126 case the offset will be MULT_EXPR (T2, C3).
128 When this pattern is recognized, the original memory reference
129 can be replaced with:
131 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
132 C1 + (C2 * C3) + C4)
134 which distributes the multiply to allow constant folding. When
135 two or more addressing expressions can be represented by MEM_REFs
136 of this form, differing only in the constants C1, C2, and C4,
137 making this substitution produces more efficient addressing during
138 the RTL phases. When there are not at least two expressions with
139 the same values of T1, T2, and C3, there is nothing to be gained
140 by the replacement.
142 Strength reduction of CAND_REFs uses the same infrastructure as
143 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
144 field, MULT_EXPR (T2, C3) in the stride (S) field, and
145 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
146 is thus another CAND_REF with the same B and S values. When at
147 least two CAND_REFs are chained together using the basis relation,
148 each of them is replaced as above, resulting in improved code
149 generation for addressing. */
152 /* Index into the candidate vector, offset by 1. VECs are zero-based,
153 while cand_idx's are one-based, with zero indicating null. */
154 typedef unsigned cand_idx;
156 /* The kind of candidate. */
157 enum cand_kind
159 CAND_MULT,
160 CAND_ADD,
161 CAND_REF
164 struct slsr_cand_d
166 /* The candidate statement S1. */
167 gimple cand_stmt;
169 /* The base expression B: often an SSA name, but not always. */
170 tree base_expr;
172 /* The stride S. */
173 tree stride;
175 /* The index constant i. */
176 double_int index;
178 /* The type of the candidate. This is normally the type of base_expr,
179 but casts may have occurred when combining feeding instructions.
180 A candidate can only be a basis for candidates of the same final type.
181 (For CAND_REFs, this is the type to be used for operand 1 of the
182 replacement MEM_REF.) */
183 tree cand_type;
185 /* The kind of candidate (CAND_MULT, etc.). */
186 enum cand_kind kind;
188 /* Index of this candidate in the candidate vector. */
189 cand_idx cand_num;
191 /* Index of the next candidate record for the same statement.
192 A statement may be useful in more than one way (e.g., due to
193 commutativity). So we can have multiple "interpretations"
194 of a statement. */
195 cand_idx next_interp;
197 /* Index of the basis statement S0, if any, in the candidate vector. */
198 cand_idx basis;
200 /* First candidate for which this candidate is a basis, if one exists. */
201 cand_idx dependent;
203 /* Next candidate having the same basis as this one. */
204 cand_idx sibling;
206 /* If this is a conditional candidate, the defining PHI statement
207 for the base SSA name B. For future use; always NULL for now. */
208 gimple def_phi;
210 /* Savings that can be expected from eliminating dead code if this
211 candidate is replaced. */
212 int dead_savings;
215 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
216 typedef const struct slsr_cand_d *const_slsr_cand_t;
218 /* Pointers to candidates are chained together as part of a mapping
219 from base expressions to the candidates that use them. */
221 struct cand_chain_d
223 /* Base expression for the chain of candidates: often, but not
224 always, an SSA name. */
225 tree base_expr;
227 /* Pointer to a candidate. */
228 slsr_cand_t cand;
230 /* Chain pointer. */
231 struct cand_chain_d *next;
235 typedef struct cand_chain_d cand_chain, *cand_chain_t;
236 typedef const struct cand_chain_d *const_cand_chain_t;
238 /* Information about a unique "increment" associated with candidates
239 having an SSA name for a stride. An increment is the difference
240 between the index of the candidate and the index of its basis,
241 i.e., (i - i') as discussed in the module commentary.
243 When we are not going to generate address arithmetic we treat
244 increments that differ only in sign as the same, allowing sharing
245 of the cost of initializers. The absolute value of the increment
246 is stored in the incr_info. */
248 struct incr_info_d
250 /* The increment that relates a candidate to its basis. */
251 double_int incr;
253 /* How many times the increment occurs in the candidate tree. */
254 unsigned count;
256 /* Cost of replacing candidates using this increment. Negative and
257 zero costs indicate replacement should be performed. */
258 int cost;
260 /* If this increment is profitable but is not -1, 0, or 1, it requires
261 an initializer T_0 = stride * incr to be found or introduced in the
262 nearest common dominator of all candidates. This field holds T_0
263 for subsequent use. */
264 tree initializer;
266 /* If the initializer was found to already exist, this is the block
267 where it was found. */
268 basic_block init_bb;
271 typedef struct incr_info_d incr_info, *incr_info_t;
273 /* Candidates are maintained in a vector. If candidate X dominates
274 candidate Y, then X appears before Y in the vector; but the
275 converse does not necessarily hold. */
276 DEF_VEC_P (slsr_cand_t);
277 DEF_VEC_ALLOC_P (slsr_cand_t, heap);
278 static VEC (slsr_cand_t, heap) *cand_vec;
280 enum cost_consts
282 COST_NEUTRAL = 0,
283 COST_INFINITE = 1000
286 /* Pointer map embodying a mapping from statements to candidates. */
287 static struct pointer_map_t *stmt_cand_map;
289 /* Obstack for candidates. */
290 static struct obstack cand_obstack;
292 /* Hash table embodying a mapping from base exprs to chains of candidates. */
293 static htab_t base_cand_map;
295 /* Obstack for candidate chains. */
296 static struct obstack chain_obstack;
298 /* An array INCR_VEC of incr_infos is used during analysis of related
299 candidates having an SSA name for a stride. INCR_VEC_LEN describes
300 its current length. */
301 static incr_info_t incr_vec;
302 static unsigned incr_vec_len;
304 /* For a chain of candidates with unknown stride, indicates whether or not
305 we must generate pointer arithmetic when replacing statements. */
306 static bool address_arithmetic_p;
308 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
310 static slsr_cand_t
311 lookup_cand (cand_idx idx)
313 return VEC_index (slsr_cand_t, cand_vec, idx - 1);
316 /* Callback to produce a hash value for a candidate chain header. */
318 static hashval_t
319 base_cand_hash (const void *p)
321 tree base_expr = ((const_cand_chain_t) p)->base_expr;
322 return iterative_hash_expr (base_expr, 0);
325 /* Callback when an element is removed from the hash table.
326 We never remove entries until the entire table is released. */
328 static void
329 base_cand_free (void *p ATTRIBUTE_UNUSED)
333 /* Callback to return true if two candidate chain headers are equal. */
335 static int
336 base_cand_eq (const void *p1, const void *p2)
338 const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
339 const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
340 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
343 /* Use the base expr from candidate C to look for possible candidates
344 that can serve as a basis for C. Each potential basis must also
345 appear in a block that dominates the candidate statement and have
346 the same stride and type. If more than one possible basis exists,
347 the one with highest index in the vector is chosen; this will be
348 the most immediately dominating basis. */
350 static int
351 find_basis_for_candidate (slsr_cand_t c)
353 cand_chain mapping_key;
354 cand_chain_t chain;
355 slsr_cand_t basis = NULL;
357 // Limit potential of N^2 behavior for long candidate chains.
358 int iters = 0;
359 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
361 mapping_key.base_expr = c->base_expr;
362 chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
364 for (; chain && iters < max_iters; chain = chain->next, ++iters)
366 slsr_cand_t one_basis = chain->cand;
368 if (one_basis->kind != c->kind
369 || one_basis->cand_stmt == c->cand_stmt
370 || !operand_equal_p (one_basis->stride, c->stride, 0)
371 || !types_compatible_p (one_basis->cand_type, c->cand_type)
372 || !dominated_by_p (CDI_DOMINATORS,
373 gimple_bb (c->cand_stmt),
374 gimple_bb (one_basis->cand_stmt)))
375 continue;
377 if (!basis || basis->cand_num < one_basis->cand_num)
378 basis = one_basis;
381 if (basis)
383 c->sibling = basis->dependent;
384 basis->dependent = c->cand_num;
385 return basis->cand_num;
388 return 0;
391 /* Record a mapping from the base expression of C to C itself, indicating that
392 C may potentially serve as a basis using that base expression. */
394 static void
395 record_potential_basis (slsr_cand_t c)
397 cand_chain_t node;
398 void **slot;
400 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
401 node->base_expr = c->base_expr;
402 node->cand = c;
403 node->next = NULL;
404 slot = htab_find_slot (base_cand_map, node, INSERT);
406 if (*slot)
408 cand_chain_t head = (cand_chain_t) (*slot);
409 node->next = head->next;
410 head->next = node;
412 else
413 *slot = node;
416 /* Allocate storage for a new candidate and initialize its fields.
417 Attempt to find a basis for the candidate. */
419 static slsr_cand_t
420 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
421 double_int index, tree stride, tree ctype,
422 unsigned savings)
424 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
425 sizeof (slsr_cand));
426 c->cand_stmt = gs;
427 c->base_expr = base;
428 c->stride = stride;
429 c->index = index;
430 c->cand_type = ctype;
431 c->kind = kind;
432 c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1;
433 c->next_interp = 0;
434 c->dependent = 0;
435 c->sibling = 0;
436 c->def_phi = NULL;
437 c->dead_savings = savings;
439 VEC_safe_push (slsr_cand_t, heap, cand_vec, c);
440 c->basis = find_basis_for_candidate (c);
441 record_potential_basis (c);
443 return c;
446 /* Determine the target cost of statement GS when compiling according
447 to SPEED. */
449 static int
450 stmt_cost (gimple gs, bool speed)
452 tree lhs, rhs1, rhs2;
453 enum machine_mode lhs_mode;
455 gcc_assert (is_gimple_assign (gs));
456 lhs = gimple_assign_lhs (gs);
457 rhs1 = gimple_assign_rhs1 (gs);
458 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
460 switch (gimple_assign_rhs_code (gs))
462 case MULT_EXPR:
463 rhs2 = gimple_assign_rhs2 (gs);
465 if (host_integerp (rhs2, 0))
466 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
468 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
469 return mul_cost (speed, lhs_mode);
471 case PLUS_EXPR:
472 case POINTER_PLUS_EXPR:
473 case MINUS_EXPR:
474 return add_cost (speed, lhs_mode);
476 case NEGATE_EXPR:
477 return neg_cost (speed, lhs_mode);
479 case NOP_EXPR:
480 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
482 /* Note that we don't assign costs to copies that in most cases
483 will go away. */
484 default:
488 gcc_unreachable ();
489 return 0;
492 /* Look up the defining statement for BASE_IN and return a pointer
493 to its candidate in the candidate table, if any; otherwise NULL.
494 Only CAND_ADD and CAND_MULT candidates are returned. */
496 static slsr_cand_t
497 base_cand_from_table (tree base_in)
499 slsr_cand_t *result;
501 gimple def = SSA_NAME_DEF_STMT (base_in);
502 if (!def)
503 return (slsr_cand_t) NULL;
505 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
507 if (result && (*result)->kind != CAND_REF)
508 return *result;
510 return (slsr_cand_t) NULL;
513 /* Add an entry to the statement-to-candidate mapping. */
515 static void
516 add_cand_for_stmt (gimple gs, slsr_cand_t c)
518 void **slot = pointer_map_insert (stmt_cand_map, gs);
519 gcc_assert (!*slot);
520 *slot = c;
523 /* Look for the following pattern:
525 *PBASE: MEM_REF (T1, C1)
527 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
529 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
531 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
533 *PINDEX: C4 * BITS_PER_UNIT
535 If not present, leave the input values unchanged and return FALSE.
536 Otherwise, modify the input values as follows and return TRUE:
538 *PBASE: T1
539 *POFFSET: MULT_EXPR (T2, C3)
540 *PINDEX: C1 + (C2 * C3) + C4 */
542 static bool
543 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
544 tree *ptype)
546 tree base = *pbase, offset = *poffset;
547 double_int index = *pindex;
548 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
549 tree mult_op0, mult_op1, t1, t2, type;
550 double_int c1, c2, c3, c4;
552 if (!base
553 || !offset
554 || TREE_CODE (base) != MEM_REF
555 || TREE_CODE (offset) != MULT_EXPR
556 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
557 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
558 return false;
560 t1 = TREE_OPERAND (base, 0);
561 c1 = mem_ref_offset (base);
562 type = TREE_TYPE (TREE_OPERAND (base, 1));
564 mult_op0 = TREE_OPERAND (offset, 0);
565 mult_op1 = TREE_OPERAND (offset, 1);
567 c3 = tree_to_double_int (mult_op1);
569 if (TREE_CODE (mult_op0) == PLUS_EXPR)
571 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
573 t2 = TREE_OPERAND (mult_op0, 0);
574 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
576 else
577 return false;
579 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
581 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
583 t2 = TREE_OPERAND (mult_op0, 0);
584 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
586 else
587 return false;
589 else
591 t2 = mult_op0;
592 c2 = double_int_zero;
595 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
597 *pbase = t1;
598 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
599 double_int_to_tree (sizetype, c3));
600 *pindex = c1 + c2 * c3 + c4;
601 *ptype = type;
603 return true;
606 /* Given GS which contains a data reference, create a CAND_REF entry in
607 the candidate table and attempt to find a basis. */
609 static void
610 slsr_process_ref (gimple gs)
612 tree ref_expr, base, offset, type;
613 HOST_WIDE_INT bitsize, bitpos;
614 enum machine_mode mode;
615 int unsignedp, volatilep;
616 double_int index;
617 slsr_cand_t c;
619 if (gimple_vdef (gs))
620 ref_expr = gimple_assign_lhs (gs);
621 else
622 ref_expr = gimple_assign_rhs1 (gs);
624 if (!handled_component_p (ref_expr)
625 || TREE_CODE (ref_expr) == BIT_FIELD_REF
626 || (TREE_CODE (ref_expr) == COMPONENT_REF
627 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
628 return;
630 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
631 &unsignedp, &volatilep, false);
632 index = double_int::from_uhwi (bitpos);
634 if (!restructure_reference (&base, &offset, &index, &type))
635 return;
637 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
638 type, 0);
640 /* Add the candidate to the statement-candidate mapping. */
641 add_cand_for_stmt (gs, c);
644 /* Create a candidate entry for a statement GS, where GS multiplies
645 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
646 about the two SSA names into the new candidate. Return the new
647 candidate. */
649 static slsr_cand_t
650 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
652 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
653 double_int index;
654 unsigned savings = 0;
655 slsr_cand_t c;
656 slsr_cand_t base_cand = base_cand_from_table (base_in);
658 /* Look at all interpretations of the base candidate, if necessary,
659 to find information to propagate into this candidate. */
660 while (base_cand && !base)
663 if (base_cand->kind == CAND_MULT
664 && operand_equal_p (base_cand->stride, integer_one_node, 0))
666 /* Y = (B + i') * 1
667 X = Y * Z
668 ================
669 X = (B + i') * Z */
670 base = base_cand->base_expr;
671 index = base_cand->index;
672 stride = stride_in;
673 ctype = base_cand->cand_type;
674 if (has_single_use (base_in))
675 savings = (base_cand->dead_savings
676 + stmt_cost (base_cand->cand_stmt, speed));
678 else if (base_cand->kind == CAND_ADD
679 && TREE_CODE (base_cand->stride) == INTEGER_CST)
681 /* Y = B + (i' * S), S constant
682 X = Y * Z
683 ============================
684 X = B + ((i' * S) * Z) */
685 base = base_cand->base_expr;
686 index = base_cand->index * tree_to_double_int (base_cand->stride);
687 stride = stride_in;
688 ctype = base_cand->cand_type;
689 if (has_single_use (base_in))
690 savings = (base_cand->dead_savings
691 + stmt_cost (base_cand->cand_stmt, speed));
694 if (base_cand->next_interp)
695 base_cand = lookup_cand (base_cand->next_interp);
696 else
697 base_cand = NULL;
700 if (!base)
702 /* No interpretations had anything useful to propagate, so
703 produce X = (Y + 0) * Z. */
704 base = base_in;
705 index = double_int_zero;
706 stride = stride_in;
707 ctype = TREE_TYPE (base_in);
710 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
711 ctype, savings);
712 return c;
715 /* Create a candidate entry for a statement GS, where GS multiplies
716 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
717 information about BASE_IN into the new candidate. Return the new
718 candidate. */
720 static slsr_cand_t
721 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
723 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
724 double_int index, temp;
725 unsigned savings = 0;
726 slsr_cand_t c;
727 slsr_cand_t base_cand = base_cand_from_table (base_in);
729 /* Look at all interpretations of the base candidate, if necessary,
730 to find information to propagate into this candidate. */
731 while (base_cand && !base)
733 if (base_cand->kind == CAND_MULT
734 && TREE_CODE (base_cand->stride) == INTEGER_CST)
736 /* Y = (B + i') * S, S constant
737 X = Y * c
738 ============================
739 X = (B + i') * (S * c) */
740 base = base_cand->base_expr;
741 index = base_cand->index;
742 temp = tree_to_double_int (base_cand->stride)
743 * tree_to_double_int (stride_in);
744 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
745 ctype = base_cand->cand_type;
746 if (has_single_use (base_in))
747 savings = (base_cand->dead_savings
748 + stmt_cost (base_cand->cand_stmt, speed));
750 else if (base_cand->kind == CAND_ADD
751 && operand_equal_p (base_cand->stride, integer_one_node, 0))
753 /* Y = B + (i' * 1)
754 X = Y * c
755 ===========================
756 X = (B + i') * c */
757 base = base_cand->base_expr;
758 index = base_cand->index;
759 stride = stride_in;
760 ctype = base_cand->cand_type;
761 if (has_single_use (base_in))
762 savings = (base_cand->dead_savings
763 + stmt_cost (base_cand->cand_stmt, speed));
765 else if (base_cand->kind == CAND_ADD
766 && base_cand->index.is_one ()
767 && TREE_CODE (base_cand->stride) == INTEGER_CST)
769 /* Y = B + (1 * S), S constant
770 X = Y * c
771 ===========================
772 X = (B + S) * c */
773 base = base_cand->base_expr;
774 index = tree_to_double_int (base_cand->stride);
775 stride = stride_in;
776 ctype = base_cand->cand_type;
777 if (has_single_use (base_in))
778 savings = (base_cand->dead_savings
779 + stmt_cost (base_cand->cand_stmt, speed));
782 if (base_cand->next_interp)
783 base_cand = lookup_cand (base_cand->next_interp);
784 else
785 base_cand = NULL;
788 if (!base)
790 /* No interpretations had anything useful to propagate, so
791 produce X = (Y + 0) * c. */
792 base = base_in;
793 index = double_int_zero;
794 stride = stride_in;
795 ctype = TREE_TYPE (base_in);
798 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
799 ctype, savings);
800 return c;
803 /* Given GS which is a multiply of scalar integers, make an appropriate
804 entry in the candidate table. If this is a multiply of two SSA names,
805 create two CAND_MULT interpretations and attempt to find a basis for
806 each of them. Otherwise, create a single CAND_MULT and attempt to
807 find a basis. */
809 static void
810 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
812 slsr_cand_t c, c2;
814 /* If this is a multiply of an SSA name with itself, it is highly
815 unlikely that we will get a strength reduction opportunity, so
816 don't record it as a candidate. This simplifies the logic for
817 finding a basis, so if this is removed that must be considered. */
818 if (rhs1 == rhs2)
819 return;
821 if (TREE_CODE (rhs2) == SSA_NAME)
823 /* Record an interpretation of this statement in the candidate table
824 assuming RHS1 is the base expression and RHS2 is the stride. */
825 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
827 /* Add the first interpretation to the statement-candidate mapping. */
828 add_cand_for_stmt (gs, c);
830 /* Record another interpretation of this statement assuming RHS1
831 is the stride and RHS2 is the base expression. */
832 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
833 c->next_interp = c2->cand_num;
835 else
837 /* Record an interpretation for the multiply-immediate. */
838 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
840 /* Add the interpretation to the statement-candidate mapping. */
841 add_cand_for_stmt (gs, c);
845 /* Create a candidate entry for a statement GS, where GS adds two
846 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
847 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
848 information about the two SSA names into the new candidate.
849 Return the new candidate. */
851 static slsr_cand_t
852 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
853 bool subtract_p, bool speed)
855 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
856 double_int index;
857 unsigned savings = 0;
858 slsr_cand_t c;
859 slsr_cand_t base_cand = base_cand_from_table (base_in);
860 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
862 /* The most useful transformation is a multiply-immediate feeding
863 an add or subtract. Look for that first. */
864 while (addend_cand && !base)
866 if (addend_cand->kind == CAND_MULT
867 && addend_cand->index.is_zero ()
868 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
870 /* Z = (B + 0) * S, S constant
871 X = Y +/- Z
872 ===========================
873 X = Y + ((+/-1 * S) * B) */
874 base = base_in;
875 index = tree_to_double_int (addend_cand->stride);
876 if (subtract_p)
877 index = -index;
878 stride = addend_cand->base_expr;
879 ctype = TREE_TYPE (base_in);
880 if (has_single_use (addend_in))
881 savings = (addend_cand->dead_savings
882 + stmt_cost (addend_cand->cand_stmt, speed));
885 if (addend_cand->next_interp)
886 addend_cand = lookup_cand (addend_cand->next_interp);
887 else
888 addend_cand = NULL;
891 while (base_cand && !base)
893 if (base_cand->kind == CAND_ADD
894 && (base_cand->index.is_zero ()
895 || operand_equal_p (base_cand->stride,
896 integer_zero_node, 0)))
898 /* Y = B + (i' * S), i' * S = 0
899 X = Y +/- Z
900 ============================
901 X = B + (+/-1 * Z) */
902 base = base_cand->base_expr;
903 index = subtract_p ? double_int_minus_one : double_int_one;
904 stride = addend_in;
905 ctype = base_cand->cand_type;
906 if (has_single_use (base_in))
907 savings = (base_cand->dead_savings
908 + stmt_cost (base_cand->cand_stmt, speed));
910 else if (subtract_p)
912 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
914 while (subtrahend_cand && !base)
916 if (subtrahend_cand->kind == CAND_MULT
917 && subtrahend_cand->index.is_zero ()
918 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
920 /* Z = (B + 0) * S, S constant
921 X = Y - Z
922 ===========================
923 Value: X = Y + ((-1 * S) * B) */
924 base = base_in;
925 index = tree_to_double_int (subtrahend_cand->stride);
926 index = -index;
927 stride = subtrahend_cand->base_expr;
928 ctype = TREE_TYPE (base_in);
929 if (has_single_use (addend_in))
930 savings = (subtrahend_cand->dead_savings
931 + stmt_cost (subtrahend_cand->cand_stmt, speed));
934 if (subtrahend_cand->next_interp)
935 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
936 else
937 subtrahend_cand = NULL;
941 if (base_cand->next_interp)
942 base_cand = lookup_cand (base_cand->next_interp);
943 else
944 base_cand = NULL;
947 if (!base)
949 /* No interpretations had anything useful to propagate, so
950 produce X = Y + (1 * Z). */
951 base = base_in;
952 index = subtract_p ? double_int_minus_one : double_int_one;
953 stride = addend_in;
954 ctype = TREE_TYPE (base_in);
957 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
958 ctype, savings);
959 return c;
962 /* Create a candidate entry for a statement GS, where GS adds SSA
963 name BASE_IN to constant INDEX_IN. Propagate any known information
964 about BASE_IN into the new candidate. Return the new candidate. */
966 static slsr_cand_t
967 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
969 enum cand_kind kind = CAND_ADD;
970 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
971 double_int index, multiple;
972 unsigned savings = 0;
973 slsr_cand_t c;
974 slsr_cand_t base_cand = base_cand_from_table (base_in);
976 while (base_cand && !base)
978 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
980 if (TREE_CODE (base_cand->stride) == INTEGER_CST
981 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
982 unsigned_p, &multiple))
984 /* Y = (B + i') * S, S constant, c = kS for some integer k
985 X = Y + c
986 ============================
987 X = (B + (i'+ k)) * S
989 Y = B + (i' * S), S constant, c = kS for some integer k
990 X = Y + c
991 ============================
992 X = (B + (i'+ k)) * S */
993 kind = base_cand->kind;
994 base = base_cand->base_expr;
995 index = base_cand->index + multiple;
996 stride = base_cand->stride;
997 ctype = base_cand->cand_type;
998 if (has_single_use (base_in))
999 savings = (base_cand->dead_savings
1000 + stmt_cost (base_cand->cand_stmt, speed));
1003 if (base_cand->next_interp)
1004 base_cand = lookup_cand (base_cand->next_interp);
1005 else
1006 base_cand = NULL;
1009 if (!base)
1011 /* No interpretations had anything useful to propagate, so
1012 produce X = Y + (c * 1). */
1013 kind = CAND_ADD;
1014 base = base_in;
1015 index = index_in;
1016 stride = integer_one_node;
1017 ctype = TREE_TYPE (base_in);
1020 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1021 ctype, savings);
1022 return c;
1025 /* Given GS which is an add or subtract of scalar integers or pointers,
1026 make at least one appropriate entry in the candidate table. */
1028 static void
1029 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1031 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1032 slsr_cand_t c = NULL, c2;
1034 if (TREE_CODE (rhs2) == SSA_NAME)
1036 /* First record an interpretation assuming RHS1 is the base expression
1037 and RHS2 is the stride. But it doesn't make sense for the
1038 stride to be a pointer, so don't record a candidate in that case. */
1039 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1041 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1043 /* Add the first interpretation to the statement-candidate
1044 mapping. */
1045 add_cand_for_stmt (gs, c);
1048 /* If the two RHS operands are identical, or this is a subtract,
1049 we're done. */
1050 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1051 return;
1053 /* Otherwise, record another interpretation assuming RHS2 is the
1054 base expression and RHS1 is the stride, again provided that the
1055 stride is not a pointer. */
1056 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1058 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1059 if (c)
1060 c->next_interp = c2->cand_num;
1061 else
1062 add_cand_for_stmt (gs, c2);
1065 else
1067 double_int index;
1069 /* Record an interpretation for the add-immediate. */
1070 index = tree_to_double_int (rhs2);
1071 if (subtract_p)
1072 index = -index;
1074 c = create_add_imm_cand (gs, rhs1, index, speed);
1076 /* Add the interpretation to the statement-candidate mapping. */
1077 add_cand_for_stmt (gs, c);
1081 /* Given GS which is a negate of a scalar integer, make an appropriate
1082 entry in the candidate table. A negate is equivalent to a multiply
1083 by -1. */
1085 static void
1086 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1088 /* Record a CAND_MULT interpretation for the multiply by -1. */
1089 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1091 /* Add the interpretation to the statement-candidate mapping. */
1092 add_cand_for_stmt (gs, c);
1095 /* Help function for legal_cast_p, operating on two trees. Checks
1096 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1097 for more details. */
1099 static bool
1100 legal_cast_p_1 (tree lhs, tree rhs)
1102 tree lhs_type, rhs_type;
1103 unsigned lhs_size, rhs_size;
1104 bool lhs_wraps, rhs_wraps;
1106 lhs_type = TREE_TYPE (lhs);
1107 rhs_type = TREE_TYPE (rhs);
1108 lhs_size = TYPE_PRECISION (lhs_type);
1109 rhs_size = TYPE_PRECISION (rhs_type);
1110 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1111 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1113 if (lhs_size < rhs_size
1114 || (rhs_wraps && !lhs_wraps)
1115 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1116 return false;
1118 return true;
1121 /* Return TRUE if GS is a statement that defines an SSA name from
1122 a conversion and is legal for us to combine with an add and multiply
1123 in the candidate table. For example, suppose we have:
1125 A = B + i;
1126 C = (type) A;
1127 D = C * S;
1129 Without the type-cast, we would create a CAND_MULT for D with base B,
1130 index i, and stride S. We want to record this candidate only if it
1131 is equivalent to apply the type cast following the multiply:
1133 A = B + i;
1134 E = A * S;
1135 D = (type) E;
1137 We will record the type with the candidate for D. This allows us
1138 to use a similar previous candidate as a basis. If we have earlier seen
1140 A' = B + i';
1141 C' = (type) A';
1142 D' = C' * S;
1144 we can replace D with
1146 D = D' + (i - i') * S;
1148 But if moving the type-cast would change semantics, we mustn't do this.
1150 This is legitimate for casts from a non-wrapping integral type to
1151 any integral type of the same or larger size. It is not legitimate
1152 to convert a wrapping type to a non-wrapping type, or to a wrapping
1153 type of a different size. I.e., with a wrapping type, we must
1154 assume that the addition B + i could wrap, in which case performing
1155 the multiply before or after one of the "illegal" type casts will
1156 have different semantics. */
1158 static bool
1159 legal_cast_p (gimple gs, tree rhs)
1161 if (!is_gimple_assign (gs)
1162 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1163 return false;
1165 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1168 /* Given GS which is a cast to a scalar integer type, determine whether
1169 the cast is legal for strength reduction. If so, make at least one
1170 appropriate entry in the candidate table. */
1172 static void
1173 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1175 tree lhs, ctype;
1176 slsr_cand_t base_cand, c, c2;
1177 unsigned savings = 0;
1179 if (!legal_cast_p (gs, rhs1))
1180 return;
1182 lhs = gimple_assign_lhs (gs);
1183 base_cand = base_cand_from_table (rhs1);
1184 ctype = TREE_TYPE (lhs);
1186 if (base_cand)
1188 while (base_cand)
1190 /* Propagate all data from the base candidate except the type,
1191 which comes from the cast, and the base candidate's cast,
1192 which is no longer applicable. */
1193 if (has_single_use (rhs1))
1194 savings = (base_cand->dead_savings
1195 + stmt_cost (base_cand->cand_stmt, speed));
1197 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1198 base_cand->base_expr,
1199 base_cand->index, base_cand->stride,
1200 ctype, savings);
1201 if (base_cand->next_interp)
1202 base_cand = lookup_cand (base_cand->next_interp);
1203 else
1204 base_cand = NULL;
1207 else
1209 /* If nothing is known about the RHS, create fresh CAND_ADD and
1210 CAND_MULT interpretations:
1212 X = Y + (0 * 1)
1213 X = (Y + 0) * 1
1215 The first of these is somewhat arbitrary, but the choice of
1216 1 for the stride simplifies the logic for propagating casts
1217 into their uses. */
1218 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1219 integer_one_node, ctype, 0);
1220 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1221 integer_one_node, ctype, 0);
1222 c->next_interp = c2->cand_num;
1225 /* Add the first (or only) interpretation to the statement-candidate
1226 mapping. */
1227 add_cand_for_stmt (gs, c);
1230 /* Given GS which is a copy of a scalar integer type, make at least one
1231 appropriate entry in the candidate table.
1233 This interface is included for completeness, but is unnecessary
1234 if this pass immediately follows a pass that performs copy
1235 propagation, such as DOM. */
1237 static void
1238 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1240 slsr_cand_t base_cand, c, c2;
1241 unsigned savings = 0;
1243 base_cand = base_cand_from_table (rhs1);
1245 if (base_cand)
1247 while (base_cand)
1249 /* Propagate all data from the base candidate. */
1250 if (has_single_use (rhs1))
1251 savings = (base_cand->dead_savings
1252 + stmt_cost (base_cand->cand_stmt, speed));
1254 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1255 base_cand->base_expr,
1256 base_cand->index, base_cand->stride,
1257 base_cand->cand_type, savings);
1258 if (base_cand->next_interp)
1259 base_cand = lookup_cand (base_cand->next_interp);
1260 else
1261 base_cand = NULL;
1264 else
1266 /* If nothing is known about the RHS, create fresh CAND_ADD and
1267 CAND_MULT interpretations:
1269 X = Y + (0 * 1)
1270 X = (Y + 0) * 1
1272 The first of these is somewhat arbitrary, but the choice of
1273 1 for the stride simplifies the logic for propagating casts
1274 into their uses. */
1275 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1276 integer_one_node, TREE_TYPE (rhs1), 0);
1277 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1278 integer_one_node, TREE_TYPE (rhs1), 0);
1279 c->next_interp = c2->cand_num;
1282 /* Add the first (or only) interpretation to the statement-candidate
1283 mapping. */
1284 add_cand_for_stmt (gs, c);
1287 /* Find strength-reduction candidates in block BB. */
1289 static void
1290 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1291 basic_block bb)
1293 bool speed = optimize_bb_for_speed_p (bb);
1294 gimple_stmt_iterator gsi;
1296 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1298 gimple gs = gsi_stmt (gsi);
1300 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1301 slsr_process_ref (gs);
1303 else if (is_gimple_assign (gs)
1304 && SCALAR_INT_MODE_P
1305 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1307 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1309 switch (gimple_assign_rhs_code (gs))
1311 case MULT_EXPR:
1312 case PLUS_EXPR:
1313 rhs1 = gimple_assign_rhs1 (gs);
1314 rhs2 = gimple_assign_rhs2 (gs);
1315 /* Should never happen, but currently some buggy situations
1316 in earlier phases put constants in rhs1. */
1317 if (TREE_CODE (rhs1) != SSA_NAME)
1318 continue;
1319 break;
1321 /* Possible future opportunity: rhs1 of a ptr+ can be
1322 an ADDR_EXPR. */
1323 case POINTER_PLUS_EXPR:
1324 case MINUS_EXPR:
1325 rhs2 = gimple_assign_rhs2 (gs);
1326 /* Fall-through. */
1328 case NOP_EXPR:
1329 case MODIFY_EXPR:
1330 case NEGATE_EXPR:
1331 rhs1 = gimple_assign_rhs1 (gs);
1332 if (TREE_CODE (rhs1) != SSA_NAME)
1333 continue;
1334 break;
1336 default:
1340 switch (gimple_assign_rhs_code (gs))
1342 case MULT_EXPR:
1343 slsr_process_mul (gs, rhs1, rhs2, speed);
1344 break;
1346 case PLUS_EXPR:
1347 case POINTER_PLUS_EXPR:
1348 case MINUS_EXPR:
1349 slsr_process_add (gs, rhs1, rhs2, speed);
1350 break;
1352 case NEGATE_EXPR:
1353 slsr_process_neg (gs, rhs1, speed);
1354 break;
1356 case NOP_EXPR:
1357 slsr_process_cast (gs, rhs1, speed);
1358 break;
1360 case MODIFY_EXPR:
1361 slsr_process_copy (gs, rhs1, speed);
1362 break;
1364 default:
1371 /* Dump a candidate for debug. */
1373 static void
1374 dump_candidate (slsr_cand_t c)
1376 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1377 gimple_bb (c->cand_stmt)->index);
1378 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1379 switch (c->kind)
1381 case CAND_MULT:
1382 fputs (" MULT : (", dump_file);
1383 print_generic_expr (dump_file, c->base_expr, 0);
1384 fputs (" + ", dump_file);
1385 dump_double_int (dump_file, c->index, false);
1386 fputs (") * ", dump_file);
1387 print_generic_expr (dump_file, c->stride, 0);
1388 fputs (" : ", dump_file);
1389 break;
1390 case CAND_ADD:
1391 fputs (" ADD : ", dump_file);
1392 print_generic_expr (dump_file, c->base_expr, 0);
1393 fputs (" + (", dump_file);
1394 dump_double_int (dump_file, c->index, false);
1395 fputs (" * ", dump_file);
1396 print_generic_expr (dump_file, c->stride, 0);
1397 fputs (") : ", dump_file);
1398 break;
1399 case CAND_REF:
1400 fputs (" REF : ", dump_file);
1401 print_generic_expr (dump_file, c->base_expr, 0);
1402 fputs (" + (", dump_file);
1403 print_generic_expr (dump_file, c->stride, 0);
1404 fputs (") + ", dump_file);
1405 dump_double_int (dump_file, c->index, false);
1406 fputs (" : ", dump_file);
1407 break;
1408 default:
1409 gcc_unreachable ();
1411 print_generic_expr (dump_file, c->cand_type, 0);
1412 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1413 c->basis, c->dependent, c->sibling);
1414 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1415 c->next_interp, c->dead_savings);
1416 if (c->def_phi)
1418 fputs (" phi: ", dump_file);
1419 print_gimple_stmt (dump_file, c->def_phi, 0, 0);
1421 fputs ("\n", dump_file);
1424 /* Dump the candidate vector for debug. */
1426 static void
1427 dump_cand_vec (void)
1429 unsigned i;
1430 slsr_cand_t c;
1432 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1434 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
1435 dump_candidate (c);
1438 /* Callback used to dump the candidate chains hash table. */
1440 static int
1441 base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
1443 const_cand_chain_t chain = *((const_cand_chain_t *) slot);
1444 cand_chain_t p;
1446 print_generic_expr (dump_file, chain->base_expr, 0);
1447 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1449 for (p = chain->next; p; p = p->next)
1450 fprintf (dump_file, " -> %d", p->cand->cand_num);
1452 fputs ("\n", dump_file);
1453 return 1;
1456 /* Dump the candidate chains. */
1458 static void
1459 dump_cand_chains (void)
1461 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1462 htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
1463 fputs ("\n", dump_file);
1466 /* Dump the increment vector for debug. */
1468 static void
1469 dump_incr_vec (void)
1471 if (dump_file && (dump_flags & TDF_DETAILS))
1473 unsigned i;
1475 fprintf (dump_file, "\nIncrement vector:\n\n");
1477 for (i = 0; i < incr_vec_len; i++)
1479 fprintf (dump_file, "%3d increment: ", i);
1480 dump_double_int (dump_file, incr_vec[i].incr, false);
1481 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1482 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1483 fputs ("\n initializer: ", dump_file);
1484 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1485 fputs ("\n\n", dump_file);
1490 /* Recursive helper for unconditional_cands_with_known_stride_p.
1491 Returns TRUE iff C, its siblings, and its dependents are all
1492 unconditional candidates. */
1494 static bool
1495 unconditional_cands (slsr_cand_t c)
1497 if (c->def_phi)
1498 return false;
1500 if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
1501 return false;
1503 if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
1504 return false;
1506 return true;
1509 /* Determine whether or not the tree of candidates rooted at
1510 ROOT consists entirely of unconditional increments with
1511 an INTEGER_CST stride. */
1513 static bool
1514 unconditional_cands_with_known_stride_p (slsr_cand_t root)
1516 /* The stride is identical for all related candidates, so
1517 check it once. */
1518 if (TREE_CODE (root->stride) != INTEGER_CST)
1519 return false;
1521 return unconditional_cands (lookup_cand (root->dependent));
1524 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1525 data reference. */
1527 static void
1528 replace_ref (tree *expr, slsr_cand_t c)
1530 tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1531 c->base_expr, c->stride);
1532 tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
1533 double_int_to_tree (c->cand_type, c->index));
1535 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1536 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1537 TREE_OPERAND (mem_ref, 0)
1538 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1539 /*simple_p=*/true, NULL,
1540 /*before=*/true, GSI_SAME_STMT);
1541 copy_ref_info (mem_ref, *expr);
1542 *expr = mem_ref;
1543 update_stmt (c->cand_stmt);
1546 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1547 dependent of candidate C with an equivalent strength-reduced data
1548 reference. */
1550 static void
1551 replace_refs (slsr_cand_t c)
1553 if (gimple_vdef (c->cand_stmt))
1555 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1556 replace_ref (lhs, c);
1558 else
1560 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1561 replace_ref (rhs, c);
1564 if (c->sibling)
1565 replace_refs (lookup_cand (c->sibling));
1567 if (c->dependent)
1568 replace_refs (lookup_cand (c->dependent));
1571 /* Calculate the increment required for candidate C relative to
1572 its basis. */
1574 static double_int
1575 cand_increment (slsr_cand_t c)
1577 slsr_cand_t basis;
1579 /* If the candidate doesn't have a basis, just return its own
1580 index. This is useful in record_increments to help us find
1581 an existing initializer. */
1582 if (!c->basis)
1583 return c->index;
1585 basis = lookup_cand (c->basis);
1586 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1587 return c->index - basis->index;
1590 /* Calculate the increment required for candidate C relative to
1591 its basis. If we aren't going to generate pointer arithmetic
1592 for this candidate, return the absolute value of that increment
1593 instead. */
1595 static inline double_int
1596 cand_abs_increment (slsr_cand_t c)
1598 double_int increment = cand_increment (c);
1600 if (!address_arithmetic_p && increment.is_negative ())
1601 increment = -increment;
1603 return increment;
1606 /* If *VAR is NULL or is not of a compatible type with TYPE, create a
1607 new temporary reg of type TYPE and store it in *VAR. */
1609 static inline void
1610 lazy_create_slsr_reg (tree *var, tree type)
1612 if (!*var || !types_compatible_p (TREE_TYPE (*var), type))
1613 *var = create_tmp_reg (type, "slsr");
1616 /* Return TRUE iff candidate C has already been replaced under
1617 another interpretation. */
1619 static inline bool
1620 cand_already_replaced (slsr_cand_t c)
1622 return (gimple_bb (c->cand_stmt) == 0);
1625 /* Helper routine for replace_dependents, doing the work for a
1626 single candidate C. */
1628 static void
1629 replace_dependent (slsr_cand_t c, enum tree_code cand_code)
1631 double_int stride = tree_to_double_int (c->stride);
1632 double_int bump = cand_increment (c) * stride;
1633 gimple stmt_to_print = NULL;
1634 slsr_cand_t basis;
1635 tree basis_name, incr_type, bump_tree;
1636 enum tree_code code;
1638 /* It is highly unlikely, but possible, that the resulting
1639 bump doesn't fit in a HWI. Abandon the replacement
1640 in this case. Restriction to signed HWI is conservative
1641 for unsigned types but allows for safe negation without
1642 twisted logic. */
1643 if (!bump.fits_shwi ())
1644 return;
1646 basis = lookup_cand (c->basis);
1647 basis_name = gimple_assign_lhs (basis->cand_stmt);
1648 incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
1649 code = PLUS_EXPR;
1651 if (bump.is_negative ())
1653 code = MINUS_EXPR;
1654 bump = -bump;
1657 bump_tree = double_int_to_tree (incr_type, bump);
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 fputs ("Replacing: ", dump_file);
1662 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1665 if (bump.is_zero ())
1667 tree lhs = gimple_assign_lhs (c->cand_stmt);
1668 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1669 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1670 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1671 gsi_replace (&gsi, copy_stmt, false);
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 stmt_to_print = copy_stmt;
1675 else
1677 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1678 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1679 if (cand_code != NEGATE_EXPR
1680 && ((operand_equal_p (rhs1, basis_name, 0)
1681 && operand_equal_p (rhs2, bump_tree, 0))
1682 || (operand_equal_p (rhs1, bump_tree, 0)
1683 && operand_equal_p (rhs2, basis_name, 0))))
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1687 fputs ("(duplicate, not actually replacing)", dump_file);
1688 stmt_to_print = c->cand_stmt;
1691 else
1693 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1694 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
1695 update_stmt (gsi_stmt (gsi));
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1697 stmt_to_print = gsi_stmt (gsi);
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fputs ("With: ", dump_file);
1704 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1705 fputs ("\n", dump_file);
1709 /* Replace candidate C, each sibling of candidate C, and each
1710 dependent of candidate C with an add or subtract. Note that we
1711 only operate on CAND_MULTs with known strides, so we will never
1712 generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
1713 replaced by X = Y + ((i - i') * S), as described in the module
1714 commentary. The folded value ((i - i') * S) is referred to here
1715 as the "bump." */
1717 static void
1718 replace_dependents (slsr_cand_t c)
1720 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1722 /* It is not useful to replace casts, copies, or adds of an SSA name
1723 and a constant. Also skip candidates that have already been
1724 replaced under another interpretation. */
1725 if (cand_code != MODIFY_EXPR
1726 && cand_code != NOP_EXPR
1727 && c->kind == CAND_MULT
1728 && !cand_already_replaced (c))
1729 replace_dependent (c, cand_code);
1731 if (c->sibling)
1732 replace_dependents (lookup_cand (c->sibling));
1734 if (c->dependent)
1735 replace_dependents (lookup_cand (c->dependent));
1738 /* Return the index in the increment vector of the given INCREMENT. */
1740 static inline unsigned
1741 incr_vec_index (double_int increment)
1743 unsigned i;
1745 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1748 gcc_assert (i < incr_vec_len);
1749 return i;
1752 /* Count the number of candidates in the tree rooted at C that have
1753 not already been replaced under other interpretations. */
1755 static unsigned
1756 count_candidates (slsr_cand_t c)
1758 unsigned count = cand_already_replaced (c) ? 0 : 1;
1760 if (c->sibling)
1761 count += count_candidates (lookup_cand (c->sibling));
1763 if (c->dependent)
1764 count += count_candidates (lookup_cand (c->dependent));
1766 return count;
1769 /* Increase the count of INCREMENT by one in the increment vector.
1770 INCREMENT is associated with candidate C. If an initializer
1771 T_0 = stride * I is provided by a candidate that dominates all
1772 candidates with the same increment, also record T_0 for subsequent use. */
1774 static void
1775 record_increment (slsr_cand_t c, double_int increment)
1777 bool found = false;
1778 unsigned i;
1780 /* Treat increments that differ only in sign as identical so as to
1781 share initializers, unless we are generating pointer arithmetic. */
1782 if (!address_arithmetic_p && increment.is_negative ())
1783 increment = -increment;
1785 for (i = 0; i < incr_vec_len; i++)
1787 if (incr_vec[i].incr == increment)
1789 incr_vec[i].count++;
1790 found = true;
1792 /* If we previously recorded an initializer that doesn't
1793 dominate this candidate, it's not going to be useful to
1794 us after all. */
1795 if (incr_vec[i].initializer
1796 && !dominated_by_p (CDI_DOMINATORS,
1797 gimple_bb (c->cand_stmt),
1798 incr_vec[i].init_bb))
1800 incr_vec[i].initializer = NULL_TREE;
1801 incr_vec[i].init_bb = NULL;
1804 break;
1808 if (!found)
1810 /* The first time we see an increment, create the entry for it.
1811 If this is the root candidate which doesn't have a basis, set
1812 the count to zero. We're only processing it so it can possibly
1813 provide an initializer for other candidates. */
1814 incr_vec[incr_vec_len].incr = increment;
1815 incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
1816 incr_vec[incr_vec_len].cost = COST_INFINITE;
1818 /* Optimistically record the first occurrence of this increment
1819 as providing an initializer (if it does); we will revise this
1820 opinion later if it doesn't dominate all other occurrences.
1821 Exception: increments of -1, 0, 1 never need initializers. */
1822 if (c->kind == CAND_ADD
1823 && c->index == increment
1824 && (increment.sgt (double_int_one)
1825 || increment.slt (double_int_minus_one)))
1827 tree t0;
1828 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1829 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1830 if (operand_equal_p (rhs1, c->base_expr, 0))
1831 t0 = rhs2;
1832 else
1833 t0 = rhs1;
1834 if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0)))
1836 incr_vec[incr_vec_len].initializer = t0;
1837 incr_vec[incr_vec_len++].init_bb
1838 = gimple_bb (SSA_NAME_DEF_STMT (t0));
1840 else
1842 incr_vec[incr_vec_len].initializer = NULL_TREE;
1843 incr_vec[incr_vec_len++].init_bb = NULL;
1846 else
1848 incr_vec[incr_vec_len].initializer = NULL_TREE;
1849 incr_vec[incr_vec_len++].init_bb = NULL;
1854 /* Determine how many times each unique increment occurs in the set
1855 of candidates rooted at C's parent, recording the data in the
1856 increment vector. For each unique increment I, if an initializer
1857 T_0 = stride * I is provided by a candidate that dominates all
1858 candidates with the same increment, also record T_0 for subsequent
1859 use. */
1861 static void
1862 record_increments (slsr_cand_t c)
1864 if (!cand_already_replaced (c))
1865 record_increment (c, cand_increment (c));
1867 if (c->sibling)
1868 record_increments (lookup_cand (c->sibling));
1870 if (c->dependent)
1871 record_increments (lookup_cand (c->dependent));
1874 /* Return the first candidate in the tree rooted at C that has not
1875 already been replaced, favoring siblings over dependents. */
1877 static slsr_cand_t
1878 unreplaced_cand_in_tree (slsr_cand_t c)
1880 if (!cand_already_replaced (c))
1881 return c;
1883 if (c->sibling)
1885 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
1886 if (sib)
1887 return sib;
1890 if (c->dependent)
1892 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
1893 if (dep)
1894 return dep;
1897 return NULL;
1900 /* Return TRUE if the candidates in the tree rooted at C should be
1901 optimized for speed, else FALSE. We estimate this based on the block
1902 containing the most dominant candidate in the tree that has not yet
1903 been replaced. */
1905 static bool
1906 optimize_cands_for_speed_p (slsr_cand_t c)
1908 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
1909 gcc_assert (c2);
1910 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
1913 /* Add COST_IN to the lowest cost of any dependent path starting at
1914 candidate C or any of its siblings, counting only candidates along
1915 such paths with increment INCR. Assume that replacing a candidate
1916 reduces cost by REPL_SAVINGS. Also account for savings from any
1917 statements that would go dead. */
1919 static int
1920 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr)
1922 int local_cost, sib_cost;
1923 double_int cand_incr = cand_abs_increment (c);
1925 if (cand_already_replaced (c))
1926 local_cost = cost_in;
1927 else if (incr == cand_incr)
1928 local_cost = cost_in - repl_savings - c->dead_savings;
1929 else
1930 local_cost = cost_in - c->dead_savings;
1932 if (c->dependent)
1933 local_cost = lowest_cost_path (local_cost, repl_savings,
1934 lookup_cand (c->dependent), incr);
1936 if (c->sibling)
1938 sib_cost = lowest_cost_path (cost_in, repl_savings,
1939 lookup_cand (c->sibling), incr);
1940 local_cost = MIN (local_cost, sib_cost);
1943 return local_cost;
1946 /* Compute the total savings that would accrue from all replacements
1947 in the candidate tree rooted at C, counting only candidates with
1948 increment INCR. Assume that replacing a candidate reduces cost
1949 by REPL_SAVINGS. Also account for savings from statements that
1950 would go dead. */
1952 static int
1953 total_savings (int repl_savings, slsr_cand_t c, double_int incr)
1955 int savings = 0;
1956 double_int cand_incr = cand_abs_increment (c);
1958 if (incr == cand_incr && !cand_already_replaced (c))
1959 savings += repl_savings + c->dead_savings;
1961 if (c->dependent)
1962 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
1964 if (c->sibling)
1965 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
1967 return savings;
1970 /* Use target-specific costs to determine and record which increments
1971 in the current candidate tree are profitable to replace, assuming
1972 MODE and SPEED. FIRST_DEP is the first dependent of the root of
1973 the candidate tree.
1975 One slight limitation here is that we don't account for the possible
1976 introduction of casts in some cases. See replace_one_candidate for
1977 the cases where these are introduced. This should probably be cleaned
1978 up sometime. */
1980 static void
1981 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
1983 unsigned i;
1985 for (i = 0; i < incr_vec_len; i++)
1987 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
1989 /* If somehow this increment is bigger than a HWI, we won't
1990 be optimizing candidates that use it. And if the increment
1991 has a count of zero, nothing will be done with it. */
1992 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
1993 incr_vec[i].cost = COST_INFINITE;
1995 /* Increments of 0, 1, and -1 are always profitable to replace,
1996 because they always replace a multiply or add with an add or
1997 copy, and may cause one or more existing instructions to go
1998 dead. Exception: -1 can't be assumed to be profitable for
1999 pointer addition. */
2000 else if (incr == 0
2001 || incr == 1
2002 || (incr == -1
2003 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2004 != POINTER_PLUS_EXPR)))
2005 incr_vec[i].cost = COST_NEUTRAL;
2007 /* FORNOW: If we need to add an initializer, give up if a cast from
2008 the candidate's type to its stride's type can lose precision.
2009 This could eventually be handled better by expressly retaining the
2010 result of a cast to a wider type in the stride. Example:
2012 short int _1;
2013 _2 = (int) _1;
2014 _3 = _2 * 10;
2015 _4 = x + _3; ADD: x + (10 * _1) : int
2016 _5 = _2 * 15;
2017 _6 = x + _3; ADD: x + (15 * _1) : int
2019 Right now replacing _6 would cause insertion of an initializer
2020 of the form "short int T = _1 * 5;" followed by a cast to
2021 int, which could overflow incorrectly. Had we recorded _2 or
2022 (int)_1 as the stride, this wouldn't happen. However, doing
2023 this breaks other opportunities, so this will require some
2024 care. */
2025 else if (!incr_vec[i].initializer
2026 && TREE_CODE (first_dep->stride) != INTEGER_CST
2027 && !legal_cast_p_1 (first_dep->stride,
2028 gimple_assign_lhs (first_dep->cand_stmt)))
2030 incr_vec[i].cost = COST_INFINITE;
2032 /* If we need to add an initializer, make sure we don't introduce
2033 a multiply by a pointer type, which can happen in certain cast
2034 scenarios. FIXME: When cleaning up these cast issues, we can
2035 afford to introduce the multiply provided we cast out to an
2036 unsigned int of appropriate size. */
2037 else if (!incr_vec[i].initializer
2038 && TREE_CODE (first_dep->stride) != INTEGER_CST
2039 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2041 incr_vec[i].cost = COST_INFINITE;
2043 /* For any other increment, if this is a multiply candidate, we
2044 must introduce a temporary T and initialize it with
2045 T_0 = stride * increment. When optimizing for speed, walk the
2046 candidate tree to calculate the best cost reduction along any
2047 path; if it offsets the fixed cost of inserting the initializer,
2048 replacing the increment is profitable. When optimizing for
2049 size, instead calculate the total cost reduction from replacing
2050 all candidates with this increment. */
2051 else if (first_dep->kind == CAND_MULT)
2053 int cost = mult_by_coeff_cost (incr, mode, speed);
2054 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2055 if (speed)
2056 cost = lowest_cost_path (cost, repl_savings, first_dep,
2057 incr_vec[i].incr);
2058 else
2059 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
2061 incr_vec[i].cost = cost;
2064 /* If this is an add candidate, the initializer may already
2065 exist, so only calculate the cost of the initializer if it
2066 doesn't. We are replacing one add with another here, so the
2067 known replacement savings is zero. We will account for removal
2068 of dead instructions in lowest_cost_path or total_savings. */
2069 else
2071 int cost = 0;
2072 if (!incr_vec[i].initializer)
2073 cost = mult_by_coeff_cost (incr, mode, speed);
2075 if (speed)
2076 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
2077 else
2078 cost -= total_savings (0, first_dep, incr_vec[i].incr);
2080 incr_vec[i].cost = cost;
2085 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2086 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2087 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2088 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2089 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2091 static basic_block
2092 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2093 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2095 basic_block ncd;
2097 if (!bb1)
2099 *where = c2;
2100 return bb2;
2103 if (!bb2)
2105 *where = c1;
2106 return bb1;
2109 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2111 /* If both candidates are in the same block, the earlier
2112 candidate wins. */
2113 if (bb1 == ncd && bb2 == ncd)
2115 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2116 *where = c2;
2117 else
2118 *where = c1;
2121 /* Otherwise, if one of them produced a candidate in the
2122 dominator, that one wins. */
2123 else if (bb1 == ncd)
2124 *where = c1;
2126 else if (bb2 == ncd)
2127 *where = c2;
2129 /* If neither matches the dominator, neither wins. */
2130 else
2131 *where = NULL;
2133 return ncd;
2136 /* Consider all candidates in the tree rooted at C for which INCR
2137 represents the required increment of C relative to its basis.
2138 Find and return the basic block that most nearly dominates all
2139 such candidates. If the returned block contains one or more of
2140 the candidates, return the earliest candidate in the block in
2141 *WHERE. */
2143 static basic_block
2144 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2145 slsr_cand_t *where)
2147 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2148 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2149 double_int cand_incr;
2151 /* First find the NCD of all siblings and dependents. */
2152 if (c->sibling)
2153 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2154 incr, &sib_where);
2155 if (c->dependent)
2156 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2157 incr, &dep_where);
2158 if (!sib_ncd && !dep_ncd)
2160 new_where = NULL;
2161 ncd = NULL;
2163 else if (sib_ncd && !dep_ncd)
2165 new_where = sib_where;
2166 ncd = sib_ncd;
2168 else if (dep_ncd && !sib_ncd)
2170 new_where = dep_where;
2171 ncd = dep_ncd;
2173 else
2174 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2175 dep_where, &new_where);
2177 /* If the candidate's increment doesn't match the one we're interested
2178 in, then the result depends only on siblings and dependents. */
2179 cand_incr = cand_abs_increment (c);
2181 if (cand_incr != incr || cand_already_replaced (c))
2183 *where = new_where;
2184 return ncd;
2187 /* Otherwise, compare this candidate with the result from all siblings
2188 and dependents. */
2189 this_where = c;
2190 this_ncd = gimple_bb (c->cand_stmt);
2191 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2193 return ncd;
2196 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2198 static inline bool
2199 profitable_increment_p (unsigned index)
2201 return (incr_vec[index].cost <= COST_NEUTRAL);
2204 /* For each profitable increment in the increment vector not equal to
2205 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2206 dominator of all statements in the candidate chain rooted at C
2207 that require that increment, and insert an initializer
2208 T_0 = stride * increment at that location. Record T_0 with the
2209 increment record. */
2211 static void
2212 insert_initializers (slsr_cand_t c)
2214 unsigned i;
2215 tree new_var = NULL_TREE;
2217 for (i = 0; i < incr_vec_len; i++)
2219 basic_block bb;
2220 slsr_cand_t where = NULL;
2221 gimple init_stmt;
2222 tree stride_type, new_name, incr_tree;
2223 double_int incr = incr_vec[i].incr;
2225 if (!profitable_increment_p (i)
2226 || incr.is_one ()
2227 || (incr.is_minus_one ()
2228 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2229 || incr.is_zero ())
2230 continue;
2232 /* We may have already identified an existing initializer that
2233 will suffice. */
2234 if (incr_vec[i].initializer)
2236 if (dump_file && (dump_flags & TDF_DETAILS))
2238 fputs ("Using existing initializer: ", dump_file);
2239 print_gimple_stmt (dump_file,
2240 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2241 0, 0);
2243 continue;
2246 /* Find the block that most closely dominates all candidates
2247 with this increment. If there is at least one candidate in
2248 that block, the earliest one will be returned in WHERE. */
2249 bb = nearest_common_dominator_for_cands (c, incr, &where);
2251 /* Create a new SSA name to hold the initializer's value. */
2252 stride_type = TREE_TYPE (c->stride);
2253 lazy_create_slsr_reg (&new_var, stride_type);
2254 new_name = make_ssa_name (new_var, NULL);
2255 incr_vec[i].initializer = new_name;
2257 /* Create the initializer and insert it in the latest possible
2258 dominating position. */
2259 incr_tree = double_int_to_tree (stride_type, incr);
2260 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2261 c->stride, incr_tree);
2262 if (where)
2264 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2265 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2266 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2268 else
2270 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2271 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2273 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2274 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2275 else
2276 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2278 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2281 if (dump_file && (dump_flags & TDF_DETAILS))
2283 fputs ("Inserting initializer: ", dump_file);
2284 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2289 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
2290 type TO_TYPE, and insert it in front of the statement represented
2291 by candidate C. Use *NEW_VAR to create the new SSA name. Return
2292 the new SSA name. */
2294 static tree
2295 introduce_cast_before_cand (slsr_cand_t c, tree to_type,
2296 tree from_expr, tree *new_var)
2298 tree cast_lhs;
2299 gimple cast_stmt;
2300 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2302 lazy_create_slsr_reg (new_var, to_type);
2303 cast_lhs = make_ssa_name (*new_var, NULL);
2304 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
2305 from_expr, NULL_TREE);
2306 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2307 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2311 fputs (" Inserting: ", dump_file);
2312 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
2315 return cast_lhs;
2318 /* Replace the RHS of the statement represented by candidate C with
2319 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
2320 leave C unchanged or just interchange its operands. The original
2321 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
2322 If the replacement was made and we are doing a details dump,
2323 return the revised statement, else NULL. */
2325 static gimple
2326 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
2327 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
2328 slsr_cand_t c)
2330 if (new_code != old_code
2331 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
2332 || !operand_equal_p (new_rhs2, old_rhs2, 0))
2333 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
2334 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
2336 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2337 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
2338 update_stmt (gsi_stmt (gsi));
2340 if (dump_file && (dump_flags & TDF_DETAILS))
2341 return gsi_stmt (gsi);
2344 else if (dump_file && (dump_flags & TDF_DETAILS))
2345 fputs (" (duplicate, not actually replacing)\n", dump_file);
2347 return NULL;
2350 /* Strength-reduce the statement represented by candidate C by replacing
2351 it with an equivalent addition or subtraction. I is the index into
2352 the increment vector identifying C's increment. NEW_VAR is used to
2353 create a new SSA name if a cast needs to be introduced. BASIS_NAME
2354 is the rhs1 to use in creating the add/subtract. */
2356 static void
2357 replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var,
2358 tree basis_name)
2360 gimple stmt_to_print = NULL;
2361 tree orig_rhs1, orig_rhs2;
2362 tree rhs2;
2363 enum tree_code orig_code, repl_code;
2364 double_int cand_incr;
2366 orig_code = gimple_assign_rhs_code (c->cand_stmt);
2367 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2368 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2369 cand_incr = cand_increment (c);
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2373 fputs ("Replacing: ", dump_file);
2374 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2375 stmt_to_print = c->cand_stmt;
2378 if (address_arithmetic_p)
2379 repl_code = POINTER_PLUS_EXPR;
2380 else
2381 repl_code = PLUS_EXPR;
2383 /* If the increment has an initializer T_0, replace the candidate
2384 statement with an add of the basis name and the initializer. */
2385 if (incr_vec[i].initializer)
2387 tree init_type = TREE_TYPE (incr_vec[i].initializer);
2388 tree orig_type = TREE_TYPE (orig_rhs2);
2390 if (types_compatible_p (orig_type, init_type))
2391 rhs2 = incr_vec[i].initializer;
2392 else
2393 rhs2 = introduce_cast_before_cand (c, orig_type,
2394 incr_vec[i].initializer,
2395 new_var);
2397 if (incr_vec[i].incr != cand_incr)
2399 gcc_assert (repl_code == PLUS_EXPR);
2400 repl_code = MINUS_EXPR;
2403 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2404 orig_code, orig_rhs1, orig_rhs2,
2408 /* Otherwise, the increment is one of -1, 0, and 1. Replace
2409 with a subtract of the stride from the basis name, a copy
2410 from the basis name, or an add of the stride to the basis
2411 name, respectively. It may be necessary to introduce a
2412 cast (or reuse an existing cast). */
2413 else if (cand_incr.is_one ())
2415 tree stride_type = TREE_TYPE (c->stride);
2416 tree orig_type = TREE_TYPE (orig_rhs2);
2418 if (types_compatible_p (orig_type, stride_type))
2419 rhs2 = c->stride;
2420 else
2421 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2423 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
2424 orig_code, orig_rhs1, orig_rhs2,
2428 else if (cand_incr.is_minus_one ())
2430 tree stride_type = TREE_TYPE (c->stride);
2431 tree orig_type = TREE_TYPE (orig_rhs2);
2432 gcc_assert (repl_code != POINTER_PLUS_EXPR);
2434 if (types_compatible_p (orig_type, stride_type))
2435 rhs2 = c->stride;
2436 else
2437 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
2439 if (orig_code != MINUS_EXPR
2440 || !operand_equal_p (basis_name, orig_rhs1, 0)
2441 || !operand_equal_p (rhs2, orig_rhs2, 0))
2443 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2444 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
2445 update_stmt (gsi_stmt (gsi));
2447 if (dump_file && (dump_flags & TDF_DETAILS))
2448 stmt_to_print = gsi_stmt (gsi);
2450 else if (dump_file && (dump_flags & TDF_DETAILS))
2451 fputs (" (duplicate, not actually replacing)\n", dump_file);
2454 else if (cand_incr.is_zero ())
2456 tree lhs = gimple_assign_lhs (c->cand_stmt);
2457 tree lhs_type = TREE_TYPE (lhs);
2458 tree basis_type = TREE_TYPE (basis_name);
2460 if (types_compatible_p (lhs_type, basis_type))
2462 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2463 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2464 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2465 gsi_replace (&gsi, copy_stmt, false);
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 stmt_to_print = copy_stmt;
2470 else
2472 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2473 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
2474 basis_name,
2475 NULL_TREE);
2476 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
2477 gsi_replace (&gsi, cast_stmt, false);
2479 if (dump_file && (dump_flags & TDF_DETAILS))
2480 stmt_to_print = cast_stmt;
2483 else
2484 gcc_unreachable ();
2486 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
2488 fputs ("With: ", dump_file);
2489 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2490 fputs ("\n", dump_file);
2494 /* For each candidate in the tree rooted at C, replace it with
2495 an increment if such has been shown to be profitable. */
2497 static void
2498 replace_profitable_candidates (slsr_cand_t c)
2500 if (!cand_already_replaced (c))
2502 double_int increment = cand_abs_increment (c);
2503 tree new_var = NULL;
2504 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
2505 unsigned i;
2507 i = incr_vec_index (increment);
2509 /* Only process profitable increments. Nothing useful can be done
2510 to a cast or copy. */
2511 if (profitable_increment_p (i)
2512 && orig_code != MODIFY_EXPR
2513 && orig_code != NOP_EXPR)
2515 slsr_cand_t basis = lookup_cand (c->basis);
2516 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
2517 replace_one_candidate (c, i, &new_var, basis_name);
2521 if (c->sibling)
2522 replace_profitable_candidates (lookup_cand (c->sibling));
2524 if (c->dependent)
2525 replace_profitable_candidates (lookup_cand (c->dependent));
2528 /* Analyze costs of related candidates in the candidate vector,
2529 and make beneficial replacements. */
2531 static void
2532 analyze_candidates_and_replace (void)
2534 unsigned i;
2535 slsr_cand_t c;
2537 /* Each candidate that has a null basis and a non-null
2538 dependent is the root of a tree of related statements.
2539 Analyze each tree to determine a subset of those
2540 statements that can be replaced with maximum benefit. */
2541 FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
2543 slsr_cand_t first_dep;
2545 if (c->basis != 0 || c->dependent == 0)
2546 continue;
2548 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
2550 c->cand_num);
2552 first_dep = lookup_cand (c->dependent);
2554 /* If this is a chain of CAND_REFs, unconditionally replace
2555 each of them with a strength-reduced data reference. */
2556 if (c->kind == CAND_REF)
2557 replace_refs (c);
2559 /* If the common stride of all related candidates is a
2560 known constant, and none of these has a phi-dependence,
2561 then all replacements are considered profitable.
2562 Each replaces a multiply by a single add, with the
2563 possibility that a feeding add also goes dead as a
2564 result. */
2565 else if (unconditional_cands_with_known_stride_p (c))
2566 replace_dependents (first_dep);
2568 /* When the stride is an SSA name, it may still be profitable
2569 to replace some or all of the dependent candidates, depending
2570 on whether the introduced increments can be reused, or are
2571 less expensive to calculate than the replaced statements. */
2572 else
2574 unsigned length;
2575 enum machine_mode mode;
2576 bool speed;
2578 /* Determine whether we'll be generating pointer arithmetic
2579 when replacing candidates. */
2580 address_arithmetic_p = (c->kind == CAND_ADD
2581 && POINTER_TYPE_P (c->cand_type));
2583 /* If all candidates have already been replaced under other
2584 interpretations, nothing remains to be done. */
2585 length = count_candidates (c);
2586 if (!length)
2587 continue;
2589 /* Construct an array of increments for this candidate chain. */
2590 incr_vec = XNEWVEC (incr_info, length);
2591 incr_vec_len = 0;
2592 record_increments (c);
2594 /* Determine which increments are profitable to replace. */
2595 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
2596 speed = optimize_cands_for_speed_p (c);
2597 analyze_increments (first_dep, mode, speed);
2599 /* Insert initializers of the form T_0 = stride * increment
2600 for use in profitable replacements. */
2601 insert_initializers (first_dep);
2602 dump_incr_vec ();
2604 /* Perform the replacements. */
2605 replace_profitable_candidates (first_dep);
2606 free (incr_vec);
2609 /* TODO: When conditional increments occur so that a
2610 candidate is dependent upon a phi-basis, the cost of
2611 introducing a temporary must be accounted for. */
2615 static unsigned
2616 execute_strength_reduction (void)
2618 struct dom_walk_data walk_data;
2620 /* Create the obstack where candidates will reside. */
2621 gcc_obstack_init (&cand_obstack);
2623 /* Allocate the candidate vector. */
2624 cand_vec = VEC_alloc (slsr_cand_t, heap, 128);
2626 /* Allocate the mapping from statements to candidate indices. */
2627 stmt_cand_map = pointer_map_create ();
2629 /* Create the obstack where candidate chains will reside. */
2630 gcc_obstack_init (&chain_obstack);
2632 /* Allocate the mapping from base expressions to candidate chains. */
2633 base_cand_map = htab_create (500, base_cand_hash,
2634 base_cand_eq, base_cand_free);
2636 /* Initialize the loop optimizer. We need to detect flow across
2637 back edges, and this gives us dominator information as well. */
2638 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2640 /* Set up callbacks for the generic dominator tree walker. */
2641 walk_data.dom_direction = CDI_DOMINATORS;
2642 walk_data.initialize_block_local_data = NULL;
2643 walk_data.before_dom_children = find_candidates_in_block;
2644 walk_data.after_dom_children = NULL;
2645 walk_data.global_data = NULL;
2646 walk_data.block_local_data_size = 0;
2647 init_walk_dominator_tree (&walk_data);
2649 /* Walk the CFG in predominator order looking for strength reduction
2650 candidates. */
2651 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
2653 if (dump_file && (dump_flags & TDF_DETAILS))
2655 dump_cand_vec ();
2656 dump_cand_chains ();
2659 /* Analyze costs and make appropriate replacements. */
2660 analyze_candidates_and_replace ();
2662 /* Free resources. */
2663 fini_walk_dominator_tree (&walk_data);
2664 loop_optimizer_finalize ();
2665 htab_delete (base_cand_map);
2666 obstack_free (&chain_obstack, NULL);
2667 pointer_map_destroy (stmt_cand_map);
2668 VEC_free (slsr_cand_t, heap, cand_vec);
2669 obstack_free (&cand_obstack, NULL);
2671 return 0;
2674 static bool
2675 gate_strength_reduction (void)
2677 return flag_tree_slsr;
2680 struct gimple_opt_pass pass_strength_reduction =
2683 GIMPLE_PASS,
2684 "slsr", /* name */
2685 OPTGROUP_NONE, /* optinfo_flags */
2686 gate_strength_reduction, /* gate */
2687 execute_strength_reduction, /* execute */
2688 NULL, /* sub */
2689 NULL, /* next */
2690 0, /* static_pass_number */
2691 TV_GIMPLE_SLSR, /* tv_id */
2692 PROP_cfg | PROP_ssa, /* properties_required */
2693 0, /* properties_provided */
2694 0, /* properties_destroyed */
2695 0, /* todo_flags_start */
2696 TODO_verify_ssa /* todo_flags_finish */