PR target/81369
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob00c304447002321e6d399003a933f2a953e80ba1
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "rtl.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "cfghooks.h"
44 #include "tree-pass.h"
45 #include "ssa.h"
46 #include "expmed.h"
47 #include "gimple-pretty-print.h"
48 #include "fold-const.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-ssa-address.h"
57 #include "tree-affine.h"
58 #include "builtins.h"
60 /* Information about a strength reduction candidate. Each statement
61 in the candidate table represents an expression of one of the
62 following forms (the special case of CAND_REF will be described
63 later):
65 (CAND_MULT) S1: X = (B + i) * S
66 (CAND_ADD) S1: X = B + (i * S)
68 Here X and B are SSA names, i is an integer constant, and S is
69 either an SSA name or a constant. We call B the "base," i the
70 "index", and S the "stride."
72 Any statement S0 that dominates S1 and is of the form:
74 (CAND_MULT) S0: Y = (B + i') * S
75 (CAND_ADD) S0: Y = B + (i' * S)
77 is called a "basis" for S1. In both cases, S1 may be replaced by
79 S1': X = Y + (i - i') * S,
81 where (i - i') * S is folded to the extent possible.
83 All gimple statements are visited in dominator order, and each
84 statement that may contribute to one of the forms of S1 above is
85 given at least one entry in the candidate table. Such statements
86 include addition, pointer addition, subtraction, multiplication,
87 negation, copies, and nontrivial type casts. If a statement may
88 represent more than one expression of the forms of S1 above,
89 multiple "interpretations" are stored in the table and chained
90 together. Examples:
92 * An add of two SSA names may treat either operand as the base.
93 * A multiply of two SSA names, likewise.
94 * A copy or cast may be thought of as either a CAND_MULT with
95 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
97 Candidate records are allocated from an obstack. They are addressed
98 both from a hash table keyed on S1, and from a vector of candidate
99 pointers arranged in predominator order.
101 Opportunity note
102 ----------------
103 Currently we don't recognize:
105 S0: Y = (S * i') - B
106 S1: X = (S * i) - B
108 as a strength reduction opportunity, even though this S1 would
109 also be replaceable by the S1' above. This can be added if it
110 comes up in practice.
112 Strength reduction in addressing
113 --------------------------------
114 There is another kind of candidate known as CAND_REF. A CAND_REF
115 describes a statement containing a memory reference having
116 complex addressing that might benefit from strength reduction.
117 Specifically, we are interested in references for which
118 get_inner_reference returns a base address, offset, and bitpos as
119 follows:
121 base: MEM_REF (T1, C1)
122 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
123 bitpos: C4 * BITS_PER_UNIT
125 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
126 arbitrary integer constants. Note that C2 may be zero, in which
127 case the offset will be MULT_EXPR (T2, C3).
129 When this pattern is recognized, the original memory reference
130 can be replaced with:
132 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133 C1 + (C2 * C3) + C4)
135 which distributes the multiply to allow constant folding. When
136 two or more addressing expressions can be represented by MEM_REFs
137 of this form, differing only in the constants C1, C2, and C4,
138 making this substitution produces more efficient addressing during
139 the RTL phases. When there are not at least two expressions with
140 the same values of T1, T2, and C3, there is nothing to be gained
141 by the replacement.
143 Strength reduction of CAND_REFs uses the same infrastructure as
144 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
145 field, MULT_EXPR (T2, C3) in the stride (S) field, and
146 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
147 is thus another CAND_REF with the same B and S values. When at
148 least two CAND_REFs are chained together using the basis relation,
149 each of them is replaced as above, resulting in improved code
150 generation for addressing.
152 Conditional candidates
153 ======================
155 Conditional candidates are best illustrated with an example.
156 Consider the code sequence:
158 (1) x_0 = ...;
159 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
160 if (...)
161 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
162 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
163 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
164 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
166 Here strength reduction is complicated by the uncertain value of x_2.
167 A legitimate transformation is:
169 (1) x_0 = ...;
170 (2) a_0 = x_0 * 5;
171 if (...)
173 (3) [x_1 = x_0 + 1;]
174 (3a) t_1 = a_0 + 5;
176 (4) [x_2 = PHI <x_0, x_1>;]
177 (4a) t_2 = PHI <a_0, t_1>;
178 (5) [x_3 = x_2 + 1;]
179 (6r) a_1 = t_2 + 5;
181 where the bracketed instructions may go dead.
183 To recognize this opportunity, we have to observe that statement (6)
184 has a "hidden basis" (2). The hidden basis is unlike a normal basis
185 in that the statement and the hidden basis have different base SSA
186 names (x_2 and x_0, respectively). The relationship is established
187 when a statement's base name (x_2) is defined by a phi statement (4),
188 each argument of which (x_0, x_1) has an identical "derived base name."
189 If the argument is defined by a candidate (as x_1 is by (3)) that is a
190 CAND_ADD having a stride of 1, the derived base name of the argument is
191 the base name of the candidate (x_0). Otherwise, the argument itself
192 is its derived base name (as is the case with argument x_0).
194 The hidden basis for statement (6) is the nearest dominating candidate
195 whose base name is the derived base name (x_0) of the feeding phi (4),
196 and whose stride is identical to that of the statement. We can then
197 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
198 allowing the final replacement of (6) by the strength-reduced (6r).
200 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
201 A CAND_PHI is not a candidate for replacement, but is maintained in the
202 candidate table to ease discovery of hidden bases. Any phi statement
203 whose arguments share a common derived base name is entered into the
204 table with the derived base name, an (arbitrary) index of zero, and a
205 stride of 1. A statement with a hidden basis can then be detected by
206 simply looking up its feeding phi definition in the candidate table,
207 extracting the derived base name, and searching for a basis in the
208 usual manner after substituting the derived base name.
210 Note that the transformation is only valid when the original phi and
211 the statements that define the phi's arguments are all at the same
212 position in the loop hierarchy. */
215 /* Index into the candidate vector, offset by 1. VECs are zero-based,
216 while cand_idx's are one-based, with zero indicating null. */
217 typedef unsigned cand_idx;
219 /* The kind of candidate. */
220 enum cand_kind
222 CAND_MULT,
223 CAND_ADD,
224 CAND_REF,
225 CAND_PHI
228 struct slsr_cand_d
230 /* The candidate statement S1. */
231 gimple *cand_stmt;
233 /* The base expression B: often an SSA name, but not always. */
234 tree base_expr;
236 /* The stride S. */
237 tree stride;
239 /* The index constant i. */
240 widest_int index;
242 /* The type of the candidate. This is normally the type of base_expr,
243 but casts may have occurred when combining feeding instructions.
244 A candidate can only be a basis for candidates of the same final type.
245 (For CAND_REFs, this is the type to be used for operand 1 of the
246 replacement MEM_REF.) */
247 tree cand_type;
249 /* The type to be used to interpret the stride field when the stride
250 is not a constant. Normally the same as the type of the recorded
251 stride, but when the stride has been cast we need to maintain that
252 knowledge in order to make legal substitutions without losing
253 precision. When the stride is a constant, this will be sizetype. */
254 tree stride_type;
256 /* The kind of candidate (CAND_MULT, etc.). */
257 enum cand_kind kind;
259 /* Index of this candidate in the candidate vector. */
260 cand_idx cand_num;
262 /* Index of the next candidate record for the same statement.
263 A statement may be useful in more than one way (e.g., due to
264 commutativity). So we can have multiple "interpretations"
265 of a statement. */
266 cand_idx next_interp;
268 /* Index of the basis statement S0, if any, in the candidate vector. */
269 cand_idx basis;
271 /* First candidate for which this candidate is a basis, if one exists. */
272 cand_idx dependent;
274 /* Next candidate having the same basis as this one. */
275 cand_idx sibling;
277 /* If this is a conditional candidate, the CAND_PHI candidate
278 that defines the base SSA name B. */
279 cand_idx def_phi;
281 /* Savings that can be expected from eliminating dead code if this
282 candidate is replaced. */
283 int dead_savings;
286 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
287 typedef const struct slsr_cand_d *const_slsr_cand_t;
289 /* Pointers to candidates are chained together as part of a mapping
290 from base expressions to the candidates that use them. */
292 struct cand_chain_d
294 /* Base expression for the chain of candidates: often, but not
295 always, an SSA name. */
296 tree base_expr;
298 /* Pointer to a candidate. */
299 slsr_cand_t cand;
301 /* Chain pointer. */
302 struct cand_chain_d *next;
306 typedef struct cand_chain_d cand_chain, *cand_chain_t;
307 typedef const struct cand_chain_d *const_cand_chain_t;
309 /* Information about a unique "increment" associated with candidates
310 having an SSA name for a stride. An increment is the difference
311 between the index of the candidate and the index of its basis,
312 i.e., (i - i') as discussed in the module commentary.
314 When we are not going to generate address arithmetic we treat
315 increments that differ only in sign as the same, allowing sharing
316 of the cost of initializers. The absolute value of the increment
317 is stored in the incr_info. */
319 struct incr_info_d
321 /* The increment that relates a candidate to its basis. */
322 widest_int incr;
324 /* How many times the increment occurs in the candidate tree. */
325 unsigned count;
327 /* Cost of replacing candidates using this increment. Negative and
328 zero costs indicate replacement should be performed. */
329 int cost;
331 /* If this increment is profitable but is not -1, 0, or 1, it requires
332 an initializer T_0 = stride * incr to be found or introduced in the
333 nearest common dominator of all candidates. This field holds T_0
334 for subsequent use. */
335 tree initializer;
337 /* If the initializer was found to already exist, this is the block
338 where it was found. */
339 basic_block init_bb;
342 typedef struct incr_info_d incr_info, *incr_info_t;
344 /* Candidates are maintained in a vector. If candidate X dominates
345 candidate Y, then X appears before Y in the vector; but the
346 converse does not necessarily hold. */
347 static vec<slsr_cand_t> cand_vec;
349 enum cost_consts
351 COST_NEUTRAL = 0,
352 COST_INFINITE = 1000
355 enum stride_status
357 UNKNOWN_STRIDE = 0,
358 KNOWN_STRIDE = 1
361 enum phi_adjust_status
363 NOT_PHI_ADJUST = 0,
364 PHI_ADJUST = 1
367 enum count_phis_status
369 DONT_COUNT_PHIS = 0,
370 COUNT_PHIS = 1
373 /* Pointer map embodying a mapping from statements to candidates. */
374 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
376 /* Obstack for candidates. */
377 static struct obstack cand_obstack;
379 /* Obstack for candidate chains. */
380 static struct obstack chain_obstack;
382 /* An array INCR_VEC of incr_infos is used during analysis of related
383 candidates having an SSA name for a stride. INCR_VEC_LEN describes
384 its current length. MAX_INCR_VEC_LEN is used to avoid costly
385 pathological cases. */
386 static incr_info_t incr_vec;
387 static unsigned incr_vec_len;
388 const int MAX_INCR_VEC_LEN = 16;
390 /* For a chain of candidates with unknown stride, indicates whether or not
391 we must generate pointer arithmetic when replacing statements. */
392 static bool address_arithmetic_p;
394 /* Forward function declarations. */
395 static slsr_cand_t base_cand_from_table (tree);
396 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
397 static bool legal_cast_p_1 (tree, tree);
399 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
401 static slsr_cand_t
402 lookup_cand (cand_idx idx)
404 return cand_vec[idx - 1];
407 /* Helper for hashing a candidate chain header. */
409 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
411 static inline hashval_t hash (const cand_chain *);
412 static inline bool equal (const cand_chain *, const cand_chain *);
415 inline hashval_t
416 cand_chain_hasher::hash (const cand_chain *p)
418 tree base_expr = p->base_expr;
419 return iterative_hash_expr (base_expr, 0);
422 inline bool
423 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
425 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
428 /* Hash table embodying a mapping from base exprs to chains of candidates. */
429 static hash_table<cand_chain_hasher> *base_cand_map;
431 /* Pointer map used by tree_to_aff_combination_expand. */
432 static hash_map<tree, name_expansion *> *name_expansions;
433 /* Pointer map embodying a mapping from bases to alternative bases. */
434 static hash_map<tree, tree> *alt_base_map;
436 /* Given BASE, use the tree affine combiniation facilities to
437 find the underlying tree expression for BASE, with any
438 immediate offset excluded.
440 N.B. we should eliminate this backtracking with better forward
441 analysis in a future release. */
443 static tree
444 get_alternative_base (tree base)
446 tree *result = alt_base_map->get (base);
448 if (result == NULL)
450 tree expr;
451 aff_tree aff;
453 tree_to_aff_combination_expand (base, TREE_TYPE (base),
454 &aff, &name_expansions);
455 aff.offset = 0;
456 expr = aff_combination_to_tree (&aff);
458 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
460 return expr == base ? NULL : expr;
463 return *result;
466 /* Look in the candidate table for a CAND_PHI that defines BASE and
467 return it if found; otherwise return NULL. */
469 static cand_idx
470 find_phi_def (tree base)
472 slsr_cand_t c;
474 if (TREE_CODE (base) != SSA_NAME)
475 return 0;
477 c = base_cand_from_table (base);
479 if (!c || c->kind != CAND_PHI)
480 return 0;
482 return c->cand_num;
485 /* Determine whether all uses of NAME are directly or indirectly
486 used by STMT. That is, we want to know whether if STMT goes
487 dead, the definition of NAME also goes dead. */
488 static bool
489 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
491 gimple *use_stmt;
492 imm_use_iterator iter;
493 bool retval = true;
495 FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
497 if (use_stmt == stmt || is_gimple_debug (use_stmt))
498 continue;
500 if (!is_gimple_assign (use_stmt)
501 || !gimple_get_lhs (use_stmt)
502 || !is_gimple_reg (gimple_get_lhs (use_stmt))
503 || recurse >= 10
504 || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
505 recurse + 1))
507 retval = false;
508 BREAK_FROM_IMM_USE_STMT (iter);
512 return retval;
515 /* Helper routine for find_basis_for_candidate. May be called twice:
516 once for the candidate's base expr, and optionally again either for
517 the candidate's phi definition or for a CAND_REF's alternative base
518 expression. */
520 static slsr_cand_t
521 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
523 cand_chain mapping_key;
524 cand_chain_t chain;
525 slsr_cand_t basis = NULL;
527 // Limit potential of N^2 behavior for long candidate chains.
528 int iters = 0;
529 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
531 mapping_key.base_expr = base_expr;
532 chain = base_cand_map->find (&mapping_key);
534 for (; chain && iters < max_iters; chain = chain->next, ++iters)
536 slsr_cand_t one_basis = chain->cand;
538 if (one_basis->kind != c->kind
539 || one_basis->cand_stmt == c->cand_stmt
540 || !operand_equal_p (one_basis->stride, c->stride, 0)
541 || !types_compatible_p (one_basis->cand_type, c->cand_type)
542 || !types_compatible_p (one_basis->stride_type, c->stride_type)
543 || !dominated_by_p (CDI_DOMINATORS,
544 gimple_bb (c->cand_stmt),
545 gimple_bb (one_basis->cand_stmt)))
546 continue;
548 if (!basis || basis->cand_num < one_basis->cand_num)
549 basis = one_basis;
552 return basis;
555 /* Use the base expr from candidate C to look for possible candidates
556 that can serve as a basis for C. Each potential basis must also
557 appear in a block that dominates the candidate statement and have
558 the same stride and type. If more than one possible basis exists,
559 the one with highest index in the vector is chosen; this will be
560 the most immediately dominating basis. */
562 static int
563 find_basis_for_candidate (slsr_cand_t c)
565 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
567 /* If a candidate doesn't have a basis using its base expression,
568 it may have a basis hidden by one or more intervening phis. */
569 if (!basis && c->def_phi)
571 basic_block basis_bb, phi_bb;
572 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
573 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
575 if (basis)
577 /* A hidden basis must dominate the phi-definition of the
578 candidate's base name. */
579 phi_bb = gimple_bb (phi_cand->cand_stmt);
580 basis_bb = gimple_bb (basis->cand_stmt);
582 if (phi_bb == basis_bb
583 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
585 basis = NULL;
586 c->basis = 0;
589 /* If we found a hidden basis, estimate additional dead-code
590 savings if the phi and its feeding statements can be removed. */
591 tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
592 if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
593 c->dead_savings += phi_cand->dead_savings;
597 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
599 tree alt_base_expr = get_alternative_base (c->base_expr);
600 if (alt_base_expr)
601 basis = find_basis_for_base_expr (c, alt_base_expr);
604 if (basis)
606 c->sibling = basis->dependent;
607 basis->dependent = c->cand_num;
608 return basis->cand_num;
611 return 0;
614 /* Record a mapping from BASE to C, indicating that C may potentially serve
615 as a basis using that base expression. BASE may be the same as
616 C->BASE_EXPR; alternatively BASE can be a different tree that share the
617 underlining expression of C->BASE_EXPR. */
619 static void
620 record_potential_basis (slsr_cand_t c, tree base)
622 cand_chain_t node;
623 cand_chain **slot;
625 gcc_assert (base);
627 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
628 node->base_expr = base;
629 node->cand = c;
630 node->next = NULL;
631 slot = base_cand_map->find_slot (node, INSERT);
633 if (*slot)
635 cand_chain_t head = (cand_chain_t) (*slot);
636 node->next = head->next;
637 head->next = node;
639 else
640 *slot = node;
643 /* Allocate storage for a new candidate and initialize its fields.
644 Attempt to find a basis for the candidate.
646 For CAND_REF, an alternative base may also be recorded and used
647 to find a basis. This helps cases where the expression hidden
648 behind BASE (which is usually an SSA_NAME) has immediate offset,
649 e.g.
651 a2[i][j] = 1;
652 a2[i + 20][j] = 2; */
654 static slsr_cand_t
655 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
656 const widest_int &index, tree stride, tree ctype,
657 tree stype, unsigned savings)
659 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
660 sizeof (slsr_cand));
661 c->cand_stmt = gs;
662 c->base_expr = base;
663 c->stride = stride;
664 c->index = index;
665 c->cand_type = ctype;
666 c->stride_type = stype;
667 c->kind = kind;
668 c->cand_num = cand_vec.length () + 1;
669 c->next_interp = 0;
670 c->dependent = 0;
671 c->sibling = 0;
672 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
673 c->dead_savings = savings;
675 cand_vec.safe_push (c);
677 if (kind == CAND_PHI)
678 c->basis = 0;
679 else
680 c->basis = find_basis_for_candidate (c);
682 record_potential_basis (c, base);
683 if (flag_expensive_optimizations && kind == CAND_REF)
685 tree alt_base = get_alternative_base (base);
686 if (alt_base)
687 record_potential_basis (c, alt_base);
690 return c;
693 /* Determine the target cost of statement GS when compiling according
694 to SPEED. */
696 static int
697 stmt_cost (gimple *gs, bool speed)
699 tree lhs, rhs1, rhs2;
700 machine_mode lhs_mode;
702 gcc_assert (is_gimple_assign (gs));
703 lhs = gimple_assign_lhs (gs);
704 rhs1 = gimple_assign_rhs1 (gs);
705 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
707 switch (gimple_assign_rhs_code (gs))
709 case MULT_EXPR:
710 rhs2 = gimple_assign_rhs2 (gs);
712 if (tree_fits_shwi_p (rhs2))
713 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
715 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
716 return mul_cost (speed, lhs_mode);
718 case PLUS_EXPR:
719 case POINTER_PLUS_EXPR:
720 case MINUS_EXPR:
721 return add_cost (speed, lhs_mode);
723 case NEGATE_EXPR:
724 return neg_cost (speed, lhs_mode);
726 CASE_CONVERT:
727 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
729 /* Note that we don't assign costs to copies that in most cases
730 will go away. */
731 case SSA_NAME:
732 return 0;
734 default:
738 gcc_unreachable ();
739 return 0;
742 /* Look up the defining statement for BASE_IN and return a pointer
743 to its candidate in the candidate table, if any; otherwise NULL.
744 Only CAND_ADD and CAND_MULT candidates are returned. */
746 static slsr_cand_t
747 base_cand_from_table (tree base_in)
749 slsr_cand_t *result;
751 gimple *def = SSA_NAME_DEF_STMT (base_in);
752 if (!def)
753 return (slsr_cand_t) NULL;
755 result = stmt_cand_map->get (def);
757 if (result && (*result)->kind != CAND_REF)
758 return *result;
760 return (slsr_cand_t) NULL;
763 /* Add an entry to the statement-to-candidate mapping. */
765 static void
766 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
768 gcc_assert (!stmt_cand_map->put (gs, c));
771 /* Given PHI which contains a phi statement, determine whether it
772 satisfies all the requirements of a phi candidate. If so, create
773 a candidate. Note that a CAND_PHI never has a basis itself, but
774 is used to help find a basis for subsequent candidates. */
776 static void
777 slsr_process_phi (gphi *phi, bool speed)
779 unsigned i;
780 tree arg0_base = NULL_TREE, base_type;
781 slsr_cand_t c;
782 struct loop *cand_loop = gimple_bb (phi)->loop_father;
783 unsigned savings = 0;
785 /* A CAND_PHI requires each of its arguments to have the same
786 derived base name. (See the module header commentary for a
787 definition of derived base names.) Furthermore, all feeding
788 definitions must be in the same position in the loop hierarchy
789 as PHI. */
791 for (i = 0; i < gimple_phi_num_args (phi); i++)
793 slsr_cand_t arg_cand;
794 tree arg = gimple_phi_arg_def (phi, i);
795 tree derived_base_name = NULL_TREE;
796 gimple *arg_stmt = NULL;
797 basic_block arg_bb = NULL;
799 if (TREE_CODE (arg) != SSA_NAME)
800 return;
802 arg_cand = base_cand_from_table (arg);
804 if (arg_cand)
806 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
808 if (!arg_cand->next_interp)
809 return;
811 arg_cand = lookup_cand (arg_cand->next_interp);
814 if (!integer_onep (arg_cand->stride))
815 return;
817 derived_base_name = arg_cand->base_expr;
818 arg_stmt = arg_cand->cand_stmt;
819 arg_bb = gimple_bb (arg_stmt);
821 /* Gather potential dead code savings if the phi statement
822 can be removed later on. */
823 if (uses_consumed_by_stmt (arg, phi))
825 if (gimple_code (arg_stmt) == GIMPLE_PHI)
826 savings += arg_cand->dead_savings;
827 else
828 savings += stmt_cost (arg_stmt, speed);
831 else if (SSA_NAME_IS_DEFAULT_DEF (arg))
833 derived_base_name = arg;
834 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
837 if (!arg_bb || arg_bb->loop_father != cand_loop)
838 return;
840 if (i == 0)
841 arg0_base = derived_base_name;
842 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
843 return;
846 /* Create the candidate. "alloc_cand_and_find_basis" is named
847 misleadingly for this case, as no basis will be sought for a
848 CAND_PHI. */
849 base_type = TREE_TYPE (arg0_base);
851 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
852 0, integer_one_node, base_type,
853 sizetype, savings);
855 /* Add the candidate to the statement-candidate mapping. */
856 add_cand_for_stmt (phi, c);
859 /* Given PBASE which is a pointer to tree, look up the defining
860 statement for it and check whether the candidate is in the
861 form of:
863 X = B + (1 * S), S is integer constant
864 X = B + (i * S), S is integer one
866 If so, set PBASE to the candidate's base_expr and return double
867 int (i * S).
868 Otherwise, just return double int zero. */
870 static widest_int
871 backtrace_base_for_ref (tree *pbase)
873 tree base_in = *pbase;
874 slsr_cand_t base_cand;
876 STRIP_NOPS (base_in);
878 /* Strip off widening conversion(s) to handle cases where
879 e.g. 'B' is widened from an 'int' in order to calculate
880 a 64-bit address. */
881 if (CONVERT_EXPR_P (base_in)
882 && legal_cast_p_1 (TREE_TYPE (base_in),
883 TREE_TYPE (TREE_OPERAND (base_in, 0))))
884 base_in = get_unwidened (base_in, NULL_TREE);
886 if (TREE_CODE (base_in) != SSA_NAME)
887 return 0;
889 base_cand = base_cand_from_table (base_in);
891 while (base_cand && base_cand->kind != CAND_PHI)
893 if (base_cand->kind == CAND_ADD
894 && base_cand->index == 1
895 && TREE_CODE (base_cand->stride) == INTEGER_CST)
897 /* X = B + (1 * S), S is integer constant. */
898 *pbase = base_cand->base_expr;
899 return wi::to_widest (base_cand->stride);
901 else if (base_cand->kind == CAND_ADD
902 && TREE_CODE (base_cand->stride) == INTEGER_CST
903 && integer_onep (base_cand->stride))
905 /* X = B + (i * S), S is integer one. */
906 *pbase = base_cand->base_expr;
907 return base_cand->index;
910 if (base_cand->next_interp)
911 base_cand = lookup_cand (base_cand->next_interp);
912 else
913 base_cand = NULL;
916 return 0;
919 /* Look for the following pattern:
921 *PBASE: MEM_REF (T1, C1)
923 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
925 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
927 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
929 *PINDEX: C4 * BITS_PER_UNIT
931 If not present, leave the input values unchanged and return FALSE.
932 Otherwise, modify the input values as follows and return TRUE:
934 *PBASE: T1
935 *POFFSET: MULT_EXPR (T2, C3)
936 *PINDEX: C1 + (C2 * C3) + C4
938 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
939 will be further restructured to:
941 *PBASE: T1
942 *POFFSET: MULT_EXPR (T2', C3)
943 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
945 static bool
946 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
947 tree *ptype)
949 tree base = *pbase, offset = *poffset;
950 widest_int index = *pindex;
951 tree mult_op0, t1, t2, type;
952 widest_int c1, c2, c3, c4, c5;
954 if (!base
955 || !offset
956 || TREE_CODE (base) != MEM_REF
957 || TREE_CODE (offset) != MULT_EXPR
958 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
959 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
960 return false;
962 t1 = TREE_OPERAND (base, 0);
963 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
964 type = TREE_TYPE (TREE_OPERAND (base, 1));
966 mult_op0 = TREE_OPERAND (offset, 0);
967 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
969 if (TREE_CODE (mult_op0) == PLUS_EXPR)
971 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
973 t2 = TREE_OPERAND (mult_op0, 0);
974 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
976 else
977 return false;
979 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
981 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
983 t2 = TREE_OPERAND (mult_op0, 0);
984 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
986 else
987 return false;
989 else
991 t2 = mult_op0;
992 c2 = 0;
995 c4 = index >> LOG2_BITS_PER_UNIT;
996 c5 = backtrace_base_for_ref (&t2);
998 *pbase = t1;
999 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1000 wide_int_to_tree (sizetype, c3));
1001 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1002 *ptype = type;
1004 return true;
1007 /* Given GS which contains a data reference, create a CAND_REF entry in
1008 the candidate table and attempt to find a basis. */
1010 static void
1011 slsr_process_ref (gimple *gs)
1013 tree ref_expr, base, offset, type;
1014 HOST_WIDE_INT bitsize, bitpos;
1015 machine_mode mode;
1016 int unsignedp, reversep, volatilep;
1017 slsr_cand_t c;
1019 if (gimple_vdef (gs))
1020 ref_expr = gimple_assign_lhs (gs);
1021 else
1022 ref_expr = gimple_assign_rhs1 (gs);
1024 if (!handled_component_p (ref_expr)
1025 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1026 || (TREE_CODE (ref_expr) == COMPONENT_REF
1027 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1028 return;
1030 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1031 &unsignedp, &reversep, &volatilep);
1032 if (reversep)
1033 return;
1034 widest_int index = bitpos;
1036 if (!restructure_reference (&base, &offset, &index, &type))
1037 return;
1039 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1040 type, sizetype, 0);
1042 /* Add the candidate to the statement-candidate mapping. */
1043 add_cand_for_stmt (gs, c);
1046 /* Create a candidate entry for a statement GS, where GS multiplies
1047 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1048 about the two SSA names into the new candidate. Return the new
1049 candidate. */
1051 static slsr_cand_t
1052 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1054 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1055 tree stype = NULL_TREE;
1056 widest_int index;
1057 unsigned savings = 0;
1058 slsr_cand_t c;
1059 slsr_cand_t base_cand = base_cand_from_table (base_in);
1061 /* Look at all interpretations of the base candidate, if necessary,
1062 to find information to propagate into this candidate. */
1063 while (base_cand && !base && base_cand->kind != CAND_PHI)
1066 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1068 /* Y = (B + i') * 1
1069 X = Y * Z
1070 ================
1071 X = (B + i') * Z */
1072 base = base_cand->base_expr;
1073 index = base_cand->index;
1074 stride = stride_in;
1075 ctype = base_cand->cand_type;
1076 stype = TREE_TYPE (stride_in);
1077 if (has_single_use (base_in))
1078 savings = (base_cand->dead_savings
1079 + stmt_cost (base_cand->cand_stmt, speed));
1081 else if (base_cand->kind == CAND_ADD
1082 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1084 /* Y = B + (i' * S), S constant
1085 X = Y * Z
1086 ============================
1087 X = B + ((i' * S) * Z) */
1088 base = base_cand->base_expr;
1089 index = base_cand->index * wi::to_widest (base_cand->stride);
1090 stride = stride_in;
1091 ctype = base_cand->cand_type;
1092 stype = TREE_TYPE (stride_in);
1093 if (has_single_use (base_in))
1094 savings = (base_cand->dead_savings
1095 + stmt_cost (base_cand->cand_stmt, speed));
1098 if (base_cand->next_interp)
1099 base_cand = lookup_cand (base_cand->next_interp);
1100 else
1101 base_cand = NULL;
1104 if (!base)
1106 /* No interpretations had anything useful to propagate, so
1107 produce X = (Y + 0) * Z. */
1108 base = base_in;
1109 index = 0;
1110 stride = stride_in;
1111 ctype = TREE_TYPE (base_in);
1112 stype = TREE_TYPE (stride_in);
1115 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1116 ctype, stype, savings);
1117 return c;
1120 /* Create a candidate entry for a statement GS, where GS multiplies
1121 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1122 information about BASE_IN into the new candidate. Return the new
1123 candidate. */
1125 static slsr_cand_t
1126 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1128 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1129 widest_int index, temp;
1130 unsigned savings = 0;
1131 slsr_cand_t c;
1132 slsr_cand_t base_cand = base_cand_from_table (base_in);
1134 /* Look at all interpretations of the base candidate, if necessary,
1135 to find information to propagate into this candidate. */
1136 while (base_cand && !base && base_cand->kind != CAND_PHI)
1138 if (base_cand->kind == CAND_MULT
1139 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1141 /* Y = (B + i') * S, S constant
1142 X = Y * c
1143 ============================
1144 X = (B + i') * (S * c) */
1145 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1146 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1148 base = base_cand->base_expr;
1149 index = base_cand->index;
1150 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1151 ctype = base_cand->cand_type;
1152 if (has_single_use (base_in))
1153 savings = (base_cand->dead_savings
1154 + stmt_cost (base_cand->cand_stmt, speed));
1157 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1159 /* Y = B + (i' * 1)
1160 X = Y * c
1161 ===========================
1162 X = (B + i') * c */
1163 base = base_cand->base_expr;
1164 index = base_cand->index;
1165 stride = stride_in;
1166 ctype = base_cand->cand_type;
1167 if (has_single_use (base_in))
1168 savings = (base_cand->dead_savings
1169 + stmt_cost (base_cand->cand_stmt, speed));
1171 else if (base_cand->kind == CAND_ADD
1172 && base_cand->index == 1
1173 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1175 /* Y = B + (1 * S), S constant
1176 X = Y * c
1177 ===========================
1178 X = (B + S) * c */
1179 base = base_cand->base_expr;
1180 index = wi::to_widest (base_cand->stride);
1181 stride = stride_in;
1182 ctype = base_cand->cand_type;
1183 if (has_single_use (base_in))
1184 savings = (base_cand->dead_savings
1185 + stmt_cost (base_cand->cand_stmt, speed));
1188 if (base_cand->next_interp)
1189 base_cand = lookup_cand (base_cand->next_interp);
1190 else
1191 base_cand = NULL;
1194 if (!base)
1196 /* No interpretations had anything useful to propagate, so
1197 produce X = (Y + 0) * c. */
1198 base = base_in;
1199 index = 0;
1200 stride = stride_in;
1201 ctype = TREE_TYPE (base_in);
1204 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1205 ctype, sizetype, savings);
1206 return c;
1209 /* Given GS which is a multiply of scalar integers, make an appropriate
1210 entry in the candidate table. If this is a multiply of two SSA names,
1211 create two CAND_MULT interpretations and attempt to find a basis for
1212 each of them. Otherwise, create a single CAND_MULT and attempt to
1213 find a basis. */
1215 static void
1216 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1218 slsr_cand_t c, c2;
1220 /* If this is a multiply of an SSA name with itself, it is highly
1221 unlikely that we will get a strength reduction opportunity, so
1222 don't record it as a candidate. This simplifies the logic for
1223 finding a basis, so if this is removed that must be considered. */
1224 if (rhs1 == rhs2)
1225 return;
1227 if (TREE_CODE (rhs2) == SSA_NAME)
1229 /* Record an interpretation of this statement in the candidate table
1230 assuming RHS1 is the base expression and RHS2 is the stride. */
1231 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1233 /* Add the first interpretation to the statement-candidate mapping. */
1234 add_cand_for_stmt (gs, c);
1236 /* Record another interpretation of this statement assuming RHS1
1237 is the stride and RHS2 is the base expression. */
1238 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1239 c->next_interp = c2->cand_num;
1241 else
1243 /* Record an interpretation for the multiply-immediate. */
1244 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1246 /* Add the interpretation to the statement-candidate mapping. */
1247 add_cand_for_stmt (gs, c);
1251 /* Create a candidate entry for a statement GS, where GS adds two
1252 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1253 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1254 information about the two SSA names into the new candidate.
1255 Return the new candidate. */
1257 static slsr_cand_t
1258 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1259 bool subtract_p, bool speed)
1261 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1262 tree stype = NULL_TREE;
1263 widest_int index;
1264 unsigned savings = 0;
1265 slsr_cand_t c;
1266 slsr_cand_t base_cand = base_cand_from_table (base_in);
1267 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1269 /* The most useful transformation is a multiply-immediate feeding
1270 an add or subtract. Look for that first. */
1271 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1273 if (addend_cand->kind == CAND_MULT
1274 && addend_cand->index == 0
1275 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1277 /* Z = (B + 0) * S, S constant
1278 X = Y +/- Z
1279 ===========================
1280 X = Y + ((+/-1 * S) * B) */
1281 base = base_in;
1282 index = wi::to_widest (addend_cand->stride);
1283 if (subtract_p)
1284 index = -index;
1285 stride = addend_cand->base_expr;
1286 ctype = TREE_TYPE (base_in);
1287 stype = addend_cand->cand_type;
1288 if (has_single_use (addend_in))
1289 savings = (addend_cand->dead_savings
1290 + stmt_cost (addend_cand->cand_stmt, speed));
1293 if (addend_cand->next_interp)
1294 addend_cand = lookup_cand (addend_cand->next_interp);
1295 else
1296 addend_cand = NULL;
1299 while (base_cand && !base && base_cand->kind != CAND_PHI)
1301 if (base_cand->kind == CAND_ADD
1302 && (base_cand->index == 0
1303 || operand_equal_p (base_cand->stride,
1304 integer_zero_node, 0)))
1306 /* Y = B + (i' * S), i' * S = 0
1307 X = Y +/- Z
1308 ============================
1309 X = B + (+/-1 * Z) */
1310 base = base_cand->base_expr;
1311 index = subtract_p ? -1 : 1;
1312 stride = addend_in;
1313 ctype = base_cand->cand_type;
1314 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1315 : TREE_TYPE (addend_in));
1316 if (has_single_use (base_in))
1317 savings = (base_cand->dead_savings
1318 + stmt_cost (base_cand->cand_stmt, speed));
1320 else if (subtract_p)
1322 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1324 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1326 if (subtrahend_cand->kind == CAND_MULT
1327 && subtrahend_cand->index == 0
1328 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1330 /* Z = (B + 0) * S, S constant
1331 X = Y - Z
1332 ===========================
1333 Value: X = Y + ((-1 * S) * B) */
1334 base = base_in;
1335 index = wi::to_widest (subtrahend_cand->stride);
1336 index = -index;
1337 stride = subtrahend_cand->base_expr;
1338 ctype = TREE_TYPE (base_in);
1339 stype = subtrahend_cand->cand_type;
1340 if (has_single_use (addend_in))
1341 savings = (subtrahend_cand->dead_savings
1342 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1345 if (subtrahend_cand->next_interp)
1346 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1347 else
1348 subtrahend_cand = NULL;
1352 if (base_cand->next_interp)
1353 base_cand = lookup_cand (base_cand->next_interp);
1354 else
1355 base_cand = NULL;
1358 if (!base)
1360 /* No interpretations had anything useful to propagate, so
1361 produce X = Y + (1 * Z). */
1362 base = base_in;
1363 index = subtract_p ? -1 : 1;
1364 stride = addend_in;
1365 ctype = TREE_TYPE (base_in);
1366 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1367 : TREE_TYPE (addend_in));
1370 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1371 ctype, stype, savings);
1372 return c;
1375 /* Create a candidate entry for a statement GS, where GS adds SSA
1376 name BASE_IN to constant INDEX_IN. Propagate any known information
1377 about BASE_IN into the new candidate. Return the new candidate. */
1379 static slsr_cand_t
1380 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
1381 bool speed)
1383 enum cand_kind kind = CAND_ADD;
1384 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1385 tree stype = NULL_TREE;
1386 widest_int index, multiple;
1387 unsigned savings = 0;
1388 slsr_cand_t c;
1389 slsr_cand_t base_cand = base_cand_from_table (base_in);
1391 while (base_cand && !base && base_cand->kind != CAND_PHI)
1393 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1395 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1396 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1397 sign, &multiple))
1399 /* Y = (B + i') * S, S constant, c = kS for some integer k
1400 X = Y + c
1401 ============================
1402 X = (B + (i'+ k)) * S
1404 Y = B + (i' * S), S constant, c = kS for some integer k
1405 X = Y + c
1406 ============================
1407 X = (B + (i'+ k)) * S */
1408 kind = base_cand->kind;
1409 base = base_cand->base_expr;
1410 index = base_cand->index + multiple;
1411 stride = base_cand->stride;
1412 ctype = base_cand->cand_type;
1413 stype = base_cand->stride_type;
1414 if (has_single_use (base_in))
1415 savings = (base_cand->dead_savings
1416 + stmt_cost (base_cand->cand_stmt, speed));
1419 if (base_cand->next_interp)
1420 base_cand = lookup_cand (base_cand->next_interp);
1421 else
1422 base_cand = NULL;
1425 if (!base)
1427 /* No interpretations had anything useful to propagate, so
1428 produce X = Y + (c * 1). */
1429 kind = CAND_ADD;
1430 base = base_in;
1431 index = index_in;
1432 stride = integer_one_node;
1433 ctype = TREE_TYPE (base_in);
1434 stype = sizetype;
1437 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1438 ctype, stype, savings);
1439 return c;
1442 /* Given GS which is an add or subtract of scalar integers or pointers,
1443 make at least one appropriate entry in the candidate table. */
1445 static void
1446 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1448 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1449 slsr_cand_t c = NULL, c2;
1451 if (TREE_CODE (rhs2) == SSA_NAME)
1453 /* First record an interpretation assuming RHS1 is the base expression
1454 and RHS2 is the stride. But it doesn't make sense for the
1455 stride to be a pointer, so don't record a candidate in that case. */
1456 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1458 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1460 /* Add the first interpretation to the statement-candidate
1461 mapping. */
1462 add_cand_for_stmt (gs, c);
1465 /* If the two RHS operands are identical, or this is a subtract,
1466 we're done. */
1467 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1468 return;
1470 /* Otherwise, record another interpretation assuming RHS2 is the
1471 base expression and RHS1 is the stride, again provided that the
1472 stride is not a pointer. */
1473 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1475 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1476 if (c)
1477 c->next_interp = c2->cand_num;
1478 else
1479 add_cand_for_stmt (gs, c2);
1482 else
1484 /* Record an interpretation for the add-immediate. */
1485 widest_int index = wi::to_widest (rhs2);
1486 if (subtract_p)
1487 index = -index;
1489 c = create_add_imm_cand (gs, rhs1, index, speed);
1491 /* Add the interpretation to the statement-candidate mapping. */
1492 add_cand_for_stmt (gs, c);
1496 /* Given GS which is a negate of a scalar integer, make an appropriate
1497 entry in the candidate table. A negate is equivalent to a multiply
1498 by -1. */
1500 static void
1501 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1503 /* Record a CAND_MULT interpretation for the multiply by -1. */
1504 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1506 /* Add the interpretation to the statement-candidate mapping. */
1507 add_cand_for_stmt (gs, c);
1510 /* Help function for legal_cast_p, operating on two trees. Checks
1511 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1512 for more details. */
1514 static bool
1515 legal_cast_p_1 (tree lhs_type, tree rhs_type)
1517 unsigned lhs_size, rhs_size;
1518 bool lhs_wraps, rhs_wraps;
1520 lhs_size = TYPE_PRECISION (lhs_type);
1521 rhs_size = TYPE_PRECISION (rhs_type);
1522 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1523 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1525 if (lhs_size < rhs_size
1526 || (rhs_wraps && !lhs_wraps)
1527 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1528 return false;
1530 return true;
1533 /* Return TRUE if GS is a statement that defines an SSA name from
1534 a conversion and is legal for us to combine with an add and multiply
1535 in the candidate table. For example, suppose we have:
1537 A = B + i;
1538 C = (type) A;
1539 D = C * S;
1541 Without the type-cast, we would create a CAND_MULT for D with base B,
1542 index i, and stride S. We want to record this candidate only if it
1543 is equivalent to apply the type cast following the multiply:
1545 A = B + i;
1546 E = A * S;
1547 D = (type) E;
1549 We will record the type with the candidate for D. This allows us
1550 to use a similar previous candidate as a basis. If we have earlier seen
1552 A' = B + i';
1553 C' = (type) A';
1554 D' = C' * S;
1556 we can replace D with
1558 D = D' + (i - i') * S;
1560 But if moving the type-cast would change semantics, we mustn't do this.
1562 This is legitimate for casts from a non-wrapping integral type to
1563 any integral type of the same or larger size. It is not legitimate
1564 to convert a wrapping type to a non-wrapping type, or to a wrapping
1565 type of a different size. I.e., with a wrapping type, we must
1566 assume that the addition B + i could wrap, in which case performing
1567 the multiply before or after one of the "illegal" type casts will
1568 have different semantics. */
1570 static bool
1571 legal_cast_p (gimple *gs, tree rhs)
1573 if (!is_gimple_assign (gs)
1574 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1575 return false;
1577 return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1580 /* Given GS which is a cast to a scalar integer type, determine whether
1581 the cast is legal for strength reduction. If so, make at least one
1582 appropriate entry in the candidate table. */
1584 static void
1585 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1587 tree lhs, ctype;
1588 slsr_cand_t base_cand, c = NULL, c2;
1589 unsigned savings = 0;
1591 if (!legal_cast_p (gs, rhs1))
1592 return;
1594 lhs = gimple_assign_lhs (gs);
1595 base_cand = base_cand_from_table (rhs1);
1596 ctype = TREE_TYPE (lhs);
1598 if (base_cand && base_cand->kind != CAND_PHI)
1600 while (base_cand)
1602 /* Propagate all data from the base candidate except the type,
1603 which comes from the cast, and the base candidate's cast,
1604 which is no longer applicable. */
1605 if (has_single_use (rhs1))
1606 savings = (base_cand->dead_savings
1607 + stmt_cost (base_cand->cand_stmt, speed));
1609 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1610 base_cand->base_expr,
1611 base_cand->index, base_cand->stride,
1612 ctype, base_cand->stride_type,
1613 savings);
1614 if (base_cand->next_interp)
1615 base_cand = lookup_cand (base_cand->next_interp);
1616 else
1617 base_cand = NULL;
1620 else
1622 /* If nothing is known about the RHS, create fresh CAND_ADD and
1623 CAND_MULT interpretations:
1625 X = Y + (0 * 1)
1626 X = (Y + 0) * 1
1628 The first of these is somewhat arbitrary, but the choice of
1629 1 for the stride simplifies the logic for propagating casts
1630 into their uses. */
1631 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1632 integer_one_node, ctype, sizetype, 0);
1633 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1634 integer_one_node, ctype, sizetype, 0);
1635 c->next_interp = c2->cand_num;
1638 /* Add the first (or only) interpretation to the statement-candidate
1639 mapping. */
1640 add_cand_for_stmt (gs, c);
1643 /* Given GS which is a copy of a scalar integer type, make at least one
1644 appropriate entry in the candidate table.
1646 This interface is included for completeness, but is unnecessary
1647 if this pass immediately follows a pass that performs copy
1648 propagation, such as DOM. */
1650 static void
1651 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1653 slsr_cand_t base_cand, c = NULL, c2;
1654 unsigned savings = 0;
1656 base_cand = base_cand_from_table (rhs1);
1658 if (base_cand && base_cand->kind != CAND_PHI)
1660 while (base_cand)
1662 /* Propagate all data from the base candidate. */
1663 if (has_single_use (rhs1))
1664 savings = (base_cand->dead_savings
1665 + stmt_cost (base_cand->cand_stmt, speed));
1667 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1668 base_cand->base_expr,
1669 base_cand->index, base_cand->stride,
1670 base_cand->cand_type,
1671 base_cand->stride_type, savings);
1672 if (base_cand->next_interp)
1673 base_cand = lookup_cand (base_cand->next_interp);
1674 else
1675 base_cand = NULL;
1678 else
1680 /* If nothing is known about the RHS, create fresh CAND_ADD and
1681 CAND_MULT interpretations:
1683 X = Y + (0 * 1)
1684 X = (Y + 0) * 1
1686 The first of these is somewhat arbitrary, but the choice of
1687 1 for the stride simplifies the logic for propagating casts
1688 into their uses. */
1689 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1690 integer_one_node, TREE_TYPE (rhs1),
1691 sizetype, 0);
1692 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1693 integer_one_node, TREE_TYPE (rhs1),
1694 sizetype, 0);
1695 c->next_interp = c2->cand_num;
1698 /* Add the first (or only) interpretation to the statement-candidate
1699 mapping. */
1700 add_cand_for_stmt (gs, c);
1703 class find_candidates_dom_walker : public dom_walker
1705 public:
1706 find_candidates_dom_walker (cdi_direction direction)
1707 : dom_walker (direction) {}
1708 virtual edge before_dom_children (basic_block);
1711 /* Find strength-reduction candidates in block BB. */
1713 edge
1714 find_candidates_dom_walker::before_dom_children (basic_block bb)
1716 bool speed = optimize_bb_for_speed_p (bb);
1718 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1719 gsi_next (&gsi))
1720 slsr_process_phi (gsi.phi (), speed);
1722 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1723 gsi_next (&gsi))
1725 gimple *gs = gsi_stmt (gsi);
1727 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1728 slsr_process_ref (gs);
1730 else if (is_gimple_assign (gs)
1731 && SCALAR_INT_MODE_P
1732 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1734 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1736 switch (gimple_assign_rhs_code (gs))
1738 case MULT_EXPR:
1739 case PLUS_EXPR:
1740 rhs1 = gimple_assign_rhs1 (gs);
1741 rhs2 = gimple_assign_rhs2 (gs);
1742 /* Should never happen, but currently some buggy situations
1743 in earlier phases put constants in rhs1. */
1744 if (TREE_CODE (rhs1) != SSA_NAME)
1745 continue;
1746 break;
1748 /* Possible future opportunity: rhs1 of a ptr+ can be
1749 an ADDR_EXPR. */
1750 case POINTER_PLUS_EXPR:
1751 case MINUS_EXPR:
1752 rhs2 = gimple_assign_rhs2 (gs);
1753 gcc_fallthrough ();
1755 CASE_CONVERT:
1756 case SSA_NAME:
1757 case NEGATE_EXPR:
1758 rhs1 = gimple_assign_rhs1 (gs);
1759 if (TREE_CODE (rhs1) != SSA_NAME)
1760 continue;
1761 break;
1763 default:
1767 switch (gimple_assign_rhs_code (gs))
1769 case MULT_EXPR:
1770 slsr_process_mul (gs, rhs1, rhs2, speed);
1771 break;
1773 case PLUS_EXPR:
1774 case POINTER_PLUS_EXPR:
1775 case MINUS_EXPR:
1776 slsr_process_add (gs, rhs1, rhs2, speed);
1777 break;
1779 case NEGATE_EXPR:
1780 slsr_process_neg (gs, rhs1, speed);
1781 break;
1783 CASE_CONVERT:
1784 slsr_process_cast (gs, rhs1, speed);
1785 break;
1787 case SSA_NAME:
1788 slsr_process_copy (gs, rhs1, speed);
1789 break;
1791 default:
1796 return NULL;
1799 /* Dump a candidate for debug. */
1801 static void
1802 dump_candidate (slsr_cand_t c)
1804 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1805 gimple_bb (c->cand_stmt)->index);
1806 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1807 switch (c->kind)
1809 case CAND_MULT:
1810 fputs (" MULT : (", dump_file);
1811 print_generic_expr (dump_file, c->base_expr);
1812 fputs (" + ", dump_file);
1813 print_decs (c->index, dump_file);
1814 fputs (") * ", dump_file);
1815 if (TREE_CODE (c->stride) != INTEGER_CST
1816 && c->stride_type != TREE_TYPE (c->stride))
1818 fputs ("(", dump_file);
1819 print_generic_expr (dump_file, c->stride_type);
1820 fputs (")", dump_file);
1822 print_generic_expr (dump_file, c->stride);
1823 fputs (" : ", dump_file);
1824 break;
1825 case CAND_ADD:
1826 fputs (" ADD : ", dump_file);
1827 print_generic_expr (dump_file, c->base_expr);
1828 fputs (" + (", dump_file);
1829 print_decs (c->index, dump_file);
1830 fputs (" * ", dump_file);
1831 if (TREE_CODE (c->stride) != INTEGER_CST
1832 && c->stride_type != TREE_TYPE (c->stride))
1834 fputs ("(", dump_file);
1835 print_generic_expr (dump_file, c->stride_type);
1836 fputs (")", dump_file);
1838 print_generic_expr (dump_file, c->stride);
1839 fputs (") : ", dump_file);
1840 break;
1841 case CAND_REF:
1842 fputs (" REF : ", dump_file);
1843 print_generic_expr (dump_file, c->base_expr);
1844 fputs (" + (", dump_file);
1845 print_generic_expr (dump_file, c->stride);
1846 fputs (") + ", dump_file);
1847 print_decs (c->index, dump_file);
1848 fputs (" : ", dump_file);
1849 break;
1850 case CAND_PHI:
1851 fputs (" PHI : ", dump_file);
1852 print_generic_expr (dump_file, c->base_expr);
1853 fputs (" + (unknown * ", dump_file);
1854 print_generic_expr (dump_file, c->stride);
1855 fputs (") : ", dump_file);
1856 break;
1857 default:
1858 gcc_unreachable ();
1860 print_generic_expr (dump_file, c->cand_type);
1861 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1862 c->basis, c->dependent, c->sibling);
1863 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1864 c->next_interp, c->dead_savings);
1865 if (c->def_phi)
1866 fprintf (dump_file, " phi: %d\n", c->def_phi);
1867 fputs ("\n", dump_file);
1870 /* Dump the candidate vector for debug. */
1872 static void
1873 dump_cand_vec (void)
1875 unsigned i;
1876 slsr_cand_t c;
1878 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1880 FOR_EACH_VEC_ELT (cand_vec, i, c)
1881 dump_candidate (c);
1884 /* Callback used to dump the candidate chains hash table. */
1887 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1889 const_cand_chain_t chain = *slot;
1890 cand_chain_t p;
1892 print_generic_expr (dump_file, chain->base_expr);
1893 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1895 for (p = chain->next; p; p = p->next)
1896 fprintf (dump_file, " -> %d", p->cand->cand_num);
1898 fputs ("\n", dump_file);
1899 return 1;
1902 /* Dump the candidate chains. */
1904 static void
1905 dump_cand_chains (void)
1907 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1908 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1909 (NULL);
1910 fputs ("\n", dump_file);
1913 /* Dump the increment vector for debug. */
1915 static void
1916 dump_incr_vec (void)
1918 if (dump_file && (dump_flags & TDF_DETAILS))
1920 unsigned i;
1922 fprintf (dump_file, "\nIncrement vector:\n\n");
1924 for (i = 0; i < incr_vec_len; i++)
1926 fprintf (dump_file, "%3d increment: ", i);
1927 print_decs (incr_vec[i].incr, dump_file);
1928 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1929 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1930 fputs ("\n initializer: ", dump_file);
1931 print_generic_expr (dump_file, incr_vec[i].initializer);
1932 fputs ("\n\n", dump_file);
1937 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1938 data reference. */
1940 static void
1941 replace_ref (tree *expr, slsr_cand_t c)
1943 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1944 unsigned HOST_WIDE_INT misalign;
1945 unsigned align;
1947 /* Ensure the memory reference carries the minimum alignment
1948 requirement for the data type. See PR58041. */
1949 get_object_alignment_1 (*expr, &align, &misalign);
1950 if (misalign != 0)
1951 align = least_bit_hwi (misalign);
1952 if (align < TYPE_ALIGN (acc_type))
1953 acc_type = build_aligned_type (acc_type, align);
1955 add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1956 c->base_expr, c->stride);
1957 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1958 wide_int_to_tree (c->cand_type, c->index));
1960 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1961 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1962 TREE_OPERAND (mem_ref, 0)
1963 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1964 /*simple_p=*/true, NULL,
1965 /*before=*/true, GSI_SAME_STMT);
1966 copy_ref_info (mem_ref, *expr);
1967 *expr = mem_ref;
1968 update_stmt (c->cand_stmt);
1971 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1972 dependent of candidate C with an equivalent strength-reduced data
1973 reference. */
1975 static void
1976 replace_refs (slsr_cand_t c)
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1980 fputs ("Replacing reference: ", dump_file);
1981 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1984 if (gimple_vdef (c->cand_stmt))
1986 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1987 replace_ref (lhs, c);
1989 else
1991 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1992 replace_ref (rhs, c);
1995 if (dump_file && (dump_flags & TDF_DETAILS))
1997 fputs ("With: ", dump_file);
1998 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1999 fputs ("\n", dump_file);
2002 if (c->sibling)
2003 replace_refs (lookup_cand (c->sibling));
2005 if (c->dependent)
2006 replace_refs (lookup_cand (c->dependent));
2009 /* Return TRUE if candidate C is dependent upon a PHI. */
2011 static bool
2012 phi_dependent_cand_p (slsr_cand_t c)
2014 /* A candidate is not necessarily dependent upon a PHI just because
2015 it has a phi definition for its base name. It may have a basis
2016 that relies upon the same phi definition, in which case the PHI
2017 is irrelevant to this candidate. */
2018 return (c->def_phi
2019 && c->basis
2020 && lookup_cand (c->basis)->def_phi != c->def_phi);
2023 /* Calculate the increment required for candidate C relative to
2024 its basis. */
2026 static widest_int
2027 cand_increment (slsr_cand_t c)
2029 slsr_cand_t basis;
2031 /* If the candidate doesn't have a basis, just return its own
2032 index. This is useful in record_increments to help us find
2033 an existing initializer. Also, if the candidate's basis is
2034 hidden by a phi, then its own index will be the increment
2035 from the newly introduced phi basis. */
2036 if (!c->basis || phi_dependent_cand_p (c))
2037 return c->index;
2039 basis = lookup_cand (c->basis);
2040 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2041 return c->index - basis->index;
2044 /* Calculate the increment required for candidate C relative to
2045 its basis. If we aren't going to generate pointer arithmetic
2046 for this candidate, return the absolute value of that increment
2047 instead. */
2049 static inline widest_int
2050 cand_abs_increment (slsr_cand_t c)
2052 widest_int increment = cand_increment (c);
2054 if (!address_arithmetic_p && wi::neg_p (increment))
2055 increment = -increment;
2057 return increment;
2060 /* Return TRUE iff candidate C has already been replaced under
2061 another interpretation. */
2063 static inline bool
2064 cand_already_replaced (slsr_cand_t c)
2066 return (gimple_bb (c->cand_stmt) == 0);
2069 /* Common logic used by replace_unconditional_candidate and
2070 replace_conditional_candidate. */
2072 static void
2073 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2075 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2076 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2078 /* It is highly unlikely, but possible, that the resulting
2079 bump doesn't fit in a HWI. Abandon the replacement
2080 in this case. This does not affect siblings or dependents
2081 of C. Restriction to signed HWI is conservative for unsigned
2082 types but allows for safe negation without twisted logic. */
2083 if (wi::fits_shwi_p (bump)
2084 && bump.to_shwi () != HOST_WIDE_INT_MIN
2085 /* It is not useful to replace casts, copies, negates, or adds of
2086 an SSA name and a constant. */
2087 && cand_code != SSA_NAME
2088 && !CONVERT_EXPR_CODE_P (cand_code)
2089 && cand_code != PLUS_EXPR
2090 && cand_code != POINTER_PLUS_EXPR
2091 && cand_code != MINUS_EXPR
2092 && cand_code != NEGATE_EXPR)
2094 enum tree_code code = PLUS_EXPR;
2095 tree bump_tree;
2096 gimple *stmt_to_print = NULL;
2098 /* If the basis name and the candidate's LHS have incompatible
2099 types, introduce a cast. */
2100 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2101 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2102 if (wi::neg_p (bump))
2104 code = MINUS_EXPR;
2105 bump = -bump;
2108 bump_tree = wide_int_to_tree (target_type, bump);
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2112 fputs ("Replacing: ", dump_file);
2113 print_gimple_stmt (dump_file, c->cand_stmt, 0);
2116 if (bump == 0)
2118 tree lhs = gimple_assign_lhs (c->cand_stmt);
2119 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2120 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2121 slsr_cand_t cc = c;
2122 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2123 gsi_replace (&gsi, copy_stmt, false);
2124 c->cand_stmt = copy_stmt;
2125 while (cc->next_interp)
2127 cc = lookup_cand (cc->next_interp);
2128 cc->cand_stmt = copy_stmt;
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 stmt_to_print = copy_stmt;
2133 else
2135 tree rhs1, rhs2;
2136 if (cand_code != NEGATE_EXPR) {
2137 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2138 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2140 if (cand_code != NEGATE_EXPR
2141 && ((operand_equal_p (rhs1, basis_name, 0)
2142 && operand_equal_p (rhs2, bump_tree, 0))
2143 || (operand_equal_p (rhs1, bump_tree, 0)
2144 && operand_equal_p (rhs2, basis_name, 0))))
2146 if (dump_file && (dump_flags & TDF_DETAILS))
2148 fputs ("(duplicate, not actually replacing)", dump_file);
2149 stmt_to_print = c->cand_stmt;
2152 else
2154 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2155 slsr_cand_t cc = c;
2156 gimple_assign_set_rhs_with_ops (&gsi, code,
2157 basis_name, bump_tree);
2158 update_stmt (gsi_stmt (gsi));
2159 c->cand_stmt = gsi_stmt (gsi);
2160 while (cc->next_interp)
2162 cc = lookup_cand (cc->next_interp);
2163 cc->cand_stmt = gsi_stmt (gsi);
2165 if (dump_file && (dump_flags & TDF_DETAILS))
2166 stmt_to_print = gsi_stmt (gsi);
2170 if (dump_file && (dump_flags & TDF_DETAILS))
2172 fputs ("With: ", dump_file);
2173 print_gimple_stmt (dump_file, stmt_to_print, 0);
2174 fputs ("\n", dump_file);
2179 /* Replace candidate C with an add or subtract. Note that we only
2180 operate on CAND_MULTs with known strides, so we will never generate
2181 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2182 X = Y + ((i - i') * S), as described in the module commentary. The
2183 folded value ((i - i') * S) is referred to here as the "bump." */
2185 static void
2186 replace_unconditional_candidate (slsr_cand_t c)
2188 slsr_cand_t basis;
2190 if (cand_already_replaced (c))
2191 return;
2193 basis = lookup_cand (c->basis);
2194 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2196 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2199 /* Return the index in the increment vector of the given INCREMENT,
2200 or -1 if not found. The latter can occur if more than
2201 MAX_INCR_VEC_LEN increments have been found. */
2203 static inline int
2204 incr_vec_index (const widest_int &increment)
2206 unsigned i;
2208 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2211 if (i < incr_vec_len)
2212 return i;
2213 else
2214 return -1;
2217 /* Create a new statement along edge E to add BASIS_NAME to the product
2218 of INCREMENT and the stride of candidate C. Create and return a new
2219 SSA name from *VAR to be used as the LHS of the new statement.
2220 KNOWN_STRIDE is true iff C's stride is a constant. */
2222 static tree
2223 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2224 widest_int increment, edge e, location_t loc,
2225 bool known_stride)
2227 basic_block insert_bb;
2228 gimple_stmt_iterator gsi;
2229 tree lhs, basis_type;
2230 gassign *new_stmt, *cast_stmt = NULL;
2232 /* If the add candidate along this incoming edge has the same
2233 index as C's hidden basis, the hidden basis represents this
2234 edge correctly. */
2235 if (increment == 0)
2236 return basis_name;
2238 basis_type = TREE_TYPE (basis_name);
2239 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2241 /* Occasionally people convert integers to pointers without a
2242 cast, leading us into trouble if we aren't careful. */
2243 enum tree_code plus_code
2244 = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2246 if (known_stride)
2248 tree bump_tree;
2249 enum tree_code code = plus_code;
2250 widest_int bump = increment * wi::to_widest (c->stride);
2251 if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2253 code = MINUS_EXPR;
2254 bump = -bump;
2257 tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2258 bump_tree = wide_int_to_tree (stride_type, bump);
2259 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2261 else
2263 int i;
2264 bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2265 i = incr_vec_index (negate_incr ? -increment : increment);
2266 gcc_assert (i >= 0);
2268 if (incr_vec[i].initializer)
2270 enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2271 new_stmt = gimple_build_assign (lhs, code, basis_name,
2272 incr_vec[i].initializer);
2274 else {
2275 tree stride;
2277 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2279 tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2280 "slsr");
2281 cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2282 c->stride);
2283 stride = cast_stride;
2285 else
2286 stride = c->stride;
2288 if (increment == 1)
2289 new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2290 else if (increment == -1)
2291 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2292 else
2293 gcc_unreachable ();
2297 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2298 gsi = gsi_last_bb (insert_bb);
2300 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
2302 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
2303 if (cast_stmt)
2305 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2306 gimple_set_location (cast_stmt, loc);
2309 else
2311 if (cast_stmt)
2313 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
2314 gimple_set_location (cast_stmt, loc);
2316 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2319 gimple_set_location (new_stmt, loc);
2321 if (dump_file && (dump_flags & TDF_DETAILS))
2323 if (cast_stmt)
2325 fprintf (dump_file, "Inserting cast in block %d: ",
2326 insert_bb->index);
2327 print_gimple_stmt (dump_file, cast_stmt, 0);
2329 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2330 print_gimple_stmt (dump_file, new_stmt, 0);
2333 return lhs;
2336 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2337 is hidden by the phi node FROM_PHI, create a new phi node in the same
2338 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2339 with its phi arguments representing conditional adjustments to the
2340 hidden basis along conditional incoming paths. Those adjustments are
2341 made by creating add statements (and sometimes recursively creating
2342 phis) along those incoming paths. LOC is the location to attach to
2343 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2344 constant. */
2346 static tree
2347 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2348 location_t loc, bool known_stride)
2350 int i;
2351 tree name, phi_arg;
2352 gphi *phi;
2353 slsr_cand_t basis = lookup_cand (c->basis);
2354 int nargs = gimple_phi_num_args (from_phi);
2355 basic_block phi_bb = gimple_bb (from_phi);
2356 slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2357 auto_vec<tree> phi_args (nargs);
2359 /* Process each argument of the existing phi that represents
2360 conditionally-executed add candidates. */
2361 for (i = 0; i < nargs; i++)
2363 edge e = (*phi_bb->preds)[i];
2364 tree arg = gimple_phi_arg_def (from_phi, i);
2365 tree feeding_def;
2367 /* If the phi argument is the base name of the CAND_PHI, then
2368 this incoming arc should use the hidden basis. */
2369 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2370 if (basis->index == 0)
2371 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2372 else
2374 widest_int incr = -basis->index;
2375 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2376 e, loc, known_stride);
2378 else
2380 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2382 /* If there is another phi along this incoming edge, we must
2383 process it in the same fashion to ensure that all basis
2384 adjustments are made along its incoming edges. */
2385 if (gimple_code (arg_def) == GIMPLE_PHI)
2386 feeding_def = create_phi_basis (c, arg_def, basis_name,
2387 loc, known_stride);
2388 else
2390 slsr_cand_t arg_cand = base_cand_from_table (arg);
2391 widest_int diff = arg_cand->index - basis->index;
2392 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2393 e, loc, known_stride);
2397 /* Because of recursion, we need to save the arguments in a vector
2398 so we can create the PHI statement all at once. Otherwise the
2399 storage for the half-created PHI can be reclaimed. */
2400 phi_args.safe_push (feeding_def);
2403 /* Create the new phi basis. */
2404 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2405 phi = create_phi_node (name, phi_bb);
2406 SSA_NAME_DEF_STMT (name) = phi;
2408 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2410 edge e = (*phi_bb->preds)[i];
2411 add_phi_arg (phi, phi_arg, e, loc);
2414 update_stmt (phi);
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2418 fputs ("Introducing new phi basis: ", dump_file);
2419 print_gimple_stmt (dump_file, phi, 0);
2422 return name;
2425 /* Given a candidate C whose basis is hidden by at least one intervening
2426 phi, introduce a matching number of new phis to represent its basis
2427 adjusted by conditional increments along possible incoming paths. Then
2428 replace C as though it were an unconditional candidate, using the new
2429 basis. */
2431 static void
2432 replace_conditional_candidate (slsr_cand_t c)
2434 tree basis_name, name;
2435 slsr_cand_t basis;
2436 location_t loc;
2438 /* Look up the LHS SSA name from C's basis. This will be the
2439 RHS1 of the adds we will introduce to create new phi arguments. */
2440 basis = lookup_cand (c->basis);
2441 basis_name = gimple_assign_lhs (basis->cand_stmt);
2443 /* Create a new phi statement which will represent C's true basis
2444 after the transformation is complete. */
2445 loc = gimple_location (c->cand_stmt);
2446 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2447 basis_name, loc, KNOWN_STRIDE);
2448 /* Replace C with an add of the new basis phi and a constant. */
2449 widest_int bump = c->index * wi::to_widest (c->stride);
2451 replace_mult_candidate (c, name, bump);
2454 /* Compute the expected costs of inserting basis adjustments for
2455 candidate C with phi-definition PHI. The cost of inserting
2456 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2457 which are themselves phi results, recursively calculate costs
2458 for those phis as well. */
2460 static int
2461 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2463 unsigned i;
2464 int cost = 0;
2465 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2467 /* If we work our way back to a phi that isn't dominated by the hidden
2468 basis, this isn't a candidate for replacement. Indicate this by
2469 returning an unreasonably high cost. It's not easy to detect
2470 these situations when determining the basis, so we defer the
2471 decision until now. */
2472 basic_block phi_bb = gimple_bb (phi);
2473 slsr_cand_t basis = lookup_cand (c->basis);
2474 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2476 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2477 return COST_INFINITE;
2479 for (i = 0; i < gimple_phi_num_args (phi); i++)
2481 tree arg = gimple_phi_arg_def (phi, i);
2483 if (arg != phi_cand->base_expr)
2485 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2487 if (gimple_code (arg_def) == GIMPLE_PHI)
2488 cost += phi_add_costs (arg_def, c, one_add_cost);
2489 else
2491 slsr_cand_t arg_cand = base_cand_from_table (arg);
2493 if (arg_cand->index != c->index)
2494 cost += one_add_cost;
2499 return cost;
2502 /* For candidate C, each sibling of candidate C, and each dependent of
2503 candidate C, determine whether the candidate is dependent upon a
2504 phi that hides its basis. If not, replace the candidate unconditionally.
2505 Otherwise, determine whether the cost of introducing compensation code
2506 for the candidate is offset by the gains from strength reduction. If
2507 so, replace the candidate and introduce the compensation code. */
2509 static void
2510 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2512 if (phi_dependent_cand_p (c))
2514 /* A multiply candidate with a stride of 1 is just an artifice
2515 of a copy or cast; there is no value in replacing it. */
2516 if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
2518 /* A candidate dependent upon a phi will replace a multiply by
2519 a constant with an add, and will insert at most one add for
2520 each phi argument. Add these costs with the potential dead-code
2521 savings to determine profitability. */
2522 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2523 int mult_savings = stmt_cost (c->cand_stmt, speed);
2524 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2525 tree phi_result = gimple_phi_result (phi);
2526 int one_add_cost = add_cost (speed,
2527 TYPE_MODE (TREE_TYPE (phi_result)));
2528 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2529 int cost = add_costs - mult_savings - c->dead_savings;
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2533 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2534 fprintf (dump_file, " add_costs = %d\n", add_costs);
2535 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2536 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2537 fprintf (dump_file, " cost = %d\n", cost);
2538 if (cost <= COST_NEUTRAL)
2539 fputs (" Replacing...\n", dump_file);
2540 else
2541 fputs (" Not replaced.\n", dump_file);
2544 if (cost <= COST_NEUTRAL)
2545 replace_conditional_candidate (c);
2548 else
2549 replace_unconditional_candidate (c);
2551 if (c->sibling)
2552 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2554 if (c->dependent)
2555 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2558 /* Count the number of candidates in the tree rooted at C that have
2559 not already been replaced under other interpretations. */
2561 static int
2562 count_candidates (slsr_cand_t c)
2564 unsigned count = cand_already_replaced (c) ? 0 : 1;
2566 if (c->sibling)
2567 count += count_candidates (lookup_cand (c->sibling));
2569 if (c->dependent)
2570 count += count_candidates (lookup_cand (c->dependent));
2572 return count;
2575 /* Increase the count of INCREMENT by one in the increment vector.
2576 INCREMENT is associated with candidate C. If INCREMENT is to be
2577 conditionally executed as part of a conditional candidate replacement,
2578 IS_PHI_ADJUST is true, otherwise false. If an initializer
2579 T_0 = stride * I is provided by a candidate that dominates all
2580 candidates with the same increment, also record T_0 for subsequent use. */
2582 static void
2583 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2585 bool found = false;
2586 unsigned i;
2588 /* Treat increments that differ only in sign as identical so as to
2589 share initializers, unless we are generating pointer arithmetic. */
2590 if (!address_arithmetic_p && wi::neg_p (increment))
2591 increment = -increment;
2593 for (i = 0; i < incr_vec_len; i++)
2595 if (incr_vec[i].incr == increment)
2597 incr_vec[i].count++;
2598 found = true;
2600 /* If we previously recorded an initializer that doesn't
2601 dominate this candidate, it's not going to be useful to
2602 us after all. */
2603 if (incr_vec[i].initializer
2604 && !dominated_by_p (CDI_DOMINATORS,
2605 gimple_bb (c->cand_stmt),
2606 incr_vec[i].init_bb))
2608 incr_vec[i].initializer = NULL_TREE;
2609 incr_vec[i].init_bb = NULL;
2612 break;
2616 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2618 /* The first time we see an increment, create the entry for it.
2619 If this is the root candidate which doesn't have a basis, set
2620 the count to zero. We're only processing it so it can possibly
2621 provide an initializer for other candidates. */
2622 incr_vec[incr_vec_len].incr = increment;
2623 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2624 incr_vec[incr_vec_len].cost = COST_INFINITE;
2626 /* Optimistically record the first occurrence of this increment
2627 as providing an initializer (if it does); we will revise this
2628 opinion later if it doesn't dominate all other occurrences.
2629 Exception: increments of 0, 1 never need initializers;
2630 and phi adjustments don't ever provide initializers. */
2631 if (c->kind == CAND_ADD
2632 && !is_phi_adjust
2633 && c->index == increment
2634 && (increment > 1 || increment < 0)
2635 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2636 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2638 tree t0 = NULL_TREE;
2639 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2640 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2641 if (operand_equal_p (rhs1, c->base_expr, 0))
2642 t0 = rhs2;
2643 else if (operand_equal_p (rhs2, c->base_expr, 0))
2644 t0 = rhs1;
2645 if (t0
2646 && SSA_NAME_DEF_STMT (t0)
2647 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2649 incr_vec[incr_vec_len].initializer = t0;
2650 incr_vec[incr_vec_len++].init_bb
2651 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2653 else
2655 incr_vec[incr_vec_len].initializer = NULL_TREE;
2656 incr_vec[incr_vec_len++].init_bb = NULL;
2659 else
2661 incr_vec[incr_vec_len].initializer = NULL_TREE;
2662 incr_vec[incr_vec_len++].init_bb = NULL;
2667 /* Given phi statement PHI that hides a candidate from its BASIS, find
2668 the increments along each incoming arc (recursively handling additional
2669 phis that may be present) and record them. These increments are the
2670 difference in index between the index-adjusting statements and the
2671 index of the basis. */
2673 static void
2674 record_phi_increments (slsr_cand_t basis, gimple *phi)
2676 unsigned i;
2677 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2679 for (i = 0; i < gimple_phi_num_args (phi); i++)
2681 tree arg = gimple_phi_arg_def (phi, i);
2683 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2685 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2687 if (gimple_code (arg_def) == GIMPLE_PHI)
2688 record_phi_increments (basis, arg_def);
2689 else
2691 slsr_cand_t arg_cand = base_cand_from_table (arg);
2692 widest_int diff = arg_cand->index - basis->index;
2693 record_increment (arg_cand, diff, PHI_ADJUST);
2699 /* Determine how many times each unique increment occurs in the set
2700 of candidates rooted at C's parent, recording the data in the
2701 increment vector. For each unique increment I, if an initializer
2702 T_0 = stride * I is provided by a candidate that dominates all
2703 candidates with the same increment, also record T_0 for subsequent
2704 use. */
2706 static void
2707 record_increments (slsr_cand_t c)
2709 if (!cand_already_replaced (c))
2711 if (!phi_dependent_cand_p (c))
2712 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2713 else
2715 /* A candidate with a basis hidden by a phi will have one
2716 increment for its relationship to the index represented by
2717 the phi, and potentially additional increments along each
2718 incoming edge. For the root of the dependency tree (which
2719 has no basis), process just the initial index in case it has
2720 an initializer that can be used by subsequent candidates. */
2721 record_increment (c, c->index, NOT_PHI_ADJUST);
2723 if (c->basis)
2724 record_phi_increments (lookup_cand (c->basis),
2725 lookup_cand (c->def_phi)->cand_stmt);
2729 if (c->sibling)
2730 record_increments (lookup_cand (c->sibling));
2732 if (c->dependent)
2733 record_increments (lookup_cand (c->dependent));
2736 /* Add up and return the costs of introducing add statements that
2737 require the increment INCR on behalf of candidate C and phi
2738 statement PHI. Accumulate into *SAVINGS the potential savings
2739 from removing existing statements that feed PHI and have no other
2740 uses. */
2742 static int
2743 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
2744 int *savings)
2746 unsigned i;
2747 int cost = 0;
2748 slsr_cand_t basis = lookup_cand (c->basis);
2749 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2751 for (i = 0; i < gimple_phi_num_args (phi); i++)
2753 tree arg = gimple_phi_arg_def (phi, i);
2755 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2757 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2759 if (gimple_code (arg_def) == GIMPLE_PHI)
2761 int feeding_savings = 0;
2762 tree feeding_var = gimple_phi_result (arg_def);
2763 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2764 if (uses_consumed_by_stmt (feeding_var, phi))
2765 *savings += feeding_savings;
2767 else
2769 slsr_cand_t arg_cand = base_cand_from_table (arg);
2770 widest_int diff = arg_cand->index - basis->index;
2772 if (incr == diff)
2774 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2775 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2776 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2777 if (uses_consumed_by_stmt (lhs, phi))
2778 *savings += stmt_cost (arg_cand->cand_stmt, true);
2784 return cost;
2787 /* Return the first candidate in the tree rooted at C that has not
2788 already been replaced, favoring siblings over dependents. */
2790 static slsr_cand_t
2791 unreplaced_cand_in_tree (slsr_cand_t c)
2793 if (!cand_already_replaced (c))
2794 return c;
2796 if (c->sibling)
2798 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2799 if (sib)
2800 return sib;
2803 if (c->dependent)
2805 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2806 if (dep)
2807 return dep;
2810 return NULL;
2813 /* Return TRUE if the candidates in the tree rooted at C should be
2814 optimized for speed, else FALSE. We estimate this based on the block
2815 containing the most dominant candidate in the tree that has not yet
2816 been replaced. */
2818 static bool
2819 optimize_cands_for_speed_p (slsr_cand_t c)
2821 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2822 gcc_assert (c2);
2823 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2826 /* Add COST_IN to the lowest cost of any dependent path starting at
2827 candidate C or any of its siblings, counting only candidates along
2828 such paths with increment INCR. Assume that replacing a candidate
2829 reduces cost by REPL_SAVINGS. Also account for savings from any
2830 statements that would go dead. If COUNT_PHIS is true, include
2831 costs of introducing feeding statements for conditional candidates. */
2833 static int
2834 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2835 const widest_int &incr, bool count_phis)
2837 int local_cost, sib_cost, savings = 0;
2838 widest_int cand_incr = cand_abs_increment (c);
2840 if (cand_already_replaced (c))
2841 local_cost = cost_in;
2842 else if (incr == cand_incr)
2843 local_cost = cost_in - repl_savings - c->dead_savings;
2844 else
2845 local_cost = cost_in - c->dead_savings;
2847 if (count_phis
2848 && phi_dependent_cand_p (c)
2849 && !cand_already_replaced (c))
2851 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2852 local_cost += phi_incr_cost (c, incr, phi, &savings);
2854 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2855 local_cost -= savings;
2858 if (c->dependent)
2859 local_cost = lowest_cost_path (local_cost, repl_savings,
2860 lookup_cand (c->dependent), incr,
2861 count_phis);
2863 if (c->sibling)
2865 sib_cost = lowest_cost_path (cost_in, repl_savings,
2866 lookup_cand (c->sibling), incr,
2867 count_phis);
2868 local_cost = MIN (local_cost, sib_cost);
2871 return local_cost;
2874 /* Compute the total savings that would accrue from all replacements
2875 in the candidate tree rooted at C, counting only candidates with
2876 increment INCR. Assume that replacing a candidate reduces cost
2877 by REPL_SAVINGS. Also account for savings from statements that
2878 would go dead. */
2880 static int
2881 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2882 bool count_phis)
2884 int savings = 0;
2885 widest_int cand_incr = cand_abs_increment (c);
2887 if (incr == cand_incr && !cand_already_replaced (c))
2888 savings += repl_savings + c->dead_savings;
2890 if (count_phis
2891 && phi_dependent_cand_p (c)
2892 && !cand_already_replaced (c))
2894 int phi_savings = 0;
2895 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2896 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2898 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2899 savings += phi_savings;
2902 if (c->dependent)
2903 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2904 count_phis);
2906 if (c->sibling)
2907 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2908 count_phis);
2910 return savings;
2913 /* Use target-specific costs to determine and record which increments
2914 in the current candidate tree are profitable to replace, assuming
2915 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2916 the candidate tree.
2918 One slight limitation here is that we don't account for the possible
2919 introduction of casts in some cases. See replace_one_candidate for
2920 the cases where these are introduced. This should probably be cleaned
2921 up sometime. */
2923 static void
2924 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
2926 unsigned i;
2928 for (i = 0; i < incr_vec_len; i++)
2930 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2932 /* If somehow this increment is bigger than a HWI, we won't
2933 be optimizing candidates that use it. And if the increment
2934 has a count of zero, nothing will be done with it. */
2935 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2936 incr_vec[i].cost = COST_INFINITE;
2938 /* Increments of 0, 1, and -1 are always profitable to replace,
2939 because they always replace a multiply or add with an add or
2940 copy, and may cause one or more existing instructions to go
2941 dead. Exception: -1 can't be assumed to be profitable for
2942 pointer addition. */
2943 else if (incr == 0
2944 || incr == 1
2945 || (incr == -1
2946 && !POINTER_TYPE_P (first_dep->cand_type)))
2947 incr_vec[i].cost = COST_NEUTRAL;
2949 /* If we need to add an initializer, give up if a cast from the
2950 candidate's type to its stride's type can lose precision.
2951 Note that this already takes into account that the stride may
2952 have been cast to a wider type, in which case this test won't
2953 fire. Example:
2955 short int _1;
2956 _2 = (int) _1;
2957 _3 = _2 * 10;
2958 _4 = x + _3; ADD: x + (10 * (int)_1) : int
2959 _5 = _2 * 15;
2960 _6 = x + _5; ADD: x + (15 * (int)_1) : int
2962 Although the stride was a short int initially, the stride
2963 used in the analysis has been widened to an int, and such
2964 widening will be done in the initializer as well. */
2965 else if (!incr_vec[i].initializer
2966 && TREE_CODE (first_dep->stride) != INTEGER_CST
2967 && !legal_cast_p_1 (first_dep->stride_type,
2968 TREE_TYPE (gimple_assign_lhs
2969 (first_dep->cand_stmt))))
2970 incr_vec[i].cost = COST_INFINITE;
2972 /* If we need to add an initializer, make sure we don't introduce
2973 a multiply by a pointer type, which can happen in certain cast
2974 scenarios. */
2975 else if (!incr_vec[i].initializer
2976 && TREE_CODE (first_dep->stride) != INTEGER_CST
2977 && POINTER_TYPE_P (first_dep->stride_type))
2978 incr_vec[i].cost = COST_INFINITE;
2980 /* For any other increment, if this is a multiply candidate, we
2981 must introduce a temporary T and initialize it with
2982 T_0 = stride * increment. When optimizing for speed, walk the
2983 candidate tree to calculate the best cost reduction along any
2984 path; if it offsets the fixed cost of inserting the initializer,
2985 replacing the increment is profitable. When optimizing for
2986 size, instead calculate the total cost reduction from replacing
2987 all candidates with this increment. */
2988 else if (first_dep->kind == CAND_MULT)
2990 int cost = mult_by_coeff_cost (incr, mode, speed);
2991 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2992 if (speed)
2993 cost = lowest_cost_path (cost, repl_savings, first_dep,
2994 incr_vec[i].incr, COUNT_PHIS);
2995 else
2996 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2997 COUNT_PHIS);
2999 incr_vec[i].cost = cost;
3002 /* If this is an add candidate, the initializer may already
3003 exist, so only calculate the cost of the initializer if it
3004 doesn't. We are replacing one add with another here, so the
3005 known replacement savings is zero. We will account for removal
3006 of dead instructions in lowest_cost_path or total_savings. */
3007 else
3009 int cost = 0;
3010 if (!incr_vec[i].initializer)
3011 cost = mult_by_coeff_cost (incr, mode, speed);
3013 if (speed)
3014 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3015 DONT_COUNT_PHIS);
3016 else
3017 cost -= total_savings (0, first_dep, incr_vec[i].incr,
3018 DONT_COUNT_PHIS);
3020 incr_vec[i].cost = cost;
3025 /* Return the nearest common dominator of BB1 and BB2. If the blocks
3026 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
3027 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3028 return C2 in *WHERE; and if the NCD matches neither, return NULL in
3029 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
3031 static basic_block
3032 ncd_for_two_cands (basic_block bb1, basic_block bb2,
3033 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3035 basic_block ncd;
3037 if (!bb1)
3039 *where = c2;
3040 return bb2;
3043 if (!bb2)
3045 *where = c1;
3046 return bb1;
3049 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3051 /* If both candidates are in the same block, the earlier
3052 candidate wins. */
3053 if (bb1 == ncd && bb2 == ncd)
3055 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3056 *where = c2;
3057 else
3058 *where = c1;
3061 /* Otherwise, if one of them produced a candidate in the
3062 dominator, that one wins. */
3063 else if (bb1 == ncd)
3064 *where = c1;
3066 else if (bb2 == ncd)
3067 *where = c2;
3069 /* If neither matches the dominator, neither wins. */
3070 else
3071 *where = NULL;
3073 return ncd;
3076 /* Consider all candidates that feed PHI. Find the nearest common
3077 dominator of those candidates requiring the given increment INCR.
3078 Further find and return the nearest common dominator of this result
3079 with block NCD. If the returned block contains one or more of the
3080 candidates, return the earliest candidate in the block in *WHERE. */
3082 static basic_block
3083 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
3084 basic_block ncd, slsr_cand_t *where)
3086 unsigned i;
3087 slsr_cand_t basis = lookup_cand (c->basis);
3088 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3090 for (i = 0; i < gimple_phi_num_args (phi); i++)
3092 tree arg = gimple_phi_arg_def (phi, i);
3094 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3096 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3098 if (gimple_code (arg_def) == GIMPLE_PHI)
3099 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd,
3100 where);
3101 else
3103 slsr_cand_t arg_cand = base_cand_from_table (arg);
3104 widest_int diff = arg_cand->index - basis->index;
3105 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3107 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3108 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3113 return ncd;
3116 /* Consider the candidate C together with any candidates that feed
3117 C's phi dependence (if any). Find and return the nearest common
3118 dominator of those candidates requiring the given increment INCR.
3119 If the returned block contains one or more of the candidates,
3120 return the earliest candidate in the block in *WHERE. */
3122 static basic_block
3123 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3125 basic_block ncd = NULL;
3127 if (cand_abs_increment (c) == incr)
3129 ncd = gimple_bb (c->cand_stmt);
3130 *where = c;
3133 if (phi_dependent_cand_p (c))
3134 ncd = ncd_with_phi (c, incr,
3135 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3136 ncd, where);
3138 return ncd;
3141 /* Consider all candidates in the tree rooted at C for which INCR
3142 represents the required increment of C relative to its basis.
3143 Find and return the basic block that most nearly dominates all
3144 such candidates. If the returned block contains one or more of
3145 the candidates, return the earliest candidate in the block in
3146 *WHERE. */
3148 static basic_block
3149 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3150 slsr_cand_t *where)
3152 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3153 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3155 /* First find the NCD of all siblings and dependents. */
3156 if (c->sibling)
3157 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3158 incr, &sib_where);
3159 if (c->dependent)
3160 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3161 incr, &dep_where);
3162 if (!sib_ncd && !dep_ncd)
3164 new_where = NULL;
3165 ncd = NULL;
3167 else if (sib_ncd && !dep_ncd)
3169 new_where = sib_where;
3170 ncd = sib_ncd;
3172 else if (dep_ncd && !sib_ncd)
3174 new_where = dep_where;
3175 ncd = dep_ncd;
3177 else
3178 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3179 dep_where, &new_where);
3181 /* If the candidate's increment doesn't match the one we're interested
3182 in (and nor do any increments for feeding defs of a phi-dependence),
3183 then the result depends only on siblings and dependents. */
3184 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3186 if (!this_ncd || cand_already_replaced (c))
3188 *where = new_where;
3189 return ncd;
3192 /* Otherwise, compare this candidate with the result from all siblings
3193 and dependents. */
3194 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3196 return ncd;
3199 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3201 static inline bool
3202 profitable_increment_p (unsigned index)
3204 return (incr_vec[index].cost <= COST_NEUTRAL);
3207 /* For each profitable increment in the increment vector not equal to
3208 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3209 dominator of all statements in the candidate chain rooted at C
3210 that require that increment, and insert an initializer
3211 T_0 = stride * increment at that location. Record T_0 with the
3212 increment record. */
3214 static void
3215 insert_initializers (slsr_cand_t c)
3217 unsigned i;
3219 for (i = 0; i < incr_vec_len; i++)
3221 basic_block bb;
3222 slsr_cand_t where = NULL;
3223 gassign *init_stmt;
3224 gassign *cast_stmt = NULL;
3225 tree new_name, incr_tree, init_stride;
3226 widest_int incr = incr_vec[i].incr;
3228 if (!profitable_increment_p (i)
3229 || incr == 1
3230 || (incr == -1
3231 && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3232 || incr == 0)
3233 continue;
3235 /* We may have already identified an existing initializer that
3236 will suffice. */
3237 if (incr_vec[i].initializer)
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fputs ("Using existing initializer: ", dump_file);
3242 print_gimple_stmt (dump_file,
3243 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3244 0, 0);
3246 continue;
3249 /* Find the block that most closely dominates all candidates
3250 with this increment. If there is at least one candidate in
3251 that block, the earliest one will be returned in WHERE. */
3252 bb = nearest_common_dominator_for_cands (c, incr, &where);
3254 /* If the nominal stride has a different type than the recorded
3255 stride type, build a cast from the nominal stride to that type. */
3256 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3258 init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3259 cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3261 else
3262 init_stride = c->stride;
3264 /* Create a new SSA name to hold the initializer's value. */
3265 new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3266 incr_vec[i].initializer = new_name;
3268 /* Create the initializer and insert it in the latest possible
3269 dominating position. */
3270 incr_tree = wide_int_to_tree (c->stride_type, incr);
3271 init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3272 init_stride, incr_tree);
3273 if (where)
3275 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3276 location_t loc = gimple_location (where->cand_stmt);
3278 if (cast_stmt)
3280 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3281 gimple_set_location (cast_stmt, loc);
3284 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3285 gimple_set_location (init_stmt, loc);
3287 else
3289 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3290 gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3291 location_t loc = gimple_location (basis_stmt);
3293 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3295 if (cast_stmt)
3297 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3298 gimple_set_location (cast_stmt, loc);
3300 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3302 else
3304 if (cast_stmt)
3306 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3307 gimple_set_location (cast_stmt, loc);
3309 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3312 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3317 if (cast_stmt)
3319 fputs ("Inserting stride cast: ", dump_file);
3320 print_gimple_stmt (dump_file, cast_stmt, 0);
3322 fputs ("Inserting initializer: ", dump_file);
3323 print_gimple_stmt (dump_file, init_stmt, 0);
3328 /* Return TRUE iff all required increments for candidates feeding PHI
3329 are profitable (and legal!) to replace on behalf of candidate C. */
3331 static bool
3332 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3334 unsigned i;
3335 slsr_cand_t basis = lookup_cand (c->basis);
3336 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3338 /* If the basis doesn't dominate the PHI (including when the PHI is
3339 in the same block as the basis), we won't be able to create a PHI
3340 using the basis here. */
3341 basic_block basis_bb = gimple_bb (basis->cand_stmt);
3342 basic_block phi_bb = gimple_bb (phi);
3344 if (phi_bb == basis_bb
3345 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3346 return false;
3348 for (i = 0; i < gimple_phi_num_args (phi); i++)
3350 /* If the PHI arg resides in a block not dominated by the basis,
3351 we won't be able to create a PHI using the basis here. */
3352 basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3354 if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3355 return false;
3357 tree arg = gimple_phi_arg_def (phi, i);
3359 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3361 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3363 if (gimple_code (arg_def) == GIMPLE_PHI)
3365 if (!all_phi_incrs_profitable (c, as_a <gphi *> (arg_def)))
3366 return false;
3368 else
3370 int j;
3371 slsr_cand_t arg_cand = base_cand_from_table (arg);
3372 widest_int increment = arg_cand->index - basis->index;
3374 if (!address_arithmetic_p && wi::neg_p (increment))
3375 increment = -increment;
3377 j = incr_vec_index (increment);
3379 if (dump_file && (dump_flags & TDF_DETAILS))
3381 fprintf (dump_file, " Conditional candidate %d, phi: ",
3382 c->cand_num);
3383 print_gimple_stmt (dump_file, phi, 0);
3384 fputs (" increment: ", dump_file);
3385 print_decs (increment, dump_file);
3386 if (j < 0)
3387 fprintf (dump_file,
3388 "\n Not replaced; incr_vec overflow.\n");
3389 else {
3390 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3391 if (profitable_increment_p (j))
3392 fputs (" Replacing...\n", dump_file);
3393 else
3394 fputs (" Not replaced.\n", dump_file);
3398 if (j < 0 || !profitable_increment_p (j))
3399 return false;
3404 return true;
3407 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3408 type TO_TYPE, and insert it in front of the statement represented
3409 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3410 the new SSA name. */
3412 static tree
3413 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3415 tree cast_lhs;
3416 gassign *cast_stmt;
3417 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3419 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3420 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3421 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3422 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3424 if (dump_file && (dump_flags & TDF_DETAILS))
3426 fputs (" Inserting: ", dump_file);
3427 print_gimple_stmt (dump_file, cast_stmt, 0);
3430 return cast_lhs;
3433 /* Replace the RHS of the statement represented by candidate C with
3434 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3435 leave C unchanged or just interchange its operands. The original
3436 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3437 If the replacement was made and we are doing a details dump,
3438 return the revised statement, else NULL. */
3440 static gimple *
3441 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3442 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3443 slsr_cand_t c)
3445 if (new_code != old_code
3446 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3447 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3448 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3449 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3451 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3452 slsr_cand_t cc = c;
3453 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3454 update_stmt (gsi_stmt (gsi));
3455 c->cand_stmt = gsi_stmt (gsi);
3456 while (cc->next_interp)
3458 cc = lookup_cand (cc->next_interp);
3459 cc->cand_stmt = gsi_stmt (gsi);
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 return gsi_stmt (gsi);
3466 else if (dump_file && (dump_flags & TDF_DETAILS))
3467 fputs (" (duplicate, not actually replacing)\n", dump_file);
3469 return NULL;
3472 /* Strength-reduce the statement represented by candidate C by replacing
3473 it with an equivalent addition or subtraction. I is the index into
3474 the increment vector identifying C's increment. NEW_VAR is used to
3475 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3476 is the rhs1 to use in creating the add/subtract. */
3478 static void
3479 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3481 gimple *stmt_to_print = NULL;
3482 tree orig_rhs1, orig_rhs2;
3483 tree rhs2;
3484 enum tree_code orig_code, repl_code;
3485 widest_int cand_incr;
3487 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3488 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3489 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3490 cand_incr = cand_increment (c);
3492 if (dump_file && (dump_flags & TDF_DETAILS))
3494 fputs ("Replacing: ", dump_file);
3495 print_gimple_stmt (dump_file, c->cand_stmt, 0);
3496 stmt_to_print = c->cand_stmt;
3499 if (address_arithmetic_p)
3500 repl_code = POINTER_PLUS_EXPR;
3501 else
3502 repl_code = PLUS_EXPR;
3504 /* If the increment has an initializer T_0, replace the candidate
3505 statement with an add of the basis name and the initializer. */
3506 if (incr_vec[i].initializer)
3508 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3509 tree orig_type = TREE_TYPE (orig_rhs2);
3511 if (types_compatible_p (orig_type, init_type))
3512 rhs2 = incr_vec[i].initializer;
3513 else
3514 rhs2 = introduce_cast_before_cand (c, orig_type,
3515 incr_vec[i].initializer);
3517 if (incr_vec[i].incr != cand_incr)
3519 gcc_assert (repl_code == PLUS_EXPR);
3520 repl_code = MINUS_EXPR;
3523 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3524 orig_code, orig_rhs1, orig_rhs2,
3528 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3529 with a subtract of the stride from the basis name, a copy
3530 from the basis name, or an add of the stride to the basis
3531 name, respectively. It may be necessary to introduce a
3532 cast (or reuse an existing cast). */
3533 else if (cand_incr == 1)
3535 tree stride_type = TREE_TYPE (c->stride);
3536 tree orig_type = TREE_TYPE (orig_rhs2);
3538 if (types_compatible_p (orig_type, stride_type))
3539 rhs2 = c->stride;
3540 else
3541 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3543 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3544 orig_code, orig_rhs1, orig_rhs2,
3548 else if (cand_incr == -1)
3550 tree stride_type = TREE_TYPE (c->stride);
3551 tree orig_type = TREE_TYPE (orig_rhs2);
3552 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3554 if (types_compatible_p (orig_type, stride_type))
3555 rhs2 = c->stride;
3556 else
3557 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3559 if (orig_code != MINUS_EXPR
3560 || !operand_equal_p (basis_name, orig_rhs1, 0)
3561 || !operand_equal_p (rhs2, orig_rhs2, 0))
3563 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3564 slsr_cand_t cc = c;
3565 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3566 update_stmt (gsi_stmt (gsi));
3567 c->cand_stmt = gsi_stmt (gsi);
3568 while (cc->next_interp)
3570 cc = lookup_cand (cc->next_interp);
3571 cc->cand_stmt = gsi_stmt (gsi);
3574 if (dump_file && (dump_flags & TDF_DETAILS))
3575 stmt_to_print = gsi_stmt (gsi);
3577 else if (dump_file && (dump_flags & TDF_DETAILS))
3578 fputs (" (duplicate, not actually replacing)\n", dump_file);
3581 else if (cand_incr == 0)
3583 tree lhs = gimple_assign_lhs (c->cand_stmt);
3584 tree lhs_type = TREE_TYPE (lhs);
3585 tree basis_type = TREE_TYPE (basis_name);
3587 if (types_compatible_p (lhs_type, basis_type))
3589 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3590 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3591 slsr_cand_t cc = c;
3592 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3593 gsi_replace (&gsi, copy_stmt, false);
3594 c->cand_stmt = copy_stmt;
3595 while (cc->next_interp)
3597 cc = lookup_cand (cc->next_interp);
3598 cc->cand_stmt = copy_stmt;
3601 if (dump_file && (dump_flags & TDF_DETAILS))
3602 stmt_to_print = copy_stmt;
3604 else
3606 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3607 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3608 slsr_cand_t cc = c;
3609 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3610 gsi_replace (&gsi, cast_stmt, false);
3611 c->cand_stmt = cast_stmt;
3612 while (cc->next_interp)
3614 cc = lookup_cand (cc->next_interp);
3615 cc->cand_stmt = cast_stmt;
3618 if (dump_file && (dump_flags & TDF_DETAILS))
3619 stmt_to_print = cast_stmt;
3622 else
3623 gcc_unreachable ();
3625 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3627 fputs ("With: ", dump_file);
3628 print_gimple_stmt (dump_file, stmt_to_print, 0);
3629 fputs ("\n", dump_file);
3633 /* For each candidate in the tree rooted at C, replace it with
3634 an increment if such has been shown to be profitable. */
3636 static void
3637 replace_profitable_candidates (slsr_cand_t c)
3639 if (!cand_already_replaced (c))
3641 widest_int increment = cand_abs_increment (c);
3642 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3643 int i;
3645 i = incr_vec_index (increment);
3647 /* Only process profitable increments. Nothing useful can be done
3648 to a cast or copy. */
3649 if (i >= 0
3650 && profitable_increment_p (i)
3651 && orig_code != SSA_NAME
3652 && !CONVERT_EXPR_CODE_P (orig_code))
3654 if (phi_dependent_cand_p (c))
3656 gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3658 if (all_phi_incrs_profitable (c, phi))
3660 /* Look up the LHS SSA name from C's basis. This will be
3661 the RHS1 of the adds we will introduce to create new
3662 phi arguments. */
3663 slsr_cand_t basis = lookup_cand (c->basis);
3664 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3666 /* Create a new phi statement that will represent C's true
3667 basis after the transformation is complete. */
3668 location_t loc = gimple_location (c->cand_stmt);
3669 tree name = create_phi_basis (c, phi, basis_name,
3670 loc, UNKNOWN_STRIDE);
3672 /* Replace C with an add of the new basis phi and the
3673 increment. */
3674 replace_one_candidate (c, i, name);
3677 else
3679 slsr_cand_t basis = lookup_cand (c->basis);
3680 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3681 replace_one_candidate (c, i, basis_name);
3686 if (c->sibling)
3687 replace_profitable_candidates (lookup_cand (c->sibling));
3689 if (c->dependent)
3690 replace_profitable_candidates (lookup_cand (c->dependent));
3693 /* Analyze costs of related candidates in the candidate vector,
3694 and make beneficial replacements. */
3696 static void
3697 analyze_candidates_and_replace (void)
3699 unsigned i;
3700 slsr_cand_t c;
3702 /* Each candidate that has a null basis and a non-null
3703 dependent is the root of a tree of related statements.
3704 Analyze each tree to determine a subset of those
3705 statements that can be replaced with maximum benefit. */
3706 FOR_EACH_VEC_ELT (cand_vec, i, c)
3708 slsr_cand_t first_dep;
3710 if (c->basis != 0 || c->dependent == 0)
3711 continue;
3713 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3715 c->cand_num);
3717 first_dep = lookup_cand (c->dependent);
3719 /* If this is a chain of CAND_REFs, unconditionally replace
3720 each of them with a strength-reduced data reference. */
3721 if (c->kind == CAND_REF)
3722 replace_refs (c);
3724 /* If the common stride of all related candidates is a known
3725 constant, each candidate without a phi-dependence can be
3726 profitably replaced. Each replaces a multiply by a single
3727 add, with the possibility that a feeding add also goes dead.
3728 A candidate with a phi-dependence is replaced only if the
3729 compensation code it requires is offset by the strength
3730 reduction savings. */
3731 else if (TREE_CODE (c->stride) == INTEGER_CST)
3732 replace_uncond_cands_and_profitable_phis (first_dep);
3734 /* When the stride is an SSA name, it may still be profitable
3735 to replace some or all of the dependent candidates, depending
3736 on whether the introduced increments can be reused, or are
3737 less expensive to calculate than the replaced statements. */
3738 else
3740 machine_mode mode;
3741 bool speed;
3743 /* Determine whether we'll be generating pointer arithmetic
3744 when replacing candidates. */
3745 address_arithmetic_p = (c->kind == CAND_ADD
3746 && POINTER_TYPE_P (c->cand_type));
3748 /* If all candidates have already been replaced under other
3749 interpretations, nothing remains to be done. */
3750 if (!count_candidates (c))
3751 continue;
3753 /* Construct an array of increments for this candidate chain. */
3754 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3755 incr_vec_len = 0;
3756 record_increments (c);
3758 /* Determine which increments are profitable to replace. */
3759 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3760 speed = optimize_cands_for_speed_p (c);
3761 analyze_increments (first_dep, mode, speed);
3763 /* Insert initializers of the form T_0 = stride * increment
3764 for use in profitable replacements. */
3765 insert_initializers (first_dep);
3766 dump_incr_vec ();
3768 /* Perform the replacements. */
3769 replace_profitable_candidates (first_dep);
3770 free (incr_vec);
3775 namespace {
3777 const pass_data pass_data_strength_reduction =
3779 GIMPLE_PASS, /* type */
3780 "slsr", /* name */
3781 OPTGROUP_NONE, /* optinfo_flags */
3782 TV_GIMPLE_SLSR, /* tv_id */
3783 ( PROP_cfg | PROP_ssa ), /* properties_required */
3784 0, /* properties_provided */
3785 0, /* properties_destroyed */
3786 0, /* todo_flags_start */
3787 0, /* todo_flags_finish */
3790 class pass_strength_reduction : public gimple_opt_pass
3792 public:
3793 pass_strength_reduction (gcc::context *ctxt)
3794 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3797 /* opt_pass methods: */
3798 virtual bool gate (function *) { return flag_tree_slsr; }
3799 virtual unsigned int execute (function *);
3801 }; // class pass_strength_reduction
3803 unsigned
3804 pass_strength_reduction::execute (function *fun)
3806 /* Create the obstack where candidates will reside. */
3807 gcc_obstack_init (&cand_obstack);
3809 /* Allocate the candidate vector. */
3810 cand_vec.create (128);
3812 /* Allocate the mapping from statements to candidate indices. */
3813 stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
3815 /* Create the obstack where candidate chains will reside. */
3816 gcc_obstack_init (&chain_obstack);
3818 /* Allocate the mapping from base expressions to candidate chains. */
3819 base_cand_map = new hash_table<cand_chain_hasher> (500);
3821 /* Allocate the mapping from bases to alternative bases. */
3822 alt_base_map = new hash_map<tree, tree>;
3824 /* Initialize the loop optimizer. We need to detect flow across
3825 back edges, and this gives us dominator information as well. */
3826 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3828 /* Walk the CFG in predominator order looking for strength reduction
3829 candidates. */
3830 find_candidates_dom_walker (CDI_DOMINATORS)
3831 .walk (fun->cfg->x_entry_block_ptr);
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3835 dump_cand_vec ();
3836 dump_cand_chains ();
3839 delete alt_base_map;
3840 free_affine_expand_cache (&name_expansions);
3842 /* Analyze costs and make appropriate replacements. */
3843 analyze_candidates_and_replace ();
3845 loop_optimizer_finalize ();
3846 delete base_cand_map;
3847 base_cand_map = NULL;
3848 obstack_free (&chain_obstack, NULL);
3849 delete stmt_cand_map;
3850 cand_vec.release ();
3851 obstack_free (&cand_obstack, NULL);
3853 return 0;
3856 } // anon namespace
3858 gimple_opt_pass *
3859 make_pass_strength_reduction (gcc::context *ctxt)
3861 return new pass_strength_reduction (ctxt);