Add support for ARMv8-R architecture
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blobdc2cb469d6c5bd45cfff9c1e1469a9894e58732c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "backend.h"
40 #include "rtl.h"
41 #include "tree.h"
42 #include "gimple.h"
43 #include "cfghooks.h"
44 #include "tree-pass.h"
45 #include "ssa.h"
46 #include "expmed.h"
47 #include "gimple-pretty-print.h"
48 #include "fold-const.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "stor-layout.h"
52 #include "cfgloop.h"
53 #include "tree-cfg.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-ssa-address.h"
57 #include "tree-affine.h"
58 #include "builtins.h"
60 /* Information about a strength reduction candidate. Each statement
61 in the candidate table represents an expression of one of the
62 following forms (the special case of CAND_REF will be described
63 later):
65 (CAND_MULT) S1: X = (B + i) * S
66 (CAND_ADD) S1: X = B + (i * S)
68 Here X and B are SSA names, i is an integer constant, and S is
69 either an SSA name or a constant. We call B the "base," i the
70 "index", and S the "stride."
72 Any statement S0 that dominates S1 and is of the form:
74 (CAND_MULT) S0: Y = (B + i') * S
75 (CAND_ADD) S0: Y = B + (i' * S)
77 is called a "basis" for S1. In both cases, S1 may be replaced by
79 S1': X = Y + (i - i') * S,
81 where (i - i') * S is folded to the extent possible.
83 All gimple statements are visited in dominator order, and each
84 statement that may contribute to one of the forms of S1 above is
85 given at least one entry in the candidate table. Such statements
86 include addition, pointer addition, subtraction, multiplication,
87 negation, copies, and nontrivial type casts. If a statement may
88 represent more than one expression of the forms of S1 above,
89 multiple "interpretations" are stored in the table and chained
90 together. Examples:
92 * An add of two SSA names may treat either operand as the base.
93 * A multiply of two SSA names, likewise.
94 * A copy or cast may be thought of as either a CAND_MULT with
95 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
97 Candidate records are allocated from an obstack. They are addressed
98 both from a hash table keyed on S1, and from a vector of candidate
99 pointers arranged in predominator order.
101 Opportunity note
102 ----------------
103 Currently we don't recognize:
105 S0: Y = (S * i') - B
106 S1: X = (S * i) - B
108 as a strength reduction opportunity, even though this S1 would
109 also be replaceable by the S1' above. This can be added if it
110 comes up in practice.
112 Strength reduction in addressing
113 --------------------------------
114 There is another kind of candidate known as CAND_REF. A CAND_REF
115 describes a statement containing a memory reference having
116 complex addressing that might benefit from strength reduction.
117 Specifically, we are interested in references for which
118 get_inner_reference returns a base address, offset, and bitpos as
119 follows:
121 base: MEM_REF (T1, C1)
122 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
123 bitpos: C4 * BITS_PER_UNIT
125 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
126 arbitrary integer constants. Note that C2 may be zero, in which
127 case the offset will be MULT_EXPR (T2, C3).
129 When this pattern is recognized, the original memory reference
130 can be replaced with:
132 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
133 C1 + (C2 * C3) + C4)
135 which distributes the multiply to allow constant folding. When
136 two or more addressing expressions can be represented by MEM_REFs
137 of this form, differing only in the constants C1, C2, and C4,
138 making this substitution produces more efficient addressing during
139 the RTL phases. When there are not at least two expressions with
140 the same values of T1, T2, and C3, there is nothing to be gained
141 by the replacement.
143 Strength reduction of CAND_REFs uses the same infrastructure as
144 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
145 field, MULT_EXPR (T2, C3) in the stride (S) field, and
146 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
147 is thus another CAND_REF with the same B and S values. When at
148 least two CAND_REFs are chained together using the basis relation,
149 each of them is replaced as above, resulting in improved code
150 generation for addressing.
152 Conditional candidates
153 ======================
155 Conditional candidates are best illustrated with an example.
156 Consider the code sequence:
158 (1) x_0 = ...;
159 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
160 if (...)
161 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
162 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
163 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
164 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
166 Here strength reduction is complicated by the uncertain value of x_2.
167 A legitimate transformation is:
169 (1) x_0 = ...;
170 (2) a_0 = x_0 * 5;
171 if (...)
173 (3) [x_1 = x_0 + 1;]
174 (3a) t_1 = a_0 + 5;
176 (4) [x_2 = PHI <x_0, x_1>;]
177 (4a) t_2 = PHI <a_0, t_1>;
178 (5) [x_3 = x_2 + 1;]
179 (6r) a_1 = t_2 + 5;
181 where the bracketed instructions may go dead.
183 To recognize this opportunity, we have to observe that statement (6)
184 has a "hidden basis" (2). The hidden basis is unlike a normal basis
185 in that the statement and the hidden basis have different base SSA
186 names (x_2 and x_0, respectively). The relationship is established
187 when a statement's base name (x_2) is defined by a phi statement (4),
188 each argument of which (x_0, x_1) has an identical "derived base name."
189 If the argument is defined by a candidate (as x_1 is by (3)) that is a
190 CAND_ADD having a stride of 1, the derived base name of the argument is
191 the base name of the candidate (x_0). Otherwise, the argument itself
192 is its derived base name (as is the case with argument x_0).
194 The hidden basis for statement (6) is the nearest dominating candidate
195 whose base name is the derived base name (x_0) of the feeding phi (4),
196 and whose stride is identical to that of the statement. We can then
197 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
198 allowing the final replacement of (6) by the strength-reduced (6r).
200 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
201 A CAND_PHI is not a candidate for replacement, but is maintained in the
202 candidate table to ease discovery of hidden bases. Any phi statement
203 whose arguments share a common derived base name is entered into the
204 table with the derived base name, an (arbitrary) index of zero, and a
205 stride of 1. A statement with a hidden basis can then be detected by
206 simply looking up its feeding phi definition in the candidate table,
207 extracting the derived base name, and searching for a basis in the
208 usual manner after substituting the derived base name.
210 Note that the transformation is only valid when the original phi and
211 the statements that define the phi's arguments are all at the same
212 position in the loop hierarchy. */
215 /* Index into the candidate vector, offset by 1. VECs are zero-based,
216 while cand_idx's are one-based, with zero indicating null. */
217 typedef unsigned cand_idx;
219 /* The kind of candidate. */
220 enum cand_kind
222 CAND_MULT,
223 CAND_ADD,
224 CAND_REF,
225 CAND_PHI
228 struct slsr_cand_d
230 /* The candidate statement S1. */
231 gimple *cand_stmt;
233 /* The base expression B: often an SSA name, but not always. */
234 tree base_expr;
236 /* The stride S. */
237 tree stride;
239 /* The index constant i. */
240 widest_int index;
242 /* The type of the candidate. This is normally the type of base_expr,
243 but casts may have occurred when combining feeding instructions.
244 A candidate can only be a basis for candidates of the same final type.
245 (For CAND_REFs, this is the type to be used for operand 1 of the
246 replacement MEM_REF.) */
247 tree cand_type;
249 /* The type to be used to interpret the stride field when the stride
250 is not a constant. Normally the same as the type of the recorded
251 stride, but when the stride has been cast we need to maintain that
252 knowledge in order to make legal substitutions without losing
253 precision. When the stride is a constant, this will be sizetype. */
254 tree stride_type;
256 /* The kind of candidate (CAND_MULT, etc.). */
257 enum cand_kind kind;
259 /* Index of this candidate in the candidate vector. */
260 cand_idx cand_num;
262 /* Index of the next candidate record for the same statement.
263 A statement may be useful in more than one way (e.g., due to
264 commutativity). So we can have multiple "interpretations"
265 of a statement. */
266 cand_idx next_interp;
268 /* Index of the basis statement S0, if any, in the candidate vector. */
269 cand_idx basis;
271 /* First candidate for which this candidate is a basis, if one exists. */
272 cand_idx dependent;
274 /* Next candidate having the same basis as this one. */
275 cand_idx sibling;
277 /* If this is a conditional candidate, the CAND_PHI candidate
278 that defines the base SSA name B. */
279 cand_idx def_phi;
281 /* Savings that can be expected from eliminating dead code if this
282 candidate is replaced. */
283 int dead_savings;
286 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
287 typedef const struct slsr_cand_d *const_slsr_cand_t;
289 /* Pointers to candidates are chained together as part of a mapping
290 from base expressions to the candidates that use them. */
292 struct cand_chain_d
294 /* Base expression for the chain of candidates: often, but not
295 always, an SSA name. */
296 tree base_expr;
298 /* Pointer to a candidate. */
299 slsr_cand_t cand;
301 /* Chain pointer. */
302 struct cand_chain_d *next;
306 typedef struct cand_chain_d cand_chain, *cand_chain_t;
307 typedef const struct cand_chain_d *const_cand_chain_t;
309 /* Information about a unique "increment" associated with candidates
310 having an SSA name for a stride. An increment is the difference
311 between the index of the candidate and the index of its basis,
312 i.e., (i - i') as discussed in the module commentary.
314 When we are not going to generate address arithmetic we treat
315 increments that differ only in sign as the same, allowing sharing
316 of the cost of initializers. The absolute value of the increment
317 is stored in the incr_info. */
319 struct incr_info_d
321 /* The increment that relates a candidate to its basis. */
322 widest_int incr;
324 /* How many times the increment occurs in the candidate tree. */
325 unsigned count;
327 /* Cost of replacing candidates using this increment. Negative and
328 zero costs indicate replacement should be performed. */
329 int cost;
331 /* If this increment is profitable but is not -1, 0, or 1, it requires
332 an initializer T_0 = stride * incr to be found or introduced in the
333 nearest common dominator of all candidates. This field holds T_0
334 for subsequent use. */
335 tree initializer;
337 /* If the initializer was found to already exist, this is the block
338 where it was found. */
339 basic_block init_bb;
342 typedef struct incr_info_d incr_info, *incr_info_t;
344 /* Candidates are maintained in a vector. If candidate X dominates
345 candidate Y, then X appears before Y in the vector; but the
346 converse does not necessarily hold. */
347 static vec<slsr_cand_t> cand_vec;
349 enum cost_consts
351 COST_NEUTRAL = 0,
352 COST_INFINITE = 1000
355 enum stride_status
357 UNKNOWN_STRIDE = 0,
358 KNOWN_STRIDE = 1
361 enum phi_adjust_status
363 NOT_PHI_ADJUST = 0,
364 PHI_ADJUST = 1
367 enum count_phis_status
369 DONT_COUNT_PHIS = 0,
370 COUNT_PHIS = 1
373 /* Pointer map embodying a mapping from statements to candidates. */
374 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
376 /* Obstack for candidates. */
377 static struct obstack cand_obstack;
379 /* Obstack for candidate chains. */
380 static struct obstack chain_obstack;
382 /* An array INCR_VEC of incr_infos is used during analysis of related
383 candidates having an SSA name for a stride. INCR_VEC_LEN describes
384 its current length. MAX_INCR_VEC_LEN is used to avoid costly
385 pathological cases. */
386 static incr_info_t incr_vec;
387 static unsigned incr_vec_len;
388 const int MAX_INCR_VEC_LEN = 16;
390 /* For a chain of candidates with unknown stride, indicates whether or not
391 we must generate pointer arithmetic when replacing statements. */
392 static bool address_arithmetic_p;
394 /* Forward function declarations. */
395 static slsr_cand_t base_cand_from_table (tree);
396 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
397 static bool legal_cast_p_1 (tree, tree);
399 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
401 static slsr_cand_t
402 lookup_cand (cand_idx idx)
404 return cand_vec[idx - 1];
407 /* Helper for hashing a candidate chain header. */
409 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
411 static inline hashval_t hash (const cand_chain *);
412 static inline bool equal (const cand_chain *, const cand_chain *);
415 inline hashval_t
416 cand_chain_hasher::hash (const cand_chain *p)
418 tree base_expr = p->base_expr;
419 return iterative_hash_expr (base_expr, 0);
422 inline bool
423 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
425 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
428 /* Hash table embodying a mapping from base exprs to chains of candidates. */
429 static hash_table<cand_chain_hasher> *base_cand_map;
431 /* Pointer map used by tree_to_aff_combination_expand. */
432 static hash_map<tree, name_expansion *> *name_expansions;
433 /* Pointer map embodying a mapping from bases to alternative bases. */
434 static hash_map<tree, tree> *alt_base_map;
436 /* Given BASE, use the tree affine combiniation facilities to
437 find the underlying tree expression for BASE, with any
438 immediate offset excluded.
440 N.B. we should eliminate this backtracking with better forward
441 analysis in a future release. */
443 static tree
444 get_alternative_base (tree base)
446 tree *result = alt_base_map->get (base);
448 if (result == NULL)
450 tree expr;
451 aff_tree aff;
453 tree_to_aff_combination_expand (base, TREE_TYPE (base),
454 &aff, &name_expansions);
455 aff.offset = 0;
456 expr = aff_combination_to_tree (&aff);
458 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
460 return expr == base ? NULL : expr;
463 return *result;
466 /* Look in the candidate table for a CAND_PHI that defines BASE and
467 return it if found; otherwise return NULL. */
469 static cand_idx
470 find_phi_def (tree base)
472 slsr_cand_t c;
474 if (TREE_CODE (base) != SSA_NAME)
475 return 0;
477 c = base_cand_from_table (base);
479 if (!c || c->kind != CAND_PHI)
480 return 0;
482 return c->cand_num;
485 /* Determine whether all uses of NAME are directly or indirectly
486 used by STMT. That is, we want to know whether if STMT goes
487 dead, the definition of NAME also goes dead. */
488 static bool
489 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
491 gimple *use_stmt;
492 imm_use_iterator iter;
493 bool retval = true;
495 FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
497 if (use_stmt == stmt || is_gimple_debug (use_stmt))
498 continue;
500 if (!is_gimple_assign (use_stmt)
501 || !gimple_get_lhs (use_stmt)
502 || !is_gimple_reg (gimple_get_lhs (use_stmt))
503 || recurse >= 10
504 || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
505 recurse + 1))
507 retval = false;
508 BREAK_FROM_IMM_USE_STMT (iter);
512 return retval;
515 /* Helper routine for find_basis_for_candidate. May be called twice:
516 once for the candidate's base expr, and optionally again either for
517 the candidate's phi definition or for a CAND_REF's alternative base
518 expression. */
520 static slsr_cand_t
521 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
523 cand_chain mapping_key;
524 cand_chain_t chain;
525 slsr_cand_t basis = NULL;
527 // Limit potential of N^2 behavior for long candidate chains.
528 int iters = 0;
529 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
531 mapping_key.base_expr = base_expr;
532 chain = base_cand_map->find (&mapping_key);
534 for (; chain && iters < max_iters; chain = chain->next, ++iters)
536 slsr_cand_t one_basis = chain->cand;
538 if (one_basis->kind != c->kind
539 || one_basis->cand_stmt == c->cand_stmt
540 || !operand_equal_p (one_basis->stride, c->stride, 0)
541 || !types_compatible_p (one_basis->cand_type, c->cand_type)
542 || !types_compatible_p (one_basis->stride_type, c->stride_type)
543 || !dominated_by_p (CDI_DOMINATORS,
544 gimple_bb (c->cand_stmt),
545 gimple_bb (one_basis->cand_stmt)))
546 continue;
548 if (!basis || basis->cand_num < one_basis->cand_num)
549 basis = one_basis;
552 return basis;
555 /* Use the base expr from candidate C to look for possible candidates
556 that can serve as a basis for C. Each potential basis must also
557 appear in a block that dominates the candidate statement and have
558 the same stride and type. If more than one possible basis exists,
559 the one with highest index in the vector is chosen; this will be
560 the most immediately dominating basis. */
562 static int
563 find_basis_for_candidate (slsr_cand_t c)
565 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
567 /* If a candidate doesn't have a basis using its base expression,
568 it may have a basis hidden by one or more intervening phis. */
569 if (!basis && c->def_phi)
571 basic_block basis_bb, phi_bb;
572 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
573 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
575 if (basis)
577 /* A hidden basis must dominate the phi-definition of the
578 candidate's base name. */
579 phi_bb = gimple_bb (phi_cand->cand_stmt);
580 basis_bb = gimple_bb (basis->cand_stmt);
582 if (phi_bb == basis_bb
583 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
585 basis = NULL;
586 c->basis = 0;
589 /* If we found a hidden basis, estimate additional dead-code
590 savings if the phi and its feeding statements can be removed. */
591 tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
592 if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
593 c->dead_savings += phi_cand->dead_savings;
597 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
599 tree alt_base_expr = get_alternative_base (c->base_expr);
600 if (alt_base_expr)
601 basis = find_basis_for_base_expr (c, alt_base_expr);
604 if (basis)
606 c->sibling = basis->dependent;
607 basis->dependent = c->cand_num;
608 return basis->cand_num;
611 return 0;
614 /* Record a mapping from BASE to C, indicating that C may potentially serve
615 as a basis using that base expression. BASE may be the same as
616 C->BASE_EXPR; alternatively BASE can be a different tree that share the
617 underlining expression of C->BASE_EXPR. */
619 static void
620 record_potential_basis (slsr_cand_t c, tree base)
622 cand_chain_t node;
623 cand_chain **slot;
625 gcc_assert (base);
627 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
628 node->base_expr = base;
629 node->cand = c;
630 node->next = NULL;
631 slot = base_cand_map->find_slot (node, INSERT);
633 if (*slot)
635 cand_chain_t head = (cand_chain_t) (*slot);
636 node->next = head->next;
637 head->next = node;
639 else
640 *slot = node;
643 /* Allocate storage for a new candidate and initialize its fields.
644 Attempt to find a basis for the candidate.
646 For CAND_REF, an alternative base may also be recorded and used
647 to find a basis. This helps cases where the expression hidden
648 behind BASE (which is usually an SSA_NAME) has immediate offset,
649 e.g.
651 a2[i][j] = 1;
652 a2[i + 20][j] = 2; */
654 static slsr_cand_t
655 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
656 const widest_int &index, tree stride, tree ctype,
657 tree stype, unsigned savings)
659 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
660 sizeof (slsr_cand));
661 c->cand_stmt = gs;
662 c->base_expr = base;
663 c->stride = stride;
664 c->index = index;
665 c->cand_type = ctype;
666 c->stride_type = stype;
667 c->kind = kind;
668 c->cand_num = cand_vec.length () + 1;
669 c->next_interp = 0;
670 c->dependent = 0;
671 c->sibling = 0;
672 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
673 c->dead_savings = savings;
675 cand_vec.safe_push (c);
677 if (kind == CAND_PHI)
678 c->basis = 0;
679 else
680 c->basis = find_basis_for_candidate (c);
682 record_potential_basis (c, base);
683 if (flag_expensive_optimizations && kind == CAND_REF)
685 tree alt_base = get_alternative_base (base);
686 if (alt_base)
687 record_potential_basis (c, alt_base);
690 return c;
693 /* Determine the target cost of statement GS when compiling according
694 to SPEED. */
696 static int
697 stmt_cost (gimple *gs, bool speed)
699 tree lhs, rhs1, rhs2;
700 machine_mode lhs_mode;
702 gcc_assert (is_gimple_assign (gs));
703 lhs = gimple_assign_lhs (gs);
704 rhs1 = gimple_assign_rhs1 (gs);
705 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
707 switch (gimple_assign_rhs_code (gs))
709 case MULT_EXPR:
710 rhs2 = gimple_assign_rhs2 (gs);
712 if (tree_fits_shwi_p (rhs2))
713 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
715 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
716 return mul_cost (speed, lhs_mode);
718 case PLUS_EXPR:
719 case POINTER_PLUS_EXPR:
720 case MINUS_EXPR:
721 return add_cost (speed, lhs_mode);
723 case NEGATE_EXPR:
724 return neg_cost (speed, lhs_mode);
726 CASE_CONVERT:
727 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
729 /* Note that we don't assign costs to copies that in most cases
730 will go away. */
731 case SSA_NAME:
732 return 0;
734 default:
738 gcc_unreachable ();
739 return 0;
742 /* Look up the defining statement for BASE_IN and return a pointer
743 to its candidate in the candidate table, if any; otherwise NULL.
744 Only CAND_ADD and CAND_MULT candidates are returned. */
746 static slsr_cand_t
747 base_cand_from_table (tree base_in)
749 slsr_cand_t *result;
751 gimple *def = SSA_NAME_DEF_STMT (base_in);
752 if (!def)
753 return (slsr_cand_t) NULL;
755 result = stmt_cand_map->get (def);
757 if (result && (*result)->kind != CAND_REF)
758 return *result;
760 return (slsr_cand_t) NULL;
763 /* Add an entry to the statement-to-candidate mapping. */
765 static void
766 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
768 gcc_assert (!stmt_cand_map->put (gs, c));
771 /* Given PHI which contains a phi statement, determine whether it
772 satisfies all the requirements of a phi candidate. If so, create
773 a candidate. Note that a CAND_PHI never has a basis itself, but
774 is used to help find a basis for subsequent candidates. */
776 static void
777 slsr_process_phi (gphi *phi, bool speed)
779 unsigned i;
780 tree arg0_base = NULL_TREE, base_type;
781 slsr_cand_t c;
782 struct loop *cand_loop = gimple_bb (phi)->loop_father;
783 unsigned savings = 0;
785 /* A CAND_PHI requires each of its arguments to have the same
786 derived base name. (See the module header commentary for a
787 definition of derived base names.) Furthermore, all feeding
788 definitions must be in the same position in the loop hierarchy
789 as PHI. */
791 for (i = 0; i < gimple_phi_num_args (phi); i++)
793 slsr_cand_t arg_cand;
794 tree arg = gimple_phi_arg_def (phi, i);
795 tree derived_base_name = NULL_TREE;
796 gimple *arg_stmt = NULL;
797 basic_block arg_bb = NULL;
799 if (TREE_CODE (arg) != SSA_NAME)
800 return;
802 arg_cand = base_cand_from_table (arg);
804 if (arg_cand)
806 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
808 if (!arg_cand->next_interp)
809 return;
811 arg_cand = lookup_cand (arg_cand->next_interp);
814 if (!integer_onep (arg_cand->stride))
815 return;
817 derived_base_name = arg_cand->base_expr;
818 arg_stmt = arg_cand->cand_stmt;
819 arg_bb = gimple_bb (arg_stmt);
821 /* Gather potential dead code savings if the phi statement
822 can be removed later on. */
823 if (uses_consumed_by_stmt (arg, phi))
825 if (gimple_code (arg_stmt) == GIMPLE_PHI)
826 savings += arg_cand->dead_savings;
827 else
828 savings += stmt_cost (arg_stmt, speed);
831 else if (SSA_NAME_IS_DEFAULT_DEF (arg))
833 derived_base_name = arg;
834 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
837 if (!arg_bb || arg_bb->loop_father != cand_loop)
838 return;
840 if (i == 0)
841 arg0_base = derived_base_name;
842 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
843 return;
846 /* Create the candidate. "alloc_cand_and_find_basis" is named
847 misleadingly for this case, as no basis will be sought for a
848 CAND_PHI. */
849 base_type = TREE_TYPE (arg0_base);
851 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
852 0, integer_one_node, base_type,
853 sizetype, savings);
855 /* Add the candidate to the statement-candidate mapping. */
856 add_cand_for_stmt (phi, c);
859 /* Given PBASE which is a pointer to tree, look up the defining
860 statement for it and check whether the candidate is in the
861 form of:
863 X = B + (1 * S), S is integer constant
864 X = B + (i * S), S is integer one
866 If so, set PBASE to the candidate's base_expr and return double
867 int (i * S).
868 Otherwise, just return double int zero. */
870 static widest_int
871 backtrace_base_for_ref (tree *pbase)
873 tree base_in = *pbase;
874 slsr_cand_t base_cand;
876 STRIP_NOPS (base_in);
878 /* Strip off widening conversion(s) to handle cases where
879 e.g. 'B' is widened from an 'int' in order to calculate
880 a 64-bit address. */
881 if (CONVERT_EXPR_P (base_in)
882 && legal_cast_p_1 (TREE_TYPE (base_in),
883 TREE_TYPE (TREE_OPERAND (base_in, 0))))
884 base_in = get_unwidened (base_in, NULL_TREE);
886 if (TREE_CODE (base_in) != SSA_NAME)
887 return 0;
889 base_cand = base_cand_from_table (base_in);
891 while (base_cand && base_cand->kind != CAND_PHI)
893 if (base_cand->kind == CAND_ADD
894 && base_cand->index == 1
895 && TREE_CODE (base_cand->stride) == INTEGER_CST)
897 /* X = B + (1 * S), S is integer constant. */
898 *pbase = base_cand->base_expr;
899 return wi::to_widest (base_cand->stride);
901 else if (base_cand->kind == CAND_ADD
902 && TREE_CODE (base_cand->stride) == INTEGER_CST
903 && integer_onep (base_cand->stride))
905 /* X = B + (i * S), S is integer one. */
906 *pbase = base_cand->base_expr;
907 return base_cand->index;
910 if (base_cand->next_interp)
911 base_cand = lookup_cand (base_cand->next_interp);
912 else
913 base_cand = NULL;
916 return 0;
919 /* Look for the following pattern:
921 *PBASE: MEM_REF (T1, C1)
923 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
925 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
927 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
929 *PINDEX: C4 * BITS_PER_UNIT
931 If not present, leave the input values unchanged and return FALSE.
932 Otherwise, modify the input values as follows and return TRUE:
934 *PBASE: T1
935 *POFFSET: MULT_EXPR (T2, C3)
936 *PINDEX: C1 + (C2 * C3) + C4
938 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
939 will be further restructured to:
941 *PBASE: T1
942 *POFFSET: MULT_EXPR (T2', C3)
943 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
945 static bool
946 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
947 tree *ptype)
949 tree base = *pbase, offset = *poffset;
950 widest_int index = *pindex;
951 tree mult_op0, t1, t2, type;
952 widest_int c1, c2, c3, c4, c5;
954 if (!base
955 || !offset
956 || TREE_CODE (base) != MEM_REF
957 || TREE_CODE (offset) != MULT_EXPR
958 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
959 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
960 return false;
962 t1 = TREE_OPERAND (base, 0);
963 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
964 type = TREE_TYPE (TREE_OPERAND (base, 1));
966 mult_op0 = TREE_OPERAND (offset, 0);
967 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
969 if (TREE_CODE (mult_op0) == PLUS_EXPR)
971 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
973 t2 = TREE_OPERAND (mult_op0, 0);
974 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
976 else
977 return false;
979 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
981 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
983 t2 = TREE_OPERAND (mult_op0, 0);
984 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
986 else
987 return false;
989 else
991 t2 = mult_op0;
992 c2 = 0;
995 c4 = index >> LOG2_BITS_PER_UNIT;
996 c5 = backtrace_base_for_ref (&t2);
998 *pbase = t1;
999 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
1000 wide_int_to_tree (sizetype, c3));
1001 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
1002 *ptype = type;
1004 return true;
1007 /* Given GS which contains a data reference, create a CAND_REF entry in
1008 the candidate table and attempt to find a basis. */
1010 static void
1011 slsr_process_ref (gimple *gs)
1013 tree ref_expr, base, offset, type;
1014 HOST_WIDE_INT bitsize, bitpos;
1015 machine_mode mode;
1016 int unsignedp, reversep, volatilep;
1017 slsr_cand_t c;
1019 if (gimple_vdef (gs))
1020 ref_expr = gimple_assign_lhs (gs);
1021 else
1022 ref_expr = gimple_assign_rhs1 (gs);
1024 if (!handled_component_p (ref_expr)
1025 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1026 || (TREE_CODE (ref_expr) == COMPONENT_REF
1027 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1028 return;
1030 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1031 &unsignedp, &reversep, &volatilep);
1032 if (reversep)
1033 return;
1034 widest_int index = bitpos;
1036 if (!restructure_reference (&base, &offset, &index, &type))
1037 return;
1039 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1040 type, sizetype, 0);
1042 /* Add the candidate to the statement-candidate mapping. */
1043 add_cand_for_stmt (gs, c);
1046 /* Create a candidate entry for a statement GS, where GS multiplies
1047 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1048 about the two SSA names into the new candidate. Return the new
1049 candidate. */
1051 static slsr_cand_t
1052 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1054 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1055 tree stype = NULL_TREE;
1056 widest_int index;
1057 unsigned savings = 0;
1058 slsr_cand_t c;
1059 slsr_cand_t base_cand = base_cand_from_table (base_in);
1061 /* Look at all interpretations of the base candidate, if necessary,
1062 to find information to propagate into this candidate. */
1063 while (base_cand && !base && base_cand->kind != CAND_PHI)
1066 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1068 /* Y = (B + i') * 1
1069 X = Y * Z
1070 ================
1071 X = (B + i') * Z */
1072 base = base_cand->base_expr;
1073 index = base_cand->index;
1074 stride = stride_in;
1075 ctype = base_cand->cand_type;
1076 stype = TREE_TYPE (stride_in);
1077 if (has_single_use (base_in))
1078 savings = (base_cand->dead_savings
1079 + stmt_cost (base_cand->cand_stmt, speed));
1081 else if (base_cand->kind == CAND_ADD
1082 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1084 /* Y = B + (i' * S), S constant
1085 X = Y * Z
1086 ============================
1087 X = B + ((i' * S) * Z) */
1088 base = base_cand->base_expr;
1089 index = base_cand->index * wi::to_widest (base_cand->stride);
1090 stride = stride_in;
1091 ctype = base_cand->cand_type;
1092 stype = TREE_TYPE (stride_in);
1093 if (has_single_use (base_in))
1094 savings = (base_cand->dead_savings
1095 + stmt_cost (base_cand->cand_stmt, speed));
1098 if (base_cand->next_interp)
1099 base_cand = lookup_cand (base_cand->next_interp);
1100 else
1101 base_cand = NULL;
1104 if (!base)
1106 /* No interpretations had anything useful to propagate, so
1107 produce X = (Y + 0) * Z. */
1108 base = base_in;
1109 index = 0;
1110 stride = stride_in;
1111 ctype = TREE_TYPE (base_in);
1112 stype = TREE_TYPE (stride_in);
1115 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1116 ctype, stype, savings);
1117 return c;
1120 /* Create a candidate entry for a statement GS, where GS multiplies
1121 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1122 information about BASE_IN into the new candidate. Return the new
1123 candidate. */
1125 static slsr_cand_t
1126 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
1128 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1129 widest_int index, temp;
1130 unsigned savings = 0;
1131 slsr_cand_t c;
1132 slsr_cand_t base_cand = base_cand_from_table (base_in);
1134 /* Look at all interpretations of the base candidate, if necessary,
1135 to find information to propagate into this candidate. */
1136 while (base_cand && !base && base_cand->kind != CAND_PHI)
1138 if (base_cand->kind == CAND_MULT
1139 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1141 /* Y = (B + i') * S, S constant
1142 X = Y * c
1143 ============================
1144 X = (B + i') * (S * c) */
1145 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1146 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1148 base = base_cand->base_expr;
1149 index = base_cand->index;
1150 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1151 ctype = base_cand->cand_type;
1152 if (has_single_use (base_in))
1153 savings = (base_cand->dead_savings
1154 + stmt_cost (base_cand->cand_stmt, speed));
1157 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1159 /* Y = B + (i' * 1)
1160 X = Y * c
1161 ===========================
1162 X = (B + i') * c */
1163 base = base_cand->base_expr;
1164 index = base_cand->index;
1165 stride = stride_in;
1166 ctype = base_cand->cand_type;
1167 if (has_single_use (base_in))
1168 savings = (base_cand->dead_savings
1169 + stmt_cost (base_cand->cand_stmt, speed));
1171 else if (base_cand->kind == CAND_ADD
1172 && base_cand->index == 1
1173 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1175 /* Y = B + (1 * S), S constant
1176 X = Y * c
1177 ===========================
1178 X = (B + S) * c */
1179 base = base_cand->base_expr;
1180 index = wi::to_widest (base_cand->stride);
1181 stride = stride_in;
1182 ctype = base_cand->cand_type;
1183 if (has_single_use (base_in))
1184 savings = (base_cand->dead_savings
1185 + stmt_cost (base_cand->cand_stmt, speed));
1188 if (base_cand->next_interp)
1189 base_cand = lookup_cand (base_cand->next_interp);
1190 else
1191 base_cand = NULL;
1194 if (!base)
1196 /* No interpretations had anything useful to propagate, so
1197 produce X = (Y + 0) * c. */
1198 base = base_in;
1199 index = 0;
1200 stride = stride_in;
1201 ctype = TREE_TYPE (base_in);
1204 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1205 ctype, sizetype, savings);
1206 return c;
1209 /* Given GS which is a multiply of scalar integers, make an appropriate
1210 entry in the candidate table. If this is a multiply of two SSA names,
1211 create two CAND_MULT interpretations and attempt to find a basis for
1212 each of them. Otherwise, create a single CAND_MULT and attempt to
1213 find a basis. */
1215 static void
1216 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
1218 slsr_cand_t c, c2;
1220 /* If this is a multiply of an SSA name with itself, it is highly
1221 unlikely that we will get a strength reduction opportunity, so
1222 don't record it as a candidate. This simplifies the logic for
1223 finding a basis, so if this is removed that must be considered. */
1224 if (rhs1 == rhs2)
1225 return;
1227 if (TREE_CODE (rhs2) == SSA_NAME)
1229 /* Record an interpretation of this statement in the candidate table
1230 assuming RHS1 is the base expression and RHS2 is the stride. */
1231 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1233 /* Add the first interpretation to the statement-candidate mapping. */
1234 add_cand_for_stmt (gs, c);
1236 /* Record another interpretation of this statement assuming RHS1
1237 is the stride and RHS2 is the base expression. */
1238 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1239 c->next_interp = c2->cand_num;
1241 else
1243 /* Record an interpretation for the multiply-immediate. */
1244 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1246 /* Add the interpretation to the statement-candidate mapping. */
1247 add_cand_for_stmt (gs, c);
1251 /* Create a candidate entry for a statement GS, where GS adds two
1252 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1253 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1254 information about the two SSA names into the new candidate.
1255 Return the new candidate. */
1257 static slsr_cand_t
1258 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
1259 bool subtract_p, bool speed)
1261 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1262 tree stype = NULL_TREE;
1263 widest_int index;
1264 unsigned savings = 0;
1265 slsr_cand_t c;
1266 slsr_cand_t base_cand = base_cand_from_table (base_in);
1267 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1269 /* The most useful transformation is a multiply-immediate feeding
1270 an add or subtract. Look for that first. */
1271 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1273 if (addend_cand->kind == CAND_MULT
1274 && addend_cand->index == 0
1275 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1277 /* Z = (B + 0) * S, S constant
1278 X = Y +/- Z
1279 ===========================
1280 X = Y + ((+/-1 * S) * B) */
1281 base = base_in;
1282 index = wi::to_widest (addend_cand->stride);
1283 if (subtract_p)
1284 index = -index;
1285 stride = addend_cand->base_expr;
1286 ctype = TREE_TYPE (base_in);
1287 stype = addend_cand->cand_type;
1288 if (has_single_use (addend_in))
1289 savings = (addend_cand->dead_savings
1290 + stmt_cost (addend_cand->cand_stmt, speed));
1293 if (addend_cand->next_interp)
1294 addend_cand = lookup_cand (addend_cand->next_interp);
1295 else
1296 addend_cand = NULL;
1299 while (base_cand && !base && base_cand->kind != CAND_PHI)
1301 if (base_cand->kind == CAND_ADD
1302 && (base_cand->index == 0
1303 || operand_equal_p (base_cand->stride,
1304 integer_zero_node, 0)))
1306 /* Y = B + (i' * S), i' * S = 0
1307 X = Y +/- Z
1308 ============================
1309 X = B + (+/-1 * Z) */
1310 base = base_cand->base_expr;
1311 index = subtract_p ? -1 : 1;
1312 stride = addend_in;
1313 ctype = base_cand->cand_type;
1314 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1315 : TREE_TYPE (addend_in));
1316 if (has_single_use (base_in))
1317 savings = (base_cand->dead_savings
1318 + stmt_cost (base_cand->cand_stmt, speed));
1320 else if (subtract_p)
1322 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1324 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1326 if (subtrahend_cand->kind == CAND_MULT
1327 && subtrahend_cand->index == 0
1328 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1330 /* Z = (B + 0) * S, S constant
1331 X = Y - Z
1332 ===========================
1333 Value: X = Y + ((-1 * S) * B) */
1334 base = base_in;
1335 index = wi::to_widest (subtrahend_cand->stride);
1336 index = -index;
1337 stride = subtrahend_cand->base_expr;
1338 ctype = TREE_TYPE (base_in);
1339 stype = subtrahend_cand->cand_type;
1340 if (has_single_use (addend_in))
1341 savings = (subtrahend_cand->dead_savings
1342 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1345 if (subtrahend_cand->next_interp)
1346 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1347 else
1348 subtrahend_cand = NULL;
1352 if (base_cand->next_interp)
1353 base_cand = lookup_cand (base_cand->next_interp);
1354 else
1355 base_cand = NULL;
1358 if (!base)
1360 /* No interpretations had anything useful to propagate, so
1361 produce X = Y + (1 * Z). */
1362 base = base_in;
1363 index = subtract_p ? -1 : 1;
1364 stride = addend_in;
1365 ctype = TREE_TYPE (base_in);
1366 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
1367 : TREE_TYPE (addend_in));
1370 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1371 ctype, stype, savings);
1372 return c;
1375 /* Create a candidate entry for a statement GS, where GS adds SSA
1376 name BASE_IN to constant INDEX_IN. Propagate any known information
1377 about BASE_IN into the new candidate. Return the new candidate. */
1379 static slsr_cand_t
1380 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
1381 bool speed)
1383 enum cand_kind kind = CAND_ADD;
1384 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1385 tree stype = NULL_TREE;
1386 widest_int index, multiple;
1387 unsigned savings = 0;
1388 slsr_cand_t c;
1389 slsr_cand_t base_cand = base_cand_from_table (base_in);
1391 while (base_cand && !base && base_cand->kind != CAND_PHI)
1393 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1395 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1396 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1397 sign, &multiple))
1399 /* Y = (B + i') * S, S constant, c = kS for some integer k
1400 X = Y + c
1401 ============================
1402 X = (B + (i'+ k)) * S
1404 Y = B + (i' * S), S constant, c = kS for some integer k
1405 X = Y + c
1406 ============================
1407 X = (B + (i'+ k)) * S */
1408 kind = base_cand->kind;
1409 base = base_cand->base_expr;
1410 index = base_cand->index + multiple;
1411 stride = base_cand->stride;
1412 ctype = base_cand->cand_type;
1413 stype = base_cand->stride_type;
1414 if (has_single_use (base_in))
1415 savings = (base_cand->dead_savings
1416 + stmt_cost (base_cand->cand_stmt, speed));
1419 if (base_cand->next_interp)
1420 base_cand = lookup_cand (base_cand->next_interp);
1421 else
1422 base_cand = NULL;
1425 if (!base)
1427 /* No interpretations had anything useful to propagate, so
1428 produce X = Y + (c * 1). */
1429 kind = CAND_ADD;
1430 base = base_in;
1431 index = index_in;
1432 stride = integer_one_node;
1433 ctype = TREE_TYPE (base_in);
1434 stype = sizetype;
1437 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1438 ctype, stype, savings);
1439 return c;
1442 /* Given GS which is an add or subtract of scalar integers or pointers,
1443 make at least one appropriate entry in the candidate table. */
1445 static void
1446 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
1448 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1449 slsr_cand_t c = NULL, c2;
1451 if (TREE_CODE (rhs2) == SSA_NAME)
1453 /* First record an interpretation assuming RHS1 is the base expression
1454 and RHS2 is the stride. But it doesn't make sense for the
1455 stride to be a pointer, so don't record a candidate in that case. */
1456 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1458 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1460 /* Add the first interpretation to the statement-candidate
1461 mapping. */
1462 add_cand_for_stmt (gs, c);
1465 /* If the two RHS operands are identical, or this is a subtract,
1466 we're done. */
1467 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1468 return;
1470 /* Otherwise, record another interpretation assuming RHS2 is the
1471 base expression and RHS1 is the stride, again provided that the
1472 stride is not a pointer. */
1473 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1475 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1476 if (c)
1477 c->next_interp = c2->cand_num;
1478 else
1479 add_cand_for_stmt (gs, c2);
1482 else
1484 /* Record an interpretation for the add-immediate. */
1485 widest_int index = wi::to_widest (rhs2);
1486 if (subtract_p)
1487 index = -index;
1489 c = create_add_imm_cand (gs, rhs1, index, speed);
1491 /* Add the interpretation to the statement-candidate mapping. */
1492 add_cand_for_stmt (gs, c);
1496 /* Given GS which is a negate of a scalar integer, make an appropriate
1497 entry in the candidate table. A negate is equivalent to a multiply
1498 by -1. */
1500 static void
1501 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
1503 /* Record a CAND_MULT interpretation for the multiply by -1. */
1504 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1506 /* Add the interpretation to the statement-candidate mapping. */
1507 add_cand_for_stmt (gs, c);
1510 /* Help function for legal_cast_p, operating on two trees. Checks
1511 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1512 for more details. */
1514 static bool
1515 legal_cast_p_1 (tree lhs_type, tree rhs_type)
1517 unsigned lhs_size, rhs_size;
1518 bool lhs_wraps, rhs_wraps;
1520 lhs_size = TYPE_PRECISION (lhs_type);
1521 rhs_size = TYPE_PRECISION (rhs_type);
1522 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1523 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1525 if (lhs_size < rhs_size
1526 || (rhs_wraps && !lhs_wraps)
1527 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1528 return false;
1530 return true;
1533 /* Return TRUE if GS is a statement that defines an SSA name from
1534 a conversion and is legal for us to combine with an add and multiply
1535 in the candidate table. For example, suppose we have:
1537 A = B + i;
1538 C = (type) A;
1539 D = C * S;
1541 Without the type-cast, we would create a CAND_MULT for D with base B,
1542 index i, and stride S. We want to record this candidate only if it
1543 is equivalent to apply the type cast following the multiply:
1545 A = B + i;
1546 E = A * S;
1547 D = (type) E;
1549 We will record the type with the candidate for D. This allows us
1550 to use a similar previous candidate as a basis. If we have earlier seen
1552 A' = B + i';
1553 C' = (type) A';
1554 D' = C' * S;
1556 we can replace D with
1558 D = D' + (i - i') * S;
1560 But if moving the type-cast would change semantics, we mustn't do this.
1562 This is legitimate for casts from a non-wrapping integral type to
1563 any integral type of the same or larger size. It is not legitimate
1564 to convert a wrapping type to a non-wrapping type, or to a wrapping
1565 type of a different size. I.e., with a wrapping type, we must
1566 assume that the addition B + i could wrap, in which case performing
1567 the multiply before or after one of the "illegal" type casts will
1568 have different semantics. */
1570 static bool
1571 legal_cast_p (gimple *gs, tree rhs)
1573 if (!is_gimple_assign (gs)
1574 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1575 return false;
1577 return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
1580 /* Given GS which is a cast to a scalar integer type, determine whether
1581 the cast is legal for strength reduction. If so, make at least one
1582 appropriate entry in the candidate table. */
1584 static void
1585 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
1587 tree lhs, ctype;
1588 slsr_cand_t base_cand, c = NULL, c2;
1589 unsigned savings = 0;
1591 if (!legal_cast_p (gs, rhs1))
1592 return;
1594 lhs = gimple_assign_lhs (gs);
1595 base_cand = base_cand_from_table (rhs1);
1596 ctype = TREE_TYPE (lhs);
1598 if (base_cand && base_cand->kind != CAND_PHI)
1600 while (base_cand)
1602 /* Propagate all data from the base candidate except the type,
1603 which comes from the cast, and the base candidate's cast,
1604 which is no longer applicable. */
1605 if (has_single_use (rhs1))
1606 savings = (base_cand->dead_savings
1607 + stmt_cost (base_cand->cand_stmt, speed));
1609 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1610 base_cand->base_expr,
1611 base_cand->index, base_cand->stride,
1612 ctype, base_cand->stride_type,
1613 savings);
1614 if (base_cand->next_interp)
1615 base_cand = lookup_cand (base_cand->next_interp);
1616 else
1617 base_cand = NULL;
1620 else
1622 /* If nothing is known about the RHS, create fresh CAND_ADD and
1623 CAND_MULT interpretations:
1625 X = Y + (0 * 1)
1626 X = (Y + 0) * 1
1628 The first of these is somewhat arbitrary, but the choice of
1629 1 for the stride simplifies the logic for propagating casts
1630 into their uses. */
1631 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1632 integer_one_node, ctype, sizetype, 0);
1633 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1634 integer_one_node, ctype, sizetype, 0);
1635 c->next_interp = c2->cand_num;
1638 /* Add the first (or only) interpretation to the statement-candidate
1639 mapping. */
1640 add_cand_for_stmt (gs, c);
1643 /* Given GS which is a copy of a scalar integer type, make at least one
1644 appropriate entry in the candidate table.
1646 This interface is included for completeness, but is unnecessary
1647 if this pass immediately follows a pass that performs copy
1648 propagation, such as DOM. */
1650 static void
1651 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
1653 slsr_cand_t base_cand, c = NULL, c2;
1654 unsigned savings = 0;
1656 base_cand = base_cand_from_table (rhs1);
1658 if (base_cand && base_cand->kind != CAND_PHI)
1660 while (base_cand)
1662 /* Propagate all data from the base candidate. */
1663 if (has_single_use (rhs1))
1664 savings = (base_cand->dead_savings
1665 + stmt_cost (base_cand->cand_stmt, speed));
1667 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1668 base_cand->base_expr,
1669 base_cand->index, base_cand->stride,
1670 base_cand->cand_type,
1671 base_cand->stride_type, savings);
1672 if (base_cand->next_interp)
1673 base_cand = lookup_cand (base_cand->next_interp);
1674 else
1675 base_cand = NULL;
1678 else
1680 /* If nothing is known about the RHS, create fresh CAND_ADD and
1681 CAND_MULT interpretations:
1683 X = Y + (0 * 1)
1684 X = (Y + 0) * 1
1686 The first of these is somewhat arbitrary, but the choice of
1687 1 for the stride simplifies the logic for propagating casts
1688 into their uses. */
1689 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
1690 integer_one_node, TREE_TYPE (rhs1),
1691 sizetype, 0);
1692 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
1693 integer_one_node, TREE_TYPE (rhs1),
1694 sizetype, 0);
1695 c->next_interp = c2->cand_num;
1698 /* Add the first (or only) interpretation to the statement-candidate
1699 mapping. */
1700 add_cand_for_stmt (gs, c);
1703 class find_candidates_dom_walker : public dom_walker
1705 public:
1706 find_candidates_dom_walker (cdi_direction direction)
1707 : dom_walker (direction) {}
1708 virtual edge before_dom_children (basic_block);
1711 /* Find strength-reduction candidates in block BB. */
1713 edge
1714 find_candidates_dom_walker::before_dom_children (basic_block bb)
1716 bool speed = optimize_bb_for_speed_p (bb);
1718 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1719 gsi_next (&gsi))
1720 slsr_process_phi (gsi.phi (), speed);
1722 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1723 gsi_next (&gsi))
1725 gimple *gs = gsi_stmt (gsi);
1727 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1728 slsr_process_ref (gs);
1730 else if (is_gimple_assign (gs)
1731 && SCALAR_INT_MODE_P
1732 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1734 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1736 switch (gimple_assign_rhs_code (gs))
1738 case MULT_EXPR:
1739 case PLUS_EXPR:
1740 rhs1 = gimple_assign_rhs1 (gs);
1741 rhs2 = gimple_assign_rhs2 (gs);
1742 /* Should never happen, but currently some buggy situations
1743 in earlier phases put constants in rhs1. */
1744 if (TREE_CODE (rhs1) != SSA_NAME)
1745 continue;
1746 break;
1748 /* Possible future opportunity: rhs1 of a ptr+ can be
1749 an ADDR_EXPR. */
1750 case POINTER_PLUS_EXPR:
1751 case MINUS_EXPR:
1752 rhs2 = gimple_assign_rhs2 (gs);
1753 gcc_fallthrough ();
1755 CASE_CONVERT:
1756 case SSA_NAME:
1757 case NEGATE_EXPR:
1758 rhs1 = gimple_assign_rhs1 (gs);
1759 if (TREE_CODE (rhs1) != SSA_NAME)
1760 continue;
1761 break;
1763 default:
1767 switch (gimple_assign_rhs_code (gs))
1769 case MULT_EXPR:
1770 slsr_process_mul (gs, rhs1, rhs2, speed);
1771 break;
1773 case PLUS_EXPR:
1774 case POINTER_PLUS_EXPR:
1775 case MINUS_EXPR:
1776 slsr_process_add (gs, rhs1, rhs2, speed);
1777 break;
1779 case NEGATE_EXPR:
1780 slsr_process_neg (gs, rhs1, speed);
1781 break;
1783 CASE_CONVERT:
1784 slsr_process_cast (gs, rhs1, speed);
1785 break;
1787 case SSA_NAME:
1788 slsr_process_copy (gs, rhs1, speed);
1789 break;
1791 default:
1796 return NULL;
1799 /* Dump a candidate for debug. */
1801 static void
1802 dump_candidate (slsr_cand_t c)
1804 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1805 gimple_bb (c->cand_stmt)->index);
1806 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1807 switch (c->kind)
1809 case CAND_MULT:
1810 fputs (" MULT : (", dump_file);
1811 print_generic_expr (dump_file, c->base_expr);
1812 fputs (" + ", dump_file);
1813 print_decs (c->index, dump_file);
1814 fputs (") * ", dump_file);
1815 if (TREE_CODE (c->stride) != INTEGER_CST
1816 && c->stride_type != TREE_TYPE (c->stride))
1818 fputs ("(", dump_file);
1819 print_generic_expr (dump_file, c->stride_type);
1820 fputs (")", dump_file);
1822 print_generic_expr (dump_file, c->stride);
1823 fputs (" : ", dump_file);
1824 break;
1825 case CAND_ADD:
1826 fputs (" ADD : ", dump_file);
1827 print_generic_expr (dump_file, c->base_expr);
1828 fputs (" + (", dump_file);
1829 print_decs (c->index, dump_file);
1830 fputs (" * ", dump_file);
1831 if (TREE_CODE (c->stride) != INTEGER_CST
1832 && c->stride_type != TREE_TYPE (c->stride))
1834 fputs ("(", dump_file);
1835 print_generic_expr (dump_file, c->stride_type);
1836 fputs (")", dump_file);
1838 print_generic_expr (dump_file, c->stride);
1839 fputs (") : ", dump_file);
1840 break;
1841 case CAND_REF:
1842 fputs (" REF : ", dump_file);
1843 print_generic_expr (dump_file, c->base_expr);
1844 fputs (" + (", dump_file);
1845 print_generic_expr (dump_file, c->stride);
1846 fputs (") + ", dump_file);
1847 print_decs (c->index, dump_file);
1848 fputs (" : ", dump_file);
1849 break;
1850 case CAND_PHI:
1851 fputs (" PHI : ", dump_file);
1852 print_generic_expr (dump_file, c->base_expr);
1853 fputs (" + (unknown * ", dump_file);
1854 print_generic_expr (dump_file, c->stride);
1855 fputs (") : ", dump_file);
1856 break;
1857 default:
1858 gcc_unreachable ();
1860 print_generic_expr (dump_file, c->cand_type);
1861 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1862 c->basis, c->dependent, c->sibling);
1863 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1864 c->next_interp, c->dead_savings);
1865 if (c->def_phi)
1866 fprintf (dump_file, " phi: %d\n", c->def_phi);
1867 fputs ("\n", dump_file);
1870 /* Dump the candidate vector for debug. */
1872 static void
1873 dump_cand_vec (void)
1875 unsigned i;
1876 slsr_cand_t c;
1878 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1880 FOR_EACH_VEC_ELT (cand_vec, i, c)
1881 dump_candidate (c);
1884 /* Callback used to dump the candidate chains hash table. */
1887 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1889 const_cand_chain_t chain = *slot;
1890 cand_chain_t p;
1892 print_generic_expr (dump_file, chain->base_expr);
1893 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1895 for (p = chain->next; p; p = p->next)
1896 fprintf (dump_file, " -> %d", p->cand->cand_num);
1898 fputs ("\n", dump_file);
1899 return 1;
1902 /* Dump the candidate chains. */
1904 static void
1905 dump_cand_chains (void)
1907 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1908 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1909 (NULL);
1910 fputs ("\n", dump_file);
1913 /* Dump the increment vector for debug. */
1915 static void
1916 dump_incr_vec (void)
1918 if (dump_file && (dump_flags & TDF_DETAILS))
1920 unsigned i;
1922 fprintf (dump_file, "\nIncrement vector:\n\n");
1924 for (i = 0; i < incr_vec_len; i++)
1926 fprintf (dump_file, "%3d increment: ", i);
1927 print_decs (incr_vec[i].incr, dump_file);
1928 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1929 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1930 fputs ("\n initializer: ", dump_file);
1931 print_generic_expr (dump_file, incr_vec[i].initializer);
1932 fputs ("\n\n", dump_file);
1937 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1938 data reference. */
1940 static void
1941 replace_ref (tree *expr, slsr_cand_t c)
1943 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1944 unsigned HOST_WIDE_INT misalign;
1945 unsigned align;
1947 /* Ensure the memory reference carries the minimum alignment
1948 requirement for the data type. See PR58041. */
1949 get_object_alignment_1 (*expr, &align, &misalign);
1950 if (misalign != 0)
1951 align = least_bit_hwi (misalign);
1952 if (align < TYPE_ALIGN (acc_type))
1953 acc_type = build_aligned_type (acc_type, align);
1955 add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
1956 c->base_expr, c->stride);
1957 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1958 wide_int_to_tree (c->cand_type, c->index));
1960 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1961 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1962 TREE_OPERAND (mem_ref, 0)
1963 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1964 /*simple_p=*/true, NULL,
1965 /*before=*/true, GSI_SAME_STMT);
1966 copy_ref_info (mem_ref, *expr);
1967 *expr = mem_ref;
1968 update_stmt (c->cand_stmt);
1971 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1972 dependent of candidate C with an equivalent strength-reduced data
1973 reference. */
1975 static void
1976 replace_refs (slsr_cand_t c)
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1980 fputs ("Replacing reference: ", dump_file);
1981 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1984 if (gimple_vdef (c->cand_stmt))
1986 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1987 replace_ref (lhs, c);
1989 else
1991 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1992 replace_ref (rhs, c);
1995 if (dump_file && (dump_flags & TDF_DETAILS))
1997 fputs ("With: ", dump_file);
1998 print_gimple_stmt (dump_file, c->cand_stmt, 0);
1999 fputs ("\n", dump_file);
2002 if (c->sibling)
2003 replace_refs (lookup_cand (c->sibling));
2005 if (c->dependent)
2006 replace_refs (lookup_cand (c->dependent));
2009 /* Return TRUE if candidate C is dependent upon a PHI. */
2011 static bool
2012 phi_dependent_cand_p (slsr_cand_t c)
2014 /* A candidate is not necessarily dependent upon a PHI just because
2015 it has a phi definition for its base name. It may have a basis
2016 that relies upon the same phi definition, in which case the PHI
2017 is irrelevant to this candidate. */
2018 return (c->def_phi
2019 && c->basis
2020 && lookup_cand (c->basis)->def_phi != c->def_phi);
2023 /* Calculate the increment required for candidate C relative to
2024 its basis. */
2026 static widest_int
2027 cand_increment (slsr_cand_t c)
2029 slsr_cand_t basis;
2031 /* If the candidate doesn't have a basis, just return its own
2032 index. This is useful in record_increments to help us find
2033 an existing initializer. Also, if the candidate's basis is
2034 hidden by a phi, then its own index will be the increment
2035 from the newly introduced phi basis. */
2036 if (!c->basis || phi_dependent_cand_p (c))
2037 return c->index;
2039 basis = lookup_cand (c->basis);
2040 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
2041 return c->index - basis->index;
2044 /* Calculate the increment required for candidate C relative to
2045 its basis. If we aren't going to generate pointer arithmetic
2046 for this candidate, return the absolute value of that increment
2047 instead. */
2049 static inline widest_int
2050 cand_abs_increment (slsr_cand_t c)
2052 widest_int increment = cand_increment (c);
2054 if (!address_arithmetic_p && wi::neg_p (increment))
2055 increment = -increment;
2057 return increment;
2060 /* Return TRUE iff candidate C has already been replaced under
2061 another interpretation. */
2063 static inline bool
2064 cand_already_replaced (slsr_cand_t c)
2066 return (gimple_bb (c->cand_stmt) == 0);
2069 /* Common logic used by replace_unconditional_candidate and
2070 replace_conditional_candidate. */
2072 static void
2073 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2075 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2076 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2078 /* It is highly unlikely, but possible, that the resulting
2079 bump doesn't fit in a HWI. Abandon the replacement
2080 in this case. This does not affect siblings or dependents
2081 of C. Restriction to signed HWI is conservative for unsigned
2082 types but allows for safe negation without twisted logic. */
2083 if (wi::fits_shwi_p (bump)
2084 && bump.to_shwi () != HOST_WIDE_INT_MIN
2085 /* It is not useful to replace casts, copies, or adds of
2086 an SSA name and a constant. */
2087 && cand_code != SSA_NAME
2088 && !CONVERT_EXPR_CODE_P (cand_code)
2089 && cand_code != PLUS_EXPR
2090 && cand_code != POINTER_PLUS_EXPR
2091 && cand_code != MINUS_EXPR)
2093 enum tree_code code = PLUS_EXPR;
2094 tree bump_tree;
2095 gimple *stmt_to_print = NULL;
2097 /* If the basis name and the candidate's LHS have incompatible
2098 types, introduce a cast. */
2099 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2100 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2101 if (wi::neg_p (bump))
2103 code = MINUS_EXPR;
2104 bump = -bump;
2107 bump_tree = wide_int_to_tree (target_type, bump);
2109 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fputs ("Replacing: ", dump_file);
2112 print_gimple_stmt (dump_file, c->cand_stmt, 0);
2115 if (bump == 0)
2117 tree lhs = gimple_assign_lhs (c->cand_stmt);
2118 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2119 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2120 slsr_cand_t cc = c;
2121 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2122 gsi_replace (&gsi, copy_stmt, false);
2123 c->cand_stmt = copy_stmt;
2124 while (cc->next_interp)
2126 cc = lookup_cand (cc->next_interp);
2127 cc->cand_stmt = copy_stmt;
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2130 stmt_to_print = copy_stmt;
2132 else
2134 tree rhs1, rhs2;
2135 if (cand_code != NEGATE_EXPR) {
2136 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2137 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2139 if (cand_code != NEGATE_EXPR
2140 && ((operand_equal_p (rhs1, basis_name, 0)
2141 && operand_equal_p (rhs2, bump_tree, 0))
2142 || (operand_equal_p (rhs1, bump_tree, 0)
2143 && operand_equal_p (rhs2, basis_name, 0))))
2145 if (dump_file && (dump_flags & TDF_DETAILS))
2147 fputs ("(duplicate, not actually replacing)", dump_file);
2148 stmt_to_print = c->cand_stmt;
2151 else
2153 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2154 slsr_cand_t cc = c;
2155 gimple_assign_set_rhs_with_ops (&gsi, code,
2156 basis_name, bump_tree);
2157 update_stmt (gsi_stmt (gsi));
2158 c->cand_stmt = gsi_stmt (gsi);
2159 while (cc->next_interp)
2161 cc = lookup_cand (cc->next_interp);
2162 cc->cand_stmt = gsi_stmt (gsi);
2164 if (dump_file && (dump_flags & TDF_DETAILS))
2165 stmt_to_print = gsi_stmt (gsi);
2169 if (dump_file && (dump_flags & TDF_DETAILS))
2171 fputs ("With: ", dump_file);
2172 print_gimple_stmt (dump_file, stmt_to_print, 0);
2173 fputs ("\n", dump_file);
2178 /* Replace candidate C with an add or subtract. Note that we only
2179 operate on CAND_MULTs with known strides, so we will never generate
2180 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2181 X = Y + ((i - i') * S), as described in the module commentary. The
2182 folded value ((i - i') * S) is referred to here as the "bump." */
2184 static void
2185 replace_unconditional_candidate (slsr_cand_t c)
2187 slsr_cand_t basis;
2189 if (cand_already_replaced (c))
2190 return;
2192 basis = lookup_cand (c->basis);
2193 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2195 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2198 /* Return the index in the increment vector of the given INCREMENT,
2199 or -1 if not found. The latter can occur if more than
2200 MAX_INCR_VEC_LEN increments have been found. */
2202 static inline int
2203 incr_vec_index (const widest_int &increment)
2205 unsigned i;
2207 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2210 if (i < incr_vec_len)
2211 return i;
2212 else
2213 return -1;
2216 /* Create a new statement along edge E to add BASIS_NAME to the product
2217 of INCREMENT and the stride of candidate C. Create and return a new
2218 SSA name from *VAR to be used as the LHS of the new statement.
2219 KNOWN_STRIDE is true iff C's stride is a constant. */
2221 static tree
2222 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2223 widest_int increment, edge e, location_t loc,
2224 bool known_stride)
2226 basic_block insert_bb;
2227 gimple_stmt_iterator gsi;
2228 tree lhs, basis_type;
2229 gassign *new_stmt, *cast_stmt = NULL;
2231 /* If the add candidate along this incoming edge has the same
2232 index as C's hidden basis, the hidden basis represents this
2233 edge correctly. */
2234 if (increment == 0)
2235 return basis_name;
2237 basis_type = TREE_TYPE (basis_name);
2238 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2240 /* Occasionally people convert integers to pointers without a
2241 cast, leading us into trouble if we aren't careful. */
2242 enum tree_code plus_code
2243 = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
2245 if (known_stride)
2247 tree bump_tree;
2248 enum tree_code code = plus_code;
2249 widest_int bump = increment * wi::to_widest (c->stride);
2250 if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
2252 code = MINUS_EXPR;
2253 bump = -bump;
2256 tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
2257 bump_tree = wide_int_to_tree (stride_type, bump);
2258 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2260 else
2262 int i;
2263 bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
2264 i = incr_vec_index (negate_incr ? -increment : increment);
2265 gcc_assert (i >= 0);
2267 if (incr_vec[i].initializer)
2269 enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
2270 new_stmt = gimple_build_assign (lhs, code, basis_name,
2271 incr_vec[i].initializer);
2273 else {
2274 tree stride;
2276 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
2278 tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
2279 "slsr");
2280 cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
2281 c->stride);
2282 stride = cast_stride;
2284 else
2285 stride = c->stride;
2287 if (increment == 1)
2288 new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
2289 else if (increment == -1)
2290 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
2291 else
2292 gcc_unreachable ();
2296 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2297 gsi = gsi_last_bb (insert_bb);
2299 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
2301 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
2302 if (cast_stmt)
2304 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
2305 gimple_set_location (cast_stmt, loc);
2308 else
2310 if (cast_stmt)
2312 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
2313 gimple_set_location (cast_stmt, loc);
2315 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2318 gimple_set_location (new_stmt, loc);
2320 if (dump_file && (dump_flags & TDF_DETAILS))
2322 if (cast_stmt)
2324 fprintf (dump_file, "Inserting cast in block %d: ",
2325 insert_bb->index);
2326 print_gimple_stmt (dump_file, cast_stmt, 0);
2328 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2329 print_gimple_stmt (dump_file, new_stmt, 0);
2332 return lhs;
2335 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2336 is hidden by the phi node FROM_PHI, create a new phi node in the same
2337 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2338 with its phi arguments representing conditional adjustments to the
2339 hidden basis along conditional incoming paths. Those adjustments are
2340 made by creating add statements (and sometimes recursively creating
2341 phis) along those incoming paths. LOC is the location to attach to
2342 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2343 constant. */
2345 static tree
2346 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
2347 location_t loc, bool known_stride)
2349 int i;
2350 tree name, phi_arg;
2351 gphi *phi;
2352 slsr_cand_t basis = lookup_cand (c->basis);
2353 int nargs = gimple_phi_num_args (from_phi);
2354 basic_block phi_bb = gimple_bb (from_phi);
2355 slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
2356 auto_vec<tree> phi_args (nargs);
2358 /* Process each argument of the existing phi that represents
2359 conditionally-executed add candidates. */
2360 for (i = 0; i < nargs; i++)
2362 edge e = (*phi_bb->preds)[i];
2363 tree arg = gimple_phi_arg_def (from_phi, i);
2364 tree feeding_def;
2366 /* If the phi argument is the base name of the CAND_PHI, then
2367 this incoming arc should use the hidden basis. */
2368 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2369 if (basis->index == 0)
2370 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2371 else
2373 widest_int incr = -basis->index;
2374 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2375 e, loc, known_stride);
2377 else
2379 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2381 /* If there is another phi along this incoming edge, we must
2382 process it in the same fashion to ensure that all basis
2383 adjustments are made along its incoming edges. */
2384 if (gimple_code (arg_def) == GIMPLE_PHI)
2385 feeding_def = create_phi_basis (c, arg_def, basis_name,
2386 loc, known_stride);
2387 else
2389 slsr_cand_t arg_cand = base_cand_from_table (arg);
2390 widest_int diff = arg_cand->index - basis->index;
2391 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2392 e, loc, known_stride);
2396 /* Because of recursion, we need to save the arguments in a vector
2397 so we can create the PHI statement all at once. Otherwise the
2398 storage for the half-created PHI can be reclaimed. */
2399 phi_args.safe_push (feeding_def);
2402 /* Create the new phi basis. */
2403 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2404 phi = create_phi_node (name, phi_bb);
2405 SSA_NAME_DEF_STMT (name) = phi;
2407 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2409 edge e = (*phi_bb->preds)[i];
2410 add_phi_arg (phi, phi_arg, e, loc);
2413 update_stmt (phi);
2415 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fputs ("Introducing new phi basis: ", dump_file);
2418 print_gimple_stmt (dump_file, phi, 0);
2421 return name;
2424 /* Given a candidate C whose basis is hidden by at least one intervening
2425 phi, introduce a matching number of new phis to represent its basis
2426 adjusted by conditional increments along possible incoming paths. Then
2427 replace C as though it were an unconditional candidate, using the new
2428 basis. */
2430 static void
2431 replace_conditional_candidate (slsr_cand_t c)
2433 tree basis_name, name;
2434 slsr_cand_t basis;
2435 location_t loc;
2437 /* Look up the LHS SSA name from C's basis. This will be the
2438 RHS1 of the adds we will introduce to create new phi arguments. */
2439 basis = lookup_cand (c->basis);
2440 basis_name = gimple_assign_lhs (basis->cand_stmt);
2442 /* Create a new phi statement which will represent C's true basis
2443 after the transformation is complete. */
2444 loc = gimple_location (c->cand_stmt);
2445 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2446 basis_name, loc, KNOWN_STRIDE);
2447 /* Replace C with an add of the new basis phi and a constant. */
2448 widest_int bump = c->index * wi::to_widest (c->stride);
2450 replace_mult_candidate (c, name, bump);
2453 /* Compute the expected costs of inserting basis adjustments for
2454 candidate C with phi-definition PHI. The cost of inserting
2455 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2456 which are themselves phi results, recursively calculate costs
2457 for those phis as well. */
2459 static int
2460 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
2462 unsigned i;
2463 int cost = 0;
2464 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2466 /* If we work our way back to a phi that isn't dominated by the hidden
2467 basis, this isn't a candidate for replacement. Indicate this by
2468 returning an unreasonably high cost. It's not easy to detect
2469 these situations when determining the basis, so we defer the
2470 decision until now. */
2471 basic_block phi_bb = gimple_bb (phi);
2472 slsr_cand_t basis = lookup_cand (c->basis);
2473 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2475 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2476 return COST_INFINITE;
2478 for (i = 0; i < gimple_phi_num_args (phi); i++)
2480 tree arg = gimple_phi_arg_def (phi, i);
2482 if (arg != phi_cand->base_expr)
2484 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2486 if (gimple_code (arg_def) == GIMPLE_PHI)
2487 cost += phi_add_costs (arg_def, c, one_add_cost);
2488 else
2490 slsr_cand_t arg_cand = base_cand_from_table (arg);
2492 if (arg_cand->index != c->index)
2493 cost += one_add_cost;
2498 return cost;
2501 /* For candidate C, each sibling of candidate C, and each dependent of
2502 candidate C, determine whether the candidate is dependent upon a
2503 phi that hides its basis. If not, replace the candidate unconditionally.
2504 Otherwise, determine whether the cost of introducing compensation code
2505 for the candidate is offset by the gains from strength reduction. If
2506 so, replace the candidate and introduce the compensation code. */
2508 static void
2509 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2511 if (phi_dependent_cand_p (c))
2513 /* A multiply candidate with a stride of 1 is just an artifice
2514 of a copy or cast; there is no value in replacing it. */
2515 if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
2517 /* A candidate dependent upon a phi will replace a multiply by
2518 a constant with an add, and will insert at most one add for
2519 each phi argument. Add these costs with the potential dead-code
2520 savings to determine profitability. */
2521 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2522 int mult_savings = stmt_cost (c->cand_stmt, speed);
2523 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2524 tree phi_result = gimple_phi_result (phi);
2525 int one_add_cost = add_cost (speed,
2526 TYPE_MODE (TREE_TYPE (phi_result)));
2527 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2528 int cost = add_costs - mult_savings - c->dead_savings;
2530 if (dump_file && (dump_flags & TDF_DETAILS))
2532 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2533 fprintf (dump_file, " add_costs = %d\n", add_costs);
2534 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2535 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2536 fprintf (dump_file, " cost = %d\n", cost);
2537 if (cost <= COST_NEUTRAL)
2538 fputs (" Replacing...\n", dump_file);
2539 else
2540 fputs (" Not replaced.\n", dump_file);
2543 if (cost <= COST_NEUTRAL)
2544 replace_conditional_candidate (c);
2547 else
2548 replace_unconditional_candidate (c);
2550 if (c->sibling)
2551 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2553 if (c->dependent)
2554 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2557 /* Count the number of candidates in the tree rooted at C that have
2558 not already been replaced under other interpretations. */
2560 static int
2561 count_candidates (slsr_cand_t c)
2563 unsigned count = cand_already_replaced (c) ? 0 : 1;
2565 if (c->sibling)
2566 count += count_candidates (lookup_cand (c->sibling));
2568 if (c->dependent)
2569 count += count_candidates (lookup_cand (c->dependent));
2571 return count;
2574 /* Increase the count of INCREMENT by one in the increment vector.
2575 INCREMENT is associated with candidate C. If INCREMENT is to be
2576 conditionally executed as part of a conditional candidate replacement,
2577 IS_PHI_ADJUST is true, otherwise false. If an initializer
2578 T_0 = stride * I is provided by a candidate that dominates all
2579 candidates with the same increment, also record T_0 for subsequent use. */
2581 static void
2582 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2584 bool found = false;
2585 unsigned i;
2587 /* Treat increments that differ only in sign as identical so as to
2588 share initializers, unless we are generating pointer arithmetic. */
2589 if (!address_arithmetic_p && wi::neg_p (increment))
2590 increment = -increment;
2592 for (i = 0; i < incr_vec_len; i++)
2594 if (incr_vec[i].incr == increment)
2596 incr_vec[i].count++;
2597 found = true;
2599 /* If we previously recorded an initializer that doesn't
2600 dominate this candidate, it's not going to be useful to
2601 us after all. */
2602 if (incr_vec[i].initializer
2603 && !dominated_by_p (CDI_DOMINATORS,
2604 gimple_bb (c->cand_stmt),
2605 incr_vec[i].init_bb))
2607 incr_vec[i].initializer = NULL_TREE;
2608 incr_vec[i].init_bb = NULL;
2611 break;
2615 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2617 /* The first time we see an increment, create the entry for it.
2618 If this is the root candidate which doesn't have a basis, set
2619 the count to zero. We're only processing it so it can possibly
2620 provide an initializer for other candidates. */
2621 incr_vec[incr_vec_len].incr = increment;
2622 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2623 incr_vec[incr_vec_len].cost = COST_INFINITE;
2625 /* Optimistically record the first occurrence of this increment
2626 as providing an initializer (if it does); we will revise this
2627 opinion later if it doesn't dominate all other occurrences.
2628 Exception: increments of 0, 1 never need initializers;
2629 and phi adjustments don't ever provide initializers. */
2630 if (c->kind == CAND_ADD
2631 && !is_phi_adjust
2632 && c->index == increment
2633 && (increment > 1 || increment < 0)
2634 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2635 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2637 tree t0 = NULL_TREE;
2638 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2639 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2640 if (operand_equal_p (rhs1, c->base_expr, 0))
2641 t0 = rhs2;
2642 else if (operand_equal_p (rhs2, c->base_expr, 0))
2643 t0 = rhs1;
2644 if (t0
2645 && SSA_NAME_DEF_STMT (t0)
2646 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2648 incr_vec[incr_vec_len].initializer = t0;
2649 incr_vec[incr_vec_len++].init_bb
2650 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2652 else
2654 incr_vec[incr_vec_len].initializer = NULL_TREE;
2655 incr_vec[incr_vec_len++].init_bb = NULL;
2658 else
2660 incr_vec[incr_vec_len].initializer = NULL_TREE;
2661 incr_vec[incr_vec_len++].init_bb = NULL;
2666 /* Given phi statement PHI that hides a candidate from its BASIS, find
2667 the increments along each incoming arc (recursively handling additional
2668 phis that may be present) and record them. These increments are the
2669 difference in index between the index-adjusting statements and the
2670 index of the basis. */
2672 static void
2673 record_phi_increments (slsr_cand_t basis, gimple *phi)
2675 unsigned i;
2676 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2678 for (i = 0; i < gimple_phi_num_args (phi); i++)
2680 tree arg = gimple_phi_arg_def (phi, i);
2682 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2684 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2686 if (gimple_code (arg_def) == GIMPLE_PHI)
2687 record_phi_increments (basis, arg_def);
2688 else
2690 slsr_cand_t arg_cand = base_cand_from_table (arg);
2691 widest_int diff = arg_cand->index - basis->index;
2692 record_increment (arg_cand, diff, PHI_ADJUST);
2698 /* Determine how many times each unique increment occurs in the set
2699 of candidates rooted at C's parent, recording the data in the
2700 increment vector. For each unique increment I, if an initializer
2701 T_0 = stride * I is provided by a candidate that dominates all
2702 candidates with the same increment, also record T_0 for subsequent
2703 use. */
2705 static void
2706 record_increments (slsr_cand_t c)
2708 if (!cand_already_replaced (c))
2710 if (!phi_dependent_cand_p (c))
2711 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2712 else
2714 /* A candidate with a basis hidden by a phi will have one
2715 increment for its relationship to the index represented by
2716 the phi, and potentially additional increments along each
2717 incoming edge. For the root of the dependency tree (which
2718 has no basis), process just the initial index in case it has
2719 an initializer that can be used by subsequent candidates. */
2720 record_increment (c, c->index, NOT_PHI_ADJUST);
2722 if (c->basis)
2723 record_phi_increments (lookup_cand (c->basis),
2724 lookup_cand (c->def_phi)->cand_stmt);
2728 if (c->sibling)
2729 record_increments (lookup_cand (c->sibling));
2731 if (c->dependent)
2732 record_increments (lookup_cand (c->dependent));
2735 /* Add up and return the costs of introducing add statements that
2736 require the increment INCR on behalf of candidate C and phi
2737 statement PHI. Accumulate into *SAVINGS the potential savings
2738 from removing existing statements that feed PHI and have no other
2739 uses. */
2741 static int
2742 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
2743 int *savings)
2745 unsigned i;
2746 int cost = 0;
2747 slsr_cand_t basis = lookup_cand (c->basis);
2748 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
2750 for (i = 0; i < gimple_phi_num_args (phi); i++)
2752 tree arg = gimple_phi_arg_def (phi, i);
2754 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2756 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
2758 if (gimple_code (arg_def) == GIMPLE_PHI)
2760 int feeding_savings = 0;
2761 tree feeding_var = gimple_phi_result (arg_def);
2762 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2763 if (uses_consumed_by_stmt (feeding_var, phi))
2764 *savings += feeding_savings;
2766 else
2768 slsr_cand_t arg_cand = base_cand_from_table (arg);
2769 widest_int diff = arg_cand->index - basis->index;
2771 if (incr == diff)
2773 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2774 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2775 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2776 if (uses_consumed_by_stmt (lhs, phi))
2777 *savings += stmt_cost (arg_cand->cand_stmt, true);
2783 return cost;
2786 /* Return the first candidate in the tree rooted at C that has not
2787 already been replaced, favoring siblings over dependents. */
2789 static slsr_cand_t
2790 unreplaced_cand_in_tree (slsr_cand_t c)
2792 if (!cand_already_replaced (c))
2793 return c;
2795 if (c->sibling)
2797 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2798 if (sib)
2799 return sib;
2802 if (c->dependent)
2804 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2805 if (dep)
2806 return dep;
2809 return NULL;
2812 /* Return TRUE if the candidates in the tree rooted at C should be
2813 optimized for speed, else FALSE. We estimate this based on the block
2814 containing the most dominant candidate in the tree that has not yet
2815 been replaced. */
2817 static bool
2818 optimize_cands_for_speed_p (slsr_cand_t c)
2820 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2821 gcc_assert (c2);
2822 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2825 /* Add COST_IN to the lowest cost of any dependent path starting at
2826 candidate C or any of its siblings, counting only candidates along
2827 such paths with increment INCR. Assume that replacing a candidate
2828 reduces cost by REPL_SAVINGS. Also account for savings from any
2829 statements that would go dead. If COUNT_PHIS is true, include
2830 costs of introducing feeding statements for conditional candidates. */
2832 static int
2833 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2834 const widest_int &incr, bool count_phis)
2836 int local_cost, sib_cost, savings = 0;
2837 widest_int cand_incr = cand_abs_increment (c);
2839 if (cand_already_replaced (c))
2840 local_cost = cost_in;
2841 else if (incr == cand_incr)
2842 local_cost = cost_in - repl_savings - c->dead_savings;
2843 else
2844 local_cost = cost_in - c->dead_savings;
2846 if (count_phis
2847 && phi_dependent_cand_p (c)
2848 && !cand_already_replaced (c))
2850 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2851 local_cost += phi_incr_cost (c, incr, phi, &savings);
2853 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2854 local_cost -= savings;
2857 if (c->dependent)
2858 local_cost = lowest_cost_path (local_cost, repl_savings,
2859 lookup_cand (c->dependent), incr,
2860 count_phis);
2862 if (c->sibling)
2864 sib_cost = lowest_cost_path (cost_in, repl_savings,
2865 lookup_cand (c->sibling), incr,
2866 count_phis);
2867 local_cost = MIN (local_cost, sib_cost);
2870 return local_cost;
2873 /* Compute the total savings that would accrue from all replacements
2874 in the candidate tree rooted at C, counting only candidates with
2875 increment INCR. Assume that replacing a candidate reduces cost
2876 by REPL_SAVINGS. Also account for savings from statements that
2877 would go dead. */
2879 static int
2880 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2881 bool count_phis)
2883 int savings = 0;
2884 widest_int cand_incr = cand_abs_increment (c);
2886 if (incr == cand_incr && !cand_already_replaced (c))
2887 savings += repl_savings + c->dead_savings;
2889 if (count_phis
2890 && phi_dependent_cand_p (c)
2891 && !cand_already_replaced (c))
2893 int phi_savings = 0;
2894 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
2895 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2897 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
2898 savings += phi_savings;
2901 if (c->dependent)
2902 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2903 count_phis);
2905 if (c->sibling)
2906 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2907 count_phis);
2909 return savings;
2912 /* Use target-specific costs to determine and record which increments
2913 in the current candidate tree are profitable to replace, assuming
2914 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2915 the candidate tree.
2917 One slight limitation here is that we don't account for the possible
2918 introduction of casts in some cases. See replace_one_candidate for
2919 the cases where these are introduced. This should probably be cleaned
2920 up sometime. */
2922 static void
2923 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
2925 unsigned i;
2927 for (i = 0; i < incr_vec_len; i++)
2929 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2931 /* If somehow this increment is bigger than a HWI, we won't
2932 be optimizing candidates that use it. And if the increment
2933 has a count of zero, nothing will be done with it. */
2934 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2935 incr_vec[i].cost = COST_INFINITE;
2937 /* Increments of 0, 1, and -1 are always profitable to replace,
2938 because they always replace a multiply or add with an add or
2939 copy, and may cause one or more existing instructions to go
2940 dead. Exception: -1 can't be assumed to be profitable for
2941 pointer addition. */
2942 else if (incr == 0
2943 || incr == 1
2944 || (incr == -1
2945 && !POINTER_TYPE_P (first_dep->cand_type)))
2946 incr_vec[i].cost = COST_NEUTRAL;
2948 /* If we need to add an initializer, give up if a cast from the
2949 candidate's type to its stride's type can lose precision.
2950 Note that this already takes into account that the stride may
2951 have been cast to a wider type, in which case this test won't
2952 fire. Example:
2954 short int _1;
2955 _2 = (int) _1;
2956 _3 = _2 * 10;
2957 _4 = x + _3; ADD: x + (10 * (int)_1) : int
2958 _5 = _2 * 15;
2959 _6 = x + _5; ADD: x + (15 * (int)_1) : int
2961 Although the stride was a short int initially, the stride
2962 used in the analysis has been widened to an int, and such
2963 widening will be done in the initializer as well. */
2964 else if (!incr_vec[i].initializer
2965 && TREE_CODE (first_dep->stride) != INTEGER_CST
2966 && !legal_cast_p_1 (first_dep->stride_type,
2967 TREE_TYPE (gimple_assign_lhs
2968 (first_dep->cand_stmt))))
2969 incr_vec[i].cost = COST_INFINITE;
2971 /* If we need to add an initializer, make sure we don't introduce
2972 a multiply by a pointer type, which can happen in certain cast
2973 scenarios. */
2974 else if (!incr_vec[i].initializer
2975 && TREE_CODE (first_dep->stride) != INTEGER_CST
2976 && POINTER_TYPE_P (first_dep->stride_type))
2977 incr_vec[i].cost = COST_INFINITE;
2979 /* For any other increment, if this is a multiply candidate, we
2980 must introduce a temporary T and initialize it with
2981 T_0 = stride * increment. When optimizing for speed, walk the
2982 candidate tree to calculate the best cost reduction along any
2983 path; if it offsets the fixed cost of inserting the initializer,
2984 replacing the increment is profitable. When optimizing for
2985 size, instead calculate the total cost reduction from replacing
2986 all candidates with this increment. */
2987 else if (first_dep->kind == CAND_MULT)
2989 int cost = mult_by_coeff_cost (incr, mode, speed);
2990 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2991 if (speed)
2992 cost = lowest_cost_path (cost, repl_savings, first_dep,
2993 incr_vec[i].incr, COUNT_PHIS);
2994 else
2995 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2996 COUNT_PHIS);
2998 incr_vec[i].cost = cost;
3001 /* If this is an add candidate, the initializer may already
3002 exist, so only calculate the cost of the initializer if it
3003 doesn't. We are replacing one add with another here, so the
3004 known replacement savings is zero. We will account for removal
3005 of dead instructions in lowest_cost_path or total_savings. */
3006 else
3008 int cost = 0;
3009 if (!incr_vec[i].initializer)
3010 cost = mult_by_coeff_cost (incr, mode, speed);
3012 if (speed)
3013 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
3014 DONT_COUNT_PHIS);
3015 else
3016 cost -= total_savings (0, first_dep, incr_vec[i].incr,
3017 DONT_COUNT_PHIS);
3019 incr_vec[i].cost = cost;
3024 /* Return the nearest common dominator of BB1 and BB2. If the blocks
3025 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
3026 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
3027 return C2 in *WHERE; and if the NCD matches neither, return NULL in
3028 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
3030 static basic_block
3031 ncd_for_two_cands (basic_block bb1, basic_block bb2,
3032 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
3034 basic_block ncd;
3036 if (!bb1)
3038 *where = c2;
3039 return bb2;
3042 if (!bb2)
3044 *where = c1;
3045 return bb1;
3048 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
3050 /* If both candidates are in the same block, the earlier
3051 candidate wins. */
3052 if (bb1 == ncd && bb2 == ncd)
3054 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
3055 *where = c2;
3056 else
3057 *where = c1;
3060 /* Otherwise, if one of them produced a candidate in the
3061 dominator, that one wins. */
3062 else if (bb1 == ncd)
3063 *where = c1;
3065 else if (bb2 == ncd)
3066 *where = c2;
3068 /* If neither matches the dominator, neither wins. */
3069 else
3070 *where = NULL;
3072 return ncd;
3075 /* Consider all candidates that feed PHI. Find the nearest common
3076 dominator of those candidates requiring the given increment INCR.
3077 Further find and return the nearest common dominator of this result
3078 with block NCD. If the returned block contains one or more of the
3079 candidates, return the earliest candidate in the block in *WHERE. */
3081 static basic_block
3082 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
3083 basic_block ncd, slsr_cand_t *where)
3085 unsigned i;
3086 slsr_cand_t basis = lookup_cand (c->basis);
3087 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3089 for (i = 0; i < gimple_phi_num_args (phi); i++)
3091 tree arg = gimple_phi_arg_def (phi, i);
3093 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3095 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3097 if (gimple_code (arg_def) == GIMPLE_PHI)
3098 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd,
3099 where);
3100 else
3102 slsr_cand_t arg_cand = base_cand_from_table (arg);
3103 widest_int diff = arg_cand->index - basis->index;
3104 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3106 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3107 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3112 return ncd;
3115 /* Consider the candidate C together with any candidates that feed
3116 C's phi dependence (if any). Find and return the nearest common
3117 dominator of those candidates requiring the given increment INCR.
3118 If the returned block contains one or more of the candidates,
3119 return the earliest candidate in the block in *WHERE. */
3121 static basic_block
3122 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3124 basic_block ncd = NULL;
3126 if (cand_abs_increment (c) == incr)
3128 ncd = gimple_bb (c->cand_stmt);
3129 *where = c;
3132 if (phi_dependent_cand_p (c))
3133 ncd = ncd_with_phi (c, incr,
3134 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3135 ncd, where);
3137 return ncd;
3140 /* Consider all candidates in the tree rooted at C for which INCR
3141 represents the required increment of C relative to its basis.
3142 Find and return the basic block that most nearly dominates all
3143 such candidates. If the returned block contains one or more of
3144 the candidates, return the earliest candidate in the block in
3145 *WHERE. */
3147 static basic_block
3148 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3149 slsr_cand_t *where)
3151 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3152 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3154 /* First find the NCD of all siblings and dependents. */
3155 if (c->sibling)
3156 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3157 incr, &sib_where);
3158 if (c->dependent)
3159 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3160 incr, &dep_where);
3161 if (!sib_ncd && !dep_ncd)
3163 new_where = NULL;
3164 ncd = NULL;
3166 else if (sib_ncd && !dep_ncd)
3168 new_where = sib_where;
3169 ncd = sib_ncd;
3171 else if (dep_ncd && !sib_ncd)
3173 new_where = dep_where;
3174 ncd = dep_ncd;
3176 else
3177 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3178 dep_where, &new_where);
3180 /* If the candidate's increment doesn't match the one we're interested
3181 in (and nor do any increments for feeding defs of a phi-dependence),
3182 then the result depends only on siblings and dependents. */
3183 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3185 if (!this_ncd || cand_already_replaced (c))
3187 *where = new_where;
3188 return ncd;
3191 /* Otherwise, compare this candidate with the result from all siblings
3192 and dependents. */
3193 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3195 return ncd;
3198 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3200 static inline bool
3201 profitable_increment_p (unsigned index)
3203 return (incr_vec[index].cost <= COST_NEUTRAL);
3206 /* For each profitable increment in the increment vector not equal to
3207 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3208 dominator of all statements in the candidate chain rooted at C
3209 that require that increment, and insert an initializer
3210 T_0 = stride * increment at that location. Record T_0 with the
3211 increment record. */
3213 static void
3214 insert_initializers (slsr_cand_t c)
3216 unsigned i;
3218 for (i = 0; i < incr_vec_len; i++)
3220 basic_block bb;
3221 slsr_cand_t where = NULL;
3222 gassign *init_stmt;
3223 gassign *cast_stmt = NULL;
3224 tree new_name, incr_tree, init_stride;
3225 widest_int incr = incr_vec[i].incr;
3227 if (!profitable_increment_p (i)
3228 || incr == 1
3229 || (incr == -1
3230 && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
3231 || incr == 0)
3232 continue;
3234 /* We may have already identified an existing initializer that
3235 will suffice. */
3236 if (incr_vec[i].initializer)
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3240 fputs ("Using existing initializer: ", dump_file);
3241 print_gimple_stmt (dump_file,
3242 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3243 0, 0);
3245 continue;
3248 /* Find the block that most closely dominates all candidates
3249 with this increment. If there is at least one candidate in
3250 that block, the earliest one will be returned in WHERE. */
3251 bb = nearest_common_dominator_for_cands (c, incr, &where);
3253 /* If the nominal stride has a different type than the recorded
3254 stride type, build a cast from the nominal stride to that type. */
3255 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
3257 init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3258 cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
3260 else
3261 init_stride = c->stride;
3263 /* Create a new SSA name to hold the initializer's value. */
3264 new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
3265 incr_vec[i].initializer = new_name;
3267 /* Create the initializer and insert it in the latest possible
3268 dominating position. */
3269 incr_tree = wide_int_to_tree (c->stride_type, incr);
3270 init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3271 init_stride, incr_tree);
3272 if (where)
3274 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3275 location_t loc = gimple_location (where->cand_stmt);
3277 if (cast_stmt)
3279 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3280 gimple_set_location (cast_stmt, loc);
3283 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3284 gimple_set_location (init_stmt, loc);
3286 else
3288 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3289 gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
3290 location_t loc = gimple_location (basis_stmt);
3292 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
3294 if (cast_stmt)
3296 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3297 gimple_set_location (cast_stmt, loc);
3299 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3301 else
3303 if (cast_stmt)
3305 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
3306 gimple_set_location (cast_stmt, loc);
3308 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3311 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3314 if (dump_file && (dump_flags & TDF_DETAILS))
3316 if (cast_stmt)
3318 fputs ("Inserting stride cast: ", dump_file);
3319 print_gimple_stmt (dump_file, cast_stmt, 0);
3321 fputs ("Inserting initializer: ", dump_file);
3322 print_gimple_stmt (dump_file, init_stmt, 0);
3327 /* Return TRUE iff all required increments for candidates feeding PHI
3328 are profitable (and legal!) to replace on behalf of candidate C. */
3330 static bool
3331 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
3333 unsigned i;
3334 slsr_cand_t basis = lookup_cand (c->basis);
3335 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
3337 /* If the basis doesn't dominate the PHI (including when the PHI is
3338 in the same block as the basis), we won't be able to create a PHI
3339 using the basis here. */
3340 basic_block basis_bb = gimple_bb (basis->cand_stmt);
3341 basic_block phi_bb = gimple_bb (phi);
3343 if (phi_bb == basis_bb
3344 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
3345 return false;
3347 for (i = 0; i < gimple_phi_num_args (phi); i++)
3349 /* If the PHI arg resides in a block not dominated by the basis,
3350 we won't be able to create a PHI using the basis here. */
3351 basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
3353 if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
3354 return false;
3356 tree arg = gimple_phi_arg_def (phi, i);
3358 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3360 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
3362 if (gimple_code (arg_def) == GIMPLE_PHI)
3364 if (!all_phi_incrs_profitable (c, as_a <gphi *> (arg_def)))
3365 return false;
3367 else
3369 int j;
3370 slsr_cand_t arg_cand = base_cand_from_table (arg);
3371 widest_int increment = arg_cand->index - basis->index;
3373 if (!address_arithmetic_p && wi::neg_p (increment))
3374 increment = -increment;
3376 j = incr_vec_index (increment);
3378 if (dump_file && (dump_flags & TDF_DETAILS))
3380 fprintf (dump_file, " Conditional candidate %d, phi: ",
3381 c->cand_num);
3382 print_gimple_stmt (dump_file, phi, 0);
3383 fputs (" increment: ", dump_file);
3384 print_decs (increment, dump_file);
3385 if (j < 0)
3386 fprintf (dump_file,
3387 "\n Not replaced; incr_vec overflow.\n");
3388 else {
3389 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3390 if (profitable_increment_p (j))
3391 fputs (" Replacing...\n", dump_file);
3392 else
3393 fputs (" Not replaced.\n", dump_file);
3397 if (j < 0 || !profitable_increment_p (j))
3398 return false;
3403 return true;
3406 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3407 type TO_TYPE, and insert it in front of the statement represented
3408 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3409 the new SSA name. */
3411 static tree
3412 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3414 tree cast_lhs;
3415 gassign *cast_stmt;
3416 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3418 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3419 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3420 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3421 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fputs (" Inserting: ", dump_file);
3426 print_gimple_stmt (dump_file, cast_stmt, 0);
3429 return cast_lhs;
3432 /* Replace the RHS of the statement represented by candidate C with
3433 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3434 leave C unchanged or just interchange its operands. The original
3435 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3436 If the replacement was made and we are doing a details dump,
3437 return the revised statement, else NULL. */
3439 static gimple *
3440 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3441 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3442 slsr_cand_t c)
3444 if (new_code != old_code
3445 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3446 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3447 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3448 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3450 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3451 slsr_cand_t cc = c;
3452 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3453 update_stmt (gsi_stmt (gsi));
3454 c->cand_stmt = gsi_stmt (gsi);
3455 while (cc->next_interp)
3457 cc = lookup_cand (cc->next_interp);
3458 cc->cand_stmt = gsi_stmt (gsi);
3461 if (dump_file && (dump_flags & TDF_DETAILS))
3462 return gsi_stmt (gsi);
3465 else if (dump_file && (dump_flags & TDF_DETAILS))
3466 fputs (" (duplicate, not actually replacing)\n", dump_file);
3468 return NULL;
3471 /* Strength-reduce the statement represented by candidate C by replacing
3472 it with an equivalent addition or subtraction. I is the index into
3473 the increment vector identifying C's increment. NEW_VAR is used to
3474 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3475 is the rhs1 to use in creating the add/subtract. */
3477 static void
3478 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3480 gimple *stmt_to_print = NULL;
3481 tree orig_rhs1, orig_rhs2;
3482 tree rhs2;
3483 enum tree_code orig_code, repl_code;
3484 widest_int cand_incr;
3486 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3487 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3488 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3489 cand_incr = cand_increment (c);
3491 if (dump_file && (dump_flags & TDF_DETAILS))
3493 fputs ("Replacing: ", dump_file);
3494 print_gimple_stmt (dump_file, c->cand_stmt, 0);
3495 stmt_to_print = c->cand_stmt;
3498 if (address_arithmetic_p)
3499 repl_code = POINTER_PLUS_EXPR;
3500 else
3501 repl_code = PLUS_EXPR;
3503 /* If the increment has an initializer T_0, replace the candidate
3504 statement with an add of the basis name and the initializer. */
3505 if (incr_vec[i].initializer)
3507 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3508 tree orig_type = TREE_TYPE (orig_rhs2);
3510 if (types_compatible_p (orig_type, init_type))
3511 rhs2 = incr_vec[i].initializer;
3512 else
3513 rhs2 = introduce_cast_before_cand (c, orig_type,
3514 incr_vec[i].initializer);
3516 if (incr_vec[i].incr != cand_incr)
3518 gcc_assert (repl_code == PLUS_EXPR);
3519 repl_code = MINUS_EXPR;
3522 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3523 orig_code, orig_rhs1, orig_rhs2,
3527 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3528 with a subtract of the stride from the basis name, a copy
3529 from the basis name, or an add of the stride to the basis
3530 name, respectively. It may be necessary to introduce a
3531 cast (or reuse an existing cast). */
3532 else if (cand_incr == 1)
3534 tree stride_type = TREE_TYPE (c->stride);
3535 tree orig_type = TREE_TYPE (orig_rhs2);
3537 if (types_compatible_p (orig_type, stride_type))
3538 rhs2 = c->stride;
3539 else
3540 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3542 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3543 orig_code, orig_rhs1, orig_rhs2,
3547 else if (cand_incr == -1)
3549 tree stride_type = TREE_TYPE (c->stride);
3550 tree orig_type = TREE_TYPE (orig_rhs2);
3551 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3553 if (types_compatible_p (orig_type, stride_type))
3554 rhs2 = c->stride;
3555 else
3556 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3558 if (orig_code != MINUS_EXPR
3559 || !operand_equal_p (basis_name, orig_rhs1, 0)
3560 || !operand_equal_p (rhs2, orig_rhs2, 0))
3562 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3563 slsr_cand_t cc = c;
3564 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3565 update_stmt (gsi_stmt (gsi));
3566 c->cand_stmt = gsi_stmt (gsi);
3567 while (cc->next_interp)
3569 cc = lookup_cand (cc->next_interp);
3570 cc->cand_stmt = gsi_stmt (gsi);
3573 if (dump_file && (dump_flags & TDF_DETAILS))
3574 stmt_to_print = gsi_stmt (gsi);
3576 else if (dump_file && (dump_flags & TDF_DETAILS))
3577 fputs (" (duplicate, not actually replacing)\n", dump_file);
3580 else if (cand_incr == 0)
3582 tree lhs = gimple_assign_lhs (c->cand_stmt);
3583 tree lhs_type = TREE_TYPE (lhs);
3584 tree basis_type = TREE_TYPE (basis_name);
3586 if (types_compatible_p (lhs_type, basis_type))
3588 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3589 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3590 slsr_cand_t cc = c;
3591 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3592 gsi_replace (&gsi, copy_stmt, false);
3593 c->cand_stmt = copy_stmt;
3594 while (cc->next_interp)
3596 cc = lookup_cand (cc->next_interp);
3597 cc->cand_stmt = copy_stmt;
3600 if (dump_file && (dump_flags & TDF_DETAILS))
3601 stmt_to_print = copy_stmt;
3603 else
3605 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3606 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3607 slsr_cand_t cc = c;
3608 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3609 gsi_replace (&gsi, cast_stmt, false);
3610 c->cand_stmt = cast_stmt;
3611 while (cc->next_interp)
3613 cc = lookup_cand (cc->next_interp);
3614 cc->cand_stmt = cast_stmt;
3617 if (dump_file && (dump_flags & TDF_DETAILS))
3618 stmt_to_print = cast_stmt;
3621 else
3622 gcc_unreachable ();
3624 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3626 fputs ("With: ", dump_file);
3627 print_gimple_stmt (dump_file, stmt_to_print, 0);
3628 fputs ("\n", dump_file);
3632 /* For each candidate in the tree rooted at C, replace it with
3633 an increment if such has been shown to be profitable. */
3635 static void
3636 replace_profitable_candidates (slsr_cand_t c)
3638 if (!cand_already_replaced (c))
3640 widest_int increment = cand_abs_increment (c);
3641 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3642 int i;
3644 i = incr_vec_index (increment);
3646 /* Only process profitable increments. Nothing useful can be done
3647 to a cast or copy. */
3648 if (i >= 0
3649 && profitable_increment_p (i)
3650 && orig_code != SSA_NAME
3651 && !CONVERT_EXPR_CODE_P (orig_code))
3653 if (phi_dependent_cand_p (c))
3655 gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
3657 if (all_phi_incrs_profitable (c, phi))
3659 /* Look up the LHS SSA name from C's basis. This will be
3660 the RHS1 of the adds we will introduce to create new
3661 phi arguments. */
3662 slsr_cand_t basis = lookup_cand (c->basis);
3663 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3665 /* Create a new phi statement that will represent C's true
3666 basis after the transformation is complete. */
3667 location_t loc = gimple_location (c->cand_stmt);
3668 tree name = create_phi_basis (c, phi, basis_name,
3669 loc, UNKNOWN_STRIDE);
3671 /* Replace C with an add of the new basis phi and the
3672 increment. */
3673 replace_one_candidate (c, i, name);
3676 else
3678 slsr_cand_t basis = lookup_cand (c->basis);
3679 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3680 replace_one_candidate (c, i, basis_name);
3685 if (c->sibling)
3686 replace_profitable_candidates (lookup_cand (c->sibling));
3688 if (c->dependent)
3689 replace_profitable_candidates (lookup_cand (c->dependent));
3692 /* Analyze costs of related candidates in the candidate vector,
3693 and make beneficial replacements. */
3695 static void
3696 analyze_candidates_and_replace (void)
3698 unsigned i;
3699 slsr_cand_t c;
3701 /* Each candidate that has a null basis and a non-null
3702 dependent is the root of a tree of related statements.
3703 Analyze each tree to determine a subset of those
3704 statements that can be replaced with maximum benefit. */
3705 FOR_EACH_VEC_ELT (cand_vec, i, c)
3707 slsr_cand_t first_dep;
3709 if (c->basis != 0 || c->dependent == 0)
3710 continue;
3712 if (dump_file && (dump_flags & TDF_DETAILS))
3713 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3714 c->cand_num);
3716 first_dep = lookup_cand (c->dependent);
3718 /* If this is a chain of CAND_REFs, unconditionally replace
3719 each of them with a strength-reduced data reference. */
3720 if (c->kind == CAND_REF)
3721 replace_refs (c);
3723 /* If the common stride of all related candidates is a known
3724 constant, each candidate without a phi-dependence can be
3725 profitably replaced. Each replaces a multiply by a single
3726 add, with the possibility that a feeding add also goes dead.
3727 A candidate with a phi-dependence is replaced only if the
3728 compensation code it requires is offset by the strength
3729 reduction savings. */
3730 else if (TREE_CODE (c->stride) == INTEGER_CST)
3731 replace_uncond_cands_and_profitable_phis (first_dep);
3733 /* When the stride is an SSA name, it may still be profitable
3734 to replace some or all of the dependent candidates, depending
3735 on whether the introduced increments can be reused, or are
3736 less expensive to calculate than the replaced statements. */
3737 else
3739 machine_mode mode;
3740 bool speed;
3742 /* Determine whether we'll be generating pointer arithmetic
3743 when replacing candidates. */
3744 address_arithmetic_p = (c->kind == CAND_ADD
3745 && POINTER_TYPE_P (c->cand_type));
3747 /* If all candidates have already been replaced under other
3748 interpretations, nothing remains to be done. */
3749 if (!count_candidates (c))
3750 continue;
3752 /* Construct an array of increments for this candidate chain. */
3753 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3754 incr_vec_len = 0;
3755 record_increments (c);
3757 /* Determine which increments are profitable to replace. */
3758 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3759 speed = optimize_cands_for_speed_p (c);
3760 analyze_increments (first_dep, mode, speed);
3762 /* Insert initializers of the form T_0 = stride * increment
3763 for use in profitable replacements. */
3764 insert_initializers (first_dep);
3765 dump_incr_vec ();
3767 /* Perform the replacements. */
3768 replace_profitable_candidates (first_dep);
3769 free (incr_vec);
3774 namespace {
3776 const pass_data pass_data_strength_reduction =
3778 GIMPLE_PASS, /* type */
3779 "slsr", /* name */
3780 OPTGROUP_NONE, /* optinfo_flags */
3781 TV_GIMPLE_SLSR, /* tv_id */
3782 ( PROP_cfg | PROP_ssa ), /* properties_required */
3783 0, /* properties_provided */
3784 0, /* properties_destroyed */
3785 0, /* todo_flags_start */
3786 0, /* todo_flags_finish */
3789 class pass_strength_reduction : public gimple_opt_pass
3791 public:
3792 pass_strength_reduction (gcc::context *ctxt)
3793 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3796 /* opt_pass methods: */
3797 virtual bool gate (function *) { return flag_tree_slsr; }
3798 virtual unsigned int execute (function *);
3800 }; // class pass_strength_reduction
3802 unsigned
3803 pass_strength_reduction::execute (function *fun)
3805 /* Create the obstack where candidates will reside. */
3806 gcc_obstack_init (&cand_obstack);
3808 /* Allocate the candidate vector. */
3809 cand_vec.create (128);
3811 /* Allocate the mapping from statements to candidate indices. */
3812 stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
3814 /* Create the obstack where candidate chains will reside. */
3815 gcc_obstack_init (&chain_obstack);
3817 /* Allocate the mapping from base expressions to candidate chains. */
3818 base_cand_map = new hash_table<cand_chain_hasher> (500);
3820 /* Allocate the mapping from bases to alternative bases. */
3821 alt_base_map = new hash_map<tree, tree>;
3823 /* Initialize the loop optimizer. We need to detect flow across
3824 back edges, and this gives us dominator information as well. */
3825 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3827 /* Walk the CFG in predominator order looking for strength reduction
3828 candidates. */
3829 find_candidates_dom_walker (CDI_DOMINATORS)
3830 .walk (fun->cfg->x_entry_block_ptr);
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3834 dump_cand_vec ();
3835 dump_cand_chains ();
3838 delete alt_base_map;
3839 free_affine_expand_cache (&name_expansions);
3841 /* Analyze costs and make appropriate replacements. */
3842 analyze_candidates_and_replace ();
3844 loop_optimizer_finalize ();
3845 delete base_cand_map;
3846 base_cand_map = NULL;
3847 obstack_free (&chain_obstack, NULL);
3848 delete stmt_cand_map;
3849 cand_vec.release ();
3850 obstack_free (&cand_obstack, NULL);
3852 return 0;
3855 } // anon namespace
3857 gimple_opt_pass *
3858 make_pass_strength_reduction (gcc::context *ctxt)
3860 return new pass_strength_reduction (ctxt);