Merge from trunk @217148.
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob4eecff5dacefe4d5f09bb80934e7879ed31180ae
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "hash-map.h"
41 #include "hash-table.h"
42 #include "predict.h"
43 #include "vec.h"
44 #include "hashtab.h"
45 #include "hash-set.h"
46 #include "machmode.h"
47 #include "tm.h"
48 #include "hard-reg-set.h"
49 #include "input.h"
50 #include "function.h"
51 #include "dominance.h"
52 #include "cfg.h"
53 #include "basic-block.h"
54 #include "tree-ssa-alias.h"
55 #include "internal-fn.h"
56 #include "gimple-expr.h"
57 #include "is-a.h"
58 #include "gimple.h"
59 #include "gimple-iterator.h"
60 #include "gimplify-me.h"
61 #include "stor-layout.h"
62 #include "expr.h"
63 #include "tree-pass.h"
64 #include "cfgloop.h"
65 #include "gimple-pretty-print.h"
66 #include "gimple-ssa.h"
67 #include "tree-cfg.h"
68 #include "tree-phinodes.h"
69 #include "ssa-iterators.h"
70 #include "stringpool.h"
71 #include "tree-ssanames.h"
72 #include "domwalk.h"
73 #include "expmed.h"
74 #include "params.h"
75 #include "tree-ssa-address.h"
76 #include "tree-affine.h"
77 #include "wide-int-print.h"
78 #include "builtins.h"
80 /* Information about a strength reduction candidate. Each statement
81 in the candidate table represents an expression of one of the
82 following forms (the special case of CAND_REF will be described
83 later):
85 (CAND_MULT) S1: X = (B + i) * S
86 (CAND_ADD) S1: X = B + (i * S)
88 Here X and B are SSA names, i is an integer constant, and S is
89 either an SSA name or a constant. We call B the "base," i the
90 "index", and S the "stride."
92 Any statement S0 that dominates S1 and is of the form:
94 (CAND_MULT) S0: Y = (B + i') * S
95 (CAND_ADD) S0: Y = B + (i' * S)
97 is called a "basis" for S1. In both cases, S1 may be replaced by
99 S1': X = Y + (i - i') * S,
101 where (i - i') * S is folded to the extent possible.
103 All gimple statements are visited in dominator order, and each
104 statement that may contribute to one of the forms of S1 above is
105 given at least one entry in the candidate table. Such statements
106 include addition, pointer addition, subtraction, multiplication,
107 negation, copies, and nontrivial type casts. If a statement may
108 represent more than one expression of the forms of S1 above,
109 multiple "interpretations" are stored in the table and chained
110 together. Examples:
112 * An add of two SSA names may treat either operand as the base.
113 * A multiply of two SSA names, likewise.
114 * A copy or cast may be thought of as either a CAND_MULT with
115 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
117 Candidate records are allocated from an obstack. They are addressed
118 both from a hash table keyed on S1, and from a vector of candidate
119 pointers arranged in predominator order.
121 Opportunity note
122 ----------------
123 Currently we don't recognize:
125 S0: Y = (S * i') - B
126 S1: X = (S * i) - B
128 as a strength reduction opportunity, even though this S1 would
129 also be replaceable by the S1' above. This can be added if it
130 comes up in practice.
132 Strength reduction in addressing
133 --------------------------------
134 There is another kind of candidate known as CAND_REF. A CAND_REF
135 describes a statement containing a memory reference having
136 complex addressing that might benefit from strength reduction.
137 Specifically, we are interested in references for which
138 get_inner_reference returns a base address, offset, and bitpos as
139 follows:
141 base: MEM_REF (T1, C1)
142 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
143 bitpos: C4 * BITS_PER_UNIT
145 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
146 arbitrary integer constants. Note that C2 may be zero, in which
147 case the offset will be MULT_EXPR (T2, C3).
149 When this pattern is recognized, the original memory reference
150 can be replaced with:
152 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
153 C1 + (C2 * C3) + C4)
155 which distributes the multiply to allow constant folding. When
156 two or more addressing expressions can be represented by MEM_REFs
157 of this form, differing only in the constants C1, C2, and C4,
158 making this substitution produces more efficient addressing during
159 the RTL phases. When there are not at least two expressions with
160 the same values of T1, T2, and C3, there is nothing to be gained
161 by the replacement.
163 Strength reduction of CAND_REFs uses the same infrastructure as
164 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
165 field, MULT_EXPR (T2, C3) in the stride (S) field, and
166 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
167 is thus another CAND_REF with the same B and S values. When at
168 least two CAND_REFs are chained together using the basis relation,
169 each of them is replaced as above, resulting in improved code
170 generation for addressing.
172 Conditional candidates
173 ======================
175 Conditional candidates are best illustrated with an example.
176 Consider the code sequence:
178 (1) x_0 = ...;
179 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
180 if (...)
181 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
182 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
183 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
184 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
186 Here strength reduction is complicated by the uncertain value of x_2.
187 A legitimate transformation is:
189 (1) x_0 = ...;
190 (2) a_0 = x_0 * 5;
191 if (...)
193 (3) [x_1 = x_0 + 1;]
194 (3a) t_1 = a_0 + 5;
196 (4) [x_2 = PHI <x_0, x_1>;]
197 (4a) t_2 = PHI <a_0, t_1>;
198 (5) [x_3 = x_2 + 1;]
199 (6r) a_1 = t_2 + 5;
201 where the bracketed instructions may go dead.
203 To recognize this opportunity, we have to observe that statement (6)
204 has a "hidden basis" (2). The hidden basis is unlike a normal basis
205 in that the statement and the hidden basis have different base SSA
206 names (x_2 and x_0, respectively). The relationship is established
207 when a statement's base name (x_2) is defined by a phi statement (4),
208 each argument of which (x_0, x_1) has an identical "derived base name."
209 If the argument is defined by a candidate (as x_1 is by (3)) that is a
210 CAND_ADD having a stride of 1, the derived base name of the argument is
211 the base name of the candidate (x_0). Otherwise, the argument itself
212 is its derived base name (as is the case with argument x_0).
214 The hidden basis for statement (6) is the nearest dominating candidate
215 whose base name is the derived base name (x_0) of the feeding phi (4),
216 and whose stride is identical to that of the statement. We can then
217 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
218 allowing the final replacement of (6) by the strength-reduced (6r).
220 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
221 A CAND_PHI is not a candidate for replacement, but is maintained in the
222 candidate table to ease discovery of hidden bases. Any phi statement
223 whose arguments share a common derived base name is entered into the
224 table with the derived base name, an (arbitrary) index of zero, and a
225 stride of 1. A statement with a hidden basis can then be detected by
226 simply looking up its feeding phi definition in the candidate table,
227 extracting the derived base name, and searching for a basis in the
228 usual manner after substituting the derived base name.
230 Note that the transformation is only valid when the original phi and
231 the statements that define the phi's arguments are all at the same
232 position in the loop hierarchy. */
235 /* Index into the candidate vector, offset by 1. VECs are zero-based,
236 while cand_idx's are one-based, with zero indicating null. */
237 typedef unsigned cand_idx;
239 /* The kind of candidate. */
240 enum cand_kind
242 CAND_MULT,
243 CAND_ADD,
244 CAND_REF,
245 CAND_PHI
248 struct slsr_cand_d
250 /* The candidate statement S1. */
251 gimple cand_stmt;
253 /* The base expression B: often an SSA name, but not always. */
254 tree base_expr;
256 /* The stride S. */
257 tree stride;
259 /* The index constant i. */
260 widest_int index;
262 /* The type of the candidate. This is normally the type of base_expr,
263 but casts may have occurred when combining feeding instructions.
264 A candidate can only be a basis for candidates of the same final type.
265 (For CAND_REFs, this is the type to be used for operand 1 of the
266 replacement MEM_REF.) */
267 tree cand_type;
269 /* The kind of candidate (CAND_MULT, etc.). */
270 enum cand_kind kind;
272 /* Index of this candidate in the candidate vector. */
273 cand_idx cand_num;
275 /* Index of the next candidate record for the same statement.
276 A statement may be useful in more than one way (e.g., due to
277 commutativity). So we can have multiple "interpretations"
278 of a statement. */
279 cand_idx next_interp;
281 /* Index of the basis statement S0, if any, in the candidate vector. */
282 cand_idx basis;
284 /* First candidate for which this candidate is a basis, if one exists. */
285 cand_idx dependent;
287 /* Next candidate having the same basis as this one. */
288 cand_idx sibling;
290 /* If this is a conditional candidate, the CAND_PHI candidate
291 that defines the base SSA name B. */
292 cand_idx def_phi;
294 /* Savings that can be expected from eliminating dead code if this
295 candidate is replaced. */
296 int dead_savings;
299 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
300 typedef const struct slsr_cand_d *const_slsr_cand_t;
302 /* Pointers to candidates are chained together as part of a mapping
303 from base expressions to the candidates that use them. */
305 struct cand_chain_d
307 /* Base expression for the chain of candidates: often, but not
308 always, an SSA name. */
309 tree base_expr;
311 /* Pointer to a candidate. */
312 slsr_cand_t cand;
314 /* Chain pointer. */
315 struct cand_chain_d *next;
319 typedef struct cand_chain_d cand_chain, *cand_chain_t;
320 typedef const struct cand_chain_d *const_cand_chain_t;
322 /* Information about a unique "increment" associated with candidates
323 having an SSA name for a stride. An increment is the difference
324 between the index of the candidate and the index of its basis,
325 i.e., (i - i') as discussed in the module commentary.
327 When we are not going to generate address arithmetic we treat
328 increments that differ only in sign as the same, allowing sharing
329 of the cost of initializers. The absolute value of the increment
330 is stored in the incr_info. */
332 struct incr_info_d
334 /* The increment that relates a candidate to its basis. */
335 widest_int incr;
337 /* How many times the increment occurs in the candidate tree. */
338 unsigned count;
340 /* Cost of replacing candidates using this increment. Negative and
341 zero costs indicate replacement should be performed. */
342 int cost;
344 /* If this increment is profitable but is not -1, 0, or 1, it requires
345 an initializer T_0 = stride * incr to be found or introduced in the
346 nearest common dominator of all candidates. This field holds T_0
347 for subsequent use. */
348 tree initializer;
350 /* If the initializer was found to already exist, this is the block
351 where it was found. */
352 basic_block init_bb;
355 typedef struct incr_info_d incr_info, *incr_info_t;
357 /* Candidates are maintained in a vector. If candidate X dominates
358 candidate Y, then X appears before Y in the vector; but the
359 converse does not necessarily hold. */
360 static vec<slsr_cand_t> cand_vec;
362 enum cost_consts
364 COST_NEUTRAL = 0,
365 COST_INFINITE = 1000
368 enum stride_status
370 UNKNOWN_STRIDE = 0,
371 KNOWN_STRIDE = 1
374 enum phi_adjust_status
376 NOT_PHI_ADJUST = 0,
377 PHI_ADJUST = 1
380 enum count_phis_status
382 DONT_COUNT_PHIS = 0,
383 COUNT_PHIS = 1
386 /* Pointer map embodying a mapping from statements to candidates. */
387 static hash_map<gimple, slsr_cand_t> *stmt_cand_map;
389 /* Obstack for candidates. */
390 static struct obstack cand_obstack;
392 /* Obstack for candidate chains. */
393 static struct obstack chain_obstack;
395 /* An array INCR_VEC of incr_infos is used during analysis of related
396 candidates having an SSA name for a stride. INCR_VEC_LEN describes
397 its current length. MAX_INCR_VEC_LEN is used to avoid costly
398 pathological cases. */
399 static incr_info_t incr_vec;
400 static unsigned incr_vec_len;
401 const int MAX_INCR_VEC_LEN = 16;
403 /* For a chain of candidates with unknown stride, indicates whether or not
404 we must generate pointer arithmetic when replacing statements. */
405 static bool address_arithmetic_p;
407 /* Forward function declarations. */
408 static slsr_cand_t base_cand_from_table (tree);
409 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
410 static bool legal_cast_p_1 (tree, tree);
412 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
414 static slsr_cand_t
415 lookup_cand (cand_idx idx)
417 return cand_vec[idx - 1];
420 /* Helper for hashing a candidate chain header. */
422 struct cand_chain_hasher : typed_noop_remove <cand_chain>
424 typedef cand_chain value_type;
425 typedef cand_chain compare_type;
426 static inline hashval_t hash (const value_type *);
427 static inline bool equal (const value_type *, const compare_type *);
430 inline hashval_t
431 cand_chain_hasher::hash (const value_type *p)
433 tree base_expr = p->base_expr;
434 return iterative_hash_expr (base_expr, 0);
437 inline bool
438 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
440 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
443 /* Hash table embodying a mapping from base exprs to chains of candidates. */
444 static hash_table<cand_chain_hasher> *base_cand_map;
446 /* Pointer map used by tree_to_aff_combination_expand. */
447 static hash_map<tree, name_expansion *> *name_expansions;
448 /* Pointer map embodying a mapping from bases to alternative bases. */
449 static hash_map<tree, tree> *alt_base_map;
451 /* Given BASE, use the tree affine combiniation facilities to
452 find the underlying tree expression for BASE, with any
453 immediate offset excluded.
455 N.B. we should eliminate this backtracking with better forward
456 analysis in a future release. */
458 static tree
459 get_alternative_base (tree base)
461 tree *result = alt_base_map->get (base);
463 if (result == NULL)
465 tree expr;
466 aff_tree aff;
468 tree_to_aff_combination_expand (base, TREE_TYPE (base),
469 &aff, &name_expansions);
470 aff.offset = 0;
471 expr = aff_combination_to_tree (&aff);
473 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
475 return expr == base ? NULL : expr;
478 return *result;
481 /* Look in the candidate table for a CAND_PHI that defines BASE and
482 return it if found; otherwise return NULL. */
484 static cand_idx
485 find_phi_def (tree base)
487 slsr_cand_t c;
489 if (TREE_CODE (base) != SSA_NAME)
490 return 0;
492 c = base_cand_from_table (base);
494 if (!c || c->kind != CAND_PHI)
495 return 0;
497 return c->cand_num;
500 /* Helper routine for find_basis_for_candidate. May be called twice:
501 once for the candidate's base expr, and optionally again either for
502 the candidate's phi definition or for a CAND_REF's alternative base
503 expression. */
505 static slsr_cand_t
506 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
508 cand_chain mapping_key;
509 cand_chain_t chain;
510 slsr_cand_t basis = NULL;
512 // Limit potential of N^2 behavior for long candidate chains.
513 int iters = 0;
514 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
516 mapping_key.base_expr = base_expr;
517 chain = base_cand_map->find (&mapping_key);
519 for (; chain && iters < max_iters; chain = chain->next, ++iters)
521 slsr_cand_t one_basis = chain->cand;
523 if (one_basis->kind != c->kind
524 || one_basis->cand_stmt == c->cand_stmt
525 || !operand_equal_p (one_basis->stride, c->stride, 0)
526 || !types_compatible_p (one_basis->cand_type, c->cand_type)
527 || !dominated_by_p (CDI_DOMINATORS,
528 gimple_bb (c->cand_stmt),
529 gimple_bb (one_basis->cand_stmt)))
530 continue;
532 if (!basis || basis->cand_num < one_basis->cand_num)
533 basis = one_basis;
536 return basis;
539 /* Use the base expr from candidate C to look for possible candidates
540 that can serve as a basis for C. Each potential basis must also
541 appear in a block that dominates the candidate statement and have
542 the same stride and type. If more than one possible basis exists,
543 the one with highest index in the vector is chosen; this will be
544 the most immediately dominating basis. */
546 static int
547 find_basis_for_candidate (slsr_cand_t c)
549 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
551 /* If a candidate doesn't have a basis using its base expression,
552 it may have a basis hidden by one or more intervening phis. */
553 if (!basis && c->def_phi)
555 basic_block basis_bb, phi_bb;
556 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
557 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
559 if (basis)
561 /* A hidden basis must dominate the phi-definition of the
562 candidate's base name. */
563 phi_bb = gimple_bb (phi_cand->cand_stmt);
564 basis_bb = gimple_bb (basis->cand_stmt);
566 if (phi_bb == basis_bb
567 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
569 basis = NULL;
570 c->basis = 0;
573 /* If we found a hidden basis, estimate additional dead-code
574 savings if the phi and its feeding statements can be removed. */
575 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
576 c->dead_savings += phi_cand->dead_savings;
580 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
582 tree alt_base_expr = get_alternative_base (c->base_expr);
583 if (alt_base_expr)
584 basis = find_basis_for_base_expr (c, alt_base_expr);
587 if (basis)
589 c->sibling = basis->dependent;
590 basis->dependent = c->cand_num;
591 return basis->cand_num;
594 return 0;
597 /* Record a mapping from BASE to C, indicating that C may potentially serve
598 as a basis using that base expression. BASE may be the same as
599 C->BASE_EXPR; alternatively BASE can be a different tree that share the
600 underlining expression of C->BASE_EXPR. */
602 static void
603 record_potential_basis (slsr_cand_t c, tree base)
605 cand_chain_t node;
606 cand_chain **slot;
608 gcc_assert (base);
610 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
611 node->base_expr = base;
612 node->cand = c;
613 node->next = NULL;
614 slot = base_cand_map->find_slot (node, INSERT);
616 if (*slot)
618 cand_chain_t head = (cand_chain_t) (*slot);
619 node->next = head->next;
620 head->next = node;
622 else
623 *slot = node;
626 /* Allocate storage for a new candidate and initialize its fields.
627 Attempt to find a basis for the candidate.
629 For CAND_REF, an alternative base may also be recorded and used
630 to find a basis. This helps cases where the expression hidden
631 behind BASE (which is usually an SSA_NAME) has immediate offset,
632 e.g.
634 a2[i][j] = 1;
635 a2[i + 20][j] = 2; */
637 static slsr_cand_t
638 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
639 const widest_int &index, tree stride, tree ctype,
640 unsigned savings)
642 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
643 sizeof (slsr_cand));
644 c->cand_stmt = gs;
645 c->base_expr = base;
646 c->stride = stride;
647 c->index = index;
648 c->cand_type = ctype;
649 c->kind = kind;
650 c->cand_num = cand_vec.length () + 1;
651 c->next_interp = 0;
652 c->dependent = 0;
653 c->sibling = 0;
654 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
655 c->dead_savings = savings;
657 cand_vec.safe_push (c);
659 if (kind == CAND_PHI)
660 c->basis = 0;
661 else
662 c->basis = find_basis_for_candidate (c);
664 record_potential_basis (c, base);
665 if (flag_expensive_optimizations && kind == CAND_REF)
667 tree alt_base = get_alternative_base (base);
668 if (alt_base)
669 record_potential_basis (c, alt_base);
672 return c;
675 /* Determine the target cost of statement GS when compiling according
676 to SPEED. */
678 static int
679 stmt_cost (gimple gs, bool speed)
681 tree lhs, rhs1, rhs2;
682 machine_mode lhs_mode;
684 gcc_assert (is_gimple_assign (gs));
685 lhs = gimple_assign_lhs (gs);
686 rhs1 = gimple_assign_rhs1 (gs);
687 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
689 switch (gimple_assign_rhs_code (gs))
691 case MULT_EXPR:
692 rhs2 = gimple_assign_rhs2 (gs);
694 if (tree_fits_shwi_p (rhs2))
695 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
697 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
698 return mul_cost (speed, lhs_mode);
700 case PLUS_EXPR:
701 case POINTER_PLUS_EXPR:
702 case MINUS_EXPR:
703 return add_cost (speed, lhs_mode);
705 case NEGATE_EXPR:
706 return neg_cost (speed, lhs_mode);
708 CASE_CONVERT:
709 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
711 /* Note that we don't assign costs to copies that in most cases
712 will go away. */
713 default:
717 gcc_unreachable ();
718 return 0;
721 /* Look up the defining statement for BASE_IN and return a pointer
722 to its candidate in the candidate table, if any; otherwise NULL.
723 Only CAND_ADD and CAND_MULT candidates are returned. */
725 static slsr_cand_t
726 base_cand_from_table (tree base_in)
728 slsr_cand_t *result;
730 gimple def = SSA_NAME_DEF_STMT (base_in);
731 if (!def)
732 return (slsr_cand_t) NULL;
734 result = stmt_cand_map->get (def);
736 if (result && (*result)->kind != CAND_REF)
737 return *result;
739 return (slsr_cand_t) NULL;
742 /* Add an entry to the statement-to-candidate mapping. */
744 static void
745 add_cand_for_stmt (gimple gs, slsr_cand_t c)
747 gcc_assert (!stmt_cand_map->put (gs, c));
750 /* Given PHI which contains a phi statement, determine whether it
751 satisfies all the requirements of a phi candidate. If so, create
752 a candidate. Note that a CAND_PHI never has a basis itself, but
753 is used to help find a basis for subsequent candidates. */
755 static void
756 slsr_process_phi (gimple phi, bool speed)
758 unsigned i;
759 tree arg0_base = NULL_TREE, base_type;
760 slsr_cand_t c;
761 struct loop *cand_loop = gimple_bb (phi)->loop_father;
762 unsigned savings = 0;
764 /* A CAND_PHI requires each of its arguments to have the same
765 derived base name. (See the module header commentary for a
766 definition of derived base names.) Furthermore, all feeding
767 definitions must be in the same position in the loop hierarchy
768 as PHI. */
770 for (i = 0; i < gimple_phi_num_args (phi); i++)
772 slsr_cand_t arg_cand;
773 tree arg = gimple_phi_arg_def (phi, i);
774 tree derived_base_name = NULL_TREE;
775 gimple arg_stmt = NULL;
776 basic_block arg_bb = NULL;
778 if (TREE_CODE (arg) != SSA_NAME)
779 return;
781 arg_cand = base_cand_from_table (arg);
783 if (arg_cand)
785 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
787 if (!arg_cand->next_interp)
788 return;
790 arg_cand = lookup_cand (arg_cand->next_interp);
793 if (!integer_onep (arg_cand->stride))
794 return;
796 derived_base_name = arg_cand->base_expr;
797 arg_stmt = arg_cand->cand_stmt;
798 arg_bb = gimple_bb (arg_stmt);
800 /* Gather potential dead code savings if the phi statement
801 can be removed later on. */
802 if (has_single_use (arg))
804 if (gimple_code (arg_stmt) == GIMPLE_PHI)
805 savings += arg_cand->dead_savings;
806 else
807 savings += stmt_cost (arg_stmt, speed);
810 else
812 derived_base_name = arg;
814 if (SSA_NAME_IS_DEFAULT_DEF (arg))
815 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
816 else
817 gimple_bb (SSA_NAME_DEF_STMT (arg));
820 if (!arg_bb || arg_bb->loop_father != cand_loop)
821 return;
823 if (i == 0)
824 arg0_base = derived_base_name;
825 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
826 return;
829 /* Create the candidate. "alloc_cand_and_find_basis" is named
830 misleadingly for this case, as no basis will be sought for a
831 CAND_PHI. */
832 base_type = TREE_TYPE (arg0_base);
834 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
835 0, integer_one_node, base_type, savings);
837 /* Add the candidate to the statement-candidate mapping. */
838 add_cand_for_stmt (phi, c);
841 /* Given PBASE which is a pointer to tree, look up the defining
842 statement for it and check whether the candidate is in the
843 form of:
845 X = B + (1 * S), S is integer constant
846 X = B + (i * S), S is integer one
848 If so, set PBASE to the candidate's base_expr and return double
849 int (i * S).
850 Otherwise, just return double int zero. */
852 static widest_int
853 backtrace_base_for_ref (tree *pbase)
855 tree base_in = *pbase;
856 slsr_cand_t base_cand;
858 STRIP_NOPS (base_in);
860 /* Strip off widening conversion(s) to handle cases where
861 e.g. 'B' is widened from an 'int' in order to calculate
862 a 64-bit address. */
863 if (CONVERT_EXPR_P (base_in)
864 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0)))
865 base_in = get_unwidened (base_in, NULL_TREE);
867 if (TREE_CODE (base_in) != SSA_NAME)
868 return 0;
870 base_cand = base_cand_from_table (base_in);
872 while (base_cand && base_cand->kind != CAND_PHI)
874 if (base_cand->kind == CAND_ADD
875 && base_cand->index == 1
876 && TREE_CODE (base_cand->stride) == INTEGER_CST)
878 /* X = B + (1 * S), S is integer constant. */
879 *pbase = base_cand->base_expr;
880 return wi::to_widest (base_cand->stride);
882 else if (base_cand->kind == CAND_ADD
883 && TREE_CODE (base_cand->stride) == INTEGER_CST
884 && integer_onep (base_cand->stride))
886 /* X = B + (i * S), S is integer one. */
887 *pbase = base_cand->base_expr;
888 return base_cand->index;
891 if (base_cand->next_interp)
892 base_cand = lookup_cand (base_cand->next_interp);
893 else
894 base_cand = NULL;
897 return 0;
900 /* Look for the following pattern:
902 *PBASE: MEM_REF (T1, C1)
904 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
906 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
908 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
910 *PINDEX: C4 * BITS_PER_UNIT
912 If not present, leave the input values unchanged and return FALSE.
913 Otherwise, modify the input values as follows and return TRUE:
915 *PBASE: T1
916 *POFFSET: MULT_EXPR (T2, C3)
917 *PINDEX: C1 + (C2 * C3) + C4
919 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
920 will be further restructured to:
922 *PBASE: T1
923 *POFFSET: MULT_EXPR (T2', C3)
924 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
926 static bool
927 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
928 tree *ptype)
930 tree base = *pbase, offset = *poffset;
931 widest_int index = *pindex;
932 tree mult_op0, t1, t2, type;
933 widest_int c1, c2, c3, c4, c5;
935 if (!base
936 || !offset
937 || TREE_CODE (base) != MEM_REF
938 || TREE_CODE (offset) != MULT_EXPR
939 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
940 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
941 return false;
943 t1 = TREE_OPERAND (base, 0);
944 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
945 type = TREE_TYPE (TREE_OPERAND (base, 1));
947 mult_op0 = TREE_OPERAND (offset, 0);
948 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
950 if (TREE_CODE (mult_op0) == PLUS_EXPR)
952 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
954 t2 = TREE_OPERAND (mult_op0, 0);
955 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
957 else
958 return false;
960 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
962 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
964 t2 = TREE_OPERAND (mult_op0, 0);
965 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
967 else
968 return false;
970 else
972 t2 = mult_op0;
973 c2 = 0;
976 c4 = wi::lrshift (index, LOG2_BITS_PER_UNIT);
977 c5 = backtrace_base_for_ref (&t2);
979 *pbase = t1;
980 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
981 wide_int_to_tree (sizetype, c3));
982 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
983 *ptype = type;
985 return true;
988 /* Given GS which contains a data reference, create a CAND_REF entry in
989 the candidate table and attempt to find a basis. */
991 static void
992 slsr_process_ref (gimple gs)
994 tree ref_expr, base, offset, type;
995 HOST_WIDE_INT bitsize, bitpos;
996 machine_mode mode;
997 int unsignedp, reversep, volatilep;
998 slsr_cand_t c;
1000 if (gimple_vdef (gs))
1001 ref_expr = gimple_assign_lhs (gs);
1002 else
1003 ref_expr = gimple_assign_rhs1 (gs);
1005 if (!handled_component_p (ref_expr)
1006 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1007 || (TREE_CODE (ref_expr) == COMPONENT_REF
1008 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1009 return;
1011 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1012 &unsignedp, &reversep, &volatilep, false);
1013 if (reversep)
1014 return;
1015 widest_int index = bitpos;
1017 if (!restructure_reference (&base, &offset, &index, &type))
1018 return;
1020 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1021 type, 0);
1023 /* Add the candidate to the statement-candidate mapping. */
1024 add_cand_for_stmt (gs, c);
1027 /* Create a candidate entry for a statement GS, where GS multiplies
1028 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1029 about the two SSA names into the new candidate. Return the new
1030 candidate. */
1032 static slsr_cand_t
1033 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1035 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1036 widest_int index;
1037 unsigned savings = 0;
1038 slsr_cand_t c;
1039 slsr_cand_t base_cand = base_cand_from_table (base_in);
1041 /* Look at all interpretations of the base candidate, if necessary,
1042 to find information to propagate into this candidate. */
1043 while (base_cand && !base && base_cand->kind != CAND_PHI)
1046 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1048 /* Y = (B + i') * 1
1049 X = Y * Z
1050 ================
1051 X = (B + i') * Z */
1052 base = base_cand->base_expr;
1053 index = base_cand->index;
1054 stride = stride_in;
1055 ctype = base_cand->cand_type;
1056 if (has_single_use (base_in))
1057 savings = (base_cand->dead_savings
1058 + stmt_cost (base_cand->cand_stmt, speed));
1060 else if (base_cand->kind == CAND_ADD
1061 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1063 /* Y = B + (i' * S), S constant
1064 X = Y * Z
1065 ============================
1066 X = B + ((i' * S) * Z) */
1067 base = base_cand->base_expr;
1068 index = base_cand->index * wi::to_widest (base_cand->stride);
1069 stride = stride_in;
1070 ctype = base_cand->cand_type;
1071 if (has_single_use (base_in))
1072 savings = (base_cand->dead_savings
1073 + stmt_cost (base_cand->cand_stmt, speed));
1076 if (base_cand->next_interp)
1077 base_cand = lookup_cand (base_cand->next_interp);
1078 else
1079 base_cand = NULL;
1082 if (!base)
1084 /* No interpretations had anything useful to propagate, so
1085 produce X = (Y + 0) * Z. */
1086 base = base_in;
1087 index = 0;
1088 stride = stride_in;
1089 ctype = TREE_TYPE (base_in);
1092 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1093 ctype, savings);
1094 return c;
1097 /* Create a candidate entry for a statement GS, where GS multiplies
1098 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1099 information about BASE_IN into the new candidate. Return the new
1100 candidate. */
1102 static slsr_cand_t
1103 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1105 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1106 widest_int index, temp;
1107 unsigned savings = 0;
1108 slsr_cand_t c;
1109 slsr_cand_t base_cand = base_cand_from_table (base_in);
1111 /* Look at all interpretations of the base candidate, if necessary,
1112 to find information to propagate into this candidate. */
1113 while (base_cand && !base && base_cand->kind != CAND_PHI)
1115 if (base_cand->kind == CAND_MULT
1116 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1118 /* Y = (B + i') * S, S constant
1119 X = Y * c
1120 ============================
1121 X = (B + i') * (S * c) */
1122 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1123 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1125 base = base_cand->base_expr;
1126 index = base_cand->index;
1127 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1128 ctype = base_cand->cand_type;
1129 if (has_single_use (base_in))
1130 savings = (base_cand->dead_savings
1131 + stmt_cost (base_cand->cand_stmt, speed));
1134 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1136 /* Y = B + (i' * 1)
1137 X = Y * c
1138 ===========================
1139 X = (B + i') * c */
1140 base = base_cand->base_expr;
1141 index = base_cand->index;
1142 stride = stride_in;
1143 ctype = base_cand->cand_type;
1144 if (has_single_use (base_in))
1145 savings = (base_cand->dead_savings
1146 + stmt_cost (base_cand->cand_stmt, speed));
1148 else if (base_cand->kind == CAND_ADD
1149 && base_cand->index == 1
1150 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1152 /* Y = B + (1 * S), S constant
1153 X = Y * c
1154 ===========================
1155 X = (B + S) * c */
1156 base = base_cand->base_expr;
1157 index = wi::to_widest (base_cand->stride);
1158 stride = stride_in;
1159 ctype = base_cand->cand_type;
1160 if (has_single_use (base_in))
1161 savings = (base_cand->dead_savings
1162 + stmt_cost (base_cand->cand_stmt, speed));
1165 if (base_cand->next_interp)
1166 base_cand = lookup_cand (base_cand->next_interp);
1167 else
1168 base_cand = NULL;
1171 if (!base)
1173 /* No interpretations had anything useful to propagate, so
1174 produce X = (Y + 0) * c. */
1175 base = base_in;
1176 index = 0;
1177 stride = stride_in;
1178 ctype = TREE_TYPE (base_in);
1181 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1182 ctype, savings);
1183 return c;
1186 /* Given GS which is a multiply of scalar integers, make an appropriate
1187 entry in the candidate table. If this is a multiply of two SSA names,
1188 create two CAND_MULT interpretations and attempt to find a basis for
1189 each of them. Otherwise, create a single CAND_MULT and attempt to
1190 find a basis. */
1192 static void
1193 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1195 slsr_cand_t c, c2;
1197 /* If this is a multiply of an SSA name with itself, it is highly
1198 unlikely that we will get a strength reduction opportunity, so
1199 don't record it as a candidate. This simplifies the logic for
1200 finding a basis, so if this is removed that must be considered. */
1201 if (rhs1 == rhs2)
1202 return;
1204 if (TREE_CODE (rhs2) == SSA_NAME)
1206 /* Record an interpretation of this statement in the candidate table
1207 assuming RHS1 is the base expression and RHS2 is the stride. */
1208 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1210 /* Add the first interpretation to the statement-candidate mapping. */
1211 add_cand_for_stmt (gs, c);
1213 /* Record another interpretation of this statement assuming RHS1
1214 is the stride and RHS2 is the base expression. */
1215 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1216 c->next_interp = c2->cand_num;
1218 else
1220 /* Record an interpretation for the multiply-immediate. */
1221 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1223 /* Add the interpretation to the statement-candidate mapping. */
1224 add_cand_for_stmt (gs, c);
1228 /* Create a candidate entry for a statement GS, where GS adds two
1229 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1230 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1231 information about the two SSA names into the new candidate.
1232 Return the new candidate. */
1234 static slsr_cand_t
1235 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1236 bool subtract_p, bool speed)
1238 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1239 widest_int index;
1240 unsigned savings = 0;
1241 slsr_cand_t c;
1242 slsr_cand_t base_cand = base_cand_from_table (base_in);
1243 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1245 /* The most useful transformation is a multiply-immediate feeding
1246 an add or subtract. Look for that first. */
1247 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1249 if (addend_cand->kind == CAND_MULT
1250 && addend_cand->index == 0
1251 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1253 /* Z = (B + 0) * S, S constant
1254 X = Y +/- Z
1255 ===========================
1256 X = Y + ((+/-1 * S) * B) */
1257 base = base_in;
1258 index = wi::to_widest (addend_cand->stride);
1259 if (subtract_p)
1260 index = -index;
1261 stride = addend_cand->base_expr;
1262 ctype = TREE_TYPE (base_in);
1263 if (has_single_use (addend_in))
1264 savings = (addend_cand->dead_savings
1265 + stmt_cost (addend_cand->cand_stmt, speed));
1268 if (addend_cand->next_interp)
1269 addend_cand = lookup_cand (addend_cand->next_interp);
1270 else
1271 addend_cand = NULL;
1274 while (base_cand && !base && base_cand->kind != CAND_PHI)
1276 if (base_cand->kind == CAND_ADD
1277 && (base_cand->index == 0
1278 || operand_equal_p (base_cand->stride,
1279 integer_zero_node, 0)))
1281 /* Y = B + (i' * S), i' * S = 0
1282 X = Y +/- Z
1283 ============================
1284 X = B + (+/-1 * Z) */
1285 base = base_cand->base_expr;
1286 index = subtract_p ? -1 : 1;
1287 stride = addend_in;
1288 ctype = base_cand->cand_type;
1289 if (has_single_use (base_in))
1290 savings = (base_cand->dead_savings
1291 + stmt_cost (base_cand->cand_stmt, speed));
1293 else if (subtract_p)
1295 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1297 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1299 if (subtrahend_cand->kind == CAND_MULT
1300 && subtrahend_cand->index == 0
1301 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1303 /* Z = (B + 0) * S, S constant
1304 X = Y - Z
1305 ===========================
1306 Value: X = Y + ((-1 * S) * B) */
1307 base = base_in;
1308 index = wi::to_widest (subtrahend_cand->stride);
1309 index = -index;
1310 stride = subtrahend_cand->base_expr;
1311 ctype = TREE_TYPE (base_in);
1312 if (has_single_use (addend_in))
1313 savings = (subtrahend_cand->dead_savings
1314 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1317 if (subtrahend_cand->next_interp)
1318 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1319 else
1320 subtrahend_cand = NULL;
1324 if (base_cand->next_interp)
1325 base_cand = lookup_cand (base_cand->next_interp);
1326 else
1327 base_cand = NULL;
1330 if (!base)
1332 /* No interpretations had anything useful to propagate, so
1333 produce X = Y + (1 * Z). */
1334 base = base_in;
1335 index = subtract_p ? -1 : 1;
1336 stride = addend_in;
1337 ctype = TREE_TYPE (base_in);
1340 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1341 ctype, savings);
1342 return c;
1345 /* Create a candidate entry for a statement GS, where GS adds SSA
1346 name BASE_IN to constant INDEX_IN. Propagate any known information
1347 about BASE_IN into the new candidate. Return the new candidate. */
1349 static slsr_cand_t
1350 create_add_imm_cand (gimple gs, tree base_in, const widest_int &index_in,
1351 bool speed)
1353 enum cand_kind kind = CAND_ADD;
1354 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1355 widest_int index, multiple;
1356 unsigned savings = 0;
1357 slsr_cand_t c;
1358 slsr_cand_t base_cand = base_cand_from_table (base_in);
1360 while (base_cand && !base && base_cand->kind != CAND_PHI)
1362 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1364 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1365 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1366 sign, &multiple))
1368 /* Y = (B + i') * S, S constant, c = kS for some integer k
1369 X = Y + c
1370 ============================
1371 X = (B + (i'+ k)) * S
1373 Y = B + (i' * S), S constant, c = kS for some integer k
1374 X = Y + c
1375 ============================
1376 X = (B + (i'+ k)) * S */
1377 kind = base_cand->kind;
1378 base = base_cand->base_expr;
1379 index = base_cand->index + multiple;
1380 stride = base_cand->stride;
1381 ctype = base_cand->cand_type;
1382 if (has_single_use (base_in))
1383 savings = (base_cand->dead_savings
1384 + stmt_cost (base_cand->cand_stmt, speed));
1387 if (base_cand->next_interp)
1388 base_cand = lookup_cand (base_cand->next_interp);
1389 else
1390 base_cand = NULL;
1393 if (!base)
1395 /* No interpretations had anything useful to propagate, so
1396 produce X = Y + (c * 1). */
1397 kind = CAND_ADD;
1398 base = base_in;
1399 index = index_in;
1400 stride = integer_one_node;
1401 ctype = TREE_TYPE (base_in);
1404 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1405 ctype, savings);
1406 return c;
1409 /* Given GS which is an add or subtract of scalar integers or pointers,
1410 make at least one appropriate entry in the candidate table. */
1412 static void
1413 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1415 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1416 slsr_cand_t c = NULL, c2;
1418 if (TREE_CODE (rhs2) == SSA_NAME)
1420 /* First record an interpretation assuming RHS1 is the base expression
1421 and RHS2 is the stride. But it doesn't make sense for the
1422 stride to be a pointer, so don't record a candidate in that case. */
1423 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1425 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1427 /* Add the first interpretation to the statement-candidate
1428 mapping. */
1429 add_cand_for_stmt (gs, c);
1432 /* If the two RHS operands are identical, or this is a subtract,
1433 we're done. */
1434 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1435 return;
1437 /* Otherwise, record another interpretation assuming RHS2 is the
1438 base expression and RHS1 is the stride, again provided that the
1439 stride is not a pointer. */
1440 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1442 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1443 if (c)
1444 c->next_interp = c2->cand_num;
1445 else
1446 add_cand_for_stmt (gs, c2);
1449 else
1451 /* Record an interpretation for the add-immediate. */
1452 widest_int index = wi::to_widest (rhs2);
1453 if (subtract_p)
1454 index = -index;
1456 c = create_add_imm_cand (gs, rhs1, index, speed);
1458 /* Add the interpretation to the statement-candidate mapping. */
1459 add_cand_for_stmt (gs, c);
1463 /* Given GS which is a negate of a scalar integer, make an appropriate
1464 entry in the candidate table. A negate is equivalent to a multiply
1465 by -1. */
1467 static void
1468 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1470 /* Record a CAND_MULT interpretation for the multiply by -1. */
1471 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1473 /* Add the interpretation to the statement-candidate mapping. */
1474 add_cand_for_stmt (gs, c);
1477 /* Help function for legal_cast_p, operating on two trees. Checks
1478 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1479 for more details. */
1481 static bool
1482 legal_cast_p_1 (tree lhs, tree rhs)
1484 tree lhs_type, rhs_type;
1485 unsigned lhs_size, rhs_size;
1486 bool lhs_wraps, rhs_wraps;
1488 lhs_type = TREE_TYPE (lhs);
1489 rhs_type = TREE_TYPE (rhs);
1490 lhs_size = TYPE_PRECISION (lhs_type);
1491 rhs_size = TYPE_PRECISION (rhs_type);
1492 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1493 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1495 if (lhs_size < rhs_size
1496 || (rhs_wraps && !lhs_wraps)
1497 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1498 return false;
1500 return true;
1503 /* Return TRUE if GS is a statement that defines an SSA name from
1504 a conversion and is legal for us to combine with an add and multiply
1505 in the candidate table. For example, suppose we have:
1507 A = B + i;
1508 C = (type) A;
1509 D = C * S;
1511 Without the type-cast, we would create a CAND_MULT for D with base B,
1512 index i, and stride S. We want to record this candidate only if it
1513 is equivalent to apply the type cast following the multiply:
1515 A = B + i;
1516 E = A * S;
1517 D = (type) E;
1519 We will record the type with the candidate for D. This allows us
1520 to use a similar previous candidate as a basis. If we have earlier seen
1522 A' = B + i';
1523 C' = (type) A';
1524 D' = C' * S;
1526 we can replace D with
1528 D = D' + (i - i') * S;
1530 But if moving the type-cast would change semantics, we mustn't do this.
1532 This is legitimate for casts from a non-wrapping integral type to
1533 any integral type of the same or larger size. It is not legitimate
1534 to convert a wrapping type to a non-wrapping type, or to a wrapping
1535 type of a different size. I.e., with a wrapping type, we must
1536 assume that the addition B + i could wrap, in which case performing
1537 the multiply before or after one of the "illegal" type casts will
1538 have different semantics. */
1540 static bool
1541 legal_cast_p (gimple gs, tree rhs)
1543 if (!is_gimple_assign (gs)
1544 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1545 return false;
1547 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1550 /* Given GS which is a cast to a scalar integer type, determine whether
1551 the cast is legal for strength reduction. If so, make at least one
1552 appropriate entry in the candidate table. */
1554 static void
1555 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1557 tree lhs, ctype;
1558 slsr_cand_t base_cand, c, c2;
1559 unsigned savings = 0;
1561 if (!legal_cast_p (gs, rhs1))
1562 return;
1564 lhs = gimple_assign_lhs (gs);
1565 base_cand = base_cand_from_table (rhs1);
1566 ctype = TREE_TYPE (lhs);
1568 if (base_cand && base_cand->kind != CAND_PHI)
1570 while (base_cand)
1572 /* Propagate all data from the base candidate except the type,
1573 which comes from the cast, and the base candidate's cast,
1574 which is no longer applicable. */
1575 if (has_single_use (rhs1))
1576 savings = (base_cand->dead_savings
1577 + stmt_cost (base_cand->cand_stmt, speed));
1579 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1580 base_cand->base_expr,
1581 base_cand->index, base_cand->stride,
1582 ctype, savings);
1583 if (base_cand->next_interp)
1584 base_cand = lookup_cand (base_cand->next_interp);
1585 else
1586 base_cand = NULL;
1589 else
1591 /* If nothing is known about the RHS, create fresh CAND_ADD and
1592 CAND_MULT interpretations:
1594 X = Y + (0 * 1)
1595 X = (Y + 0) * 1
1597 The first of these is somewhat arbitrary, but the choice of
1598 1 for the stride simplifies the logic for propagating casts
1599 into their uses. */
1600 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1601 0, integer_one_node, ctype, 0);
1602 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1603 0, integer_one_node, ctype, 0);
1604 c->next_interp = c2->cand_num;
1607 /* Add the first (or only) interpretation to the statement-candidate
1608 mapping. */
1609 add_cand_for_stmt (gs, c);
1612 /* Given GS which is a copy of a scalar integer type, make at least one
1613 appropriate entry in the candidate table.
1615 This interface is included for completeness, but is unnecessary
1616 if this pass immediately follows a pass that performs copy
1617 propagation, such as DOM. */
1619 static void
1620 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1622 slsr_cand_t base_cand, c, c2;
1623 unsigned savings = 0;
1625 base_cand = base_cand_from_table (rhs1);
1627 if (base_cand && base_cand->kind != CAND_PHI)
1629 while (base_cand)
1631 /* Propagate all data from the base candidate. */
1632 if (has_single_use (rhs1))
1633 savings = (base_cand->dead_savings
1634 + stmt_cost (base_cand->cand_stmt, speed));
1636 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1637 base_cand->base_expr,
1638 base_cand->index, base_cand->stride,
1639 base_cand->cand_type, savings);
1640 if (base_cand->next_interp)
1641 base_cand = lookup_cand (base_cand->next_interp);
1642 else
1643 base_cand = NULL;
1646 else
1648 /* If nothing is known about the RHS, create fresh CAND_ADD and
1649 CAND_MULT interpretations:
1651 X = Y + (0 * 1)
1652 X = (Y + 0) * 1
1654 The first of these is somewhat arbitrary, but the choice of
1655 1 for the stride simplifies the logic for propagating casts
1656 into their uses. */
1657 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1658 0, integer_one_node, TREE_TYPE (rhs1), 0);
1659 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1660 0, integer_one_node, TREE_TYPE (rhs1), 0);
1661 c->next_interp = c2->cand_num;
1664 /* Add the first (or only) interpretation to the statement-candidate
1665 mapping. */
1666 add_cand_for_stmt (gs, c);
1669 class find_candidates_dom_walker : public dom_walker
1671 public:
1672 find_candidates_dom_walker (cdi_direction direction)
1673 : dom_walker (direction) {}
1674 virtual void before_dom_children (basic_block);
1677 /* Find strength-reduction candidates in block BB. */
1679 void
1680 find_candidates_dom_walker::before_dom_children (basic_block bb)
1682 bool speed = optimize_bb_for_speed_p (bb);
1683 gimple_stmt_iterator gsi;
1685 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1686 slsr_process_phi (gsi_stmt (gsi), speed);
1688 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1690 gimple gs = gsi_stmt (gsi);
1692 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1693 slsr_process_ref (gs);
1695 else if (is_gimple_assign (gs)
1696 && SCALAR_INT_MODE_P
1697 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1699 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1701 switch (gimple_assign_rhs_code (gs))
1703 case MULT_EXPR:
1704 case PLUS_EXPR:
1705 rhs1 = gimple_assign_rhs1 (gs);
1706 rhs2 = gimple_assign_rhs2 (gs);
1707 /* Should never happen, but currently some buggy situations
1708 in earlier phases put constants in rhs1. */
1709 if (TREE_CODE (rhs1) != SSA_NAME)
1710 continue;
1711 break;
1713 /* Possible future opportunity: rhs1 of a ptr+ can be
1714 an ADDR_EXPR. */
1715 case POINTER_PLUS_EXPR:
1716 case MINUS_EXPR:
1717 rhs2 = gimple_assign_rhs2 (gs);
1718 /* Fall-through. */
1720 CASE_CONVERT:
1721 case MODIFY_EXPR:
1722 case NEGATE_EXPR:
1723 rhs1 = gimple_assign_rhs1 (gs);
1724 if (TREE_CODE (rhs1) != SSA_NAME)
1725 continue;
1726 break;
1728 default:
1732 switch (gimple_assign_rhs_code (gs))
1734 case MULT_EXPR:
1735 slsr_process_mul (gs, rhs1, rhs2, speed);
1736 break;
1738 case PLUS_EXPR:
1739 case POINTER_PLUS_EXPR:
1740 case MINUS_EXPR:
1741 slsr_process_add (gs, rhs1, rhs2, speed);
1742 break;
1744 case NEGATE_EXPR:
1745 slsr_process_neg (gs, rhs1, speed);
1746 break;
1748 CASE_CONVERT:
1749 slsr_process_cast (gs, rhs1, speed);
1750 break;
1752 case MODIFY_EXPR:
1753 slsr_process_copy (gs, rhs1, speed);
1754 break;
1756 default:
1763 /* Dump a candidate for debug. */
1765 static void
1766 dump_candidate (slsr_cand_t c)
1768 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1769 gimple_bb (c->cand_stmt)->index);
1770 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1771 switch (c->kind)
1773 case CAND_MULT:
1774 fputs (" MULT : (", dump_file);
1775 print_generic_expr (dump_file, c->base_expr, 0);
1776 fputs (" + ", dump_file);
1777 print_decs (c->index, dump_file);
1778 fputs (") * ", dump_file);
1779 print_generic_expr (dump_file, c->stride, 0);
1780 fputs (" : ", dump_file);
1781 break;
1782 case CAND_ADD:
1783 fputs (" ADD : ", dump_file);
1784 print_generic_expr (dump_file, c->base_expr, 0);
1785 fputs (" + (", dump_file);
1786 print_decs (c->index, dump_file);
1787 fputs (" * ", dump_file);
1788 print_generic_expr (dump_file, c->stride, 0);
1789 fputs (") : ", dump_file);
1790 break;
1791 case CAND_REF:
1792 fputs (" REF : ", dump_file);
1793 print_generic_expr (dump_file, c->base_expr, 0);
1794 fputs (" + (", dump_file);
1795 print_generic_expr (dump_file, c->stride, 0);
1796 fputs (") + ", dump_file);
1797 print_decs (c->index, dump_file);
1798 fputs (" : ", dump_file);
1799 break;
1800 case CAND_PHI:
1801 fputs (" PHI : ", dump_file);
1802 print_generic_expr (dump_file, c->base_expr, 0);
1803 fputs (" + (unknown * ", dump_file);
1804 print_generic_expr (dump_file, c->stride, 0);
1805 fputs (") : ", dump_file);
1806 break;
1807 default:
1808 gcc_unreachable ();
1810 print_generic_expr (dump_file, c->cand_type, 0);
1811 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1812 c->basis, c->dependent, c->sibling);
1813 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1814 c->next_interp, c->dead_savings);
1815 if (c->def_phi)
1816 fprintf (dump_file, " phi: %d\n", c->def_phi);
1817 fputs ("\n", dump_file);
1820 /* Dump the candidate vector for debug. */
1822 static void
1823 dump_cand_vec (void)
1825 unsigned i;
1826 slsr_cand_t c;
1828 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1830 FOR_EACH_VEC_ELT (cand_vec, i, c)
1831 dump_candidate (c);
1834 /* Callback used to dump the candidate chains hash table. */
1837 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1839 const_cand_chain_t chain = *slot;
1840 cand_chain_t p;
1842 print_generic_expr (dump_file, chain->base_expr, 0);
1843 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1845 for (p = chain->next; p; p = p->next)
1846 fprintf (dump_file, " -> %d", p->cand->cand_num);
1848 fputs ("\n", dump_file);
1849 return 1;
1852 /* Dump the candidate chains. */
1854 static void
1855 dump_cand_chains (void)
1857 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1858 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1859 (NULL);
1860 fputs ("\n", dump_file);
1863 /* Dump the increment vector for debug. */
1865 static void
1866 dump_incr_vec (void)
1868 if (dump_file && (dump_flags & TDF_DETAILS))
1870 unsigned i;
1872 fprintf (dump_file, "\nIncrement vector:\n\n");
1874 for (i = 0; i < incr_vec_len; i++)
1876 fprintf (dump_file, "%3d increment: ", i);
1877 print_decs (incr_vec[i].incr, dump_file);
1878 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1879 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1880 fputs ("\n initializer: ", dump_file);
1881 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1882 fputs ("\n\n", dump_file);
1887 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1888 data reference. */
1890 static void
1891 replace_ref (tree *expr, slsr_cand_t c)
1893 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1894 unsigned HOST_WIDE_INT misalign;
1895 unsigned align;
1897 /* Ensure the memory reference carries the minimum alignment
1898 requirement for the data type. See PR58041. */
1899 get_object_alignment_1 (*expr, &align, &misalign);
1900 if (misalign != 0)
1901 align = (misalign & -misalign);
1902 if (align < TYPE_ALIGN (acc_type))
1903 acc_type = build_aligned_type (acc_type, align);
1905 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1906 c->base_expr, c->stride);
1907 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1908 wide_int_to_tree (c->cand_type, c->index));
1910 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1911 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1912 TREE_OPERAND (mem_ref, 0)
1913 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1914 /*simple_p=*/true, NULL,
1915 /*before=*/true, GSI_SAME_STMT);
1916 copy_ref_info (mem_ref, *expr);
1917 *expr = mem_ref;
1918 update_stmt (c->cand_stmt);
1921 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1922 dependent of candidate C with an equivalent strength-reduced data
1923 reference. */
1925 static void
1926 replace_refs (slsr_cand_t c)
1928 if (dump_file && (dump_flags & TDF_DETAILS))
1930 fputs ("Replacing reference: ", dump_file);
1931 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1934 if (gimple_vdef (c->cand_stmt))
1936 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1937 replace_ref (lhs, c);
1939 else
1941 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1942 replace_ref (rhs, c);
1945 if (dump_file && (dump_flags & TDF_DETAILS))
1947 fputs ("With: ", dump_file);
1948 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1949 fputs ("\n", dump_file);
1952 if (c->sibling)
1953 replace_refs (lookup_cand (c->sibling));
1955 if (c->dependent)
1956 replace_refs (lookup_cand (c->dependent));
1959 /* Return TRUE if candidate C is dependent upon a PHI. */
1961 static bool
1962 phi_dependent_cand_p (slsr_cand_t c)
1964 /* A candidate is not necessarily dependent upon a PHI just because
1965 it has a phi definition for its base name. It may have a basis
1966 that relies upon the same phi definition, in which case the PHI
1967 is irrelevant to this candidate. */
1968 return (c->def_phi
1969 && c->basis
1970 && lookup_cand (c->basis)->def_phi != c->def_phi);
1973 /* Calculate the increment required for candidate C relative to
1974 its basis. */
1976 static widest_int
1977 cand_increment (slsr_cand_t c)
1979 slsr_cand_t basis;
1981 /* If the candidate doesn't have a basis, just return its own
1982 index. This is useful in record_increments to help us find
1983 an existing initializer. Also, if the candidate's basis is
1984 hidden by a phi, then its own index will be the increment
1985 from the newly introduced phi basis. */
1986 if (!c->basis || phi_dependent_cand_p (c))
1987 return c->index;
1989 basis = lookup_cand (c->basis);
1990 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1991 return c->index - basis->index;
1994 /* Calculate the increment required for candidate C relative to
1995 its basis. If we aren't going to generate pointer arithmetic
1996 for this candidate, return the absolute value of that increment
1997 instead. */
1999 static inline widest_int
2000 cand_abs_increment (slsr_cand_t c)
2002 widest_int increment = cand_increment (c);
2004 if (!address_arithmetic_p && wi::neg_p (increment))
2005 increment = -increment;
2007 return increment;
2010 /* Return TRUE iff candidate C has already been replaced under
2011 another interpretation. */
2013 static inline bool
2014 cand_already_replaced (slsr_cand_t c)
2016 return (gimple_bb (c->cand_stmt) == 0);
2019 /* Common logic used by replace_unconditional_candidate and
2020 replace_conditional_candidate. */
2022 static void
2023 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2025 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2026 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2028 /* It is highly unlikely, but possible, that the resulting
2029 bump doesn't fit in a HWI. Abandon the replacement
2030 in this case. This does not affect siblings or dependents
2031 of C. Restriction to signed HWI is conservative for unsigned
2032 types but allows for safe negation without twisted logic. */
2033 if (wi::fits_shwi_p (bump)
2034 && bump.to_shwi () != HOST_WIDE_INT_MIN
2035 /* It is not useful to replace casts, copies, or adds of
2036 an SSA name and a constant. */
2037 && cand_code != MODIFY_EXPR
2038 && !CONVERT_EXPR_CODE_P (cand_code)
2039 && cand_code != PLUS_EXPR
2040 && cand_code != POINTER_PLUS_EXPR
2041 && cand_code != MINUS_EXPR)
2043 enum tree_code code = PLUS_EXPR;
2044 tree bump_tree;
2045 gimple stmt_to_print = NULL;
2047 /* If the basis name and the candidate's LHS have incompatible
2048 types, introduce a cast. */
2049 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2050 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2051 if (wi::neg_p (bump))
2053 code = MINUS_EXPR;
2054 bump = -bump;
2057 bump_tree = wide_int_to_tree (target_type, bump);
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2061 fputs ("Replacing: ", dump_file);
2062 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2065 if (bump == 0)
2067 tree lhs = gimple_assign_lhs (c->cand_stmt);
2068 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2069 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2070 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2071 gsi_replace (&gsi, copy_stmt, false);
2072 c->cand_stmt = copy_stmt;
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 stmt_to_print = copy_stmt;
2076 else
2078 tree rhs1, rhs2;
2079 if (cand_code != NEGATE_EXPR) {
2080 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2081 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2083 if (cand_code != NEGATE_EXPR
2084 && ((operand_equal_p (rhs1, basis_name, 0)
2085 && operand_equal_p (rhs2, bump_tree, 0))
2086 || (operand_equal_p (rhs1, bump_tree, 0)
2087 && operand_equal_p (rhs2, basis_name, 0))))
2089 if (dump_file && (dump_flags & TDF_DETAILS))
2091 fputs ("(duplicate, not actually replacing)", dump_file);
2092 stmt_to_print = c->cand_stmt;
2095 else
2097 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2098 gimple_assign_set_rhs_with_ops (&gsi, code,
2099 basis_name, bump_tree);
2100 update_stmt (gsi_stmt (gsi));
2101 c->cand_stmt = gsi_stmt (gsi);
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2103 stmt_to_print = gsi_stmt (gsi);
2107 if (dump_file && (dump_flags & TDF_DETAILS))
2109 fputs ("With: ", dump_file);
2110 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2111 fputs ("\n", dump_file);
2116 /* Replace candidate C with an add or subtract. Note that we only
2117 operate on CAND_MULTs with known strides, so we will never generate
2118 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2119 X = Y + ((i - i') * S), as described in the module commentary. The
2120 folded value ((i - i') * S) is referred to here as the "bump." */
2122 static void
2123 replace_unconditional_candidate (slsr_cand_t c)
2125 slsr_cand_t basis;
2127 if (cand_already_replaced (c))
2128 return;
2130 basis = lookup_cand (c->basis);
2131 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2133 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2136 /* Return the index in the increment vector of the given INCREMENT,
2137 or -1 if not found. The latter can occur if more than
2138 MAX_INCR_VEC_LEN increments have been found. */
2140 static inline int
2141 incr_vec_index (const widest_int &increment)
2143 unsigned i;
2145 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2148 if (i < incr_vec_len)
2149 return i;
2150 else
2151 return -1;
2154 /* Create a new statement along edge E to add BASIS_NAME to the product
2155 of INCREMENT and the stride of candidate C. Create and return a new
2156 SSA name from *VAR to be used as the LHS of the new statement.
2157 KNOWN_STRIDE is true iff C's stride is a constant. */
2159 static tree
2160 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2161 widest_int increment, edge e, location_t loc,
2162 bool known_stride)
2164 basic_block insert_bb;
2165 gimple_stmt_iterator gsi;
2166 tree lhs, basis_type;
2167 gimple new_stmt;
2169 /* If the add candidate along this incoming edge has the same
2170 index as C's hidden basis, the hidden basis represents this
2171 edge correctly. */
2172 if (increment == 0)
2173 return basis_name;
2175 basis_type = TREE_TYPE (basis_name);
2176 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2178 if (known_stride)
2180 tree bump_tree;
2181 enum tree_code code = PLUS_EXPR;
2182 widest_int bump = increment * wi::to_widest (c->stride);
2183 if (wi::neg_p (bump))
2185 code = MINUS_EXPR;
2186 bump = -bump;
2189 bump_tree = wide_int_to_tree (basis_type, bump);
2190 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2191 bump_tree);
2193 else
2195 int i;
2196 bool negate_incr = (!address_arithmetic_p && wi::neg_p (increment));
2197 i = incr_vec_index (negate_incr ? -increment : increment);
2198 gcc_assert (i >= 0);
2200 if (incr_vec[i].initializer)
2202 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2203 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2204 incr_vec[i].initializer);
2206 else if (increment == 1)
2207 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2208 c->stride);
2209 else if (increment == -1)
2210 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2211 c->stride);
2212 else
2213 gcc_unreachable ();
2216 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2217 gsi = gsi_last_bb (insert_bb);
2219 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2220 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2221 else
2222 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2224 gimple_set_location (new_stmt, loc);
2226 if (dump_file && (dump_flags & TDF_DETAILS))
2228 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2229 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2232 return lhs;
2235 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2236 is hidden by the phi node FROM_PHI, create a new phi node in the same
2237 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2238 with its phi arguments representing conditional adjustments to the
2239 hidden basis along conditional incoming paths. Those adjustments are
2240 made by creating add statements (and sometimes recursively creating
2241 phis) along those incoming paths. LOC is the location to attach to
2242 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2243 constant. */
2245 static tree
2246 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2247 location_t loc, bool known_stride)
2249 int i;
2250 tree name, phi_arg;
2251 gimple phi;
2252 vec<tree> phi_args;
2253 slsr_cand_t basis = lookup_cand (c->basis);
2254 int nargs = gimple_phi_num_args (from_phi);
2255 basic_block phi_bb = gimple_bb (from_phi);
2256 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2257 phi_args.create (nargs);
2259 /* Process each argument of the existing phi that represents
2260 conditionally-executed add candidates. */
2261 for (i = 0; i < nargs; i++)
2263 edge e = (*phi_bb->preds)[i];
2264 tree arg = gimple_phi_arg_def (from_phi, i);
2265 tree feeding_def;
2267 /* If the phi argument is the base name of the CAND_PHI, then
2268 this incoming arc should use the hidden basis. */
2269 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2270 if (basis->index == 0)
2271 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2272 else
2274 widest_int incr = -basis->index;
2275 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2276 e, loc, known_stride);
2278 else
2280 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2282 /* If there is another phi along this incoming edge, we must
2283 process it in the same fashion to ensure that all basis
2284 adjustments are made along its incoming edges. */
2285 if (gimple_code (arg_def) == GIMPLE_PHI)
2286 feeding_def = create_phi_basis (c, arg_def, basis_name,
2287 loc, known_stride);
2288 else
2290 slsr_cand_t arg_cand = base_cand_from_table (arg);
2291 widest_int diff = arg_cand->index - basis->index;
2292 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2293 e, loc, known_stride);
2297 /* Because of recursion, we need to save the arguments in a vector
2298 so we can create the PHI statement all at once. Otherwise the
2299 storage for the half-created PHI can be reclaimed. */
2300 phi_args.safe_push (feeding_def);
2303 /* Create the new phi basis. */
2304 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2305 phi = create_phi_node (name, phi_bb);
2306 SSA_NAME_DEF_STMT (name) = phi;
2308 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2310 edge e = (*phi_bb->preds)[i];
2311 add_phi_arg (phi, phi_arg, e, loc);
2314 update_stmt (phi);
2316 if (dump_file && (dump_flags & TDF_DETAILS))
2318 fputs ("Introducing new phi basis: ", dump_file);
2319 print_gimple_stmt (dump_file, phi, 0, 0);
2322 return name;
2325 /* Given a candidate C whose basis is hidden by at least one intervening
2326 phi, introduce a matching number of new phis to represent its basis
2327 adjusted by conditional increments along possible incoming paths. Then
2328 replace C as though it were an unconditional candidate, using the new
2329 basis. */
2331 static void
2332 replace_conditional_candidate (slsr_cand_t c)
2334 tree basis_name, name;
2335 slsr_cand_t basis;
2336 location_t loc;
2338 /* Look up the LHS SSA name from C's basis. This will be the
2339 RHS1 of the adds we will introduce to create new phi arguments. */
2340 basis = lookup_cand (c->basis);
2341 basis_name = gimple_assign_lhs (basis->cand_stmt);
2343 /* Create a new phi statement which will represent C's true basis
2344 after the transformation is complete. */
2345 loc = gimple_location (c->cand_stmt);
2346 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2347 basis_name, loc, KNOWN_STRIDE);
2348 /* Replace C with an add of the new basis phi and a constant. */
2349 widest_int bump = c->index * wi::to_widest (c->stride);
2351 replace_mult_candidate (c, name, bump);
2354 /* Compute the expected costs of inserting basis adjustments for
2355 candidate C with phi-definition PHI. The cost of inserting
2356 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2357 which are themselves phi results, recursively calculate costs
2358 for those phis as well. */
2360 static int
2361 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2363 unsigned i;
2364 int cost = 0;
2365 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2367 /* If we work our way back to a phi that isn't dominated by the hidden
2368 basis, this isn't a candidate for replacement. Indicate this by
2369 returning an unreasonably high cost. It's not easy to detect
2370 these situations when determining the basis, so we defer the
2371 decision until now. */
2372 basic_block phi_bb = gimple_bb (phi);
2373 slsr_cand_t basis = lookup_cand (c->basis);
2374 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2376 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2377 return COST_INFINITE;
2379 for (i = 0; i < gimple_phi_num_args (phi); i++)
2381 tree arg = gimple_phi_arg_def (phi, i);
2383 if (arg != phi_cand->base_expr)
2385 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2387 if (gimple_code (arg_def) == GIMPLE_PHI)
2388 cost += phi_add_costs (arg_def, c, one_add_cost);
2389 else
2391 slsr_cand_t arg_cand = base_cand_from_table (arg);
2393 if (arg_cand->index != c->index)
2394 cost += one_add_cost;
2399 return cost;
2402 /* For candidate C, each sibling of candidate C, and each dependent of
2403 candidate C, determine whether the candidate is dependent upon a
2404 phi that hides its basis. If not, replace the candidate unconditionally.
2405 Otherwise, determine whether the cost of introducing compensation code
2406 for the candidate is offset by the gains from strength reduction. If
2407 so, replace the candidate and introduce the compensation code. */
2409 static void
2410 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2412 if (phi_dependent_cand_p (c))
2414 if (c->kind == CAND_MULT)
2416 /* A candidate dependent upon a phi will replace a multiply by
2417 a constant with an add, and will insert at most one add for
2418 each phi argument. Add these costs with the potential dead-code
2419 savings to determine profitability. */
2420 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2421 int mult_savings = stmt_cost (c->cand_stmt, speed);
2422 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2423 tree phi_result = gimple_phi_result (phi);
2424 int one_add_cost = add_cost (speed,
2425 TYPE_MODE (TREE_TYPE (phi_result)));
2426 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2427 int cost = add_costs - mult_savings - c->dead_savings;
2429 if (dump_file && (dump_flags & TDF_DETAILS))
2431 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2432 fprintf (dump_file, " add_costs = %d\n", add_costs);
2433 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2434 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2435 fprintf (dump_file, " cost = %d\n", cost);
2436 if (cost <= COST_NEUTRAL)
2437 fputs (" Replacing...\n", dump_file);
2438 else
2439 fputs (" Not replaced.\n", dump_file);
2442 if (cost <= COST_NEUTRAL)
2443 replace_conditional_candidate (c);
2446 else
2447 replace_unconditional_candidate (c);
2449 if (c->sibling)
2450 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2452 if (c->dependent)
2453 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2456 /* Count the number of candidates in the tree rooted at C that have
2457 not already been replaced under other interpretations. */
2459 static int
2460 count_candidates (slsr_cand_t c)
2462 unsigned count = cand_already_replaced (c) ? 0 : 1;
2464 if (c->sibling)
2465 count += count_candidates (lookup_cand (c->sibling));
2467 if (c->dependent)
2468 count += count_candidates (lookup_cand (c->dependent));
2470 return count;
2473 /* Increase the count of INCREMENT by one in the increment vector.
2474 INCREMENT is associated with candidate C. If INCREMENT is to be
2475 conditionally executed as part of a conditional candidate replacement,
2476 IS_PHI_ADJUST is true, otherwise false. If an initializer
2477 T_0 = stride * I is provided by a candidate that dominates all
2478 candidates with the same increment, also record T_0 for subsequent use. */
2480 static void
2481 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2483 bool found = false;
2484 unsigned i;
2486 /* Treat increments that differ only in sign as identical so as to
2487 share initializers, unless we are generating pointer arithmetic. */
2488 if (!address_arithmetic_p && wi::neg_p (increment))
2489 increment = -increment;
2491 for (i = 0; i < incr_vec_len; i++)
2493 if (incr_vec[i].incr == increment)
2495 incr_vec[i].count++;
2496 found = true;
2498 /* If we previously recorded an initializer that doesn't
2499 dominate this candidate, it's not going to be useful to
2500 us after all. */
2501 if (incr_vec[i].initializer
2502 && !dominated_by_p (CDI_DOMINATORS,
2503 gimple_bb (c->cand_stmt),
2504 incr_vec[i].init_bb))
2506 incr_vec[i].initializer = NULL_TREE;
2507 incr_vec[i].init_bb = NULL;
2510 break;
2514 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2516 /* The first time we see an increment, create the entry for it.
2517 If this is the root candidate which doesn't have a basis, set
2518 the count to zero. We're only processing it so it can possibly
2519 provide an initializer for other candidates. */
2520 incr_vec[incr_vec_len].incr = increment;
2521 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2522 incr_vec[incr_vec_len].cost = COST_INFINITE;
2524 /* Optimistically record the first occurrence of this increment
2525 as providing an initializer (if it does); we will revise this
2526 opinion later if it doesn't dominate all other occurrences.
2527 Exception: increments of -1, 0, 1 never need initializers;
2528 and phi adjustments don't ever provide initializers. */
2529 if (c->kind == CAND_ADD
2530 && !is_phi_adjust
2531 && c->index == increment
2532 && (wi::gts_p (increment, 1)
2533 || wi::lts_p (increment, -1))
2534 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2535 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2537 tree t0 = NULL_TREE;
2538 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2539 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2540 if (operand_equal_p (rhs1, c->base_expr, 0))
2541 t0 = rhs2;
2542 else if (operand_equal_p (rhs2, c->base_expr, 0))
2543 t0 = rhs1;
2544 if (t0
2545 && SSA_NAME_DEF_STMT (t0)
2546 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2548 incr_vec[incr_vec_len].initializer = t0;
2549 incr_vec[incr_vec_len++].init_bb
2550 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2552 else
2554 incr_vec[incr_vec_len].initializer = NULL_TREE;
2555 incr_vec[incr_vec_len++].init_bb = NULL;
2558 else
2560 incr_vec[incr_vec_len].initializer = NULL_TREE;
2561 incr_vec[incr_vec_len++].init_bb = NULL;
2566 /* Given phi statement PHI that hides a candidate from its BASIS, find
2567 the increments along each incoming arc (recursively handling additional
2568 phis that may be present) and record them. These increments are the
2569 difference in index between the index-adjusting statements and the
2570 index of the basis. */
2572 static void
2573 record_phi_increments (slsr_cand_t basis, gimple phi)
2575 unsigned i;
2576 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2578 for (i = 0; i < gimple_phi_num_args (phi); i++)
2580 tree arg = gimple_phi_arg_def (phi, i);
2582 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2584 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2586 if (gimple_code (arg_def) == GIMPLE_PHI)
2587 record_phi_increments (basis, arg_def);
2588 else
2590 slsr_cand_t arg_cand = base_cand_from_table (arg);
2591 widest_int diff = arg_cand->index - basis->index;
2592 record_increment (arg_cand, diff, PHI_ADJUST);
2598 /* Determine how many times each unique increment occurs in the set
2599 of candidates rooted at C's parent, recording the data in the
2600 increment vector. For each unique increment I, if an initializer
2601 T_0 = stride * I is provided by a candidate that dominates all
2602 candidates with the same increment, also record T_0 for subsequent
2603 use. */
2605 static void
2606 record_increments (slsr_cand_t c)
2608 if (!cand_already_replaced (c))
2610 if (!phi_dependent_cand_p (c))
2611 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2612 else
2614 /* A candidate with a basis hidden by a phi will have one
2615 increment for its relationship to the index represented by
2616 the phi, and potentially additional increments along each
2617 incoming edge. For the root of the dependency tree (which
2618 has no basis), process just the initial index in case it has
2619 an initializer that can be used by subsequent candidates. */
2620 record_increment (c, c->index, NOT_PHI_ADJUST);
2622 if (c->basis)
2623 record_phi_increments (lookup_cand (c->basis),
2624 lookup_cand (c->def_phi)->cand_stmt);
2628 if (c->sibling)
2629 record_increments (lookup_cand (c->sibling));
2631 if (c->dependent)
2632 record_increments (lookup_cand (c->dependent));
2635 /* Add up and return the costs of introducing add statements that
2636 require the increment INCR on behalf of candidate C and phi
2637 statement PHI. Accumulate into *SAVINGS the potential savings
2638 from removing existing statements that feed PHI and have no other
2639 uses. */
2641 static int
2642 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple phi, int *savings)
2644 unsigned i;
2645 int cost = 0;
2646 slsr_cand_t basis = lookup_cand (c->basis);
2647 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2649 for (i = 0; i < gimple_phi_num_args (phi); i++)
2651 tree arg = gimple_phi_arg_def (phi, i);
2653 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2655 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2657 if (gimple_code (arg_def) == GIMPLE_PHI)
2659 int feeding_savings = 0;
2660 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2661 if (has_single_use (gimple_phi_result (arg_def)))
2662 *savings += feeding_savings;
2664 else
2666 slsr_cand_t arg_cand = base_cand_from_table (arg);
2667 widest_int diff = arg_cand->index - basis->index;
2669 if (incr == diff)
2671 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2672 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2673 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2674 if (has_single_use (lhs))
2675 *savings += stmt_cost (arg_cand->cand_stmt, true);
2681 return cost;
2684 /* Return the first candidate in the tree rooted at C that has not
2685 already been replaced, favoring siblings over dependents. */
2687 static slsr_cand_t
2688 unreplaced_cand_in_tree (slsr_cand_t c)
2690 if (!cand_already_replaced (c))
2691 return c;
2693 if (c->sibling)
2695 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2696 if (sib)
2697 return sib;
2700 if (c->dependent)
2702 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2703 if (dep)
2704 return dep;
2707 return NULL;
2710 /* Return TRUE if the candidates in the tree rooted at C should be
2711 optimized for speed, else FALSE. We estimate this based on the block
2712 containing the most dominant candidate in the tree that has not yet
2713 been replaced. */
2715 static bool
2716 optimize_cands_for_speed_p (slsr_cand_t c)
2718 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2719 gcc_assert (c2);
2720 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2723 /* Add COST_IN to the lowest cost of any dependent path starting at
2724 candidate C or any of its siblings, counting only candidates along
2725 such paths with increment INCR. Assume that replacing a candidate
2726 reduces cost by REPL_SAVINGS. Also account for savings from any
2727 statements that would go dead. If COUNT_PHIS is true, include
2728 costs of introducing feeding statements for conditional candidates. */
2730 static int
2731 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2732 const widest_int &incr, bool count_phis)
2734 int local_cost, sib_cost, savings = 0;
2735 widest_int cand_incr = cand_abs_increment (c);
2737 if (cand_already_replaced (c))
2738 local_cost = cost_in;
2739 else if (incr == cand_incr)
2740 local_cost = cost_in - repl_savings - c->dead_savings;
2741 else
2742 local_cost = cost_in - c->dead_savings;
2744 if (count_phis
2745 && phi_dependent_cand_p (c)
2746 && !cand_already_replaced (c))
2748 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2749 local_cost += phi_incr_cost (c, incr, phi, &savings);
2751 if (has_single_use (gimple_phi_result (phi)))
2752 local_cost -= savings;
2755 if (c->dependent)
2756 local_cost = lowest_cost_path (local_cost, repl_savings,
2757 lookup_cand (c->dependent), incr,
2758 count_phis);
2760 if (c->sibling)
2762 sib_cost = lowest_cost_path (cost_in, repl_savings,
2763 lookup_cand (c->sibling), incr,
2764 count_phis);
2765 local_cost = MIN (local_cost, sib_cost);
2768 return local_cost;
2771 /* Compute the total savings that would accrue from all replacements
2772 in the candidate tree rooted at C, counting only candidates with
2773 increment INCR. Assume that replacing a candidate reduces cost
2774 by REPL_SAVINGS. Also account for savings from statements that
2775 would go dead. */
2777 static int
2778 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2779 bool count_phis)
2781 int savings = 0;
2782 widest_int cand_incr = cand_abs_increment (c);
2784 if (incr == cand_incr && !cand_already_replaced (c))
2785 savings += repl_savings + c->dead_savings;
2787 if (count_phis
2788 && phi_dependent_cand_p (c)
2789 && !cand_already_replaced (c))
2791 int phi_savings = 0;
2792 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2793 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2795 if (has_single_use (gimple_phi_result (phi)))
2796 savings += phi_savings;
2799 if (c->dependent)
2800 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2801 count_phis);
2803 if (c->sibling)
2804 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2805 count_phis);
2807 return savings;
2810 /* Use target-specific costs to determine and record which increments
2811 in the current candidate tree are profitable to replace, assuming
2812 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2813 the candidate tree.
2815 One slight limitation here is that we don't account for the possible
2816 introduction of casts in some cases. See replace_one_candidate for
2817 the cases where these are introduced. This should probably be cleaned
2818 up sometime. */
2820 static void
2821 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
2823 unsigned i;
2825 for (i = 0; i < incr_vec_len; i++)
2827 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2829 /* If somehow this increment is bigger than a HWI, we won't
2830 be optimizing candidates that use it. And if the increment
2831 has a count of zero, nothing will be done with it. */
2832 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2833 incr_vec[i].cost = COST_INFINITE;
2835 /* Increments of 0, 1, and -1 are always profitable to replace,
2836 because they always replace a multiply or add with an add or
2837 copy, and may cause one or more existing instructions to go
2838 dead. Exception: -1 can't be assumed to be profitable for
2839 pointer addition. */
2840 else if (incr == 0
2841 || incr == 1
2842 || (incr == -1
2843 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2844 != POINTER_PLUS_EXPR)))
2845 incr_vec[i].cost = COST_NEUTRAL;
2847 /* FORNOW: If we need to add an initializer, give up if a cast from
2848 the candidate's type to its stride's type can lose precision.
2849 This could eventually be handled better by expressly retaining the
2850 result of a cast to a wider type in the stride. Example:
2852 short int _1;
2853 _2 = (int) _1;
2854 _3 = _2 * 10;
2855 _4 = x + _3; ADD: x + (10 * _1) : int
2856 _5 = _2 * 15;
2857 _6 = x + _3; ADD: x + (15 * _1) : int
2859 Right now replacing _6 would cause insertion of an initializer
2860 of the form "short int T = _1 * 5;" followed by a cast to
2861 int, which could overflow incorrectly. Had we recorded _2 or
2862 (int)_1 as the stride, this wouldn't happen. However, doing
2863 this breaks other opportunities, so this will require some
2864 care. */
2865 else if (!incr_vec[i].initializer
2866 && TREE_CODE (first_dep->stride) != INTEGER_CST
2867 && !legal_cast_p_1 (first_dep->stride,
2868 gimple_assign_lhs (first_dep->cand_stmt)))
2870 incr_vec[i].cost = COST_INFINITE;
2872 /* If we need to add an initializer, make sure we don't introduce
2873 a multiply by a pointer type, which can happen in certain cast
2874 scenarios. FIXME: When cleaning up these cast issues, we can
2875 afford to introduce the multiply provided we cast out to an
2876 unsigned int of appropriate size. */
2877 else if (!incr_vec[i].initializer
2878 && TREE_CODE (first_dep->stride) != INTEGER_CST
2879 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2881 incr_vec[i].cost = COST_INFINITE;
2883 /* For any other increment, if this is a multiply candidate, we
2884 must introduce a temporary T and initialize it with
2885 T_0 = stride * increment. When optimizing for speed, walk the
2886 candidate tree to calculate the best cost reduction along any
2887 path; if it offsets the fixed cost of inserting the initializer,
2888 replacing the increment is profitable. When optimizing for
2889 size, instead calculate the total cost reduction from replacing
2890 all candidates with this increment. */
2891 else if (first_dep->kind == CAND_MULT)
2893 int cost = mult_by_coeff_cost (incr, mode, speed);
2894 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2895 if (speed)
2896 cost = lowest_cost_path (cost, repl_savings, first_dep,
2897 incr_vec[i].incr, COUNT_PHIS);
2898 else
2899 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2900 COUNT_PHIS);
2902 incr_vec[i].cost = cost;
2905 /* If this is an add candidate, the initializer may already
2906 exist, so only calculate the cost of the initializer if it
2907 doesn't. We are replacing one add with another here, so the
2908 known replacement savings is zero. We will account for removal
2909 of dead instructions in lowest_cost_path or total_savings. */
2910 else
2912 int cost = 0;
2913 if (!incr_vec[i].initializer)
2914 cost = mult_by_coeff_cost (incr, mode, speed);
2916 if (speed)
2917 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2918 DONT_COUNT_PHIS);
2919 else
2920 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2921 DONT_COUNT_PHIS);
2923 incr_vec[i].cost = cost;
2928 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2929 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2930 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2931 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2932 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2934 static basic_block
2935 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2936 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2938 basic_block ncd;
2940 if (!bb1)
2942 *where = c2;
2943 return bb2;
2946 if (!bb2)
2948 *where = c1;
2949 return bb1;
2952 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2954 /* If both candidates are in the same block, the earlier
2955 candidate wins. */
2956 if (bb1 == ncd && bb2 == ncd)
2958 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2959 *where = c2;
2960 else
2961 *where = c1;
2964 /* Otherwise, if one of them produced a candidate in the
2965 dominator, that one wins. */
2966 else if (bb1 == ncd)
2967 *where = c1;
2969 else if (bb2 == ncd)
2970 *where = c2;
2972 /* If neither matches the dominator, neither wins. */
2973 else
2974 *where = NULL;
2976 return ncd;
2979 /* Consider all candidates that feed PHI. Find the nearest common
2980 dominator of those candidates requiring the given increment INCR.
2981 Further find and return the nearest common dominator of this result
2982 with block NCD. If the returned block contains one or more of the
2983 candidates, return the earliest candidate in the block in *WHERE. */
2985 static basic_block
2986 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gimple phi,
2987 basic_block ncd, slsr_cand_t *where)
2989 unsigned i;
2990 slsr_cand_t basis = lookup_cand (c->basis);
2991 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2993 for (i = 0; i < gimple_phi_num_args (phi); i++)
2995 tree arg = gimple_phi_arg_def (phi, i);
2997 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2999 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3001 if (gimple_code (arg_def) == GIMPLE_PHI)
3002 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
3003 else
3005 slsr_cand_t arg_cand = base_cand_from_table (arg);
3006 widest_int diff = arg_cand->index - basis->index;
3007 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3009 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3010 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3015 return ncd;
3018 /* Consider the candidate C together with any candidates that feed
3019 C's phi dependence (if any). Find and return the nearest common
3020 dominator of those candidates requiring the given increment INCR.
3021 If the returned block contains one or more of the candidates,
3022 return the earliest candidate in the block in *WHERE. */
3024 static basic_block
3025 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3027 basic_block ncd = NULL;
3029 if (cand_abs_increment (c) == incr)
3031 ncd = gimple_bb (c->cand_stmt);
3032 *where = c;
3035 if (phi_dependent_cand_p (c))
3036 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
3037 ncd, where);
3039 return ncd;
3042 /* Consider all candidates in the tree rooted at C for which INCR
3043 represents the required increment of C relative to its basis.
3044 Find and return the basic block that most nearly dominates all
3045 such candidates. If the returned block contains one or more of
3046 the candidates, return the earliest candidate in the block in
3047 *WHERE. */
3049 static basic_block
3050 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3051 slsr_cand_t *where)
3053 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3054 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3056 /* First find the NCD of all siblings and dependents. */
3057 if (c->sibling)
3058 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3059 incr, &sib_where);
3060 if (c->dependent)
3061 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3062 incr, &dep_where);
3063 if (!sib_ncd && !dep_ncd)
3065 new_where = NULL;
3066 ncd = NULL;
3068 else if (sib_ncd && !dep_ncd)
3070 new_where = sib_where;
3071 ncd = sib_ncd;
3073 else if (dep_ncd && !sib_ncd)
3075 new_where = dep_where;
3076 ncd = dep_ncd;
3078 else
3079 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3080 dep_where, &new_where);
3082 /* If the candidate's increment doesn't match the one we're interested
3083 in (and nor do any increments for feeding defs of a phi-dependence),
3084 then the result depends only on siblings and dependents. */
3085 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3087 if (!this_ncd || cand_already_replaced (c))
3089 *where = new_where;
3090 return ncd;
3093 /* Otherwise, compare this candidate with the result from all siblings
3094 and dependents. */
3095 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3097 return ncd;
3100 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3102 static inline bool
3103 profitable_increment_p (unsigned index)
3105 return (incr_vec[index].cost <= COST_NEUTRAL);
3108 /* For each profitable increment in the increment vector not equal to
3109 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3110 dominator of all statements in the candidate chain rooted at C
3111 that require that increment, and insert an initializer
3112 T_0 = stride * increment at that location. Record T_0 with the
3113 increment record. */
3115 static void
3116 insert_initializers (slsr_cand_t c)
3118 unsigned i;
3120 for (i = 0; i < incr_vec_len; i++)
3122 basic_block bb;
3123 slsr_cand_t where = NULL;
3124 gimple init_stmt;
3125 tree stride_type, new_name, incr_tree;
3126 widest_int incr = incr_vec[i].incr;
3128 if (!profitable_increment_p (i)
3129 || incr == 1
3130 || (incr == -1
3131 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
3132 || incr == 0)
3133 continue;
3135 /* We may have already identified an existing initializer that
3136 will suffice. */
3137 if (incr_vec[i].initializer)
3139 if (dump_file && (dump_flags & TDF_DETAILS))
3141 fputs ("Using existing initializer: ", dump_file);
3142 print_gimple_stmt (dump_file,
3143 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3144 0, 0);
3146 continue;
3149 /* Find the block that most closely dominates all candidates
3150 with this increment. If there is at least one candidate in
3151 that block, the earliest one will be returned in WHERE. */
3152 bb = nearest_common_dominator_for_cands (c, incr, &where);
3154 /* Create a new SSA name to hold the initializer's value. */
3155 stride_type = TREE_TYPE (c->stride);
3156 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
3157 incr_vec[i].initializer = new_name;
3159 /* Create the initializer and insert it in the latest possible
3160 dominating position. */
3161 incr_tree = wide_int_to_tree (stride_type, incr);
3162 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
3163 c->stride, incr_tree);
3164 if (where)
3166 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3167 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3168 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
3170 else
3172 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3173 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3175 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3176 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3177 else
3178 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3180 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3183 if (dump_file && (dump_flags & TDF_DETAILS))
3185 fputs ("Inserting initializer: ", dump_file);
3186 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3191 /* Return TRUE iff all required increments for candidates feeding PHI
3192 are profitable to replace on behalf of candidate C. */
3194 static bool
3195 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3197 unsigned i;
3198 slsr_cand_t basis = lookup_cand (c->basis);
3199 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3201 for (i = 0; i < gimple_phi_num_args (phi); i++)
3203 tree arg = gimple_phi_arg_def (phi, i);
3205 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3207 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3209 if (gimple_code (arg_def) == GIMPLE_PHI)
3211 if (!all_phi_incrs_profitable (c, arg_def))
3212 return false;
3214 else
3216 int j;
3217 slsr_cand_t arg_cand = base_cand_from_table (arg);
3218 widest_int increment = arg_cand->index - basis->index;
3220 if (!address_arithmetic_p && wi::neg_p (increment))
3221 increment = -increment;
3223 j = incr_vec_index (increment);
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (dump_file, " Conditional candidate %d, phi: ",
3228 c->cand_num);
3229 print_gimple_stmt (dump_file, phi, 0, 0);
3230 fputs (" increment: ", dump_file);
3231 print_decs (increment, dump_file);
3232 if (j < 0)
3233 fprintf (dump_file,
3234 "\n Not replaced; incr_vec overflow.\n");
3235 else {
3236 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3237 if (profitable_increment_p (j))
3238 fputs (" Replacing...\n", dump_file);
3239 else
3240 fputs (" Not replaced.\n", dump_file);
3244 if (j < 0 || !profitable_increment_p (j))
3245 return false;
3250 return true;
3253 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3254 type TO_TYPE, and insert it in front of the statement represented
3255 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3256 the new SSA name. */
3258 static tree
3259 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3261 tree cast_lhs;
3262 gimple cast_stmt;
3263 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3265 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3266 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3267 from_expr, NULL_TREE);
3268 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3269 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3273 fputs (" Inserting: ", dump_file);
3274 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3277 return cast_lhs;
3280 /* Replace the RHS of the statement represented by candidate C with
3281 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3282 leave C unchanged or just interchange its operands. The original
3283 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3284 If the replacement was made and we are doing a details dump,
3285 return the revised statement, else NULL. */
3287 static gimple
3288 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3289 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3290 slsr_cand_t c)
3292 if (new_code != old_code
3293 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3294 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3295 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3296 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3298 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3299 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3300 update_stmt (gsi_stmt (gsi));
3301 c->cand_stmt = gsi_stmt (gsi);
3303 if (dump_file && (dump_flags & TDF_DETAILS))
3304 return gsi_stmt (gsi);
3307 else if (dump_file && (dump_flags & TDF_DETAILS))
3308 fputs (" (duplicate, not actually replacing)\n", dump_file);
3310 return NULL;
3313 /* Strength-reduce the statement represented by candidate C by replacing
3314 it with an equivalent addition or subtraction. I is the index into
3315 the increment vector identifying C's increment. NEW_VAR is used to
3316 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3317 is the rhs1 to use in creating the add/subtract. */
3319 static void
3320 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3322 gimple stmt_to_print = NULL;
3323 tree orig_rhs1, orig_rhs2;
3324 tree rhs2;
3325 enum tree_code orig_code, repl_code;
3326 widest_int cand_incr;
3328 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3329 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3330 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3331 cand_incr = cand_increment (c);
3333 if (dump_file && (dump_flags & TDF_DETAILS))
3335 fputs ("Replacing: ", dump_file);
3336 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3337 stmt_to_print = c->cand_stmt;
3340 if (address_arithmetic_p)
3341 repl_code = POINTER_PLUS_EXPR;
3342 else
3343 repl_code = PLUS_EXPR;
3345 /* If the increment has an initializer T_0, replace the candidate
3346 statement with an add of the basis name and the initializer. */
3347 if (incr_vec[i].initializer)
3349 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3350 tree orig_type = TREE_TYPE (orig_rhs2);
3352 if (types_compatible_p (orig_type, init_type))
3353 rhs2 = incr_vec[i].initializer;
3354 else
3355 rhs2 = introduce_cast_before_cand (c, orig_type,
3356 incr_vec[i].initializer);
3358 if (incr_vec[i].incr != cand_incr)
3360 gcc_assert (repl_code == PLUS_EXPR);
3361 repl_code = MINUS_EXPR;
3364 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3365 orig_code, orig_rhs1, orig_rhs2,
3369 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3370 with a subtract of the stride from the basis name, a copy
3371 from the basis name, or an add of the stride to the basis
3372 name, respectively. It may be necessary to introduce a
3373 cast (or reuse an existing cast). */
3374 else if (cand_incr == 1)
3376 tree stride_type = TREE_TYPE (c->stride);
3377 tree orig_type = TREE_TYPE (orig_rhs2);
3379 if (types_compatible_p (orig_type, stride_type))
3380 rhs2 = c->stride;
3381 else
3382 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3384 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3385 orig_code, orig_rhs1, orig_rhs2,
3389 else if (cand_incr == -1)
3391 tree stride_type = TREE_TYPE (c->stride);
3392 tree orig_type = TREE_TYPE (orig_rhs2);
3393 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3395 if (types_compatible_p (orig_type, stride_type))
3396 rhs2 = c->stride;
3397 else
3398 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3400 if (orig_code != MINUS_EXPR
3401 || !operand_equal_p (basis_name, orig_rhs1, 0)
3402 || !operand_equal_p (rhs2, orig_rhs2, 0))
3404 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3405 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3406 update_stmt (gsi_stmt (gsi));
3407 c->cand_stmt = gsi_stmt (gsi);
3409 if (dump_file && (dump_flags & TDF_DETAILS))
3410 stmt_to_print = gsi_stmt (gsi);
3412 else if (dump_file && (dump_flags & TDF_DETAILS))
3413 fputs (" (duplicate, not actually replacing)\n", dump_file);
3416 else if (cand_incr == 0)
3418 tree lhs = gimple_assign_lhs (c->cand_stmt);
3419 tree lhs_type = TREE_TYPE (lhs);
3420 tree basis_type = TREE_TYPE (basis_name);
3422 if (types_compatible_p (lhs_type, basis_type))
3424 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3425 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3426 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3427 gsi_replace (&gsi, copy_stmt, false);
3428 c->cand_stmt = copy_stmt;
3430 if (dump_file && (dump_flags & TDF_DETAILS))
3431 stmt_to_print = copy_stmt;
3433 else
3435 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3436 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3437 basis_name,
3438 NULL_TREE);
3439 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3440 gsi_replace (&gsi, cast_stmt, false);
3441 c->cand_stmt = cast_stmt;
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 stmt_to_print = cast_stmt;
3447 else
3448 gcc_unreachable ();
3450 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3452 fputs ("With: ", dump_file);
3453 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3454 fputs ("\n", dump_file);
3458 /* For each candidate in the tree rooted at C, replace it with
3459 an increment if such has been shown to be profitable. */
3461 static void
3462 replace_profitable_candidates (slsr_cand_t c)
3464 if (!cand_already_replaced (c))
3466 widest_int increment = cand_abs_increment (c);
3467 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3468 int i;
3470 i = incr_vec_index (increment);
3472 /* Only process profitable increments. Nothing useful can be done
3473 to a cast or copy. */
3474 if (i >= 0
3475 && profitable_increment_p (i)
3476 && orig_code != MODIFY_EXPR
3477 && !CONVERT_EXPR_CODE_P (orig_code))
3479 if (phi_dependent_cand_p (c))
3481 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3483 if (all_phi_incrs_profitable (c, phi))
3485 /* Look up the LHS SSA name from C's basis. This will be
3486 the RHS1 of the adds we will introduce to create new
3487 phi arguments. */
3488 slsr_cand_t basis = lookup_cand (c->basis);
3489 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3491 /* Create a new phi statement that will represent C's true
3492 basis after the transformation is complete. */
3493 location_t loc = gimple_location (c->cand_stmt);
3494 tree name = create_phi_basis (c, phi, basis_name,
3495 loc, UNKNOWN_STRIDE);
3497 /* Replace C with an add of the new basis phi and the
3498 increment. */
3499 replace_one_candidate (c, i, name);
3502 else
3504 slsr_cand_t basis = lookup_cand (c->basis);
3505 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3506 replace_one_candidate (c, i, basis_name);
3511 if (c->sibling)
3512 replace_profitable_candidates (lookup_cand (c->sibling));
3514 if (c->dependent)
3515 replace_profitable_candidates (lookup_cand (c->dependent));
3518 /* Analyze costs of related candidates in the candidate vector,
3519 and make beneficial replacements. */
3521 static void
3522 analyze_candidates_and_replace (void)
3524 unsigned i;
3525 slsr_cand_t c;
3527 /* Each candidate that has a null basis and a non-null
3528 dependent is the root of a tree of related statements.
3529 Analyze each tree to determine a subset of those
3530 statements that can be replaced with maximum benefit. */
3531 FOR_EACH_VEC_ELT (cand_vec, i, c)
3533 slsr_cand_t first_dep;
3535 if (c->basis != 0 || c->dependent == 0)
3536 continue;
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3540 c->cand_num);
3542 first_dep = lookup_cand (c->dependent);
3544 /* If this is a chain of CAND_REFs, unconditionally replace
3545 each of them with a strength-reduced data reference. */
3546 if (c->kind == CAND_REF)
3547 replace_refs (c);
3549 /* If the common stride of all related candidates is a known
3550 constant, each candidate without a phi-dependence can be
3551 profitably replaced. Each replaces a multiply by a single
3552 add, with the possibility that a feeding add also goes dead.
3553 A candidate with a phi-dependence is replaced only if the
3554 compensation code it requires is offset by the strength
3555 reduction savings. */
3556 else if (TREE_CODE (c->stride) == INTEGER_CST)
3557 replace_uncond_cands_and_profitable_phis (first_dep);
3559 /* When the stride is an SSA name, it may still be profitable
3560 to replace some or all of the dependent candidates, depending
3561 on whether the introduced increments can be reused, or are
3562 less expensive to calculate than the replaced statements. */
3563 else
3565 machine_mode mode;
3566 bool speed;
3568 /* Determine whether we'll be generating pointer arithmetic
3569 when replacing candidates. */
3570 address_arithmetic_p = (c->kind == CAND_ADD
3571 && POINTER_TYPE_P (c->cand_type));
3573 /* If all candidates have already been replaced under other
3574 interpretations, nothing remains to be done. */
3575 if (!count_candidates (c))
3576 continue;
3578 /* Construct an array of increments for this candidate chain. */
3579 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3580 incr_vec_len = 0;
3581 record_increments (c);
3583 /* Determine which increments are profitable to replace. */
3584 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3585 speed = optimize_cands_for_speed_p (c);
3586 analyze_increments (first_dep, mode, speed);
3588 /* Insert initializers of the form T_0 = stride * increment
3589 for use in profitable replacements. */
3590 insert_initializers (first_dep);
3591 dump_incr_vec ();
3593 /* Perform the replacements. */
3594 replace_profitable_candidates (first_dep);
3595 free (incr_vec);
3600 namespace {
3602 const pass_data pass_data_strength_reduction =
3604 GIMPLE_PASS, /* type */
3605 "slsr", /* name */
3606 OPTGROUP_NONE, /* optinfo_flags */
3607 TV_GIMPLE_SLSR, /* tv_id */
3608 ( PROP_cfg | PROP_ssa ), /* properties_required */
3609 0, /* properties_provided */
3610 0, /* properties_destroyed */
3611 0, /* todo_flags_start */
3612 0, /* todo_flags_finish */
3615 class pass_strength_reduction : public gimple_opt_pass
3617 public:
3618 pass_strength_reduction (gcc::context *ctxt)
3619 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3622 /* opt_pass methods: */
3623 virtual bool gate (function *) { return flag_tree_slsr; }
3624 virtual unsigned int execute (function *);
3626 }; // class pass_strength_reduction
3628 unsigned
3629 pass_strength_reduction::execute (function *fun)
3631 /* Create the obstack where candidates will reside. */
3632 gcc_obstack_init (&cand_obstack);
3634 /* Allocate the candidate vector. */
3635 cand_vec.create (128);
3637 /* Allocate the mapping from statements to candidate indices. */
3638 stmt_cand_map = new hash_map<gimple, slsr_cand_t>;
3640 /* Create the obstack where candidate chains will reside. */
3641 gcc_obstack_init (&chain_obstack);
3643 /* Allocate the mapping from base expressions to candidate chains. */
3644 base_cand_map = new hash_table<cand_chain_hasher> (500);
3646 /* Allocate the mapping from bases to alternative bases. */
3647 alt_base_map = new hash_map<tree, tree>;
3649 /* Initialize the loop optimizer. We need to detect flow across
3650 back edges, and this gives us dominator information as well. */
3651 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3653 /* Walk the CFG in predominator order looking for strength reduction
3654 candidates. */
3655 find_candidates_dom_walker (CDI_DOMINATORS)
3656 .walk (fun->cfg->x_entry_block_ptr);
3658 if (dump_file && (dump_flags & TDF_DETAILS))
3660 dump_cand_vec ();
3661 dump_cand_chains ();
3664 delete alt_base_map;
3665 free_affine_expand_cache (&name_expansions);
3667 /* Analyze costs and make appropriate replacements. */
3668 analyze_candidates_and_replace ();
3670 loop_optimizer_finalize ();
3671 delete base_cand_map;
3672 base_cand_map = NULL;
3673 obstack_free (&chain_obstack, NULL);
3674 delete stmt_cand_map;
3675 cand_vec.release ();
3676 obstack_free (&cand_obstack, NULL);
3678 return 0;
3681 } // anon namespace
3683 gimple_opt_pass *
3684 make_pass_strength_reduction (gcc::context *ctxt)
3686 return new pass_strength_reduction (ctxt);