[AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree
[official-gcc.git] / gcc / gimple-ssa-strength-reduction.c
blob9a53bf7c3397c2576c7e151dbb738cd244e2d442
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "gimple.h"
41 #include "basic-block.h"
42 #include "tree-pass.h"
43 #include "cfgloop.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-flow.h"
46 #include "domwalk.h"
47 #include "pointer-set.h"
48 #include "expmed.h"
49 #include "params.h"
50 #include "hash-table.h"
52 /* Information about a strength reduction candidate. Each statement
53 in the candidate table represents an expression of one of the
54 following forms (the special case of CAND_REF will be described
55 later):
57 (CAND_MULT) S1: X = (B + i) * S
58 (CAND_ADD) S1: X = B + (i * S)
60 Here X and B are SSA names, i is an integer constant, and S is
61 either an SSA name or a constant. We call B the "base," i the
62 "index", and S the "stride."
64 Any statement S0 that dominates S1 and is of the form:
66 (CAND_MULT) S0: Y = (B + i') * S
67 (CAND_ADD) S0: Y = B + (i' * S)
69 is called a "basis" for S1. In both cases, S1 may be replaced by
71 S1': X = Y + (i - i') * S,
73 where (i - i') * S is folded to the extent possible.
75 All gimple statements are visited in dominator order, and each
76 statement that may contribute to one of the forms of S1 above is
77 given at least one entry in the candidate table. Such statements
78 include addition, pointer addition, subtraction, multiplication,
79 negation, copies, and nontrivial type casts. If a statement may
80 represent more than one expression of the forms of S1 above,
81 multiple "interpretations" are stored in the table and chained
82 together. Examples:
84 * An add of two SSA names may treat either operand as the base.
85 * A multiply of two SSA names, likewise.
86 * A copy or cast may be thought of as either a CAND_MULT with
87 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
89 Candidate records are allocated from an obstack. They are addressed
90 both from a hash table keyed on S1, and from a vector of candidate
91 pointers arranged in predominator order.
93 Opportunity note
94 ----------------
95 Currently we don't recognize:
97 S0: Y = (S * i') - B
98 S1: X = (S * i) - B
100 as a strength reduction opportunity, even though this S1 would
101 also be replaceable by the S1' above. This can be added if it
102 comes up in practice.
104 Strength reduction in addressing
105 --------------------------------
106 There is another kind of candidate known as CAND_REF. A CAND_REF
107 describes a statement containing a memory reference having
108 complex addressing that might benefit from strength reduction.
109 Specifically, we are interested in references for which
110 get_inner_reference returns a base address, offset, and bitpos as
111 follows:
113 base: MEM_REF (T1, C1)
114 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
115 bitpos: C4 * BITS_PER_UNIT
117 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
118 arbitrary integer constants. Note that C2 may be zero, in which
119 case the offset will be MULT_EXPR (T2, C3).
121 When this pattern is recognized, the original memory reference
122 can be replaced with:
124 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
125 C1 + (C2 * C3) + C4)
127 which distributes the multiply to allow constant folding. When
128 two or more addressing expressions can be represented by MEM_REFs
129 of this form, differing only in the constants C1, C2, and C4,
130 making this substitution produces more efficient addressing during
131 the RTL phases. When there are not at least two expressions with
132 the same values of T1, T2, and C3, there is nothing to be gained
133 by the replacement.
135 Strength reduction of CAND_REFs uses the same infrastructure as
136 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
137 field, MULT_EXPR (T2, C3) in the stride (S) field, and
138 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
139 is thus another CAND_REF with the same B and S values. When at
140 least two CAND_REFs are chained together using the basis relation,
141 each of them is replaced as above, resulting in improved code
142 generation for addressing.
144 Conditional candidates
145 ======================
147 Conditional candidates are best illustrated with an example.
148 Consider the code sequence:
150 (1) x_0 = ...;
151 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
152 if (...)
153 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
154 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
155 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
156 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
158 Here strength reduction is complicated by the uncertain value of x_2.
159 A legitimate transformation is:
161 (1) x_0 = ...;
162 (2) a_0 = x_0 * 5;
163 if (...)
165 (3) [x_1 = x_0 + 1;]
166 (3a) t_1 = a_0 + 5;
168 (4) [x_2 = PHI <x_0, x_1>;]
169 (4a) t_2 = PHI <a_0, t_1>;
170 (5) [x_3 = x_2 + 1;]
171 (6r) a_1 = t_2 + 5;
173 where the bracketed instructions may go dead.
175 To recognize this opportunity, we have to observe that statement (6)
176 has a "hidden basis" (2). The hidden basis is unlike a normal basis
177 in that the statement and the hidden basis have different base SSA
178 names (x_2 and x_0, respectively). The relationship is established
179 when a statement's base name (x_2) is defined by a phi statement (4),
180 each argument of which (x_0, x_1) has an identical "derived base name."
181 If the argument is defined by a candidate (as x_1 is by (3)) that is a
182 CAND_ADD having a stride of 1, the derived base name of the argument is
183 the base name of the candidate (x_0). Otherwise, the argument itself
184 is its derived base name (as is the case with argument x_0).
186 The hidden basis for statement (6) is the nearest dominating candidate
187 whose base name is the derived base name (x_0) of the feeding phi (4),
188 and whose stride is identical to that of the statement. We can then
189 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
190 allowing the final replacement of (6) by the strength-reduced (6r).
192 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
193 A CAND_PHI is not a candidate for replacement, but is maintained in the
194 candidate table to ease discovery of hidden bases. Any phi statement
195 whose arguments share a common derived base name is entered into the
196 table with the derived base name, an (arbitrary) index of zero, and a
197 stride of 1. A statement with a hidden basis can then be detected by
198 simply looking up its feeding phi definition in the candidate table,
199 extracting the derived base name, and searching for a basis in the
200 usual manner after substituting the derived base name.
202 Note that the transformation is only valid when the original phi and
203 the statements that define the phi's arguments are all at the same
204 position in the loop hierarchy. */
207 /* Index into the candidate vector, offset by 1. VECs are zero-based,
208 while cand_idx's are one-based, with zero indicating null. */
209 typedef unsigned cand_idx;
211 /* The kind of candidate. */
212 enum cand_kind
214 CAND_MULT,
215 CAND_ADD,
216 CAND_REF,
217 CAND_PHI
220 struct slsr_cand_d
222 /* The candidate statement S1. */
223 gimple cand_stmt;
225 /* The base expression B: often an SSA name, but not always. */
226 tree base_expr;
228 /* The stride S. */
229 tree stride;
231 /* The index constant i. */
232 double_int index;
234 /* The type of the candidate. This is normally the type of base_expr,
235 but casts may have occurred when combining feeding instructions.
236 A candidate can only be a basis for candidates of the same final type.
237 (For CAND_REFs, this is the type to be used for operand 1 of the
238 replacement MEM_REF.) */
239 tree cand_type;
241 /* The kind of candidate (CAND_MULT, etc.). */
242 enum cand_kind kind;
244 /* Index of this candidate in the candidate vector. */
245 cand_idx cand_num;
247 /* Index of the next candidate record for the same statement.
248 A statement may be useful in more than one way (e.g., due to
249 commutativity). So we can have multiple "interpretations"
250 of a statement. */
251 cand_idx next_interp;
253 /* Index of the basis statement S0, if any, in the candidate vector. */
254 cand_idx basis;
256 /* First candidate for which this candidate is a basis, if one exists. */
257 cand_idx dependent;
259 /* Next candidate having the same basis as this one. */
260 cand_idx sibling;
262 /* If this is a conditional candidate, the CAND_PHI candidate
263 that defines the base SSA name B. */
264 cand_idx def_phi;
266 /* Savings that can be expected from eliminating dead code if this
267 candidate is replaced. */
268 int dead_savings;
271 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
272 typedef const struct slsr_cand_d *const_slsr_cand_t;
274 /* Pointers to candidates are chained together as part of a mapping
275 from base expressions to the candidates that use them. */
277 struct cand_chain_d
279 /* Base expression for the chain of candidates: often, but not
280 always, an SSA name. */
281 tree base_expr;
283 /* Pointer to a candidate. */
284 slsr_cand_t cand;
286 /* Chain pointer. */
287 struct cand_chain_d *next;
291 typedef struct cand_chain_d cand_chain, *cand_chain_t;
292 typedef const struct cand_chain_d *const_cand_chain_t;
294 /* Information about a unique "increment" associated with candidates
295 having an SSA name for a stride. An increment is the difference
296 between the index of the candidate and the index of its basis,
297 i.e., (i - i') as discussed in the module commentary.
299 When we are not going to generate address arithmetic we treat
300 increments that differ only in sign as the same, allowing sharing
301 of the cost of initializers. The absolute value of the increment
302 is stored in the incr_info. */
304 struct incr_info_d
306 /* The increment that relates a candidate to its basis. */
307 double_int incr;
309 /* How many times the increment occurs in the candidate tree. */
310 unsigned count;
312 /* Cost of replacing candidates using this increment. Negative and
313 zero costs indicate replacement should be performed. */
314 int cost;
316 /* If this increment is profitable but is not -1, 0, or 1, it requires
317 an initializer T_0 = stride * incr to be found or introduced in the
318 nearest common dominator of all candidates. This field holds T_0
319 for subsequent use. */
320 tree initializer;
322 /* If the initializer was found to already exist, this is the block
323 where it was found. */
324 basic_block init_bb;
327 typedef struct incr_info_d incr_info, *incr_info_t;
329 /* Candidates are maintained in a vector. If candidate X dominates
330 candidate Y, then X appears before Y in the vector; but the
331 converse does not necessarily hold. */
332 static vec<slsr_cand_t> cand_vec;
334 enum cost_consts
336 COST_NEUTRAL = 0,
337 COST_INFINITE = 1000
340 enum stride_status
342 UNKNOWN_STRIDE = 0,
343 KNOWN_STRIDE = 1
346 enum phi_adjust_status
348 NOT_PHI_ADJUST = 0,
349 PHI_ADJUST = 1
352 enum count_phis_status
354 DONT_COUNT_PHIS = 0,
355 COUNT_PHIS = 1
358 /* Pointer map embodying a mapping from statements to candidates. */
359 static struct pointer_map_t *stmt_cand_map;
361 /* Obstack for candidates. */
362 static struct obstack cand_obstack;
364 /* Obstack for candidate chains. */
365 static struct obstack chain_obstack;
367 /* An array INCR_VEC of incr_infos is used during analysis of related
368 candidates having an SSA name for a stride. INCR_VEC_LEN describes
369 its current length. MAX_INCR_VEC_LEN is used to avoid costly
370 pathological cases. */
371 static incr_info_t incr_vec;
372 static unsigned incr_vec_len;
373 const int MAX_INCR_VEC_LEN = 16;
375 /* For a chain of candidates with unknown stride, indicates whether or not
376 we must generate pointer arithmetic when replacing statements. */
377 static bool address_arithmetic_p;
379 /* Forward function declarations. */
380 static slsr_cand_t base_cand_from_table (tree);
381 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
383 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
385 static slsr_cand_t
386 lookup_cand (cand_idx idx)
388 return cand_vec[idx - 1];
391 /* Helper for hashing a candidate chain header. */
393 struct cand_chain_hasher : typed_noop_remove <cand_chain>
395 typedef cand_chain value_type;
396 typedef cand_chain compare_type;
397 static inline hashval_t hash (const value_type *);
398 static inline bool equal (const value_type *, const compare_type *);
401 inline hashval_t
402 cand_chain_hasher::hash (const value_type *p)
404 tree base_expr = p->base_expr;
405 return iterative_hash_expr (base_expr, 0);
408 inline bool
409 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
411 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
414 /* Hash table embodying a mapping from base exprs to chains of candidates. */
415 static hash_table <cand_chain_hasher> base_cand_map;
417 /* Look in the candidate table for a CAND_PHI that defines BASE and
418 return it if found; otherwise return NULL. */
420 static cand_idx
421 find_phi_def (tree base)
423 slsr_cand_t c;
425 if (TREE_CODE (base) != SSA_NAME)
426 return 0;
428 c = base_cand_from_table (base);
430 if (!c || c->kind != CAND_PHI)
431 return 0;
433 return c->cand_num;
436 /* Helper routine for find_basis_for_candidate. May be called twice:
437 once for the candidate's base expr, and optionally again for the
438 candidate's phi definition. */
440 static slsr_cand_t
441 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
443 cand_chain mapping_key;
444 cand_chain_t chain;
445 slsr_cand_t basis = NULL;
447 // Limit potential of N^2 behavior for long candidate chains.
448 int iters = 0;
449 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
451 mapping_key.base_expr = base_expr;
452 chain = base_cand_map.find (&mapping_key);
454 for (; chain && iters < max_iters; chain = chain->next, ++iters)
456 slsr_cand_t one_basis = chain->cand;
458 if (one_basis->kind != c->kind
459 || one_basis->cand_stmt == c->cand_stmt
460 || !operand_equal_p (one_basis->stride, c->stride, 0)
461 || !types_compatible_p (one_basis->cand_type, c->cand_type)
462 || !dominated_by_p (CDI_DOMINATORS,
463 gimple_bb (c->cand_stmt),
464 gimple_bb (one_basis->cand_stmt)))
465 continue;
467 if (!basis || basis->cand_num < one_basis->cand_num)
468 basis = one_basis;
471 return basis;
474 /* Use the base expr from candidate C to look for possible candidates
475 that can serve as a basis for C. Each potential basis must also
476 appear in a block that dominates the candidate statement and have
477 the same stride and type. If more than one possible basis exists,
478 the one with highest index in the vector is chosen; this will be
479 the most immediately dominating basis. */
481 static int
482 find_basis_for_candidate (slsr_cand_t c)
484 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
486 /* If a candidate doesn't have a basis using its base expression,
487 it may have a basis hidden by one or more intervening phis. */
488 if (!basis && c->def_phi)
490 basic_block basis_bb, phi_bb;
491 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
492 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
494 if (basis)
496 /* A hidden basis must dominate the phi-definition of the
497 candidate's base name. */
498 phi_bb = gimple_bb (phi_cand->cand_stmt);
499 basis_bb = gimple_bb (basis->cand_stmt);
501 if (phi_bb == basis_bb
502 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
504 basis = NULL;
505 c->basis = 0;
508 /* If we found a hidden basis, estimate additional dead-code
509 savings if the phi and its feeding statements can be removed. */
510 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
511 c->dead_savings += phi_cand->dead_savings;
515 if (basis)
517 c->sibling = basis->dependent;
518 basis->dependent = c->cand_num;
519 return basis->cand_num;
522 return 0;
525 /* Record a mapping from the base expression of C to C itself, indicating that
526 C may potentially serve as a basis using that base expression. */
528 static void
529 record_potential_basis (slsr_cand_t c)
531 cand_chain_t node;
532 cand_chain **slot;
534 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
535 node->base_expr = c->base_expr;
536 node->cand = c;
537 node->next = NULL;
538 slot = base_cand_map.find_slot (node, INSERT);
540 if (*slot)
542 cand_chain_t head = (cand_chain_t) (*slot);
543 node->next = head->next;
544 head->next = node;
546 else
547 *slot = node;
550 /* Allocate storage for a new candidate and initialize its fields.
551 Attempt to find a basis for the candidate. */
553 static slsr_cand_t
554 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
555 double_int index, tree stride, tree ctype,
556 unsigned savings)
558 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
559 sizeof (slsr_cand));
560 c->cand_stmt = gs;
561 c->base_expr = base;
562 c->stride = stride;
563 c->index = index;
564 c->cand_type = ctype;
565 c->kind = kind;
566 c->cand_num = cand_vec.length () + 1;
567 c->next_interp = 0;
568 c->dependent = 0;
569 c->sibling = 0;
570 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
571 c->dead_savings = savings;
573 cand_vec.safe_push (c);
575 if (kind == CAND_PHI)
576 c->basis = 0;
577 else
578 c->basis = find_basis_for_candidate (c);
580 record_potential_basis (c);
582 return c;
585 /* Determine the target cost of statement GS when compiling according
586 to SPEED. */
588 static int
589 stmt_cost (gimple gs, bool speed)
591 tree lhs, rhs1, rhs2;
592 enum machine_mode lhs_mode;
594 gcc_assert (is_gimple_assign (gs));
595 lhs = gimple_assign_lhs (gs);
596 rhs1 = gimple_assign_rhs1 (gs);
597 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
599 switch (gimple_assign_rhs_code (gs))
601 case MULT_EXPR:
602 rhs2 = gimple_assign_rhs2 (gs);
604 if (host_integerp (rhs2, 0))
605 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
607 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
608 return mul_cost (speed, lhs_mode);
610 case PLUS_EXPR:
611 case POINTER_PLUS_EXPR:
612 case MINUS_EXPR:
613 return add_cost (speed, lhs_mode);
615 case NEGATE_EXPR:
616 return neg_cost (speed, lhs_mode);
618 case NOP_EXPR:
619 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
621 /* Note that we don't assign costs to copies that in most cases
622 will go away. */
623 default:
627 gcc_unreachable ();
628 return 0;
631 /* Look up the defining statement for BASE_IN and return a pointer
632 to its candidate in the candidate table, if any; otherwise NULL.
633 Only CAND_ADD and CAND_MULT candidates are returned. */
635 static slsr_cand_t
636 base_cand_from_table (tree base_in)
638 slsr_cand_t *result;
640 gimple def = SSA_NAME_DEF_STMT (base_in);
641 if (!def)
642 return (slsr_cand_t) NULL;
644 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
646 if (result && (*result)->kind != CAND_REF)
647 return *result;
649 return (slsr_cand_t) NULL;
652 /* Add an entry to the statement-to-candidate mapping. */
654 static void
655 add_cand_for_stmt (gimple gs, slsr_cand_t c)
657 void **slot = pointer_map_insert (stmt_cand_map, gs);
658 gcc_assert (!*slot);
659 *slot = c;
662 /* Given PHI which contains a phi statement, determine whether it
663 satisfies all the requirements of a phi candidate. If so, create
664 a candidate. Note that a CAND_PHI never has a basis itself, but
665 is used to help find a basis for subsequent candidates. */
667 static void
668 slsr_process_phi (gimple phi, bool speed)
670 unsigned i;
671 tree arg0_base = NULL_TREE, base_type;
672 slsr_cand_t c;
673 struct loop *cand_loop = gimple_bb (phi)->loop_father;
674 unsigned savings = 0;
676 /* A CAND_PHI requires each of its arguments to have the same
677 derived base name. (See the module header commentary for a
678 definition of derived base names.) Furthermore, all feeding
679 definitions must be in the same position in the loop hierarchy
680 as PHI. */
682 for (i = 0; i < gimple_phi_num_args (phi); i++)
684 slsr_cand_t arg_cand;
685 tree arg = gimple_phi_arg_def (phi, i);
686 tree derived_base_name = NULL_TREE;
687 gimple arg_stmt = NULL;
688 basic_block arg_bb = NULL;
690 if (TREE_CODE (arg) != SSA_NAME)
691 return;
693 arg_cand = base_cand_from_table (arg);
695 if (arg_cand)
697 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
699 if (!arg_cand->next_interp)
700 return;
702 arg_cand = lookup_cand (arg_cand->next_interp);
705 if (!integer_onep (arg_cand->stride))
706 return;
708 derived_base_name = arg_cand->base_expr;
709 arg_stmt = arg_cand->cand_stmt;
710 arg_bb = gimple_bb (arg_stmt);
712 /* Gather potential dead code savings if the phi statement
713 can be removed later on. */
714 if (has_single_use (arg))
716 if (gimple_code (arg_stmt) == GIMPLE_PHI)
717 savings += arg_cand->dead_savings;
718 else
719 savings += stmt_cost (arg_stmt, speed);
722 else
724 derived_base_name = arg;
726 if (SSA_NAME_IS_DEFAULT_DEF (arg))
727 arg_bb = single_succ (ENTRY_BLOCK_PTR);
728 else
729 gimple_bb (SSA_NAME_DEF_STMT (arg));
732 if (!arg_bb || arg_bb->loop_father != cand_loop)
733 return;
735 if (i == 0)
736 arg0_base = derived_base_name;
737 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
738 return;
741 /* Create the candidate. "alloc_cand_and_find_basis" is named
742 misleadingly for this case, as no basis will be sought for a
743 CAND_PHI. */
744 base_type = TREE_TYPE (arg0_base);
746 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base, double_int_zero,
747 integer_one_node, base_type, savings);
749 /* Add the candidate to the statement-candidate mapping. */
750 add_cand_for_stmt (phi, c);
753 /* Look for the following pattern:
755 *PBASE: MEM_REF (T1, C1)
757 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
759 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
761 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
763 *PINDEX: C4 * BITS_PER_UNIT
765 If not present, leave the input values unchanged and return FALSE.
766 Otherwise, modify the input values as follows and return TRUE:
768 *PBASE: T1
769 *POFFSET: MULT_EXPR (T2, C3)
770 *PINDEX: C1 + (C2 * C3) + C4 */
772 static bool
773 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
774 tree *ptype)
776 tree base = *pbase, offset = *poffset;
777 double_int index = *pindex;
778 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
779 tree mult_op0, mult_op1, t1, t2, type;
780 double_int c1, c2, c3, c4;
782 if (!base
783 || !offset
784 || TREE_CODE (base) != MEM_REF
785 || TREE_CODE (offset) != MULT_EXPR
786 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
787 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
788 return false;
790 t1 = TREE_OPERAND (base, 0);
791 c1 = mem_ref_offset (base);
792 type = TREE_TYPE (TREE_OPERAND (base, 1));
794 mult_op0 = TREE_OPERAND (offset, 0);
795 mult_op1 = TREE_OPERAND (offset, 1);
797 c3 = tree_to_double_int (mult_op1);
799 if (TREE_CODE (mult_op0) == PLUS_EXPR)
801 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
803 t2 = TREE_OPERAND (mult_op0, 0);
804 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
806 else
807 return false;
809 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
811 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
813 t2 = TREE_OPERAND (mult_op0, 0);
814 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
816 else
817 return false;
819 else
821 t2 = mult_op0;
822 c2 = double_int_zero;
825 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
827 *pbase = t1;
828 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
829 double_int_to_tree (sizetype, c3));
830 *pindex = c1 + c2 * c3 + c4;
831 *ptype = type;
833 return true;
836 /* Given GS which contains a data reference, create a CAND_REF entry in
837 the candidate table and attempt to find a basis. */
839 static void
840 slsr_process_ref (gimple gs)
842 tree ref_expr, base, offset, type;
843 HOST_WIDE_INT bitsize, bitpos;
844 enum machine_mode mode;
845 int unsignedp, volatilep;
846 double_int index;
847 slsr_cand_t c;
849 if (gimple_vdef (gs))
850 ref_expr = gimple_assign_lhs (gs);
851 else
852 ref_expr = gimple_assign_rhs1 (gs);
854 if (!handled_component_p (ref_expr)
855 || TREE_CODE (ref_expr) == BIT_FIELD_REF
856 || (TREE_CODE (ref_expr) == COMPONENT_REF
857 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
858 return;
860 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
861 &unsignedp, &volatilep, false);
862 index = double_int::from_uhwi (bitpos);
864 if (!restructure_reference (&base, &offset, &index, &type))
865 return;
867 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
868 type, 0);
870 /* Add the candidate to the statement-candidate mapping. */
871 add_cand_for_stmt (gs, c);
874 /* Create a candidate entry for a statement GS, where GS multiplies
875 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
876 about the two SSA names into the new candidate. Return the new
877 candidate. */
879 static slsr_cand_t
880 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
882 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
883 double_int index;
884 unsigned savings = 0;
885 slsr_cand_t c;
886 slsr_cand_t base_cand = base_cand_from_table (base_in);
888 /* Look at all interpretations of the base candidate, if necessary,
889 to find information to propagate into this candidate. */
890 while (base_cand && !base && base_cand->kind != CAND_PHI)
893 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
895 /* Y = (B + i') * 1
896 X = Y * Z
897 ================
898 X = (B + i') * Z */
899 base = base_cand->base_expr;
900 index = base_cand->index;
901 stride = stride_in;
902 ctype = base_cand->cand_type;
903 if (has_single_use (base_in))
904 savings = (base_cand->dead_savings
905 + stmt_cost (base_cand->cand_stmt, speed));
907 else if (base_cand->kind == CAND_ADD
908 && TREE_CODE (base_cand->stride) == INTEGER_CST)
910 /* Y = B + (i' * S), S constant
911 X = Y * Z
912 ============================
913 X = B + ((i' * S) * Z) */
914 base = base_cand->base_expr;
915 index = base_cand->index * tree_to_double_int (base_cand->stride);
916 stride = stride_in;
917 ctype = base_cand->cand_type;
918 if (has_single_use (base_in))
919 savings = (base_cand->dead_savings
920 + stmt_cost (base_cand->cand_stmt, speed));
923 if (base_cand->next_interp)
924 base_cand = lookup_cand (base_cand->next_interp);
925 else
926 base_cand = NULL;
929 if (!base)
931 /* No interpretations had anything useful to propagate, so
932 produce X = (Y + 0) * Z. */
933 base = base_in;
934 index = double_int_zero;
935 stride = stride_in;
936 ctype = TREE_TYPE (base_in);
939 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
940 ctype, savings);
941 return c;
944 /* Create a candidate entry for a statement GS, where GS multiplies
945 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
946 information about BASE_IN into the new candidate. Return the new
947 candidate. */
949 static slsr_cand_t
950 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
952 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
953 double_int index, temp;
954 unsigned savings = 0;
955 slsr_cand_t c;
956 slsr_cand_t base_cand = base_cand_from_table (base_in);
958 /* Look at all interpretations of the base candidate, if necessary,
959 to find information to propagate into this candidate. */
960 while (base_cand && !base && base_cand->kind != CAND_PHI)
962 if (base_cand->kind == CAND_MULT
963 && TREE_CODE (base_cand->stride) == INTEGER_CST)
965 /* Y = (B + i') * S, S constant
966 X = Y * c
967 ============================
968 X = (B + i') * (S * c) */
969 base = base_cand->base_expr;
970 index = base_cand->index;
971 temp = tree_to_double_int (base_cand->stride)
972 * tree_to_double_int (stride_in);
973 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
974 ctype = base_cand->cand_type;
975 if (has_single_use (base_in))
976 savings = (base_cand->dead_savings
977 + stmt_cost (base_cand->cand_stmt, speed));
979 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
981 /* Y = B + (i' * 1)
982 X = Y * c
983 ===========================
984 X = (B + i') * c */
985 base = base_cand->base_expr;
986 index = base_cand->index;
987 stride = stride_in;
988 ctype = base_cand->cand_type;
989 if (has_single_use (base_in))
990 savings = (base_cand->dead_savings
991 + stmt_cost (base_cand->cand_stmt, speed));
993 else if (base_cand->kind == CAND_ADD
994 && base_cand->index.is_one ()
995 && TREE_CODE (base_cand->stride) == INTEGER_CST)
997 /* Y = B + (1 * S), S constant
998 X = Y * c
999 ===========================
1000 X = (B + S) * c */
1001 base = base_cand->base_expr;
1002 index = tree_to_double_int (base_cand->stride);
1003 stride = stride_in;
1004 ctype = base_cand->cand_type;
1005 if (has_single_use (base_in))
1006 savings = (base_cand->dead_savings
1007 + stmt_cost (base_cand->cand_stmt, speed));
1010 if (base_cand->next_interp)
1011 base_cand = lookup_cand (base_cand->next_interp);
1012 else
1013 base_cand = NULL;
1016 if (!base)
1018 /* No interpretations had anything useful to propagate, so
1019 produce X = (Y + 0) * c. */
1020 base = base_in;
1021 index = double_int_zero;
1022 stride = stride_in;
1023 ctype = TREE_TYPE (base_in);
1026 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1027 ctype, savings);
1028 return c;
1031 /* Given GS which is a multiply of scalar integers, make an appropriate
1032 entry in the candidate table. If this is a multiply of two SSA names,
1033 create two CAND_MULT interpretations and attempt to find a basis for
1034 each of them. Otherwise, create a single CAND_MULT and attempt to
1035 find a basis. */
1037 static void
1038 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1040 slsr_cand_t c, c2;
1042 /* If this is a multiply of an SSA name with itself, it is highly
1043 unlikely that we will get a strength reduction opportunity, so
1044 don't record it as a candidate. This simplifies the logic for
1045 finding a basis, so if this is removed that must be considered. */
1046 if (rhs1 == rhs2)
1047 return;
1049 if (TREE_CODE (rhs2) == SSA_NAME)
1051 /* Record an interpretation of this statement in the candidate table
1052 assuming RHS1 is the base expression and RHS2 is the stride. */
1053 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1055 /* Add the first interpretation to the statement-candidate mapping. */
1056 add_cand_for_stmt (gs, c);
1058 /* Record another interpretation of this statement assuming RHS1
1059 is the stride and RHS2 is the base expression. */
1060 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1061 c->next_interp = c2->cand_num;
1063 else
1065 /* Record an interpretation for the multiply-immediate. */
1066 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1068 /* Add the interpretation to the statement-candidate mapping. */
1069 add_cand_for_stmt (gs, c);
1073 /* Create a candidate entry for a statement GS, where GS adds two
1074 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1075 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1076 information about the two SSA names into the new candidate.
1077 Return the new candidate. */
1079 static slsr_cand_t
1080 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1081 bool subtract_p, bool speed)
1083 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1084 double_int index;
1085 unsigned savings = 0;
1086 slsr_cand_t c;
1087 slsr_cand_t base_cand = base_cand_from_table (base_in);
1088 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1090 /* The most useful transformation is a multiply-immediate feeding
1091 an add or subtract. Look for that first. */
1092 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1094 if (addend_cand->kind == CAND_MULT
1095 && addend_cand->index.is_zero ()
1096 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1098 /* Z = (B + 0) * S, S constant
1099 X = Y +/- Z
1100 ===========================
1101 X = Y + ((+/-1 * S) * B) */
1102 base = base_in;
1103 index = tree_to_double_int (addend_cand->stride);
1104 if (subtract_p)
1105 index = -index;
1106 stride = addend_cand->base_expr;
1107 ctype = TREE_TYPE (base_in);
1108 if (has_single_use (addend_in))
1109 savings = (addend_cand->dead_savings
1110 + stmt_cost (addend_cand->cand_stmt, speed));
1113 if (addend_cand->next_interp)
1114 addend_cand = lookup_cand (addend_cand->next_interp);
1115 else
1116 addend_cand = NULL;
1119 while (base_cand && !base && base_cand->kind != CAND_PHI)
1121 if (base_cand->kind == CAND_ADD
1122 && (base_cand->index.is_zero ()
1123 || operand_equal_p (base_cand->stride,
1124 integer_zero_node, 0)))
1126 /* Y = B + (i' * S), i' * S = 0
1127 X = Y +/- Z
1128 ============================
1129 X = B + (+/-1 * Z) */
1130 base = base_cand->base_expr;
1131 index = subtract_p ? double_int_minus_one : double_int_one;
1132 stride = addend_in;
1133 ctype = base_cand->cand_type;
1134 if (has_single_use (base_in))
1135 savings = (base_cand->dead_savings
1136 + stmt_cost (base_cand->cand_stmt, speed));
1138 else if (subtract_p)
1140 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1142 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1144 if (subtrahend_cand->kind == CAND_MULT
1145 && subtrahend_cand->index.is_zero ()
1146 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1148 /* Z = (B + 0) * S, S constant
1149 X = Y - Z
1150 ===========================
1151 Value: X = Y + ((-1 * S) * B) */
1152 base = base_in;
1153 index = tree_to_double_int (subtrahend_cand->stride);
1154 index = -index;
1155 stride = subtrahend_cand->base_expr;
1156 ctype = TREE_TYPE (base_in);
1157 if (has_single_use (addend_in))
1158 savings = (subtrahend_cand->dead_savings
1159 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1162 if (subtrahend_cand->next_interp)
1163 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1164 else
1165 subtrahend_cand = NULL;
1169 if (base_cand->next_interp)
1170 base_cand = lookup_cand (base_cand->next_interp);
1171 else
1172 base_cand = NULL;
1175 if (!base)
1177 /* No interpretations had anything useful to propagate, so
1178 produce X = Y + (1 * Z). */
1179 base = base_in;
1180 index = subtract_p ? double_int_minus_one : double_int_one;
1181 stride = addend_in;
1182 ctype = TREE_TYPE (base_in);
1185 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1186 ctype, savings);
1187 return c;
1190 /* Create a candidate entry for a statement GS, where GS adds SSA
1191 name BASE_IN to constant INDEX_IN. Propagate any known information
1192 about BASE_IN into the new candidate. Return the new candidate. */
1194 static slsr_cand_t
1195 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
1197 enum cand_kind kind = CAND_ADD;
1198 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1199 double_int index, multiple;
1200 unsigned savings = 0;
1201 slsr_cand_t c;
1202 slsr_cand_t base_cand = base_cand_from_table (base_in);
1204 while (base_cand && !base && base_cand->kind != CAND_PHI)
1206 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
1208 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1209 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
1210 unsigned_p, &multiple))
1212 /* Y = (B + i') * S, S constant, c = kS for some integer k
1213 X = Y + c
1214 ============================
1215 X = (B + (i'+ k)) * S
1217 Y = B + (i' * S), S constant, c = kS for some integer k
1218 X = Y + c
1219 ============================
1220 X = (B + (i'+ k)) * S */
1221 kind = base_cand->kind;
1222 base = base_cand->base_expr;
1223 index = base_cand->index + multiple;
1224 stride = base_cand->stride;
1225 ctype = base_cand->cand_type;
1226 if (has_single_use (base_in))
1227 savings = (base_cand->dead_savings
1228 + stmt_cost (base_cand->cand_stmt, speed));
1231 if (base_cand->next_interp)
1232 base_cand = lookup_cand (base_cand->next_interp);
1233 else
1234 base_cand = NULL;
1237 if (!base)
1239 /* No interpretations had anything useful to propagate, so
1240 produce X = Y + (c * 1). */
1241 kind = CAND_ADD;
1242 base = base_in;
1243 index = index_in;
1244 stride = integer_one_node;
1245 ctype = TREE_TYPE (base_in);
1248 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1249 ctype, savings);
1250 return c;
1253 /* Given GS which is an add or subtract of scalar integers or pointers,
1254 make at least one appropriate entry in the candidate table. */
1256 static void
1257 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1259 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1260 slsr_cand_t c = NULL, c2;
1262 if (TREE_CODE (rhs2) == SSA_NAME)
1264 /* First record an interpretation assuming RHS1 is the base expression
1265 and RHS2 is the stride. But it doesn't make sense for the
1266 stride to be a pointer, so don't record a candidate in that case. */
1267 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1269 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1271 /* Add the first interpretation to the statement-candidate
1272 mapping. */
1273 add_cand_for_stmt (gs, c);
1276 /* If the two RHS operands are identical, or this is a subtract,
1277 we're done. */
1278 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1279 return;
1281 /* Otherwise, record another interpretation assuming RHS2 is the
1282 base expression and RHS1 is the stride, again provided that the
1283 stride is not a pointer. */
1284 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1286 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1287 if (c)
1288 c->next_interp = c2->cand_num;
1289 else
1290 add_cand_for_stmt (gs, c2);
1293 else
1295 double_int index;
1297 /* Record an interpretation for the add-immediate. */
1298 index = tree_to_double_int (rhs2);
1299 if (subtract_p)
1300 index = -index;
1302 c = create_add_imm_cand (gs, rhs1, index, speed);
1304 /* Add the interpretation to the statement-candidate mapping. */
1305 add_cand_for_stmt (gs, c);
1309 /* Given GS which is a negate of a scalar integer, make an appropriate
1310 entry in the candidate table. A negate is equivalent to a multiply
1311 by -1. */
1313 static void
1314 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1316 /* Record a CAND_MULT interpretation for the multiply by -1. */
1317 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1319 /* Add the interpretation to the statement-candidate mapping. */
1320 add_cand_for_stmt (gs, c);
1323 /* Help function for legal_cast_p, operating on two trees. Checks
1324 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1325 for more details. */
1327 static bool
1328 legal_cast_p_1 (tree lhs, tree rhs)
1330 tree lhs_type, rhs_type;
1331 unsigned lhs_size, rhs_size;
1332 bool lhs_wraps, rhs_wraps;
1334 lhs_type = TREE_TYPE (lhs);
1335 rhs_type = TREE_TYPE (rhs);
1336 lhs_size = TYPE_PRECISION (lhs_type);
1337 rhs_size = TYPE_PRECISION (rhs_type);
1338 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1339 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1341 if (lhs_size < rhs_size
1342 || (rhs_wraps && !lhs_wraps)
1343 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1344 return false;
1346 return true;
1349 /* Return TRUE if GS is a statement that defines an SSA name from
1350 a conversion and is legal for us to combine with an add and multiply
1351 in the candidate table. For example, suppose we have:
1353 A = B + i;
1354 C = (type) A;
1355 D = C * S;
1357 Without the type-cast, we would create a CAND_MULT for D with base B,
1358 index i, and stride S. We want to record this candidate only if it
1359 is equivalent to apply the type cast following the multiply:
1361 A = B + i;
1362 E = A * S;
1363 D = (type) E;
1365 We will record the type with the candidate for D. This allows us
1366 to use a similar previous candidate as a basis. If we have earlier seen
1368 A' = B + i';
1369 C' = (type) A';
1370 D' = C' * S;
1372 we can replace D with
1374 D = D' + (i - i') * S;
1376 But if moving the type-cast would change semantics, we mustn't do this.
1378 This is legitimate for casts from a non-wrapping integral type to
1379 any integral type of the same or larger size. It is not legitimate
1380 to convert a wrapping type to a non-wrapping type, or to a wrapping
1381 type of a different size. I.e., with a wrapping type, we must
1382 assume that the addition B + i could wrap, in which case performing
1383 the multiply before or after one of the "illegal" type casts will
1384 have different semantics. */
1386 static bool
1387 legal_cast_p (gimple gs, tree rhs)
1389 if (!is_gimple_assign (gs)
1390 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1391 return false;
1393 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1396 /* Given GS which is a cast to a scalar integer type, determine whether
1397 the cast is legal for strength reduction. If so, make at least one
1398 appropriate entry in the candidate table. */
1400 static void
1401 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1403 tree lhs, ctype;
1404 slsr_cand_t base_cand, c, c2;
1405 unsigned savings = 0;
1407 if (!legal_cast_p (gs, rhs1))
1408 return;
1410 lhs = gimple_assign_lhs (gs);
1411 base_cand = base_cand_from_table (rhs1);
1412 ctype = TREE_TYPE (lhs);
1414 if (base_cand && base_cand->kind != CAND_PHI)
1416 while (base_cand)
1418 /* Propagate all data from the base candidate except the type,
1419 which comes from the cast, and the base candidate's cast,
1420 which is no longer applicable. */
1421 if (has_single_use (rhs1))
1422 savings = (base_cand->dead_savings
1423 + stmt_cost (base_cand->cand_stmt, speed));
1425 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1426 base_cand->base_expr,
1427 base_cand->index, base_cand->stride,
1428 ctype, savings);
1429 if (base_cand->next_interp)
1430 base_cand = lookup_cand (base_cand->next_interp);
1431 else
1432 base_cand = NULL;
1435 else
1437 /* If nothing is known about the RHS, create fresh CAND_ADD and
1438 CAND_MULT interpretations:
1440 X = Y + (0 * 1)
1441 X = (Y + 0) * 1
1443 The first of these is somewhat arbitrary, but the choice of
1444 1 for the stride simplifies the logic for propagating casts
1445 into their uses. */
1446 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1447 integer_one_node, ctype, 0);
1448 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1449 integer_one_node, ctype, 0);
1450 c->next_interp = c2->cand_num;
1453 /* Add the first (or only) interpretation to the statement-candidate
1454 mapping. */
1455 add_cand_for_stmt (gs, c);
1458 /* Given GS which is a copy of a scalar integer type, make at least one
1459 appropriate entry in the candidate table.
1461 This interface is included for completeness, but is unnecessary
1462 if this pass immediately follows a pass that performs copy
1463 propagation, such as DOM. */
1465 static void
1466 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1468 slsr_cand_t base_cand, c, c2;
1469 unsigned savings = 0;
1471 base_cand = base_cand_from_table (rhs1);
1473 if (base_cand && base_cand->kind != CAND_PHI)
1475 while (base_cand)
1477 /* Propagate all data from the base candidate. */
1478 if (has_single_use (rhs1))
1479 savings = (base_cand->dead_savings
1480 + stmt_cost (base_cand->cand_stmt, speed));
1482 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1483 base_cand->base_expr,
1484 base_cand->index, base_cand->stride,
1485 base_cand->cand_type, savings);
1486 if (base_cand->next_interp)
1487 base_cand = lookup_cand (base_cand->next_interp);
1488 else
1489 base_cand = NULL;
1492 else
1494 /* If nothing is known about the RHS, create fresh CAND_ADD and
1495 CAND_MULT interpretations:
1497 X = Y + (0 * 1)
1498 X = (Y + 0) * 1
1500 The first of these is somewhat arbitrary, but the choice of
1501 1 for the stride simplifies the logic for propagating casts
1502 into their uses. */
1503 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1504 integer_one_node, TREE_TYPE (rhs1), 0);
1505 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1506 integer_one_node, TREE_TYPE (rhs1), 0);
1507 c->next_interp = c2->cand_num;
1510 /* Add the first (or only) interpretation to the statement-candidate
1511 mapping. */
1512 add_cand_for_stmt (gs, c);
1515 /* Find strength-reduction candidates in block BB. */
1517 static void
1518 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1519 basic_block bb)
1521 bool speed = optimize_bb_for_speed_p (bb);
1522 gimple_stmt_iterator gsi;
1524 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1525 slsr_process_phi (gsi_stmt (gsi), speed);
1527 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1529 gimple gs = gsi_stmt (gsi);
1531 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1532 slsr_process_ref (gs);
1534 else if (is_gimple_assign (gs)
1535 && SCALAR_INT_MODE_P
1536 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1538 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1540 switch (gimple_assign_rhs_code (gs))
1542 case MULT_EXPR:
1543 case PLUS_EXPR:
1544 rhs1 = gimple_assign_rhs1 (gs);
1545 rhs2 = gimple_assign_rhs2 (gs);
1546 /* Should never happen, but currently some buggy situations
1547 in earlier phases put constants in rhs1. */
1548 if (TREE_CODE (rhs1) != SSA_NAME)
1549 continue;
1550 break;
1552 /* Possible future opportunity: rhs1 of a ptr+ can be
1553 an ADDR_EXPR. */
1554 case POINTER_PLUS_EXPR:
1555 case MINUS_EXPR:
1556 rhs2 = gimple_assign_rhs2 (gs);
1557 /* Fall-through. */
1559 case NOP_EXPR:
1560 case MODIFY_EXPR:
1561 case NEGATE_EXPR:
1562 rhs1 = gimple_assign_rhs1 (gs);
1563 if (TREE_CODE (rhs1) != SSA_NAME)
1564 continue;
1565 break;
1567 default:
1571 switch (gimple_assign_rhs_code (gs))
1573 case MULT_EXPR:
1574 slsr_process_mul (gs, rhs1, rhs2, speed);
1575 break;
1577 case PLUS_EXPR:
1578 case POINTER_PLUS_EXPR:
1579 case MINUS_EXPR:
1580 slsr_process_add (gs, rhs1, rhs2, speed);
1581 break;
1583 case NEGATE_EXPR:
1584 slsr_process_neg (gs, rhs1, speed);
1585 break;
1587 case NOP_EXPR:
1588 slsr_process_cast (gs, rhs1, speed);
1589 break;
1591 case MODIFY_EXPR:
1592 slsr_process_copy (gs, rhs1, speed);
1593 break;
1595 default:
1602 /* Dump a candidate for debug. */
1604 static void
1605 dump_candidate (slsr_cand_t c)
1607 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1608 gimple_bb (c->cand_stmt)->index);
1609 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1610 switch (c->kind)
1612 case CAND_MULT:
1613 fputs (" MULT : (", dump_file);
1614 print_generic_expr (dump_file, c->base_expr, 0);
1615 fputs (" + ", dump_file);
1616 dump_double_int (dump_file, c->index, false);
1617 fputs (") * ", dump_file);
1618 print_generic_expr (dump_file, c->stride, 0);
1619 fputs (" : ", dump_file);
1620 break;
1621 case CAND_ADD:
1622 fputs (" ADD : ", dump_file);
1623 print_generic_expr (dump_file, c->base_expr, 0);
1624 fputs (" + (", dump_file);
1625 dump_double_int (dump_file, c->index, false);
1626 fputs (" * ", dump_file);
1627 print_generic_expr (dump_file, c->stride, 0);
1628 fputs (") : ", dump_file);
1629 break;
1630 case CAND_REF:
1631 fputs (" REF : ", dump_file);
1632 print_generic_expr (dump_file, c->base_expr, 0);
1633 fputs (" + (", dump_file);
1634 print_generic_expr (dump_file, c->stride, 0);
1635 fputs (") + ", dump_file);
1636 dump_double_int (dump_file, c->index, false);
1637 fputs (" : ", dump_file);
1638 break;
1639 case CAND_PHI:
1640 fputs (" PHI : ", dump_file);
1641 print_generic_expr (dump_file, c->base_expr, 0);
1642 fputs (" + (unknown * ", dump_file);
1643 print_generic_expr (dump_file, c->stride, 0);
1644 fputs (") : ", dump_file);
1645 break;
1646 default:
1647 gcc_unreachable ();
1649 print_generic_expr (dump_file, c->cand_type, 0);
1650 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1651 c->basis, c->dependent, c->sibling);
1652 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1653 c->next_interp, c->dead_savings);
1654 if (c->def_phi)
1655 fprintf (dump_file, " phi: %d\n", c->def_phi);
1656 fputs ("\n", dump_file);
1659 /* Dump the candidate vector for debug. */
1661 static void
1662 dump_cand_vec (void)
1664 unsigned i;
1665 slsr_cand_t c;
1667 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1669 FOR_EACH_VEC_ELT (cand_vec, i, c)
1670 dump_candidate (c);
1673 /* Callback used to dump the candidate chains hash table. */
1676 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1678 const_cand_chain_t chain = *slot;
1679 cand_chain_t p;
1681 print_generic_expr (dump_file, chain->base_expr, 0);
1682 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1684 for (p = chain->next; p; p = p->next)
1685 fprintf (dump_file, " -> %d", p->cand->cand_num);
1687 fputs ("\n", dump_file);
1688 return 1;
1691 /* Dump the candidate chains. */
1693 static void
1694 dump_cand_chains (void)
1696 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1697 base_cand_map.traverse_noresize <void *, ssa_base_cand_dump_callback> (NULL);
1698 fputs ("\n", dump_file);
1701 /* Dump the increment vector for debug. */
1703 static void
1704 dump_incr_vec (void)
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1708 unsigned i;
1710 fprintf (dump_file, "\nIncrement vector:\n\n");
1712 for (i = 0; i < incr_vec_len; i++)
1714 fprintf (dump_file, "%3d increment: ", i);
1715 dump_double_int (dump_file, incr_vec[i].incr, false);
1716 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1717 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1718 fputs ("\n initializer: ", dump_file);
1719 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1720 fputs ("\n\n", dump_file);
1725 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1726 data reference. */
1728 static void
1729 replace_ref (tree *expr, slsr_cand_t c)
1731 tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1732 c->base_expr, c->stride);
1733 tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr,
1734 double_int_to_tree (c->cand_type, c->index));
1736 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1737 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1738 TREE_OPERAND (mem_ref, 0)
1739 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1740 /*simple_p=*/true, NULL,
1741 /*before=*/true, GSI_SAME_STMT);
1742 copy_ref_info (mem_ref, *expr);
1743 *expr = mem_ref;
1744 update_stmt (c->cand_stmt);
1747 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1748 dependent of candidate C with an equivalent strength-reduced data
1749 reference. */
1751 static void
1752 replace_refs (slsr_cand_t c)
1754 if (gimple_vdef (c->cand_stmt))
1756 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1757 replace_ref (lhs, c);
1759 else
1761 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1762 replace_ref (rhs, c);
1765 if (c->sibling)
1766 replace_refs (lookup_cand (c->sibling));
1768 if (c->dependent)
1769 replace_refs (lookup_cand (c->dependent));
1772 /* Return TRUE if candidate C is dependent upon a PHI. */
1774 static bool
1775 phi_dependent_cand_p (slsr_cand_t c)
1777 /* A candidate is not necessarily dependent upon a PHI just because
1778 it has a phi definition for its base name. It may have a basis
1779 that relies upon the same phi definition, in which case the PHI
1780 is irrelevant to this candidate. */
1781 return (c->def_phi
1782 && c->basis
1783 && lookup_cand (c->basis)->def_phi != c->def_phi);
1786 /* Calculate the increment required for candidate C relative to
1787 its basis. */
1789 static double_int
1790 cand_increment (slsr_cand_t c)
1792 slsr_cand_t basis;
1794 /* If the candidate doesn't have a basis, just return its own
1795 index. This is useful in record_increments to help us find
1796 an existing initializer. Also, if the candidate's basis is
1797 hidden by a phi, then its own index will be the increment
1798 from the newly introduced phi basis. */
1799 if (!c->basis || phi_dependent_cand_p (c))
1800 return c->index;
1802 basis = lookup_cand (c->basis);
1803 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1804 return c->index - basis->index;
1807 /* Calculate the increment required for candidate C relative to
1808 its basis. If we aren't going to generate pointer arithmetic
1809 for this candidate, return the absolute value of that increment
1810 instead. */
1812 static inline double_int
1813 cand_abs_increment (slsr_cand_t c)
1815 double_int increment = cand_increment (c);
1817 if (!address_arithmetic_p && increment.is_negative ())
1818 increment = -increment;
1820 return increment;
1823 /* Return TRUE iff candidate C has already been replaced under
1824 another interpretation. */
1826 static inline bool
1827 cand_already_replaced (slsr_cand_t c)
1829 return (gimple_bb (c->cand_stmt) == 0);
1832 /* Common logic used by replace_unconditional_candidate and
1833 replace_conditional_candidate. */
1835 static void
1836 replace_mult_candidate (slsr_cand_t c, tree basis_name, double_int bump)
1838 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
1839 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1841 /* It is highly unlikely, but possible, that the resulting
1842 bump doesn't fit in a HWI. Abandon the replacement
1843 in this case. This does not affect siblings or dependents
1844 of C. Restriction to signed HWI is conservative for unsigned
1845 types but allows for safe negation without twisted logic. */
1846 if (bump.fits_shwi ()
1847 && bump.to_shwi () != HOST_WIDE_INT_MIN
1848 /* It is not useful to replace casts, copies, or adds of
1849 an SSA name and a constant. */
1850 && cand_code != MODIFY_EXPR
1851 && cand_code != NOP_EXPR
1852 && cand_code != PLUS_EXPR
1853 && cand_code != POINTER_PLUS_EXPR
1854 && cand_code != MINUS_EXPR)
1856 enum tree_code code = PLUS_EXPR;
1857 tree bump_tree;
1858 gimple stmt_to_print = NULL;
1860 /* If the basis name and the candidate's LHS have incompatible
1861 types, introduce a cast. */
1862 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
1863 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
1864 if (bump.is_negative ())
1866 code = MINUS_EXPR;
1867 bump = -bump;
1870 bump_tree = double_int_to_tree (target_type, bump);
1872 if (dump_file && (dump_flags & TDF_DETAILS))
1874 fputs ("Replacing: ", dump_file);
1875 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1878 if (bump.is_zero ())
1880 tree lhs = gimple_assign_lhs (c->cand_stmt);
1881 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1882 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1883 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1884 gsi_replace (&gsi, copy_stmt, false);
1885 if (dump_file && (dump_flags & TDF_DETAILS))
1886 stmt_to_print = copy_stmt;
1888 else
1890 tree rhs1, rhs2;
1891 if (cand_code != NEGATE_EXPR) {
1892 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1893 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1895 if (cand_code != NEGATE_EXPR
1896 && ((operand_equal_p (rhs1, basis_name, 0)
1897 && operand_equal_p (rhs2, bump_tree, 0))
1898 || (operand_equal_p (rhs1, bump_tree, 0)
1899 && operand_equal_p (rhs2, basis_name, 0))))
1901 if (dump_file && (dump_flags & TDF_DETAILS))
1903 fputs ("(duplicate, not actually replacing)", dump_file);
1904 stmt_to_print = c->cand_stmt;
1907 else
1909 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1910 gimple_assign_set_rhs_with_ops (&gsi, code,
1911 basis_name, bump_tree);
1912 update_stmt (gsi_stmt (gsi));
1913 if (dump_file && (dump_flags & TDF_DETAILS))
1914 stmt_to_print = gsi_stmt (gsi);
1918 if (dump_file && (dump_flags & TDF_DETAILS))
1920 fputs ("With: ", dump_file);
1921 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1922 fputs ("\n", dump_file);
1927 /* Replace candidate C with an add or subtract. Note that we only
1928 operate on CAND_MULTs with known strides, so we will never generate
1929 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
1930 X = Y + ((i - i') * S), as described in the module commentary. The
1931 folded value ((i - i') * S) is referred to here as the "bump." */
1933 static void
1934 replace_unconditional_candidate (slsr_cand_t c)
1936 slsr_cand_t basis;
1937 double_int stride, bump;
1939 if (cand_already_replaced (c))
1940 return;
1942 basis = lookup_cand (c->basis);
1943 stride = tree_to_double_int (c->stride);
1944 bump = cand_increment (c) * stride;
1946 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
1949 /* Return the index in the increment vector of the given INCREMENT,
1950 or -1 if not found. The latter can occur if more than
1951 MAX_INCR_VEC_LEN increments have been found. */
1953 static inline int
1954 incr_vec_index (double_int increment)
1956 unsigned i;
1958 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1961 if (i < incr_vec_len)
1962 return i;
1963 else
1964 return -1;
1967 /* Create a new statement along edge E to add BASIS_NAME to the product
1968 of INCREMENT and the stride of candidate C. Create and return a new
1969 SSA name from *VAR to be used as the LHS of the new statement.
1970 KNOWN_STRIDE is true iff C's stride is a constant. */
1972 static tree
1973 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
1974 double_int increment, edge e, location_t loc,
1975 bool known_stride)
1977 basic_block insert_bb;
1978 gimple_stmt_iterator gsi;
1979 tree lhs, basis_type;
1980 gimple new_stmt;
1982 /* If the add candidate along this incoming edge has the same
1983 index as C's hidden basis, the hidden basis represents this
1984 edge correctly. */
1985 if (increment.is_zero ())
1986 return basis_name;
1988 basis_type = TREE_TYPE (basis_name);
1989 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
1991 if (known_stride)
1993 tree bump_tree;
1994 enum tree_code code = PLUS_EXPR;
1995 double_int bump = increment * tree_to_double_int (c->stride);
1996 if (bump.is_negative ())
1998 code = MINUS_EXPR;
1999 bump = -bump;
2002 bump_tree = double_int_to_tree (basis_type, bump);
2003 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2004 bump_tree);
2006 else
2008 int i;
2009 bool negate_incr = (!address_arithmetic_p && increment.is_negative ());
2010 i = incr_vec_index (negate_incr ? -increment : increment);
2011 gcc_assert (i >= 0);
2013 if (incr_vec[i].initializer)
2015 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2016 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2017 incr_vec[i].initializer);
2019 else if (increment.is_one ())
2020 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2021 c->stride);
2022 else if (increment.is_minus_one ())
2023 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2024 c->stride);
2025 else
2026 gcc_unreachable ();
2029 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2030 gsi = gsi_last_bb (insert_bb);
2032 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2033 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2034 else
2035 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2037 gimple_set_location (new_stmt, loc);
2039 if (dump_file && (dump_flags & TDF_DETAILS))
2041 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2042 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2045 return lhs;
2048 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2049 is hidden by the phi node FROM_PHI, create a new phi node in the same
2050 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2051 with its phi arguments representing conditional adjustments to the
2052 hidden basis along conditional incoming paths. Those adjustments are
2053 made by creating add statements (and sometimes recursively creating
2054 phis) along those incoming paths. LOC is the location to attach to
2055 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2056 constant. */
2058 static tree
2059 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2060 location_t loc, bool known_stride)
2062 int i;
2063 tree name, phi_arg;
2064 gimple phi;
2065 vec<tree> phi_args;
2066 slsr_cand_t basis = lookup_cand (c->basis);
2067 int nargs = gimple_phi_num_args (from_phi);
2068 basic_block phi_bb = gimple_bb (from_phi);
2069 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2070 phi_args.create (nargs);
2072 /* Process each argument of the existing phi that represents
2073 conditionally-executed add candidates. */
2074 for (i = 0; i < nargs; i++)
2076 edge e = (*phi_bb->preds)[i];
2077 tree arg = gimple_phi_arg_def (from_phi, i);
2078 tree feeding_def;
2080 /* If the phi argument is the base name of the CAND_PHI, then
2081 this incoming arc should use the hidden basis. */
2082 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2083 if (basis->index.is_zero ())
2084 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2085 else
2087 double_int incr = -basis->index;
2088 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2089 e, loc, known_stride);
2091 else
2093 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2095 /* If there is another phi along this incoming edge, we must
2096 process it in the same fashion to ensure that all basis
2097 adjustments are made along its incoming edges. */
2098 if (gimple_code (arg_def) == GIMPLE_PHI)
2099 feeding_def = create_phi_basis (c, arg_def, basis_name,
2100 loc, known_stride);
2101 else
2103 slsr_cand_t arg_cand = base_cand_from_table (arg);
2104 double_int diff = arg_cand->index - basis->index;
2105 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2106 e, loc, known_stride);
2110 /* Because of recursion, we need to save the arguments in a vector
2111 so we can create the PHI statement all at once. Otherwise the
2112 storage for the half-created PHI can be reclaimed. */
2113 phi_args.safe_push (feeding_def);
2116 /* Create the new phi basis. */
2117 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2118 phi = create_phi_node (name, phi_bb);
2119 SSA_NAME_DEF_STMT (name) = phi;
2121 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2123 edge e = (*phi_bb->preds)[i];
2124 add_phi_arg (phi, phi_arg, e, loc);
2127 update_stmt (phi);
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2131 fputs ("Introducing new phi basis: ", dump_file);
2132 print_gimple_stmt (dump_file, phi, 0, 0);
2135 return name;
2138 /* Given a candidate C whose basis is hidden by at least one intervening
2139 phi, introduce a matching number of new phis to represent its basis
2140 adjusted by conditional increments along possible incoming paths. Then
2141 replace C as though it were an unconditional candidate, using the new
2142 basis. */
2144 static void
2145 replace_conditional_candidate (slsr_cand_t c)
2147 tree basis_name, name;
2148 slsr_cand_t basis;
2149 location_t loc;
2150 double_int stride, bump;
2152 /* Look up the LHS SSA name from C's basis. This will be the
2153 RHS1 of the adds we will introduce to create new phi arguments. */
2154 basis = lookup_cand (c->basis);
2155 basis_name = gimple_assign_lhs (basis->cand_stmt);
2157 /* Create a new phi statement which will represent C's true basis
2158 after the transformation is complete. */
2159 loc = gimple_location (c->cand_stmt);
2160 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2161 basis_name, loc, KNOWN_STRIDE);
2162 /* Replace C with an add of the new basis phi and a constant. */
2163 stride = tree_to_double_int (c->stride);
2164 bump = c->index * stride;
2166 replace_mult_candidate (c, name, bump);
2169 /* Compute the expected costs of inserting basis adjustments for
2170 candidate C with phi-definition PHI. The cost of inserting
2171 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2172 which are themselves phi results, recursively calculate costs
2173 for those phis as well. */
2175 static int
2176 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2178 unsigned i;
2179 int cost = 0;
2180 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2182 for (i = 0; i < gimple_phi_num_args (phi); i++)
2184 tree arg = gimple_phi_arg_def (phi, i);
2186 if (arg != phi_cand->base_expr)
2188 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2190 if (gimple_code (arg_def) == GIMPLE_PHI)
2191 cost += phi_add_costs (arg_def, c, one_add_cost);
2192 else
2194 slsr_cand_t arg_cand = base_cand_from_table (arg);
2196 if (arg_cand->index != c->index)
2197 cost += one_add_cost;
2202 return cost;
2205 /* For candidate C, each sibling of candidate C, and each dependent of
2206 candidate C, determine whether the candidate is dependent upon a
2207 phi that hides its basis. If not, replace the candidate unconditionally.
2208 Otherwise, determine whether the cost of introducing compensation code
2209 for the candidate is offset by the gains from strength reduction. If
2210 so, replace the candidate and introduce the compensation code. */
2212 static void
2213 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2215 if (phi_dependent_cand_p (c))
2217 if (c->kind == CAND_MULT)
2219 /* A candidate dependent upon a phi will replace a multiply by
2220 a constant with an add, and will insert at most one add for
2221 each phi argument. Add these costs with the potential dead-code
2222 savings to determine profitability. */
2223 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2224 int mult_savings = stmt_cost (c->cand_stmt, speed);
2225 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2226 tree phi_result = gimple_phi_result (phi);
2227 int one_add_cost = add_cost (speed,
2228 TYPE_MODE (TREE_TYPE (phi_result)));
2229 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2230 int cost = add_costs - mult_savings - c->dead_savings;
2232 if (dump_file && (dump_flags & TDF_DETAILS))
2234 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2235 fprintf (dump_file, " add_costs = %d\n", add_costs);
2236 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2237 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2238 fprintf (dump_file, " cost = %d\n", cost);
2239 if (cost <= COST_NEUTRAL)
2240 fputs (" Replacing...\n", dump_file);
2241 else
2242 fputs (" Not replaced.\n", dump_file);
2245 if (cost <= COST_NEUTRAL)
2246 replace_conditional_candidate (c);
2249 else
2250 replace_unconditional_candidate (c);
2252 if (c->sibling)
2253 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2255 if (c->dependent)
2256 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2259 /* Count the number of candidates in the tree rooted at C that have
2260 not already been replaced under other interpretations. */
2262 static int
2263 count_candidates (slsr_cand_t c)
2265 unsigned count = cand_already_replaced (c) ? 0 : 1;
2267 if (c->sibling)
2268 count += count_candidates (lookup_cand (c->sibling));
2270 if (c->dependent)
2271 count += count_candidates (lookup_cand (c->dependent));
2273 return count;
2276 /* Increase the count of INCREMENT by one in the increment vector.
2277 INCREMENT is associated with candidate C. If INCREMENT is to be
2278 conditionally executed as part of a conditional candidate replacement,
2279 IS_PHI_ADJUST is true, otherwise false. If an initializer
2280 T_0 = stride * I is provided by a candidate that dominates all
2281 candidates with the same increment, also record T_0 for subsequent use. */
2283 static void
2284 record_increment (slsr_cand_t c, double_int increment, bool is_phi_adjust)
2286 bool found = false;
2287 unsigned i;
2289 /* Treat increments that differ only in sign as identical so as to
2290 share initializers, unless we are generating pointer arithmetic. */
2291 if (!address_arithmetic_p && increment.is_negative ())
2292 increment = -increment;
2294 for (i = 0; i < incr_vec_len; i++)
2296 if (incr_vec[i].incr == increment)
2298 incr_vec[i].count++;
2299 found = true;
2301 /* If we previously recorded an initializer that doesn't
2302 dominate this candidate, it's not going to be useful to
2303 us after all. */
2304 if (incr_vec[i].initializer
2305 && !dominated_by_p (CDI_DOMINATORS,
2306 gimple_bb (c->cand_stmt),
2307 incr_vec[i].init_bb))
2309 incr_vec[i].initializer = NULL_TREE;
2310 incr_vec[i].init_bb = NULL;
2313 break;
2317 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2319 /* The first time we see an increment, create the entry for it.
2320 If this is the root candidate which doesn't have a basis, set
2321 the count to zero. We're only processing it so it can possibly
2322 provide an initializer for other candidates. */
2323 incr_vec[incr_vec_len].incr = increment;
2324 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2325 incr_vec[incr_vec_len].cost = COST_INFINITE;
2327 /* Optimistically record the first occurrence of this increment
2328 as providing an initializer (if it does); we will revise this
2329 opinion later if it doesn't dominate all other occurrences.
2330 Exception: increments of -1, 0, 1 never need initializers;
2331 and phi adjustments don't ever provide initializers. */
2332 if (c->kind == CAND_ADD
2333 && !is_phi_adjust
2334 && c->index == increment
2335 && (increment.sgt (double_int_one)
2336 || increment.slt (double_int_minus_one))
2337 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2338 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2340 tree t0 = NULL_TREE;
2341 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2342 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2343 if (operand_equal_p (rhs1, c->base_expr, 0))
2344 t0 = rhs2;
2345 else if (operand_equal_p (rhs2, c->base_expr, 0))
2346 t0 = rhs1;
2347 if (t0
2348 && SSA_NAME_DEF_STMT (t0)
2349 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2351 incr_vec[incr_vec_len].initializer = t0;
2352 incr_vec[incr_vec_len++].init_bb
2353 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2355 else
2357 incr_vec[incr_vec_len].initializer = NULL_TREE;
2358 incr_vec[incr_vec_len++].init_bb = NULL;
2361 else
2363 incr_vec[incr_vec_len].initializer = NULL_TREE;
2364 incr_vec[incr_vec_len++].init_bb = NULL;
2369 /* Given phi statement PHI that hides a candidate from its BASIS, find
2370 the increments along each incoming arc (recursively handling additional
2371 phis that may be present) and record them. These increments are the
2372 difference in index between the index-adjusting statements and the
2373 index of the basis. */
2375 static void
2376 record_phi_increments (slsr_cand_t basis, gimple phi)
2378 unsigned i;
2379 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2381 for (i = 0; i < gimple_phi_num_args (phi); i++)
2383 tree arg = gimple_phi_arg_def (phi, i);
2385 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2387 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2389 if (gimple_code (arg_def) == GIMPLE_PHI)
2390 record_phi_increments (basis, arg_def);
2391 else
2393 slsr_cand_t arg_cand = base_cand_from_table (arg);
2394 double_int diff = arg_cand->index - basis->index;
2395 record_increment (arg_cand, diff, PHI_ADJUST);
2401 /* Determine how many times each unique increment occurs in the set
2402 of candidates rooted at C's parent, recording the data in the
2403 increment vector. For each unique increment I, if an initializer
2404 T_0 = stride * I is provided by a candidate that dominates all
2405 candidates with the same increment, also record T_0 for subsequent
2406 use. */
2408 static void
2409 record_increments (slsr_cand_t c)
2411 if (!cand_already_replaced (c))
2413 if (!phi_dependent_cand_p (c))
2414 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2415 else
2417 /* A candidate with a basis hidden by a phi will have one
2418 increment for its relationship to the index represented by
2419 the phi, and potentially additional increments along each
2420 incoming edge. For the root of the dependency tree (which
2421 has no basis), process just the initial index in case it has
2422 an initializer that can be used by subsequent candidates. */
2423 record_increment (c, c->index, NOT_PHI_ADJUST);
2425 if (c->basis)
2426 record_phi_increments (lookup_cand (c->basis),
2427 lookup_cand (c->def_phi)->cand_stmt);
2431 if (c->sibling)
2432 record_increments (lookup_cand (c->sibling));
2434 if (c->dependent)
2435 record_increments (lookup_cand (c->dependent));
2438 /* Add up and return the costs of introducing add statements that
2439 require the increment INCR on behalf of candidate C and phi
2440 statement PHI. Accumulate into *SAVINGS the potential savings
2441 from removing existing statements that feed PHI and have no other
2442 uses. */
2444 static int
2445 phi_incr_cost (slsr_cand_t c, double_int incr, gimple phi, int *savings)
2447 unsigned i;
2448 int cost = 0;
2449 slsr_cand_t basis = lookup_cand (c->basis);
2450 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2452 for (i = 0; i < gimple_phi_num_args (phi); i++)
2454 tree arg = gimple_phi_arg_def (phi, i);
2456 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2458 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2460 if (gimple_code (arg_def) == GIMPLE_PHI)
2462 int feeding_savings = 0;
2463 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2464 if (has_single_use (gimple_phi_result (arg_def)))
2465 *savings += feeding_savings;
2467 else
2469 slsr_cand_t arg_cand = base_cand_from_table (arg);
2470 double_int diff = arg_cand->index - basis->index;
2472 if (incr == diff)
2474 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2475 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2476 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2477 if (has_single_use (lhs))
2478 *savings += stmt_cost (arg_cand->cand_stmt, true);
2484 return cost;
2487 /* Return the first candidate in the tree rooted at C that has not
2488 already been replaced, favoring siblings over dependents. */
2490 static slsr_cand_t
2491 unreplaced_cand_in_tree (slsr_cand_t c)
2493 if (!cand_already_replaced (c))
2494 return c;
2496 if (c->sibling)
2498 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2499 if (sib)
2500 return sib;
2503 if (c->dependent)
2505 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2506 if (dep)
2507 return dep;
2510 return NULL;
2513 /* Return TRUE if the candidates in the tree rooted at C should be
2514 optimized for speed, else FALSE. We estimate this based on the block
2515 containing the most dominant candidate in the tree that has not yet
2516 been replaced. */
2518 static bool
2519 optimize_cands_for_speed_p (slsr_cand_t c)
2521 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2522 gcc_assert (c2);
2523 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2526 /* Add COST_IN to the lowest cost of any dependent path starting at
2527 candidate C or any of its siblings, counting only candidates along
2528 such paths with increment INCR. Assume that replacing a candidate
2529 reduces cost by REPL_SAVINGS. Also account for savings from any
2530 statements that would go dead. If COUNT_PHIS is true, include
2531 costs of introducing feeding statements for conditional candidates. */
2533 static int
2534 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2535 double_int incr, bool count_phis)
2537 int local_cost, sib_cost, savings = 0;
2538 double_int cand_incr = cand_abs_increment (c);
2540 if (cand_already_replaced (c))
2541 local_cost = cost_in;
2542 else if (incr == cand_incr)
2543 local_cost = cost_in - repl_savings - c->dead_savings;
2544 else
2545 local_cost = cost_in - c->dead_savings;
2547 if (count_phis
2548 && phi_dependent_cand_p (c)
2549 && !cand_already_replaced (c))
2551 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2552 local_cost += phi_incr_cost (c, incr, phi, &savings);
2554 if (has_single_use (gimple_phi_result (phi)))
2555 local_cost -= savings;
2558 if (c->dependent)
2559 local_cost = lowest_cost_path (local_cost, repl_savings,
2560 lookup_cand (c->dependent), incr,
2561 count_phis);
2563 if (c->sibling)
2565 sib_cost = lowest_cost_path (cost_in, repl_savings,
2566 lookup_cand (c->sibling), incr,
2567 count_phis);
2568 local_cost = MIN (local_cost, sib_cost);
2571 return local_cost;
2574 /* Compute the total savings that would accrue from all replacements
2575 in the candidate tree rooted at C, counting only candidates with
2576 increment INCR. Assume that replacing a candidate reduces cost
2577 by REPL_SAVINGS. Also account for savings from statements that
2578 would go dead. */
2580 static int
2581 total_savings (int repl_savings, slsr_cand_t c, double_int incr,
2582 bool count_phis)
2584 int savings = 0;
2585 double_int cand_incr = cand_abs_increment (c);
2587 if (incr == cand_incr && !cand_already_replaced (c))
2588 savings += repl_savings + c->dead_savings;
2590 if (count_phis
2591 && phi_dependent_cand_p (c)
2592 && !cand_already_replaced (c))
2594 int phi_savings = 0;
2595 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2596 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2598 if (has_single_use (gimple_phi_result (phi)))
2599 savings += phi_savings;
2602 if (c->dependent)
2603 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2604 count_phis);
2606 if (c->sibling)
2607 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2608 count_phis);
2610 return savings;
2613 /* Use target-specific costs to determine and record which increments
2614 in the current candidate tree are profitable to replace, assuming
2615 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2616 the candidate tree.
2618 One slight limitation here is that we don't account for the possible
2619 introduction of casts in some cases. See replace_one_candidate for
2620 the cases where these are introduced. This should probably be cleaned
2621 up sometime. */
2623 static void
2624 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2626 unsigned i;
2628 for (i = 0; i < incr_vec_len; i++)
2630 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2632 /* If somehow this increment is bigger than a HWI, we won't
2633 be optimizing candidates that use it. And if the increment
2634 has a count of zero, nothing will be done with it. */
2635 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2636 incr_vec[i].cost = COST_INFINITE;
2638 /* Increments of 0, 1, and -1 are always profitable to replace,
2639 because they always replace a multiply or add with an add or
2640 copy, and may cause one or more existing instructions to go
2641 dead. Exception: -1 can't be assumed to be profitable for
2642 pointer addition. */
2643 else if (incr == 0
2644 || incr == 1
2645 || (incr == -1
2646 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2647 != POINTER_PLUS_EXPR)))
2648 incr_vec[i].cost = COST_NEUTRAL;
2650 /* FORNOW: If we need to add an initializer, give up if a cast from
2651 the candidate's type to its stride's type can lose precision.
2652 This could eventually be handled better by expressly retaining the
2653 result of a cast to a wider type in the stride. Example:
2655 short int _1;
2656 _2 = (int) _1;
2657 _3 = _2 * 10;
2658 _4 = x + _3; ADD: x + (10 * _1) : int
2659 _5 = _2 * 15;
2660 _6 = x + _3; ADD: x + (15 * _1) : int
2662 Right now replacing _6 would cause insertion of an initializer
2663 of the form "short int T = _1 * 5;" followed by a cast to
2664 int, which could overflow incorrectly. Had we recorded _2 or
2665 (int)_1 as the stride, this wouldn't happen. However, doing
2666 this breaks other opportunities, so this will require some
2667 care. */
2668 else if (!incr_vec[i].initializer
2669 && TREE_CODE (first_dep->stride) != INTEGER_CST
2670 && !legal_cast_p_1 (first_dep->stride,
2671 gimple_assign_lhs (first_dep->cand_stmt)))
2673 incr_vec[i].cost = COST_INFINITE;
2675 /* If we need to add an initializer, make sure we don't introduce
2676 a multiply by a pointer type, which can happen in certain cast
2677 scenarios. FIXME: When cleaning up these cast issues, we can
2678 afford to introduce the multiply provided we cast out to an
2679 unsigned int of appropriate size. */
2680 else if (!incr_vec[i].initializer
2681 && TREE_CODE (first_dep->stride) != INTEGER_CST
2682 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2684 incr_vec[i].cost = COST_INFINITE;
2686 /* For any other increment, if this is a multiply candidate, we
2687 must introduce a temporary T and initialize it with
2688 T_0 = stride * increment. When optimizing for speed, walk the
2689 candidate tree to calculate the best cost reduction along any
2690 path; if it offsets the fixed cost of inserting the initializer,
2691 replacing the increment is profitable. When optimizing for
2692 size, instead calculate the total cost reduction from replacing
2693 all candidates with this increment. */
2694 else if (first_dep->kind == CAND_MULT)
2696 int cost = mult_by_coeff_cost (incr, mode, speed);
2697 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2698 if (speed)
2699 cost = lowest_cost_path (cost, repl_savings, first_dep,
2700 incr_vec[i].incr, COUNT_PHIS);
2701 else
2702 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2703 COUNT_PHIS);
2705 incr_vec[i].cost = cost;
2708 /* If this is an add candidate, the initializer may already
2709 exist, so only calculate the cost of the initializer if it
2710 doesn't. We are replacing one add with another here, so the
2711 known replacement savings is zero. We will account for removal
2712 of dead instructions in lowest_cost_path or total_savings. */
2713 else
2715 int cost = 0;
2716 if (!incr_vec[i].initializer)
2717 cost = mult_by_coeff_cost (incr, mode, speed);
2719 if (speed)
2720 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2721 DONT_COUNT_PHIS);
2722 else
2723 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2724 DONT_COUNT_PHIS);
2726 incr_vec[i].cost = cost;
2731 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2732 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2733 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2734 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2735 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2737 static basic_block
2738 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2739 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2741 basic_block ncd;
2743 if (!bb1)
2745 *where = c2;
2746 return bb2;
2749 if (!bb2)
2751 *where = c1;
2752 return bb1;
2755 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2757 /* If both candidates are in the same block, the earlier
2758 candidate wins. */
2759 if (bb1 == ncd && bb2 == ncd)
2761 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2762 *where = c2;
2763 else
2764 *where = c1;
2767 /* Otherwise, if one of them produced a candidate in the
2768 dominator, that one wins. */
2769 else if (bb1 == ncd)
2770 *where = c1;
2772 else if (bb2 == ncd)
2773 *where = c2;
2775 /* If neither matches the dominator, neither wins. */
2776 else
2777 *where = NULL;
2779 return ncd;
2782 /* Consider all candidates that feed PHI. Find the nearest common
2783 dominator of those candidates requiring the given increment INCR.
2784 Further find and return the nearest common dominator of this result
2785 with block NCD. If the returned block contains one or more of the
2786 candidates, return the earliest candidate in the block in *WHERE. */
2788 static basic_block
2789 ncd_with_phi (slsr_cand_t c, double_int incr, gimple phi,
2790 basic_block ncd, slsr_cand_t *where)
2792 unsigned i;
2793 slsr_cand_t basis = lookup_cand (c->basis);
2794 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2796 for (i = 0; i < gimple_phi_num_args (phi); i++)
2798 tree arg = gimple_phi_arg_def (phi, i);
2800 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2802 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2804 if (gimple_code (arg_def) == GIMPLE_PHI)
2805 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
2806 else
2808 slsr_cand_t arg_cand = base_cand_from_table (arg);
2809 double_int diff = arg_cand->index - basis->index;
2811 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
2812 ncd = ncd_for_two_cands (ncd, gimple_bb (arg_cand->cand_stmt),
2813 *where, arg_cand, where);
2818 return ncd;
2821 /* Consider the candidate C together with any candidates that feed
2822 C's phi dependence (if any). Find and return the nearest common
2823 dominator of those candidates requiring the given increment INCR.
2824 If the returned block contains one or more of the candidates,
2825 return the earliest candidate in the block in *WHERE. */
2827 static basic_block
2828 ncd_of_cand_and_phis (slsr_cand_t c, double_int incr, slsr_cand_t *where)
2830 basic_block ncd = NULL;
2832 if (cand_abs_increment (c) == incr)
2834 ncd = gimple_bb (c->cand_stmt);
2835 *where = c;
2838 if (phi_dependent_cand_p (c))
2839 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
2840 ncd, where);
2842 return ncd;
2845 /* Consider all candidates in the tree rooted at C for which INCR
2846 represents the required increment of C relative to its basis.
2847 Find and return the basic block that most nearly dominates all
2848 such candidates. If the returned block contains one or more of
2849 the candidates, return the earliest candidate in the block in
2850 *WHERE. */
2852 static basic_block
2853 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2854 slsr_cand_t *where)
2856 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2857 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2859 /* First find the NCD of all siblings and dependents. */
2860 if (c->sibling)
2861 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2862 incr, &sib_where);
2863 if (c->dependent)
2864 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2865 incr, &dep_where);
2866 if (!sib_ncd && !dep_ncd)
2868 new_where = NULL;
2869 ncd = NULL;
2871 else if (sib_ncd && !dep_ncd)
2873 new_where = sib_where;
2874 ncd = sib_ncd;
2876 else if (dep_ncd && !sib_ncd)
2878 new_where = dep_where;
2879 ncd = dep_ncd;
2881 else
2882 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2883 dep_where, &new_where);
2885 /* If the candidate's increment doesn't match the one we're interested
2886 in (and nor do any increments for feeding defs of a phi-dependence),
2887 then the result depends only on siblings and dependents. */
2888 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
2890 if (!this_ncd || cand_already_replaced (c))
2892 *where = new_where;
2893 return ncd;
2896 /* Otherwise, compare this candidate with the result from all siblings
2897 and dependents. */
2898 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2900 return ncd;
2903 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2905 static inline bool
2906 profitable_increment_p (unsigned index)
2908 return (incr_vec[index].cost <= COST_NEUTRAL);
2911 /* For each profitable increment in the increment vector not equal to
2912 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2913 dominator of all statements in the candidate chain rooted at C
2914 that require that increment, and insert an initializer
2915 T_0 = stride * increment at that location. Record T_0 with the
2916 increment record. */
2918 static void
2919 insert_initializers (slsr_cand_t c)
2921 unsigned i;
2923 for (i = 0; i < incr_vec_len; i++)
2925 basic_block bb;
2926 slsr_cand_t where = NULL;
2927 gimple init_stmt;
2928 tree stride_type, new_name, incr_tree;
2929 double_int incr = incr_vec[i].incr;
2931 if (!profitable_increment_p (i)
2932 || incr.is_one ()
2933 || (incr.is_minus_one ()
2934 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2935 || incr.is_zero ())
2936 continue;
2938 /* We may have already identified an existing initializer that
2939 will suffice. */
2940 if (incr_vec[i].initializer)
2942 if (dump_file && (dump_flags & TDF_DETAILS))
2944 fputs ("Using existing initializer: ", dump_file);
2945 print_gimple_stmt (dump_file,
2946 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2947 0, 0);
2949 continue;
2952 /* Find the block that most closely dominates all candidates
2953 with this increment. If there is at least one candidate in
2954 that block, the earliest one will be returned in WHERE. */
2955 bb = nearest_common_dominator_for_cands (c, incr, &where);
2957 /* Create a new SSA name to hold the initializer's value. */
2958 stride_type = TREE_TYPE (c->stride);
2959 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
2960 incr_vec[i].initializer = new_name;
2962 /* Create the initializer and insert it in the latest possible
2963 dominating position. */
2964 incr_tree = double_int_to_tree (stride_type, incr);
2965 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2966 c->stride, incr_tree);
2967 if (where)
2969 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2970 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2971 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2973 else
2975 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2976 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
2978 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2979 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2980 else
2981 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
2983 gimple_set_location (init_stmt, gimple_location (basis_stmt));
2986 if (dump_file && (dump_flags & TDF_DETAILS))
2988 fputs ("Inserting initializer: ", dump_file);
2989 print_gimple_stmt (dump_file, init_stmt, 0, 0);
2994 /* Return TRUE iff all required increments for candidates feeding PHI
2995 are profitable to replace on behalf of candidate C. */
2997 static bool
2998 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3000 unsigned i;
3001 slsr_cand_t basis = lookup_cand (c->basis);
3002 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3004 for (i = 0; i < gimple_phi_num_args (phi); i++)
3006 tree arg = gimple_phi_arg_def (phi, i);
3008 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3010 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3012 if (gimple_code (arg_def) == GIMPLE_PHI)
3014 if (!all_phi_incrs_profitable (c, arg_def))
3015 return false;
3017 else
3019 int j;
3020 slsr_cand_t arg_cand = base_cand_from_table (arg);
3021 double_int increment = arg_cand->index - basis->index;
3023 if (!address_arithmetic_p && increment.is_negative ())
3024 increment = -increment;
3026 j = incr_vec_index (increment);
3028 if (dump_file && (dump_flags & TDF_DETAILS))
3030 fprintf (dump_file, " Conditional candidate %d, phi: ",
3031 c->cand_num);
3032 print_gimple_stmt (dump_file, phi, 0, 0);
3033 fputs (" increment: ", dump_file);
3034 dump_double_int (dump_file, increment, false);
3035 if (j < 0)
3036 fprintf (dump_file,
3037 "\n Not replaced; incr_vec overflow.\n");
3038 else {
3039 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3040 if (profitable_increment_p (j))
3041 fputs (" Replacing...\n", dump_file);
3042 else
3043 fputs (" Not replaced.\n", dump_file);
3047 if (j < 0 || !profitable_increment_p (j))
3048 return false;
3053 return true;
3056 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3057 type TO_TYPE, and insert it in front of the statement represented
3058 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3059 the new SSA name. */
3061 static tree
3062 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3064 tree cast_lhs;
3065 gimple cast_stmt;
3066 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3068 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3069 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3070 from_expr, NULL_TREE);
3071 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3072 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3074 if (dump_file && (dump_flags & TDF_DETAILS))
3076 fputs (" Inserting: ", dump_file);
3077 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3080 return cast_lhs;
3083 /* Replace the RHS of the statement represented by candidate C with
3084 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3085 leave C unchanged or just interchange its operands. The original
3086 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3087 If the replacement was made and we are doing a details dump,
3088 return the revised statement, else NULL. */
3090 static gimple
3091 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3092 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3093 slsr_cand_t c)
3095 if (new_code != old_code
3096 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3097 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3098 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3099 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3101 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3102 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3103 update_stmt (gsi_stmt (gsi));
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3106 return gsi_stmt (gsi);
3109 else if (dump_file && (dump_flags & TDF_DETAILS))
3110 fputs (" (duplicate, not actually replacing)\n", dump_file);
3112 return NULL;
3115 /* Strength-reduce the statement represented by candidate C by replacing
3116 it with an equivalent addition or subtraction. I is the index into
3117 the increment vector identifying C's increment. NEW_VAR is used to
3118 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3119 is the rhs1 to use in creating the add/subtract. */
3121 static void
3122 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3124 gimple stmt_to_print = NULL;
3125 tree orig_rhs1, orig_rhs2;
3126 tree rhs2;
3127 enum tree_code orig_code, repl_code;
3128 double_int cand_incr;
3130 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3131 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3132 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3133 cand_incr = cand_increment (c);
3135 if (dump_file && (dump_flags & TDF_DETAILS))
3137 fputs ("Replacing: ", dump_file);
3138 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3139 stmt_to_print = c->cand_stmt;
3142 if (address_arithmetic_p)
3143 repl_code = POINTER_PLUS_EXPR;
3144 else
3145 repl_code = PLUS_EXPR;
3147 /* If the increment has an initializer T_0, replace the candidate
3148 statement with an add of the basis name and the initializer. */
3149 if (incr_vec[i].initializer)
3151 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3152 tree orig_type = TREE_TYPE (orig_rhs2);
3154 if (types_compatible_p (orig_type, init_type))
3155 rhs2 = incr_vec[i].initializer;
3156 else
3157 rhs2 = introduce_cast_before_cand (c, orig_type,
3158 incr_vec[i].initializer);
3160 if (incr_vec[i].incr != cand_incr)
3162 gcc_assert (repl_code == PLUS_EXPR);
3163 repl_code = MINUS_EXPR;
3166 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3167 orig_code, orig_rhs1, orig_rhs2,
3171 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3172 with a subtract of the stride from the basis name, a copy
3173 from the basis name, or an add of the stride to the basis
3174 name, respectively. It may be necessary to introduce a
3175 cast (or reuse an existing cast). */
3176 else if (cand_incr.is_one ())
3178 tree stride_type = TREE_TYPE (c->stride);
3179 tree orig_type = TREE_TYPE (orig_rhs2);
3181 if (types_compatible_p (orig_type, stride_type))
3182 rhs2 = c->stride;
3183 else
3184 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3186 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3187 orig_code, orig_rhs1, orig_rhs2,
3191 else if (cand_incr.is_minus_one ())
3193 tree stride_type = TREE_TYPE (c->stride);
3194 tree orig_type = TREE_TYPE (orig_rhs2);
3195 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3197 if (types_compatible_p (orig_type, stride_type))
3198 rhs2 = c->stride;
3199 else
3200 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3202 if (orig_code != MINUS_EXPR
3203 || !operand_equal_p (basis_name, orig_rhs1, 0)
3204 || !operand_equal_p (rhs2, orig_rhs2, 0))
3206 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3207 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3208 update_stmt (gsi_stmt (gsi));
3210 if (dump_file && (dump_flags & TDF_DETAILS))
3211 stmt_to_print = gsi_stmt (gsi);
3213 else if (dump_file && (dump_flags & TDF_DETAILS))
3214 fputs (" (duplicate, not actually replacing)\n", dump_file);
3217 else if (cand_incr.is_zero ())
3219 tree lhs = gimple_assign_lhs (c->cand_stmt);
3220 tree lhs_type = TREE_TYPE (lhs);
3221 tree basis_type = TREE_TYPE (basis_name);
3223 if (types_compatible_p (lhs_type, basis_type))
3225 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3226 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3227 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3228 gsi_replace (&gsi, copy_stmt, false);
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3231 stmt_to_print = copy_stmt;
3233 else
3235 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3236 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3237 basis_name,
3238 NULL_TREE);
3239 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3240 gsi_replace (&gsi, cast_stmt, false);
3242 if (dump_file && (dump_flags & TDF_DETAILS))
3243 stmt_to_print = cast_stmt;
3246 else
3247 gcc_unreachable ();
3249 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3251 fputs ("With: ", dump_file);
3252 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3253 fputs ("\n", dump_file);
3257 /* For each candidate in the tree rooted at C, replace it with
3258 an increment if such has been shown to be profitable. */
3260 static void
3261 replace_profitable_candidates (slsr_cand_t c)
3263 if (!cand_already_replaced (c))
3265 double_int increment = cand_abs_increment (c);
3266 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3267 int i;
3269 i = incr_vec_index (increment);
3271 /* Only process profitable increments. Nothing useful can be done
3272 to a cast or copy. */
3273 if (i >= 0
3274 && profitable_increment_p (i)
3275 && orig_code != MODIFY_EXPR
3276 && orig_code != NOP_EXPR)
3278 if (phi_dependent_cand_p (c))
3280 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3282 if (all_phi_incrs_profitable (c, phi))
3284 /* Look up the LHS SSA name from C's basis. This will be
3285 the RHS1 of the adds we will introduce to create new
3286 phi arguments. */
3287 slsr_cand_t basis = lookup_cand (c->basis);
3288 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3290 /* Create a new phi statement that will represent C's true
3291 basis after the transformation is complete. */
3292 location_t loc = gimple_location (c->cand_stmt);
3293 tree name = create_phi_basis (c, phi, basis_name,
3294 loc, UNKNOWN_STRIDE);
3296 /* Replace C with an add of the new basis phi and the
3297 increment. */
3298 replace_one_candidate (c, i, name);
3301 else
3303 slsr_cand_t basis = lookup_cand (c->basis);
3304 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3305 replace_one_candidate (c, i, basis_name);
3310 if (c->sibling)
3311 replace_profitable_candidates (lookup_cand (c->sibling));
3313 if (c->dependent)
3314 replace_profitable_candidates (lookup_cand (c->dependent));
3317 /* Analyze costs of related candidates in the candidate vector,
3318 and make beneficial replacements. */
3320 static void
3321 analyze_candidates_and_replace (void)
3323 unsigned i;
3324 slsr_cand_t c;
3326 /* Each candidate that has a null basis and a non-null
3327 dependent is the root of a tree of related statements.
3328 Analyze each tree to determine a subset of those
3329 statements that can be replaced with maximum benefit. */
3330 FOR_EACH_VEC_ELT (cand_vec, i, c)
3332 slsr_cand_t first_dep;
3334 if (c->basis != 0 || c->dependent == 0)
3335 continue;
3337 if (dump_file && (dump_flags & TDF_DETAILS))
3338 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3339 c->cand_num);
3341 first_dep = lookup_cand (c->dependent);
3343 /* If this is a chain of CAND_REFs, unconditionally replace
3344 each of them with a strength-reduced data reference. */
3345 if (c->kind == CAND_REF)
3346 replace_refs (c);
3348 /* If the common stride of all related candidates is a known
3349 constant, each candidate without a phi-dependence can be
3350 profitably replaced. Each replaces a multiply by a single
3351 add, with the possibility that a feeding add also goes dead.
3352 A candidate with a phi-dependence is replaced only if the
3353 compensation code it requires is offset by the strength
3354 reduction savings. */
3355 else if (TREE_CODE (c->stride) == INTEGER_CST)
3356 replace_uncond_cands_and_profitable_phis (first_dep);
3358 /* When the stride is an SSA name, it may still be profitable
3359 to replace some or all of the dependent candidates, depending
3360 on whether the introduced increments can be reused, or are
3361 less expensive to calculate than the replaced statements. */
3362 else
3364 enum machine_mode mode;
3365 bool speed;
3367 /* Determine whether we'll be generating pointer arithmetic
3368 when replacing candidates. */
3369 address_arithmetic_p = (c->kind == CAND_ADD
3370 && POINTER_TYPE_P (c->cand_type));
3372 /* If all candidates have already been replaced under other
3373 interpretations, nothing remains to be done. */
3374 if (!count_candidates (c))
3375 continue;
3377 /* Construct an array of increments for this candidate chain. */
3378 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3379 incr_vec_len = 0;
3380 record_increments (c);
3382 /* Determine which increments are profitable to replace. */
3383 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3384 speed = optimize_cands_for_speed_p (c);
3385 analyze_increments (first_dep, mode, speed);
3387 /* Insert initializers of the form T_0 = stride * increment
3388 for use in profitable replacements. */
3389 insert_initializers (first_dep);
3390 dump_incr_vec ();
3392 /* Perform the replacements. */
3393 replace_profitable_candidates (first_dep);
3394 free (incr_vec);
3399 static unsigned
3400 execute_strength_reduction (void)
3402 struct dom_walk_data walk_data;
3404 /* Create the obstack where candidates will reside. */
3405 gcc_obstack_init (&cand_obstack);
3407 /* Allocate the candidate vector. */
3408 cand_vec.create (128);
3410 /* Allocate the mapping from statements to candidate indices. */
3411 stmt_cand_map = pointer_map_create ();
3413 /* Create the obstack where candidate chains will reside. */
3414 gcc_obstack_init (&chain_obstack);
3416 /* Allocate the mapping from base expressions to candidate chains. */
3417 base_cand_map.create (500);
3419 /* Initialize the loop optimizer. We need to detect flow across
3420 back edges, and this gives us dominator information as well. */
3421 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3423 /* Set up callbacks for the generic dominator tree walker. */
3424 walk_data.dom_direction = CDI_DOMINATORS;
3425 walk_data.initialize_block_local_data = NULL;
3426 walk_data.before_dom_children = find_candidates_in_block;
3427 walk_data.after_dom_children = NULL;
3428 walk_data.global_data = NULL;
3429 walk_data.block_local_data_size = 0;
3430 init_walk_dominator_tree (&walk_data);
3432 /* Walk the CFG in predominator order looking for strength reduction
3433 candidates. */
3434 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3438 dump_cand_vec ();
3439 dump_cand_chains ();
3442 /* Analyze costs and make appropriate replacements. */
3443 analyze_candidates_and_replace ();
3445 /* Free resources. */
3446 fini_walk_dominator_tree (&walk_data);
3447 loop_optimizer_finalize ();
3448 base_cand_map.dispose ();
3449 obstack_free (&chain_obstack, NULL);
3450 pointer_map_destroy (stmt_cand_map);
3451 cand_vec.release ();
3452 obstack_free (&cand_obstack, NULL);
3454 return 0;
3457 static bool
3458 gate_strength_reduction (void)
3460 return flag_tree_slsr;
3463 struct gimple_opt_pass pass_strength_reduction =
3466 GIMPLE_PASS,
3467 "slsr", /* name */
3468 OPTGROUP_NONE, /* optinfo_flags */
3469 gate_strength_reduction, /* gate */
3470 execute_strength_reduction, /* execute */
3471 NULL, /* sub */
3472 NULL, /* next */
3473 0, /* static_pass_number */
3474 TV_GIMPLE_SLSR, /* tv_id */
3475 PROP_cfg | PROP_ssa, /* properties_required */
3476 0, /* properties_provided */
3477 0, /* properties_destroyed */
3478 0, /* todo_flags_start */
3479 TODO_verify_ssa /* todo_flags_finish */