ada: Fix wrong finalization for call to BIP function in conditional expression
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob6956a3dadb5a2adf50b5c813403ee6fa3d743c29
1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 static bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
278 propagate_bias_p (gimple *stmt)
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 gimple *current_use_stmt = USE_STMT (use);
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
300 if (single_use_stmt == NULL)
301 return false;
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
307 return true;
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
318 phi_rank (gimple *stmt)
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 int64_t op_rank;
376 op_rank = get_rank (op);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
388 return MAX (rank, op_rank);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e)
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
402 static inline void
403 insert_operand_rank (tree e, int64_t rank)
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
409 /* Given an expression E, return the rank of the expression. */
411 static int64_t
412 get_rank (tree e)
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e) == SSA_NAME)
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
474 else
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
521 constant_type (tree t)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
534 else
535 return OTHER_CONST_TYPE;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
541 sort_by_operand_rank (const void *pa, const void *pb)
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
562 if (TREE_CODE (oea->op) != SSA_NAME)
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 return oeb->id > oea->id ? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
610 static void
611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
613 operand_entry *oe = operand_entry_pool.allocate ();
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
626 static void
627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
630 operand_entry *oe = operand_entry_pool.allocate ();
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
639 reassociate_stats.pows_encountered++;
642 /* Returns true if we can associate the SSA def OP. */
644 static bool
645 can_reassociate_op_p (tree op)
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
662 static bool
663 can_reassociate_type_p (tree type)
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
675 static bool
676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
678 basic_block bb = gimple_bb (stmt);
680 if (gimple_bb (stmt) == NULL)
681 return false;
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
698 return false;
702 /* Return true if STMT is a nop-conversion. */
704 static bool
705 gimple_nop_conversion_p (gimple *stmt)
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
714 return false;
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
720 static tree
721 get_unary_op (tree name, enum tree_code opcode)
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
740 static bool
741 ops_equal_values_p (tree op1, tree op2)
743 if (op1 == op2)
744 return true;
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
758 if (TREE_CODE (op2) == SSA_NAME)
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
770 return false;
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
778 static bool
779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last && last->op == curr->op)
794 switch (opcode)
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
825 reassociate_stats.ops_eliminated += 2;
827 if (ops->length () == 2)
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
833 else
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
839 return true;
841 default:
842 break;
845 return false;
848 static vec<tree> plus_negates;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
856 static bool
857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
887 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
901 return true;
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
906 tree op_type = TREE_TYPE (oe->op);
908 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
922 return true;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
932 return false;
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
941 static bool
942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
968 if (oe->op == notop)
970 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
997 return false;
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1017 switch (opcode)
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1022 if (ops->length () != 1)
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1034 else if (integer_all_onesp (oelast->op))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1060 else if (integer_zerop (oelast->op))
1062 if (ops->length () != 1)
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1078 if (ops->length () != 1)
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1094 if (ops->length () != 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1113 if (ops->length () != 1)
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1122 break;
1123 default:
1124 break;
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher : int_hash <int, 0, 1>
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1163 /* Comparison function for oecount. */
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1193 if (!is_gimple_call (stmt))
1194 return false;
1196 switch (gimple_call_combined_fn (stmt))
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1202 default:
1203 return false;
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1218 switch (gimple_call_combined_fn (stmt))
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1234 default:
1235 gcc_unreachable ();
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1260 if (new_debug_lhs == NULL_TREE)
1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 gimple_set_uid (def_temp, gimple_uid (stmt));
1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1272 repl = new_debug_lhs;
1274 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 SET_USE (use, repl);
1276 update_stmt (use_stmt);
1278 return new_lhs;
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1285 static void
1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 vec<gimple *> &stmts_to_fix)
1289 unsigned i;
1290 gimple *stmt;
1292 if (*def != op
1293 && TREE_CODE (*def) == SSA_NAME
1294 && (stmt = SSA_NAME_DEF_STMT (*def))
1295 && gimple_code (stmt) != GIMPLE_NOP)
1296 *def = make_new_ssa_for_def (stmt, opcode, op);
1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299 make_new_ssa_for_def (stmt, opcode, op);
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1306 static void
1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1309 tree lhs;
1310 gimple *use_stmt;
1311 use_operand_p use;
1312 gimple_stmt_iterator gsi;
1314 if (is_gimple_call (stmt))
1315 lhs = gimple_call_lhs (stmt);
1316 else
1317 lhs = gimple_assign_lhs (stmt);
1319 gcc_assert (has_single_use (lhs));
1320 single_imm_use (lhs, &use, &use_stmt);
1321 if (lhs == *def)
1322 *def = op;
1323 SET_USE (use, op);
1324 if (TREE_CODE (op) != SSA_NAME)
1325 update_stmt (use_stmt);
1326 gsi = gsi_for_stmt (stmt);
1327 unlink_stmt_vdef (stmt);
1328 reassoc_remove_stmt (&gsi);
1329 release_defs (stmt);
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1336 static void
1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1339 tree orig_def = *def;
1340 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec<gimple *, 64> stmts_to_fix;
1347 tree name;
1349 if (opcode == MULT_EXPR)
1351 if (stmt_is_power_of_op (stmt, op))
1353 if (decrement_power (stmt) == 1)
1355 if (stmts_to_fix.length () > 0)
1356 stmts_to_fix.pop ();
1357 propagate_op_to_single_use (op, stmt, def);
1359 break;
1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1363 if (gimple_assign_rhs1 (stmt) == op)
1365 tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 if (stmts_to_fix.length () > 0)
1367 stmts_to_fix.pop ();
1368 propagate_op_to_single_use (cst, stmt, def);
1369 break;
1371 else if (integer_minus_onep (op)
1372 || real_minus_onep (op))
1374 gimple_assign_set_rhs_code
1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 break;
1381 name = gimple_assign_rhs1 (stmt);
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1385 single use. */
1386 if (gimple_assign_rhs_code (stmt) == opcode
1387 && (name == op
1388 || gimple_assign_rhs2 (stmt) == op))
1390 if (name == op)
1391 name = gimple_assign_rhs2 (stmt);
1392 if (stmts_to_fix.length () > 0)
1393 stmts_to_fix.pop ();
1394 propagate_op_to_single_use (name, stmt, def);
1395 break;
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode == MULT_EXPR
1402 && gimple_assign_rhs_code (stmt) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt)))
1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 if (stmt_is_power_of_op (stmt2, op))
1409 if (decrement_power (stmt2) == 1)
1410 propagate_op_to_single_use (op, stmt2, def);
1411 else
1412 stmts_to_fix.safe_push (stmt2);
1413 break;
1415 else if (is_gimple_assign (stmt2)
1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1418 if (gimple_assign_rhs1 (stmt2) == op)
1420 tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 propagate_op_to_single_use (cst, stmt2, def);
1422 break;
1424 else if (integer_minus_onep (op)
1425 || real_minus_onep (op))
1427 stmts_to_fix.safe_push (stmt2);
1428 gimple_assign_set_rhs_code
1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 break;
1435 /* Continue walking the chain. */
1436 gcc_assert (name != op
1437 && TREE_CODE (name) == SSA_NAME);
1438 stmt = SSA_NAME_DEF_STMT (name);
1439 stmts_to_fix.safe_push (stmt);
1441 while (1);
1443 if (stmts_to_fix.length () > 0 || *def == orig_def)
1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1450 static bool
1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1 || s1 == s2)
1459 return true;
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1462 if (!bb2)
1463 return false;
1465 if (bb1 == bb2)
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1) == GIMPLE_PHI)
1471 return true;
1473 if (gimple_code (s2) == GIMPLE_PHI)
1474 return false;
1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1478 if (gimple_uid (s1) < gimple_uid (s2))
1479 return true;
1481 if (gimple_uid (s1) > gimple_uid (s2))
1482 return false;
1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485 unsigned int uid = gimple_uid (s1);
1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1488 gimple *s = gsi_stmt (gsi);
1489 if (gimple_uid (s) != uid)
1490 break;
1491 if (s == s2)
1492 return true;
1495 return false;
1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1501 /* Insert STMT after INSERT_POINT. */
1503 static void
1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1506 gimple_stmt_iterator gsi;
1507 basic_block bb;
1509 if (gimple_code (insert_point) == GIMPLE_PHI)
1510 bb = gimple_bb (insert_point);
1511 else if (!stmt_ends_bb_p (insert_point))
1513 gsi = gsi_for_stmt (insert_point);
1514 gimple_set_uid (stmt, gimple_uid (insert_point));
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 return;
1518 else if (gimple_code (insert_point) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1522 gcc_unreachable ();
1523 else
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530 gsi = gsi_after_labels (bb);
1531 if (gsi_end_p (gsi))
1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534 gimple_set_uid (stmt,
1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1537 else
1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1546 static gimple *
1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1549 gimple *op1def = NULL, *op2def = NULL;
1550 gimple_stmt_iterator gsi;
1551 tree op;
1552 gassign *sum;
1554 /* Create the addition statement. */
1555 op = make_ssa_name (type);
1556 sum = gimple_build_assign (op, opcode, op1, op2);
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1) == SSA_NAME)
1560 op1def = SSA_NAME_DEF_STMT (op1);
1561 if (TREE_CODE (op2) == SSA_NAME)
1562 op2def = SSA_NAME_DEF_STMT (op2);
1563 if ((!op1def || gimple_nop_p (op1def))
1564 && (!op2def || gimple_nop_p (op2def)))
1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567 if (!gsi_end_p (gsi)
1568 && is_gimple_call (gsi_stmt (gsi))
1569 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1571 /* Don't add statements before a returns_twice call at the start
1572 of a function. */
1573 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1574 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1576 if (gsi_end_p (gsi))
1578 gimple_stmt_iterator gsi2
1579 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 gimple_set_uid (sum,
1581 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1583 else
1584 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1585 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1587 else
1589 gimple *insert_point;
1590 if ((!op1def || gimple_nop_p (op1def))
1591 || (op2def && !gimple_nop_p (op2def)
1592 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1593 insert_point = op2def;
1594 else
1595 insert_point = op1def;
1596 insert_stmt_after (sum, insert_point);
1598 update_stmt (sum);
1600 return sum;
1603 /* Perform un-distribution of divisions and multiplications.
1604 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1605 to (A + B) / X for real X.
1607 The algorithm is organized as follows.
1609 - First we walk the addition chain *OPS looking for summands that
1610 are defined by a multiplication or a real division. This results
1611 in the candidates bitmap with relevant indices into *OPS.
1613 - Second we build the chains of multiplications or divisions for
1614 these candidates, counting the number of occurrences of (operand, code)
1615 pairs in all of the candidates chains.
1617 - Third we sort the (operand, code) pairs by number of occurrence and
1618 process them starting with the pair with the most uses.
1620 * For each such pair we walk the candidates again to build a
1621 second candidate bitmap noting all multiplication/division chains
1622 that have at least one occurrence of (operand, code).
1624 * We build an alternate addition chain only covering these
1625 candidates with one (operand, code) operation removed from their
1626 multiplication/division chain.
1628 * The first candidate gets replaced by the alternate addition chain
1629 multiplied/divided by the operand.
1631 * All candidate chains get disabled for further processing and
1632 processing of (operand, code) pairs continues.
1634 The alternate addition chains built are re-processed by the main
1635 reassociation algorithm which allows optimizing a * x * y + b * y * x
1636 to (a + b ) * x * y in one invocation of the reassociation pass. */
1638 static bool
1639 undistribute_ops_list (enum tree_code opcode,
1640 vec<operand_entry *> *ops, class loop *loop)
1642 unsigned int length = ops->length ();
1643 operand_entry *oe1;
1644 unsigned i, j;
1645 unsigned nr_candidates, nr_candidates2;
1646 sbitmap_iterator sbi0;
1647 vec<operand_entry *> *subops;
1648 bool changed = false;
1649 unsigned int next_oecount_id = 0;
1651 if (length <= 1
1652 || opcode != PLUS_EXPR)
1653 return false;
1655 /* Build a list of candidates to process. */
1656 auto_sbitmap candidates (length);
1657 bitmap_clear (candidates);
1658 nr_candidates = 0;
1659 FOR_EACH_VEC_ELT (*ops, i, oe1)
1661 enum tree_code dcode;
1662 gimple *oe1def;
1664 if (TREE_CODE (oe1->op) != SSA_NAME)
1665 continue;
1666 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1667 if (!is_gimple_assign (oe1def))
1668 continue;
1669 dcode = gimple_assign_rhs_code (oe1def);
1670 if ((dcode != MULT_EXPR
1671 && dcode != RDIV_EXPR)
1672 || !is_reassociable_op (oe1def, dcode, loop))
1673 continue;
1675 bitmap_set_bit (candidates, i);
1676 nr_candidates++;
1679 if (nr_candidates < 2)
1680 return false;
1682 if (dump_file && (dump_flags & TDF_DETAILS))
1684 fprintf (dump_file, "searching for un-distribute opportunities ");
1685 print_generic_expr (dump_file,
1686 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1687 fprintf (dump_file, " %d\n", nr_candidates);
1690 /* Build linearized sub-operand lists and the counting table. */
1691 cvec.create (0);
1693 hash_table<oecount_hasher> ctable (15);
1695 /* ??? Macro arguments cannot have multi-argument template types in
1696 them. This typedef is needed to workaround that limitation. */
1697 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1698 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1699 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1701 gimple *oedef;
1702 enum tree_code oecode;
1703 unsigned j;
1705 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1706 oecode = gimple_assign_rhs_code (oedef);
1707 linearize_expr_tree (&subops[i], oedef,
1708 associative_tree_code (oecode), false);
1710 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1712 oecount c;
1713 int *slot;
1714 int idx;
1715 c.oecode = oecode;
1716 c.cnt = 1;
1717 c.id = next_oecount_id++;
1718 c.op = oe1->op;
1719 cvec.safe_push (c);
1720 idx = cvec.length () + 41;
1721 slot = ctable.find_slot (idx, INSERT);
1722 if (!*slot)
1724 *slot = idx;
1726 else
1728 cvec.pop ();
1729 cvec[*slot - 42].cnt++;
1734 /* Sort the counting table. */
1735 cvec.qsort (oecount_cmp);
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1739 oecount *c;
1740 fprintf (dump_file, "Candidates:\n");
1741 FOR_EACH_VEC_ELT (cvec, j, c)
1743 fprintf (dump_file, " %u %s: ", c->cnt,
1744 c->oecode == MULT_EXPR
1745 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1746 print_generic_expr (dump_file, c->op);
1747 fprintf (dump_file, "\n");
1751 /* Process the (operand, code) pairs in order of most occurrence. */
1752 auto_sbitmap candidates2 (length);
1753 while (!cvec.is_empty ())
1755 oecount *c = &cvec.last ();
1756 if (c->cnt < 2)
1757 break;
1759 /* Now collect the operands in the outer chain that contain
1760 the common operand in their inner chain. */
1761 bitmap_clear (candidates2);
1762 nr_candidates2 = 0;
1763 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1765 gimple *oedef;
1766 enum tree_code oecode;
1767 unsigned j;
1768 tree op = (*ops)[i]->op;
1770 /* If we undistributed in this chain already this may be
1771 a constant. */
1772 if (TREE_CODE (op) != SSA_NAME)
1773 continue;
1775 oedef = SSA_NAME_DEF_STMT (op);
1776 oecode = gimple_assign_rhs_code (oedef);
1777 if (oecode != c->oecode)
1778 continue;
1780 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1782 if (oe1->op == c->op)
1784 bitmap_set_bit (candidates2, i);
1785 ++nr_candidates2;
1786 break;
1791 if (nr_candidates2 >= 2)
1793 operand_entry *oe1, *oe2;
1794 gimple *prod;
1795 int first = bitmap_first_set_bit (candidates2);
1797 /* Build the new addition chain. */
1798 oe1 = (*ops)[first];
1799 if (dump_file && (dump_flags & TDF_DETAILS))
1801 fprintf (dump_file, "Building (");
1802 print_generic_expr (dump_file, oe1->op);
1804 zero_one_operation (&oe1->op, c->oecode, c->op);
1805 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1807 gimple *sum;
1808 oe2 = (*ops)[i];
1809 if (dump_file && (dump_flags & TDF_DETAILS))
1811 fprintf (dump_file, " + ");
1812 print_generic_expr (dump_file, oe2->op);
1814 zero_one_operation (&oe2->op, c->oecode, c->op);
1815 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1816 oe1->op, oe2->op, opcode);
1817 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1818 oe2->rank = 0;
1819 oe1->op = gimple_get_lhs (sum);
1822 /* Apply the multiplication/division. */
1823 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1824 oe1->op, c->op, c->oecode);
1825 if (dump_file && (dump_flags & TDF_DETAILS))
1827 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1828 print_generic_expr (dump_file, c->op);
1829 fprintf (dump_file, "\n");
1832 /* Record it in the addition chain and disable further
1833 undistribution with this op. */
1834 oe1->op = gimple_assign_lhs (prod);
1835 oe1->rank = get_rank (oe1->op);
1836 subops[first].release ();
1838 changed = true;
1841 cvec.pop ();
1844 for (i = 0; i < ops->length (); ++i)
1845 subops[i].release ();
1846 free (subops);
1847 cvec.release ();
1849 return changed;
1852 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1853 first: element index for each relevant BIT_FIELD_REF.
1854 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1855 typedef std::pair<unsigned, unsigned> v_info_elem;
1856 struct v_info {
1857 tree vec_type;
1858 auto_vec<v_info_elem, 32> vec;
1860 typedef v_info *v_info_ptr;
1862 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1863 static int
1864 sort_by_mach_mode (const void *p_i, const void *p_j)
1866 const tree tr1 = *((const tree *) p_i);
1867 const tree tr2 = *((const tree *) p_j);
1868 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1869 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1870 if (mode1 > mode2)
1871 return 1;
1872 else if (mode1 < mode2)
1873 return -1;
1874 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1875 return -1;
1876 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1877 return 1;
1878 return 0;
1881 /* Cleanup hash map for VECTOR information. */
1882 static void
1883 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1885 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1886 it != info_map.end (); ++it)
1888 v_info_ptr info = (*it).second;
1889 delete info;
1890 (*it).second = NULL;
1894 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1895 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1896 is transformed to
1897 Vs = (V1 + V2 + ... + Vn)
1898 Vs[0] + Vs[1] + ... + Vs[k]
1900 The basic steps are listed below:
1902 1) Check the addition chain *OPS by looking those summands coming from
1903 VECTOR bit_field_ref on VECTOR type. Put the information into
1904 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1906 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1907 continuous, they can cover the whole VECTOR perfectly without any holes.
1908 Obtain one VECTOR list which contain candidates to be transformed.
1910 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1911 candidates with same mode, build the addition statements for them and
1912 generate BIT_FIELD_REFs accordingly.
1914 TODO:
1915 The current implementation requires the whole VECTORs should be fully
1916 covered, but it can be extended to support partial, checking adjacent
1917 but not fill the whole, it may need some cost model to define the
1918 boundary to do or not.
1920 static bool
1921 undistribute_bitref_for_vector (enum tree_code opcode,
1922 vec<operand_entry *> *ops, struct loop *loop)
1924 if (ops->length () <= 1)
1925 return false;
1927 if (opcode != PLUS_EXPR
1928 && opcode != MULT_EXPR
1929 && opcode != BIT_XOR_EXPR
1930 && opcode != BIT_IOR_EXPR
1931 && opcode != BIT_AND_EXPR)
1932 return false;
1934 hash_map<tree, v_info_ptr> v_info_map;
1935 operand_entry *oe1;
1936 unsigned i;
1938 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1939 information into map. */
1940 FOR_EACH_VEC_ELT (*ops, i, oe1)
1942 enum tree_code dcode;
1943 gimple *oe1def;
1945 if (TREE_CODE (oe1->op) != SSA_NAME)
1946 continue;
1947 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1948 if (!is_gimple_assign (oe1def))
1949 continue;
1950 dcode = gimple_assign_rhs_code (oe1def);
1951 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1952 continue;
1954 tree rhs = gimple_assign_rhs1 (oe1def);
1955 tree vec = TREE_OPERAND (rhs, 0);
1956 tree vec_type = TREE_TYPE (vec);
1958 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1959 continue;
1961 /* Ignore it if target machine can't support this VECTOR type. */
1962 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1963 continue;
1965 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1966 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1967 continue;
1969 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1970 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1971 continue;
1973 /* The type of BIT_FIELD_REF might not be equal to the element type of
1974 the vector. We want to use a vector type with element type the
1975 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1976 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1978 machine_mode simd_mode;
1979 unsigned HOST_WIDE_INT size, nunits;
1980 unsigned HOST_WIDE_INT elem_size
1981 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1982 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1983 continue;
1984 if (size <= elem_size || (size % elem_size) != 0)
1985 continue;
1986 nunits = size / elem_size;
1987 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1988 nunits).exists (&simd_mode))
1989 continue;
1990 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1992 /* Ignore it if target machine can't support this VECTOR type. */
1993 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1994 continue;
1996 /* Check const vector type, constrain BIT_FIELD_REF offset and
1997 size. */
1998 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1999 continue;
2001 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2002 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2003 continue;
2006 tree elem_type = TREE_TYPE (vec_type);
2007 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2008 if (maybe_ne (bit_field_size (rhs), elem_size))
2009 continue;
2011 unsigned idx;
2012 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2013 continue;
2015 /* Ignore it if target machine can't support this type of VECTOR
2016 operation. */
2017 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2018 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2019 continue;
2021 bool existed;
2022 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2023 if (!existed)
2025 info = new v_info;
2026 info->vec_type = vec_type;
2028 else if (!types_compatible_p (vec_type, info->vec_type))
2029 continue;
2030 info->vec.safe_push (std::make_pair (idx, i));
2033 /* At least two VECTOR to combine. */
2034 if (v_info_map.elements () <= 1)
2036 cleanup_vinfo_map (v_info_map);
2037 return false;
2040 /* Verify all VECTOR candidates by checking two conditions:
2041 1) sorted offsets are adjacent, no holes.
2042 2) can fill the whole VECTOR perfectly.
2043 And add the valid candidates to a vector for further handling. */
2044 auto_vec<tree> valid_vecs (v_info_map.elements ());
2045 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2046 it != v_info_map.end (); ++it)
2048 tree cand_vec = (*it).first;
2049 v_info_ptr cand_info = (*it).second;
2050 unsigned int num_elems
2051 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2052 if (cand_info->vec.length () != num_elems)
2053 continue;
2054 sbitmap holes = sbitmap_alloc (num_elems);
2055 bitmap_ones (holes);
2056 bool valid = true;
2057 v_info_elem *curr;
2058 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2060 if (!bitmap_bit_p (holes, curr->first))
2062 valid = false;
2063 break;
2065 else
2066 bitmap_clear_bit (holes, curr->first);
2068 if (valid && bitmap_empty_p (holes))
2069 valid_vecs.quick_push (cand_vec);
2070 sbitmap_free (holes);
2073 /* At least two VECTOR to combine. */
2074 if (valid_vecs.length () <= 1)
2076 cleanup_vinfo_map (v_info_map);
2077 return false;
2080 valid_vecs.qsort (sort_by_mach_mode);
2081 /* Go through all candidates by machine mode order, query the mode_to_total
2082 to get the total number for each mode and skip the single one. */
2083 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2085 tree tvec = valid_vecs[i];
2086 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2088 /* Skip modes with only a single candidate. */
2089 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2090 continue;
2092 unsigned int idx, j;
2093 gimple *sum = NULL;
2094 tree sum_vec = tvec;
2095 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2096 v_info_elem *elem;
2097 tree vec_type = info_ptr->vec_type;
2099 /* Build the sum for all candidates with same mode. */
2102 sum = build_and_add_sum (vec_type, sum_vec,
2103 valid_vecs[i + 1], opcode);
2104 if (!useless_type_conversion_p (vec_type,
2105 TREE_TYPE (valid_vecs[i + 1])))
2107 /* Update the operands only after build_and_add_sum,
2108 so that we don't have to repeat the placement algorithm
2109 of build_and_add_sum. */
2110 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2111 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2112 valid_vecs[i + 1]);
2113 tree lhs = make_ssa_name (vec_type);
2114 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2115 gimple_set_uid (g, gimple_uid (sum));
2116 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2117 gimple_assign_set_rhs2 (sum, lhs);
2118 if (sum_vec == tvec)
2120 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2121 lhs = make_ssa_name (vec_type);
2122 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2123 gimple_set_uid (g, gimple_uid (sum));
2124 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2125 gimple_assign_set_rhs1 (sum, lhs);
2127 update_stmt (sum);
2129 sum_vec = gimple_get_lhs (sum);
2130 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2131 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2132 /* Update those related ops of current candidate VECTOR. */
2133 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2135 idx = elem->second;
2136 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2137 /* Set this then op definition will get DCEd later. */
2138 gimple_set_visited (def, true);
2139 if (opcode == PLUS_EXPR
2140 || opcode == BIT_XOR_EXPR
2141 || opcode == BIT_IOR_EXPR)
2142 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2143 else if (opcode == MULT_EXPR)
2144 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2145 else
2147 gcc_assert (opcode == BIT_AND_EXPR);
2148 (*ops)[idx]->op
2149 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2151 (*ops)[idx]->rank = 0;
2153 if (dump_file && (dump_flags & TDF_DETAILS))
2155 fprintf (dump_file, "Generating addition -> ");
2156 print_gimple_stmt (dump_file, sum, 0);
2158 i++;
2160 while ((i < valid_vecs.length () - 1)
2161 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2163 /* Referring to first valid VECTOR with this mode, generate the
2164 BIT_FIELD_REF statements accordingly. */
2165 info_ptr = *(v_info_map.get (tvec));
2166 gcc_assert (sum);
2167 tree elem_type = TREE_TYPE (vec_type);
2168 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2170 idx = elem->second;
2171 tree dst = make_ssa_name (elem_type);
2172 tree pos = bitsize_int (elem->first
2173 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2174 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2175 TYPE_SIZE (elem_type), pos);
2176 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2177 insert_stmt_after (gs, sum);
2178 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2179 /* Set this then op definition will get DCEd later. */
2180 gimple_set_visited (def, true);
2181 (*ops)[idx]->op = gimple_assign_lhs (gs);
2182 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "Generating bit_field_ref -> ");
2186 print_gimple_stmt (dump_file, gs, 0);
2191 if (dump_file && (dump_flags & TDF_DETAILS))
2192 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2194 cleanup_vinfo_map (v_info_map);
2196 return true;
2199 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2200 expression, examine the other OPS to see if any of them are comparisons
2201 of the same values, which we may be able to combine or eliminate.
2202 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2204 static bool
2205 eliminate_redundant_comparison (enum tree_code opcode,
2206 vec<operand_entry *> *ops,
2207 unsigned int currindex,
2208 operand_entry *curr)
2210 tree op1, op2;
2211 enum tree_code lcode, rcode;
2212 gimple *def1, *def2;
2213 int i;
2214 operand_entry *oe;
2216 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2217 return false;
2219 /* Check that CURR is a comparison. */
2220 if (TREE_CODE (curr->op) != SSA_NAME)
2221 return false;
2222 def1 = SSA_NAME_DEF_STMT (curr->op);
2223 if (!is_gimple_assign (def1))
2224 return false;
2225 lcode = gimple_assign_rhs_code (def1);
2226 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2227 return false;
2228 op1 = gimple_assign_rhs1 (def1);
2229 op2 = gimple_assign_rhs2 (def1);
2231 /* Now look for a similar comparison in the remaining OPS. */
2232 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2234 tree t;
2236 if (TREE_CODE (oe->op) != SSA_NAME)
2237 continue;
2238 def2 = SSA_NAME_DEF_STMT (oe->op);
2239 if (!is_gimple_assign (def2))
2240 continue;
2241 rcode = gimple_assign_rhs_code (def2);
2242 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2243 continue;
2245 /* If we got here, we have a match. See if we can combine the
2246 two comparisons. */
2247 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2248 if (opcode == BIT_IOR_EXPR)
2249 t = maybe_fold_or_comparisons (type,
2250 lcode, op1, op2,
2251 rcode, gimple_assign_rhs1 (def2),
2252 gimple_assign_rhs2 (def2));
2253 else
2254 t = maybe_fold_and_comparisons (type,
2255 lcode, op1, op2,
2256 rcode, gimple_assign_rhs1 (def2),
2257 gimple_assign_rhs2 (def2));
2258 if (!t)
2259 continue;
2261 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2262 always give us a boolean_type_node value back. If the original
2263 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2264 we need to convert. */
2265 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2267 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2268 continue;
2269 t = fold_convert (TREE_TYPE (curr->op), t);
2272 if (TREE_CODE (t) != INTEGER_CST
2273 && !operand_equal_p (t, curr->op, 0))
2275 enum tree_code subcode;
2276 tree newop1, newop2;
2277 if (!COMPARISON_CLASS_P (t))
2278 continue;
2279 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2280 STRIP_USELESS_TYPE_CONVERSION (newop1);
2281 STRIP_USELESS_TYPE_CONVERSION (newop2);
2282 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2283 continue;
2284 if (lcode == TREE_CODE (t)
2285 && operand_equal_p (op1, newop1, 0)
2286 && operand_equal_p (op2, newop2, 0))
2287 t = curr->op;
2288 else if ((TREE_CODE (newop1) == SSA_NAME
2289 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2290 || (TREE_CODE (newop2) == SSA_NAME
2291 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2292 continue;
2295 if (dump_file && (dump_flags & TDF_DETAILS))
2297 fprintf (dump_file, "Equivalence: ");
2298 print_generic_expr (dump_file, curr->op);
2299 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2300 print_generic_expr (dump_file, oe->op);
2301 fprintf (dump_file, " -> ");
2302 print_generic_expr (dump_file, t);
2303 fprintf (dump_file, "\n");
2306 /* Now we can delete oe, as it has been subsumed by the new combined
2307 expression t. */
2308 ops->ordered_remove (i);
2309 reassociate_stats.ops_eliminated ++;
2311 /* If t is the same as curr->op, we're done. Otherwise we must
2312 replace curr->op with t. Special case is if we got a constant
2313 back, in which case we add it to the end instead of in place of
2314 the current entry. */
2315 if (TREE_CODE (t) == INTEGER_CST)
2317 ops->ordered_remove (currindex);
2318 add_to_ops_vec (ops, t);
2320 else if (!operand_equal_p (t, curr->op, 0))
2322 gimple *sum;
2323 enum tree_code subcode;
2324 tree newop1;
2325 tree newop2;
2326 gcc_assert (COMPARISON_CLASS_P (t));
2327 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2328 STRIP_USELESS_TYPE_CONVERSION (newop1);
2329 STRIP_USELESS_TYPE_CONVERSION (newop2);
2330 gcc_checking_assert (is_gimple_val (newop1)
2331 && is_gimple_val (newop2));
2332 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2333 curr->op = gimple_get_lhs (sum);
2335 return true;
2338 return false;
2342 /* Transform repeated addition of same values into multiply with
2343 constant. */
2344 static bool
2345 transform_add_to_multiply (vec<operand_entry *> *ops)
2347 operand_entry *oe;
2348 tree op = NULL_TREE;
2349 int j;
2350 int i, start = -1, end = 0, count = 0;
2351 auto_vec<std::pair <int, int> > indxs;
2352 bool changed = false;
2354 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2355 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2356 || !flag_unsafe_math_optimizations))
2357 return false;
2359 /* Look for repeated operands. */
2360 FOR_EACH_VEC_ELT (*ops, i, oe)
2362 if (start == -1)
2364 count = 1;
2365 op = oe->op;
2366 start = i;
2368 else if (operand_equal_p (oe->op, op, 0))
2370 count++;
2371 end = i;
2373 else
2375 if (count > 1)
2376 indxs.safe_push (std::make_pair (start, end));
2377 count = 1;
2378 op = oe->op;
2379 start = i;
2383 if (count > 1)
2384 indxs.safe_push (std::make_pair (start, end));
2386 for (j = indxs.length () - 1; j >= 0; --j)
2388 /* Convert repeated operand addition to multiplication. */
2389 start = indxs[j].first;
2390 end = indxs[j].second;
2391 op = (*ops)[start]->op;
2392 count = end - start + 1;
2393 for (i = end; i >= start; --i)
2394 ops->unordered_remove (i);
2395 tree tmp = make_ssa_name (TREE_TYPE (op));
2396 tree cst = build_int_cst (integer_type_node, count);
2397 gassign *mul_stmt
2398 = gimple_build_assign (tmp, MULT_EXPR,
2399 op, fold_convert (TREE_TYPE (op), cst));
2400 gimple_set_visited (mul_stmt, true);
2401 add_to_ops_vec (ops, tmp, mul_stmt);
2402 changed = true;
2405 return changed;
2409 /* Perform various identities and other optimizations on the list of
2410 operand entries, stored in OPS. The tree code for the binary
2411 operation between all the operands is OPCODE. */
2413 static void
2414 optimize_ops_list (enum tree_code opcode,
2415 vec<operand_entry *> *ops)
2417 unsigned int length = ops->length ();
2418 unsigned int i;
2419 operand_entry *oe;
2420 operand_entry *oelast = NULL;
2421 bool iterate = false;
2423 if (length == 1)
2424 return;
2426 oelast = ops->last ();
2428 /* If the last two are constants, pop the constants off, merge them
2429 and try the next two. */
2430 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2432 operand_entry *oelm1 = (*ops)[length - 2];
2434 if (oelm1->rank == 0
2435 && is_gimple_min_invariant (oelm1->op)
2436 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2437 TREE_TYPE (oelast->op)))
2439 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2440 oelm1->op, oelast->op);
2442 if (folded && is_gimple_min_invariant (folded))
2444 if (dump_file && (dump_flags & TDF_DETAILS))
2445 fprintf (dump_file, "Merging constants\n");
2447 ops->pop ();
2448 ops->pop ();
2450 add_to_ops_vec (ops, folded);
2451 reassociate_stats.constants_eliminated++;
2453 optimize_ops_list (opcode, ops);
2454 return;
2459 eliminate_using_constants (opcode, ops);
2460 oelast = NULL;
2462 for (i = 0; ops->iterate (i, &oe);)
2464 bool done = false;
2466 if (eliminate_not_pairs (opcode, ops, i, oe))
2467 return;
2468 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2469 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2470 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2472 if (done)
2473 return;
2474 iterate = true;
2475 oelast = NULL;
2476 continue;
2478 oelast = oe;
2479 i++;
2482 if (iterate)
2483 optimize_ops_list (opcode, ops);
2486 /* The following functions are subroutines to optimize_range_tests and allow
2487 it to try to change a logical combination of comparisons into a range
2488 test.
2490 For example, both
2491 X == 2 || X == 5 || X == 3 || X == 4
2493 X >= 2 && X <= 5
2494 are converted to
2495 (unsigned) (X - 2) <= 3
2497 For more information see comments above fold_test_range in fold-const.cc,
2498 this implementation is for GIMPLE. */
2502 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2504 void
2505 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2507 if (!skip_exp)
2508 print_generic_expr (file, r->exp);
2509 fprintf (file, " %c[", r->in_p ? '+' : '-');
2510 print_generic_expr (file, r->low);
2511 fputs (", ", file);
2512 print_generic_expr (file, r->high);
2513 fputc (']', file);
2516 /* Dump the range entry R to STDERR. */
2518 DEBUG_FUNCTION void
2519 debug_range_entry (struct range_entry *r)
2521 dump_range_entry (stderr, r, false);
2522 fputc ('\n', stderr);
2525 /* This is similar to make_range in fold-const.cc, but on top of
2526 GIMPLE instead of trees. If EXP is non-NULL, it should be
2527 an SSA_NAME and STMT argument is ignored, otherwise STMT
2528 argument should be a GIMPLE_COND. */
2530 void
2531 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2533 int in_p;
2534 tree low, high;
2535 bool is_bool, strict_overflow_p;
2537 r->exp = NULL_TREE;
2538 r->in_p = false;
2539 r->strict_overflow_p = false;
2540 r->low = NULL_TREE;
2541 r->high = NULL_TREE;
2542 if (exp != NULL_TREE
2543 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2544 return;
2546 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2547 and see if we can refine the range. Some of the cases below may not
2548 happen, but it doesn't seem worth worrying about this. We "continue"
2549 the outer loop when we've changed something; otherwise we "break"
2550 the switch, which will "break" the while. */
2551 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2552 high = low;
2553 in_p = 0;
2554 strict_overflow_p = false;
2555 is_bool = false;
2556 if (exp == NULL_TREE)
2557 is_bool = true;
2558 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2560 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2561 is_bool = true;
2562 else
2563 return;
2565 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2566 is_bool = true;
2568 while (1)
2570 enum tree_code code;
2571 tree arg0, arg1, exp_type;
2572 tree nexp;
2573 location_t loc;
2575 if (exp != NULL_TREE)
2577 if (TREE_CODE (exp) != SSA_NAME
2578 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2579 break;
2581 stmt = SSA_NAME_DEF_STMT (exp);
2582 if (!is_gimple_assign (stmt))
2583 break;
2585 code = gimple_assign_rhs_code (stmt);
2586 arg0 = gimple_assign_rhs1 (stmt);
2587 arg1 = gimple_assign_rhs2 (stmt);
2588 exp_type = TREE_TYPE (exp);
2590 else
2592 code = gimple_cond_code (stmt);
2593 arg0 = gimple_cond_lhs (stmt);
2594 arg1 = gimple_cond_rhs (stmt);
2595 exp_type = boolean_type_node;
2598 if (TREE_CODE (arg0) != SSA_NAME
2599 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2600 break;
2601 loc = gimple_location (stmt);
2602 switch (code)
2604 case BIT_NOT_EXPR:
2605 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2606 /* Ensure the range is either +[-,0], +[0,0],
2607 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2608 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2609 or similar expression of unconditional true or
2610 false, it should not be negated. */
2611 && ((high && integer_zerop (high))
2612 || (low && integer_onep (low))))
2614 in_p = !in_p;
2615 exp = arg0;
2616 continue;
2618 break;
2619 case SSA_NAME:
2620 exp = arg0;
2621 continue;
2622 CASE_CONVERT:
2623 if (is_bool)
2625 if ((TYPE_PRECISION (exp_type) == 1
2626 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2627 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2628 return;
2630 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2632 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2633 is_bool = true;
2634 else
2635 return;
2637 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2638 is_bool = true;
2639 goto do_default;
2640 case EQ_EXPR:
2641 case NE_EXPR:
2642 case LT_EXPR:
2643 case LE_EXPR:
2644 case GE_EXPR:
2645 case GT_EXPR:
2646 is_bool = true;
2647 /* FALLTHRU */
2648 default:
2649 if (!is_bool)
2650 return;
2651 do_default:
2652 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2653 &low, &high, &in_p,
2654 &strict_overflow_p);
2655 if (nexp != NULL_TREE)
2657 exp = nexp;
2658 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2659 continue;
2661 break;
2663 break;
2665 if (is_bool)
2667 r->exp = exp;
2668 r->in_p = in_p;
2669 r->low = low;
2670 r->high = high;
2671 r->strict_overflow_p = strict_overflow_p;
2675 /* Comparison function for qsort. Sort entries
2676 without SSA_NAME exp first, then with SSA_NAMEs sorted
2677 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2678 by increasing ->low and if ->low is the same, by increasing
2679 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2680 maximum. */
2682 static int
2683 range_entry_cmp (const void *a, const void *b)
2685 const struct range_entry *p = (const struct range_entry *) a;
2686 const struct range_entry *q = (const struct range_entry *) b;
2688 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2690 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2692 /* Group range_entries for the same SSA_NAME together. */
2693 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2694 return -1;
2695 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2696 return 1;
2697 /* If ->low is different, NULL low goes first, then by
2698 ascending low. */
2699 if (p->low != NULL_TREE)
2701 if (q->low != NULL_TREE)
2703 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2704 p->low, q->low);
2705 if (tem && integer_onep (tem))
2706 return -1;
2707 tem = fold_binary (GT_EXPR, boolean_type_node,
2708 p->low, q->low);
2709 if (tem && integer_onep (tem))
2710 return 1;
2712 else
2713 return 1;
2715 else if (q->low != NULL_TREE)
2716 return -1;
2717 /* If ->high is different, NULL high goes last, before that by
2718 ascending high. */
2719 if (p->high != NULL_TREE)
2721 if (q->high != NULL_TREE)
2723 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2724 p->high, q->high);
2725 if (tem && integer_onep (tem))
2726 return -1;
2727 tem = fold_binary (GT_EXPR, boolean_type_node,
2728 p->high, q->high);
2729 if (tem && integer_onep (tem))
2730 return 1;
2732 else
2733 return -1;
2735 else if (q->high != NULL_TREE)
2736 return 1;
2737 /* If both ranges are the same, sort below by ascending idx. */
2739 else
2740 return 1;
2742 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2743 return -1;
2745 if (p->idx < q->idx)
2746 return -1;
2747 else
2749 gcc_checking_assert (p->idx > q->idx);
2750 return 1;
2754 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2755 insert needed statements BEFORE or after GSI. */
2757 static tree
2758 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2760 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2761 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2762 if (TREE_CODE (ret) != SSA_NAME)
2764 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2765 if (before)
2766 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2767 else
2768 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2769 ret = gimple_assign_lhs (g);
2771 return ret;
2774 /* Helper routine of optimize_range_test.
2775 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2776 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2777 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2778 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2779 an array of COUNT pointers to other ranges. Return
2780 true if the range merge has been successful.
2781 If OPCODE is ERROR_MARK, this is called from within
2782 maybe_optimize_range_tests and is performing inter-bb range optimization.
2783 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2784 oe->rank. */
2786 static bool
2787 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2788 struct range_entry **otherrangep,
2789 unsigned int count, enum tree_code opcode,
2790 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2791 bool in_p, tree low, tree high, bool strict_overflow_p)
2793 unsigned int idx = range->idx;
2794 struct range_entry *swap_with = NULL;
2795 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2796 if (opcode == ERROR_MARK)
2798 /* For inter-bb range test optimization, pick from the range tests
2799 the one which is tested in the earliest condition (one dominating
2800 the others), because otherwise there could be some UB (e.g. signed
2801 overflow) in following bbs that we'd expose which wasn't there in
2802 the original program. See PR104196. */
2803 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2804 basic_block range_bb = orig_range_bb;
2805 for (unsigned int i = 0; i < count; i++)
2807 struct range_entry *this_range;
2808 if (otherrange)
2809 this_range = otherrange + i;
2810 else
2811 this_range = otherrangep[i];
2812 operand_entry *oe = (*ops)[this_range->idx];
2813 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2814 if (range_bb != this_bb
2815 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2817 swap_with = this_range;
2818 range_bb = this_bb;
2819 idx = this_range->idx;
2822 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2823 only defined in later blocks. In this case we can't move the
2824 merged comparison earlier, so instead check if there are any stmts
2825 that might trigger signed integer overflow in between and rewrite
2826 them. But only after we check if the optimization is possible. */
2827 if (seq && swap_with)
2829 rewrite_bb_first = range_bb;
2830 rewrite_bb_last = orig_range_bb;
2831 idx = range->idx;
2832 swap_with = NULL;
2835 operand_entry *oe = (*ops)[idx];
2836 tree op = oe->op;
2837 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2838 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2839 location_t loc = gimple_location (stmt);
2840 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2841 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2842 in_p, low, high);
2843 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2844 gimple_stmt_iterator gsi;
2845 unsigned int i, uid;
2847 if (tem == NULL_TREE)
2848 return false;
2850 /* If op is default def SSA_NAME, there is no place to insert the
2851 new comparison. Give up, unless we can use OP itself as the
2852 range test. */
2853 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2855 if (op == range->exp
2856 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2857 || TREE_CODE (optype) == BOOLEAN_TYPE)
2858 && (op == tem
2859 || (TREE_CODE (tem) == EQ_EXPR
2860 && TREE_OPERAND (tem, 0) == op
2861 && integer_onep (TREE_OPERAND (tem, 1))))
2862 && opcode != BIT_IOR_EXPR
2863 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2865 stmt = NULL;
2866 tem = op;
2868 else
2869 return false;
2872 if (swap_with)
2873 std::swap (range->idx, swap_with->idx);
2875 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2876 warning_at (loc, OPT_Wstrict_overflow,
2877 "assuming signed overflow does not occur "
2878 "when simplifying range test");
2880 if (dump_file && (dump_flags & TDF_DETAILS))
2882 struct range_entry *r;
2883 fprintf (dump_file, "Optimizing range tests ");
2884 dump_range_entry (dump_file, range, false);
2885 for (i = 0; i < count; i++)
2887 if (otherrange)
2888 r = otherrange + i;
2889 else
2890 r = otherrangep[i];
2891 if (r->exp
2892 && r->exp != range->exp
2893 && TREE_CODE (r->exp) == SSA_NAME)
2895 fprintf (dump_file, " and ");
2896 dump_range_entry (dump_file, r, false);
2898 else
2900 fprintf (dump_file, " and");
2901 dump_range_entry (dump_file, r, true);
2904 fprintf (dump_file, "\n into ");
2905 print_generic_expr (dump_file, tem);
2906 fprintf (dump_file, "\n");
2909 /* In inter-bb range optimization mode, if we have a seq, we can't
2910 move the merged comparison to the earliest bb from the comparisons
2911 being replaced, so instead rewrite stmts that could trigger signed
2912 integer overflow. */
2913 for (basic_block bb = rewrite_bb_last;
2914 bb != rewrite_bb_first; bb = single_pred (bb))
2915 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2916 !gsi_end_p (gsi); gsi_next (&gsi))
2918 gimple *stmt = gsi_stmt (gsi);
2919 if (is_gimple_assign (stmt))
2920 if (tree lhs = gimple_assign_lhs (stmt))
2921 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2922 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2923 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2925 enum tree_code code = gimple_assign_rhs_code (stmt);
2926 if (arith_code_with_undefined_signed_overflow (code))
2928 gimple_stmt_iterator gsip = gsi;
2929 gimple_stmt_iterator gsin = gsi;
2930 gsi_prev (&gsip);
2931 gsi_next (&gsin);
2932 rewrite_to_defined_overflow (stmt, true);
2933 unsigned uid = gimple_uid (stmt);
2934 if (gsi_end_p (gsip))
2935 gsip = gsi_after_labels (bb);
2936 else
2937 gsi_next (&gsip);
2938 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2939 gsi_next (&gsip))
2940 gimple_set_uid (gsi_stmt (gsip), uid);
2945 if (opcode == BIT_IOR_EXPR
2946 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2947 tem = invert_truthvalue_loc (loc, tem);
2949 tem = fold_convert_loc (loc, optype, tem);
2950 if (stmt)
2952 gsi = gsi_for_stmt (stmt);
2953 uid = gimple_uid (stmt);
2955 else
2957 gsi = gsi_none ();
2958 uid = 0;
2960 if (stmt == NULL)
2961 gcc_checking_assert (tem == op);
2962 /* In rare cases range->exp can be equal to lhs of stmt.
2963 In that case we have to insert after the stmt rather then before
2964 it. If stmt is a PHI, insert it at the start of the basic block. */
2965 else if (op != range->exp)
2967 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2968 tem = force_into_ssa_name (&gsi, tem, true);
2969 gsi_prev (&gsi);
2971 else if (gimple_code (stmt) != GIMPLE_PHI)
2973 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2974 tem = force_into_ssa_name (&gsi, tem, false);
2976 else
2978 gsi = gsi_after_labels (gimple_bb (stmt));
2979 if (!gsi_end_p (gsi))
2980 uid = gimple_uid (gsi_stmt (gsi));
2981 else
2983 gsi = gsi_start_bb (gimple_bb (stmt));
2984 uid = 1;
2985 while (!gsi_end_p (gsi))
2987 uid = gimple_uid (gsi_stmt (gsi));
2988 gsi_next (&gsi);
2991 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2992 tem = force_into_ssa_name (&gsi, tem, true);
2993 if (gsi_end_p (gsi))
2994 gsi = gsi_last_bb (gimple_bb (stmt));
2995 else
2996 gsi_prev (&gsi);
2998 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2999 if (gimple_uid (gsi_stmt (gsi)))
3000 break;
3001 else
3002 gimple_set_uid (gsi_stmt (gsi), uid);
3004 oe->op = tem;
3005 range->exp = exp;
3006 range->low = low;
3007 range->high = high;
3008 range->in_p = in_p;
3009 range->strict_overflow_p = false;
3011 for (i = 0; i < count; i++)
3013 if (otherrange)
3014 range = otherrange + i;
3015 else
3016 range = otherrangep[i];
3017 oe = (*ops)[range->idx];
3018 /* Now change all the other range test immediate uses, so that
3019 those tests will be optimized away. */
3020 if (opcode == ERROR_MARK)
3022 if (oe->op)
3023 oe->op = build_int_cst (TREE_TYPE (oe->op),
3024 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3025 else
3026 oe->op = (oe->rank == BIT_IOR_EXPR
3027 ? boolean_false_node : boolean_true_node);
3029 else
3030 oe->op = error_mark_node;
3031 range->exp = NULL_TREE;
3032 range->low = NULL_TREE;
3033 range->high = NULL_TREE;
3035 return true;
3038 /* Optimize X == CST1 || X == CST2
3039 if popcount (CST1 ^ CST2) == 1 into
3040 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3041 Similarly for ranges. E.g.
3042 X != 2 && X != 3 && X != 10 && X != 11
3043 will be transformed by the previous optimization into
3044 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3045 and this loop can transform that into
3046 !(((X & ~8) - 2U) <= 1U). */
3048 static bool
3049 optimize_range_tests_xor (enum tree_code opcode, tree type,
3050 tree lowi, tree lowj, tree highi, tree highj,
3051 vec<operand_entry *> *ops,
3052 struct range_entry *rangei,
3053 struct range_entry *rangej)
3055 tree lowxor, highxor, tem, exp;
3056 /* Check lowi ^ lowj == highi ^ highj and
3057 popcount (lowi ^ lowj) == 1. */
3058 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3059 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3060 return false;
3061 if (!integer_pow2p (lowxor))
3062 return false;
3063 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3064 if (!tree_int_cst_equal (lowxor, highxor))
3065 return false;
3067 exp = rangei->exp;
3068 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3069 int prec = GET_MODE_PRECISION (mode);
3070 if (TYPE_PRECISION (type) < prec
3071 || (wi::to_wide (TYPE_MIN_VALUE (type))
3072 != wi::min_value (prec, TYPE_SIGN (type)))
3073 || (wi::to_wide (TYPE_MAX_VALUE (type))
3074 != wi::max_value (prec, TYPE_SIGN (type))))
3076 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3077 exp = fold_convert (type, exp);
3078 lowxor = fold_convert (type, lowxor);
3079 lowi = fold_convert (type, lowi);
3080 highi = fold_convert (type, highi);
3082 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3083 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3084 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3085 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3086 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3087 NULL, rangei->in_p, lowj, highj,
3088 rangei->strict_overflow_p
3089 || rangej->strict_overflow_p))
3090 return true;
3091 return false;
3094 /* Optimize X == CST1 || X == CST2
3095 if popcount (CST2 - CST1) == 1 into
3096 ((X - CST1) & ~(CST2 - CST1)) == 0.
3097 Similarly for ranges. E.g.
3098 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3099 || X == 75 || X == 45
3100 will be transformed by the previous optimization into
3101 (X - 43U) <= 3U || (X - 75U) <= 3U
3102 and this loop can transform that into
3103 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3104 static bool
3105 optimize_range_tests_diff (enum tree_code opcode, tree type,
3106 tree lowi, tree lowj, tree highi, tree highj,
3107 vec<operand_entry *> *ops,
3108 struct range_entry *rangei,
3109 struct range_entry *rangej)
3111 tree tem1, tem2, mask;
3112 /* Check highi - lowi == highj - lowj. */
3113 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3114 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3115 return false;
3116 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3117 if (!tree_int_cst_equal (tem1, tem2))
3118 return false;
3119 /* Check popcount (lowj - lowi) == 1. */
3120 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3121 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3122 return false;
3123 if (!integer_pow2p (tem1))
3124 return false;
3126 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3127 int prec = GET_MODE_PRECISION (mode);
3128 if (TYPE_PRECISION (type) < prec
3129 || (wi::to_wide (TYPE_MIN_VALUE (type))
3130 != wi::min_value (prec, TYPE_SIGN (type)))
3131 || (wi::to_wide (TYPE_MAX_VALUE (type))
3132 != wi::max_value (prec, TYPE_SIGN (type))))
3133 type = build_nonstandard_integer_type (prec, 1);
3134 else
3135 type = unsigned_type_for (type);
3136 tem1 = fold_convert (type, tem1);
3137 tem2 = fold_convert (type, tem2);
3138 lowi = fold_convert (type, lowi);
3139 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3140 tem1 = fold_build2 (MINUS_EXPR, type,
3141 fold_convert (type, rangei->exp), lowi);
3142 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3143 lowj = build_int_cst (type, 0);
3144 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3145 NULL, rangei->in_p, lowj, tem2,
3146 rangei->strict_overflow_p
3147 || rangej->strict_overflow_p))
3148 return true;
3149 return false;
3152 /* It does some common checks for function optimize_range_tests_xor and
3153 optimize_range_tests_diff.
3154 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3155 Else it calls optimize_range_tests_diff. */
3157 static bool
3158 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3159 bool optimize_xor, vec<operand_entry *> *ops,
3160 struct range_entry *ranges)
3162 int i, j;
3163 bool any_changes = false;
3164 for (i = first; i < length; i++)
3166 tree lowi, highi, lowj, highj, type, tem;
3168 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3169 continue;
3170 type = TREE_TYPE (ranges[i].exp);
3171 if (!INTEGRAL_TYPE_P (type))
3172 continue;
3173 lowi = ranges[i].low;
3174 if (lowi == NULL_TREE)
3175 lowi = TYPE_MIN_VALUE (type);
3176 highi = ranges[i].high;
3177 if (highi == NULL_TREE)
3178 continue;
3179 for (j = i + 1; j < length && j < i + 64; j++)
3181 bool changes;
3182 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3183 continue;
3184 lowj = ranges[j].low;
3185 if (lowj == NULL_TREE)
3186 continue;
3187 highj = ranges[j].high;
3188 if (highj == NULL_TREE)
3189 highj = TYPE_MAX_VALUE (type);
3190 /* Check lowj > highi. */
3191 tem = fold_binary (GT_EXPR, boolean_type_node,
3192 lowj, highi);
3193 if (tem == NULL_TREE || !integer_onep (tem))
3194 continue;
3195 if (optimize_xor)
3196 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3197 highi, highj, ops,
3198 ranges + i, ranges + j);
3199 else
3200 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3201 highi, highj, ops,
3202 ranges + i, ranges + j);
3203 if (changes)
3205 any_changes = true;
3206 break;
3210 return any_changes;
3213 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3214 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3215 EXP on success, NULL otherwise. */
3217 static tree
3218 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3219 wide_int *mask, tree *totallowp)
3221 tree tem = int_const_binop (MINUS_EXPR, high, low);
3222 if (tem == NULL_TREE
3223 || TREE_CODE (tem) != INTEGER_CST
3224 || TREE_OVERFLOW (tem)
3225 || tree_int_cst_sgn (tem) == -1
3226 || compare_tree_int (tem, prec) != -1)
3227 return NULL_TREE;
3229 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3230 *mask = wi::shifted_mask (0, max, false, prec);
3231 if (TREE_CODE (exp) == BIT_AND_EXPR
3232 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3234 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3235 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3236 if (wi::popcount (msk) == 1
3237 && wi::ltu_p (msk, prec - max))
3239 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3240 max += msk.to_uhwi ();
3241 exp = TREE_OPERAND (exp, 0);
3242 if (integer_zerop (low)
3243 && TREE_CODE (exp) == PLUS_EXPR
3244 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3246 tree ret = TREE_OPERAND (exp, 0);
3247 STRIP_NOPS (ret);
3248 widest_int bias
3249 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3250 TYPE_PRECISION (TREE_TYPE (low))));
3251 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3252 if (totallowp)
3254 *totallowp = tbias;
3255 return ret;
3257 else if (!tree_int_cst_lt (totallow, tbias))
3258 return NULL_TREE;
3259 bias = wi::to_widest (tbias);
3260 bias -= wi::to_widest (totallow);
3261 if (bias >= 0 && bias < prec - max)
3263 *mask = wi::lshift (*mask, bias);
3264 return ret;
3269 if (totallowp)
3270 return exp;
3271 if (!tree_int_cst_lt (totallow, low))
3272 return exp;
3273 tem = int_const_binop (MINUS_EXPR, low, totallow);
3274 if (tem == NULL_TREE
3275 || TREE_CODE (tem) != INTEGER_CST
3276 || TREE_OVERFLOW (tem)
3277 || compare_tree_int (tem, prec - max) == 1)
3278 return NULL_TREE;
3280 *mask = wi::lshift (*mask, wi::to_widest (tem));
3281 return exp;
3284 /* Attempt to optimize small range tests using bit test.
3285 E.g.
3286 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3287 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3288 has been by earlier optimizations optimized into:
3289 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3290 As all the 43 through 82 range is less than 64 numbers,
3291 for 64-bit word targets optimize that into:
3292 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3294 static bool
3295 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3296 vec<operand_entry *> *ops,
3297 struct range_entry *ranges)
3299 int i, j;
3300 bool any_changes = false;
3301 int prec = GET_MODE_BITSIZE (word_mode);
3302 auto_vec<struct range_entry *, 64> candidates;
3304 for (i = first; i < length - 1; i++)
3306 tree lowi, highi, lowj, highj, type;
3308 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3309 continue;
3310 type = TREE_TYPE (ranges[i].exp);
3311 if (!INTEGRAL_TYPE_P (type))
3312 continue;
3313 lowi = ranges[i].low;
3314 if (lowi == NULL_TREE)
3315 lowi = TYPE_MIN_VALUE (type);
3316 highi = ranges[i].high;
3317 if (highi == NULL_TREE)
3318 continue;
3319 wide_int mask;
3320 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3321 highi, &mask, &lowi);
3322 if (exp == NULL_TREE)
3323 continue;
3324 bool strict_overflow_p = ranges[i].strict_overflow_p;
3325 candidates.truncate (0);
3326 int end = MIN (i + 64, length);
3327 for (j = i + 1; j < end; j++)
3329 tree exp2;
3330 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3331 continue;
3332 if (ranges[j].exp == exp)
3334 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3336 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3337 if (exp2 == exp)
3339 else if (TREE_CODE (exp2) == PLUS_EXPR)
3341 exp2 = TREE_OPERAND (exp2, 0);
3342 STRIP_NOPS (exp2);
3343 if (exp2 != exp)
3344 continue;
3346 else
3347 continue;
3349 else
3350 continue;
3351 lowj = ranges[j].low;
3352 if (lowj == NULL_TREE)
3353 continue;
3354 highj = ranges[j].high;
3355 if (highj == NULL_TREE)
3356 highj = TYPE_MAX_VALUE (type);
3357 wide_int mask2;
3358 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3359 highj, &mask2, NULL);
3360 if (exp2 != exp)
3361 continue;
3362 mask |= mask2;
3363 strict_overflow_p |= ranges[j].strict_overflow_p;
3364 candidates.safe_push (&ranges[j]);
3367 /* If every possible relative value of the expression is a valid shift
3368 amount, then we can merge the entry test in the bit test. In this
3369 case, if we would need otherwise 2 or more comparisons, then use
3370 the bit test; in the other cases, the threshold is 3 comparisons. */
3371 bool entry_test_needed;
3372 value_range r;
3373 if (TREE_CODE (exp) == SSA_NAME
3374 && get_range_query (cfun)->range_of_expr (r, exp)
3375 && !r.undefined_p ()
3376 && !r.varying_p ()
3377 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3379 wide_int min = r.lower_bound ();
3380 wide_int ilowi = wi::to_wide (lowi);
3381 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3383 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3384 mask = wi::lshift (mask, ilowi - min);
3386 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3388 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3389 mask = wi::lrshift (mask, min - ilowi);
3391 entry_test_needed = false;
3393 else
3394 entry_test_needed = true;
3395 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3397 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3398 wi::to_widest (lowi)
3399 + prec - 1 - wi::clz (mask));
3400 operand_entry *oe = (*ops)[ranges[i].idx];
3401 tree op = oe->op;
3402 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3403 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3404 (cfun, oe->id));
3405 location_t loc = gimple_location (stmt);
3406 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3408 /* See if it isn't cheaper to pretend the minimum value of the
3409 range is 0, if maximum value is small enough.
3410 We can avoid then subtraction of the minimum value, but the
3411 mask constant could be perhaps more expensive. */
3412 if (compare_tree_int (lowi, 0) > 0
3413 && compare_tree_int (high, prec) < 0)
3415 int cost_diff;
3416 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3417 rtx reg = gen_raw_REG (word_mode, 10000);
3418 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3419 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3420 GEN_INT (-m)),
3421 word_mode, speed_p);
3422 rtx r = immed_wide_int_const (mask, word_mode);
3423 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3424 word_mode, speed_p);
3425 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3426 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3427 word_mode, speed_p);
3428 if (cost_diff > 0)
3430 mask = wi::lshift (mask, m);
3431 lowi = build_zero_cst (TREE_TYPE (lowi));
3435 tree tem;
3436 if (entry_test_needed)
3438 tem = build_range_check (loc, optype, unshare_expr (exp),
3439 false, lowi, high);
3440 if (tem == NULL_TREE || is_gimple_val (tem))
3441 continue;
3443 else
3444 tem = NULL_TREE;
3445 tree etype = unsigned_type_for (TREE_TYPE (exp));
3446 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3447 fold_convert_loc (loc, etype, exp),
3448 fold_convert_loc (loc, etype, lowi));
3449 exp = fold_convert_loc (loc, integer_type_node, exp);
3450 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3451 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3452 build_int_cst (word_type, 1), exp);
3453 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3454 wide_int_to_tree (word_type, mask));
3455 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3456 build_zero_cst (word_type));
3457 if (is_gimple_val (exp))
3458 continue;
3460 /* The shift might have undefined behavior if TEM is true,
3461 but reassociate_bb isn't prepared to have basic blocks
3462 split when it is running. So, temporarily emit a code
3463 with BIT_IOR_EXPR instead of &&, and fix it up in
3464 branch_fixup. */
3465 gimple_seq seq = NULL;
3466 if (tem)
3468 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3469 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3470 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3472 gimple_seq seq2;
3473 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3474 gimple_seq_add_seq_without_update (&seq, seq2);
3475 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3476 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3477 if (tem)
3479 gimple *g = gimple_build_assign (make_ssa_name (optype),
3480 BIT_IOR_EXPR, tem, exp);
3481 gimple_set_location (g, loc);
3482 gimple_seq_add_stmt_without_update (&seq, g);
3483 exp = gimple_assign_lhs (g);
3485 tree val = build_zero_cst (optype);
3486 if (update_range_test (&ranges[i], NULL, candidates.address (),
3487 candidates.length (), opcode, ops, exp,
3488 seq, false, val, val, strict_overflow_p))
3490 any_changes = true;
3491 if (tem)
3492 reassoc_branch_fixups.safe_push (tem);
3494 else
3495 gimple_seq_discard (seq);
3498 return any_changes;
3501 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3502 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3503 Also, handle x < C && y < C && z < C where C is power of two as
3504 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3505 as (x | y | z) < 0. */
3507 static bool
3508 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3509 vec<operand_entry *> *ops,
3510 struct range_entry *ranges)
3512 int i;
3513 unsigned int b;
3514 bool any_changes = false;
3515 auto_vec<int, 128> buckets;
3516 auto_vec<int, 32> chains;
3517 auto_vec<struct range_entry *, 32> candidates;
3519 for (i = first; i < length; i++)
3521 int idx;
3523 if (ranges[i].exp == NULL_TREE
3524 || TREE_CODE (ranges[i].exp) != SSA_NAME
3525 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3526 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3527 continue;
3529 if (ranges[i].low != NULL_TREE
3530 && ranges[i].high != NULL_TREE
3531 && ranges[i].in_p
3532 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3534 idx = !integer_zerop (ranges[i].low);
3535 if (idx && !integer_all_onesp (ranges[i].low))
3536 continue;
3538 else if (ranges[i].high != NULL_TREE
3539 && TREE_CODE (ranges[i].high) == INTEGER_CST
3540 && ranges[i].in_p)
3542 wide_int w = wi::to_wide (ranges[i].high);
3543 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3544 int l = wi::clz (w);
3545 idx = 2;
3546 if (l <= 0
3547 || l >= prec
3548 || w != wi::mask (prec - l, false, prec))
3549 continue;
3550 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3551 && ranges[i].low == NULL_TREE)
3552 || (ranges[i].low
3553 && integer_zerop (ranges[i].low))))
3554 continue;
3556 else if (ranges[i].high == NULL_TREE
3557 && ranges[i].low != NULL_TREE
3558 /* Perform this optimization only in the last
3559 reassoc pass, as it interferes with the reassociation
3560 itself or could also with VRP etc. which might not
3561 be able to virtually undo the optimization. */
3562 && !reassoc_insert_powi_p
3563 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3564 && integer_zerop (ranges[i].low))
3565 idx = 3;
3566 else
3567 continue;
3569 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3570 if (buckets.length () <= b)
3571 buckets.safe_grow_cleared (b + 1, true);
3572 if (chains.length () <= (unsigned) i)
3573 chains.safe_grow (i + 1, true);
3574 chains[i] = buckets[b];
3575 buckets[b] = i + 1;
3578 FOR_EACH_VEC_ELT (buckets, b, i)
3579 if (i && chains[i - 1])
3581 int j, k = i;
3582 if ((b % 4) == 2)
3584 /* When ranges[X - 1].high + 1 is a power of two,
3585 we need to process the same bucket up to
3586 precision - 1 times, each time split the entries
3587 with the same high bound into one chain and the
3588 rest into another one to be processed later. */
3589 int this_prev = i;
3590 int other_prev = 0;
3591 for (j = chains[i - 1]; j; j = chains[j - 1])
3593 if (tree_int_cst_equal (ranges[i - 1].high,
3594 ranges[j - 1].high))
3596 chains[this_prev - 1] = j;
3597 this_prev = j;
3599 else if (other_prev == 0)
3601 buckets[b] = j;
3602 other_prev = j;
3604 else
3606 chains[other_prev - 1] = j;
3607 other_prev = j;
3610 chains[this_prev - 1] = 0;
3611 if (other_prev)
3612 chains[other_prev - 1] = 0;
3613 if (chains[i - 1] == 0)
3615 if (other_prev)
3616 b--;
3617 continue;
3620 for (j = chains[i - 1]; j; j = chains[j - 1])
3622 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3623 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3624 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3625 k = j;
3627 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3628 tree type2 = NULL_TREE;
3629 bool strict_overflow_p = false;
3630 candidates.truncate (0);
3631 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3632 type1 = pointer_sized_int_node;
3633 for (j = i; j; j = chains[j - 1])
3635 tree type = TREE_TYPE (ranges[j - 1].exp);
3636 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3637 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3638 type = pointer_sized_int_node;
3639 if ((b % 4) == 3)
3641 /* For the signed < 0 cases, the types should be
3642 really compatible (all signed with the same precision,
3643 instead put ranges that have different in_p from
3644 k first. */
3645 if (!useless_type_conversion_p (type1, type))
3646 continue;
3647 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3648 candidates.safe_push (&ranges[j - 1]);
3649 type2 = type1;
3650 continue;
3652 if (j == k
3653 || useless_type_conversion_p (type1, type))
3655 else if (type2 == NULL_TREE
3656 || useless_type_conversion_p (type2, type))
3658 if (type2 == NULL_TREE)
3659 type2 = type;
3660 candidates.safe_push (&ranges[j - 1]);
3663 unsigned l = candidates.length ();
3664 for (j = i; j; j = chains[j - 1])
3666 tree type = TREE_TYPE (ranges[j - 1].exp);
3667 if (j == k)
3668 continue;
3669 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3670 type = pointer_sized_int_node;
3671 if ((b % 4) == 3)
3673 if (!useless_type_conversion_p (type1, type))
3674 continue;
3675 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3676 candidates.safe_push (&ranges[j - 1]);
3677 continue;
3679 if (useless_type_conversion_p (type1, type))
3681 else if (type2 == NULL_TREE
3682 || useless_type_conversion_p (type2, type))
3683 continue;
3684 candidates.safe_push (&ranges[j - 1]);
3686 gimple_seq seq = NULL;
3687 tree op = NULL_TREE;
3688 unsigned int id;
3689 struct range_entry *r;
3690 candidates.safe_push (&ranges[k - 1]);
3691 FOR_EACH_VEC_ELT (candidates, id, r)
3693 gimple *g;
3694 enum tree_code code;
3695 if (id == 0)
3697 op = r->exp;
3698 continue;
3700 if (id == l
3701 || POINTER_TYPE_P (TREE_TYPE (op))
3702 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3704 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3705 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3706 if (code == BIT_NOT_EXPR
3707 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3709 g = gimple_build_assign (make_ssa_name (type3),
3710 NOP_EXPR, op);
3711 gimple_seq_add_stmt_without_update (&seq, g);
3712 op = gimple_assign_lhs (g);
3714 g = gimple_build_assign (make_ssa_name (type3), code, op);
3715 gimple_seq_add_stmt_without_update (&seq, g);
3716 op = gimple_assign_lhs (g);
3718 tree type = TREE_TYPE (r->exp);
3719 tree exp = r->exp;
3720 if (POINTER_TYPE_P (type)
3721 || TREE_CODE (type) == OFFSET_TYPE
3722 || (id >= l && !useless_type_conversion_p (type1, type)))
3724 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3725 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3726 gimple_seq_add_stmt_without_update (&seq, g);
3727 exp = gimple_assign_lhs (g);
3729 if ((b % 4) == 3)
3730 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3731 else
3732 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3733 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3734 code, op, exp);
3735 gimple_seq_add_stmt_without_update (&seq, g);
3736 op = gimple_assign_lhs (g);
3738 type1 = TREE_TYPE (ranges[k - 1].exp);
3739 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3741 gimple *g
3742 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3743 gimple_seq_add_stmt_without_update (&seq, g);
3744 op = gimple_assign_lhs (g);
3746 candidates.pop ();
3747 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3748 candidates.length (), opcode, ops, op,
3749 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3750 ranges[k - 1].high, strict_overflow_p))
3751 any_changes = true;
3752 else
3753 gimple_seq_discard (seq);
3754 if ((b % 4) == 2 && buckets[b] != i)
3755 /* There is more work to do for this bucket. */
3756 b--;
3759 return any_changes;
3762 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3763 a >= 0 && a < b into (unsigned) a < (unsigned) b
3764 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3766 static bool
3767 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3768 vec<operand_entry *> *ops,
3769 struct range_entry *ranges,
3770 basic_block first_bb)
3772 int i;
3773 bool any_changes = false;
3774 hash_map<tree, int> *map = NULL;
3776 for (i = first; i < length; i++)
3778 if (ranges[i].exp == NULL_TREE
3779 || TREE_CODE (ranges[i].exp) != SSA_NAME
3780 || !ranges[i].in_p)
3781 continue;
3783 tree type = TREE_TYPE (ranges[i].exp);
3784 if (!INTEGRAL_TYPE_P (type)
3785 || TYPE_UNSIGNED (type)
3786 || ranges[i].low == NULL_TREE
3787 || !integer_zerop (ranges[i].low)
3788 || ranges[i].high != NULL_TREE)
3789 continue;
3790 /* EXP >= 0 here. */
3791 if (map == NULL)
3792 map = new hash_map <tree, int>;
3793 map->put (ranges[i].exp, i);
3796 if (map == NULL)
3797 return false;
3799 for (i = 0; i < length; i++)
3801 bool in_p = ranges[i].in_p;
3802 if (ranges[i].low == NULL_TREE
3803 || ranges[i].high == NULL_TREE)
3804 continue;
3805 if (!integer_zerop (ranges[i].low)
3806 || !integer_zerop (ranges[i].high))
3808 if (ranges[i].exp
3809 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3810 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3811 && integer_onep (ranges[i].low)
3812 && integer_onep (ranges[i].high))
3813 in_p = !in_p;
3814 else
3815 continue;
3818 gimple *stmt;
3819 tree_code ccode;
3820 tree rhs1, rhs2;
3821 if (ranges[i].exp)
3823 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3824 continue;
3825 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3826 if (!is_gimple_assign (stmt))
3827 continue;
3828 ccode = gimple_assign_rhs_code (stmt);
3829 rhs1 = gimple_assign_rhs1 (stmt);
3830 rhs2 = gimple_assign_rhs2 (stmt);
3832 else
3834 operand_entry *oe = (*ops)[ranges[i].idx];
3835 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3836 if (gimple_code (stmt) != GIMPLE_COND)
3837 continue;
3838 ccode = gimple_cond_code (stmt);
3839 rhs1 = gimple_cond_lhs (stmt);
3840 rhs2 = gimple_cond_rhs (stmt);
3843 if (TREE_CODE (rhs1) != SSA_NAME
3844 || rhs2 == NULL_TREE
3845 || TREE_CODE (rhs2) != SSA_NAME)
3846 continue;
3848 switch (ccode)
3850 case GT_EXPR:
3851 case GE_EXPR:
3852 case LT_EXPR:
3853 case LE_EXPR:
3854 break;
3855 default:
3856 continue;
3858 if (in_p)
3859 ccode = invert_tree_comparison (ccode, false);
3860 switch (ccode)
3862 case GT_EXPR:
3863 case GE_EXPR:
3864 std::swap (rhs1, rhs2);
3865 ccode = swap_tree_comparison (ccode);
3866 break;
3867 case LT_EXPR:
3868 case LE_EXPR:
3869 break;
3870 default:
3871 gcc_unreachable ();
3874 int *idx = map->get (rhs1);
3875 if (idx == NULL)
3876 continue;
3878 /* maybe_optimize_range_tests allows statements without side-effects
3879 in the basic blocks as long as they are consumed in the same bb.
3880 Make sure rhs2's def stmt is not among them, otherwise we can't
3881 use safely get_nonzero_bits on it. E.g. in:
3882 # RANGE [-83, 1] NONZERO 173
3883 # k_32 = PHI <k_47(13), k_12(9)>
3885 if (k_32 >= 0)
3886 goto <bb 5>; [26.46%]
3887 else
3888 goto <bb 9>; [73.54%]
3890 <bb 5> [local count: 140323371]:
3891 # RANGE [0, 1] NONZERO 1
3892 _5 = (int) k_32;
3893 # RANGE [0, 4] NONZERO 4
3894 _21 = _5 << 2;
3895 # RANGE [0, 4] NONZERO 4
3896 iftmp.0_44 = (char) _21;
3897 if (k_32 < iftmp.0_44)
3898 goto <bb 6>; [84.48%]
3899 else
3900 goto <bb 9>; [15.52%]
3901 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3902 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3903 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3904 those stmts even for negative k_32 and the value ranges would be no
3905 longer guaranteed and so the optimization would be invalid. */
3906 while (opcode == ERROR_MARK)
3908 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3909 basic_block bb2 = gimple_bb (g);
3910 if (bb2
3911 && bb2 != first_bb
3912 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3914 /* As an exception, handle a few common cases. */
3915 if (gimple_assign_cast_p (g)
3916 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3918 tree op0 = gimple_assign_rhs1 (g);
3919 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3920 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3921 > TYPE_PRECISION (TREE_TYPE (op0))))
3922 /* Zero-extension is always ok. */
3923 break;
3924 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3925 == TYPE_PRECISION (TREE_TYPE (op0))
3926 && TREE_CODE (op0) == SSA_NAME)
3928 /* Cast from signed to unsigned or vice versa. Retry
3929 with the op0 as new rhs2. */
3930 rhs2 = op0;
3931 continue;
3934 else if (is_gimple_assign (g)
3935 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3936 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3937 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3938 /* Masking with INTEGER_CST with MSB clear is always ok
3939 too. */
3940 break;
3941 rhs2 = NULL_TREE;
3943 break;
3945 if (rhs2 == NULL_TREE)
3946 continue;
3948 wide_int nz = get_nonzero_bits (rhs2);
3949 if (wi::neg_p (nz))
3950 continue;
3952 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3953 and RHS2 is known to be RHS2 >= 0. */
3954 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3956 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3957 if ((ranges[*idx].strict_overflow_p
3958 || ranges[i].strict_overflow_p)
3959 && issue_strict_overflow_warning (wc))
3960 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3961 "assuming signed overflow does not occur "
3962 "when simplifying range test");
3964 if (dump_file && (dump_flags & TDF_DETAILS))
3966 struct range_entry *r = &ranges[*idx];
3967 fprintf (dump_file, "Optimizing range test ");
3968 print_generic_expr (dump_file, r->exp);
3969 fprintf (dump_file, " +[");
3970 print_generic_expr (dump_file, r->low);
3971 fprintf (dump_file, ", ");
3972 print_generic_expr (dump_file, r->high);
3973 fprintf (dump_file, "] and comparison ");
3974 print_generic_expr (dump_file, rhs1);
3975 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3976 print_generic_expr (dump_file, rhs2);
3977 fprintf (dump_file, "\n into (");
3978 print_generic_expr (dump_file, utype);
3979 fprintf (dump_file, ") ");
3980 print_generic_expr (dump_file, rhs1);
3981 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3982 print_generic_expr (dump_file, utype);
3983 fprintf (dump_file, ") ");
3984 print_generic_expr (dump_file, rhs2);
3985 fprintf (dump_file, "\n");
3988 operand_entry *oe = (*ops)[ranges[i].idx];
3989 ranges[i].in_p = 0;
3990 if (opcode == BIT_IOR_EXPR
3991 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3993 ranges[i].in_p = 1;
3994 ccode = invert_tree_comparison (ccode, false);
3997 unsigned int uid = gimple_uid (stmt);
3998 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3999 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4000 gimple_set_uid (g, uid);
4001 rhs1 = gimple_assign_lhs (g);
4002 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4003 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4005 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4006 gimple_set_uid (g, uid);
4007 rhs2 = gimple_assign_lhs (g);
4008 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4010 if (tree_swap_operands_p (rhs1, rhs2))
4012 std::swap (rhs1, rhs2);
4013 ccode = swap_tree_comparison (ccode);
4015 if (gimple_code (stmt) == GIMPLE_COND)
4017 gcond *c = as_a <gcond *> (stmt);
4018 gimple_cond_set_code (c, ccode);
4019 gimple_cond_set_lhs (c, rhs1);
4020 gimple_cond_set_rhs (c, rhs2);
4021 update_stmt (stmt);
4023 else
4025 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4026 if (!INTEGRAL_TYPE_P (ctype)
4027 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4028 && TYPE_PRECISION (ctype) != 1))
4029 ctype = boolean_type_node;
4030 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4031 gimple_set_uid (g, uid);
4032 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4033 if (oe->op && ctype != TREE_TYPE (oe->op))
4035 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4036 NOP_EXPR, gimple_assign_lhs (g));
4037 gimple_set_uid (g, uid);
4038 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4040 ranges[i].exp = gimple_assign_lhs (g);
4041 oe->op = ranges[i].exp;
4042 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4043 ranges[i].high = ranges[i].low;
4045 ranges[i].strict_overflow_p = false;
4046 oe = (*ops)[ranges[*idx].idx];
4047 /* Now change all the other range test immediate uses, so that
4048 those tests will be optimized away. */
4049 if (opcode == ERROR_MARK)
4051 if (oe->op)
4052 oe->op = build_int_cst (TREE_TYPE (oe->op),
4053 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4054 else
4055 oe->op = (oe->rank == BIT_IOR_EXPR
4056 ? boolean_false_node : boolean_true_node);
4058 else
4059 oe->op = error_mark_node;
4060 ranges[*idx].exp = NULL_TREE;
4061 ranges[*idx].low = NULL_TREE;
4062 ranges[*idx].high = NULL_TREE;
4063 any_changes = true;
4066 delete map;
4067 return any_changes;
4070 /* Optimize range tests, similarly how fold_range_test optimizes
4071 it on trees. The tree code for the binary
4072 operation between all the operands is OPCODE.
4073 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4074 maybe_optimize_range_tests for inter-bb range optimization.
4075 In that case if oe->op is NULL, oe->id is bb->index whose
4076 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4077 the actual opcode.
4078 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4080 static bool
4081 optimize_range_tests (enum tree_code opcode,
4082 vec<operand_entry *> *ops, basic_block first_bb)
4084 unsigned int length = ops->length (), i, j, first;
4085 operand_entry *oe;
4086 struct range_entry *ranges;
4087 bool any_changes = false;
4089 if (length == 1)
4090 return false;
4092 ranges = XNEWVEC (struct range_entry, length);
4093 for (i = 0; i < length; i++)
4095 oe = (*ops)[i];
4096 ranges[i].idx = i;
4097 init_range_entry (ranges + i, oe->op,
4098 oe->op
4099 ? NULL
4100 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4101 /* For | invert it now, we will invert it again before emitting
4102 the optimized expression. */
4103 if (opcode == BIT_IOR_EXPR
4104 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4105 ranges[i].in_p = !ranges[i].in_p;
4108 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4109 for (i = 0; i < length; i++)
4110 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4111 break;
4113 /* Try to merge ranges. */
4114 for (first = i; i < length; i++)
4116 tree low = ranges[i].low;
4117 tree high = ranges[i].high;
4118 int in_p = ranges[i].in_p;
4119 bool strict_overflow_p = ranges[i].strict_overflow_p;
4120 int update_fail_count = 0;
4122 for (j = i + 1; j < length; j++)
4124 if (ranges[i].exp != ranges[j].exp)
4125 break;
4126 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4127 ranges[j].in_p, ranges[j].low, ranges[j].high))
4128 break;
4129 strict_overflow_p |= ranges[j].strict_overflow_p;
4132 if (j == i + 1)
4133 continue;
4135 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4136 opcode, ops, ranges[i].exp, NULL, in_p,
4137 low, high, strict_overflow_p))
4139 i = j - 1;
4140 any_changes = true;
4142 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4143 while update_range_test would fail. */
4144 else if (update_fail_count == 64)
4145 i = j - 1;
4146 else
4147 ++update_fail_count;
4150 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4151 ops, ranges);
4153 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4154 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4155 ops, ranges);
4156 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4157 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4158 ops, ranges);
4159 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4160 ranges, first_bb);
4161 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4162 ops, ranges);
4164 if (any_changes && opcode != ERROR_MARK)
4166 j = 0;
4167 FOR_EACH_VEC_ELT (*ops, i, oe)
4169 if (oe->op == error_mark_node)
4170 continue;
4171 else if (i != j)
4172 (*ops)[j] = oe;
4173 j++;
4175 ops->truncate (j);
4178 XDELETEVEC (ranges);
4179 return any_changes;
4182 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4183 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4184 otherwise the comparison code. TYPE is a return value that is set
4185 to type of comparison. */
4187 static tree_code
4188 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4189 tree *lhs, tree *rhs, gassign **vcond)
4191 if (TREE_CODE (var) != SSA_NAME)
4192 return ERROR_MARK;
4194 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4195 if (stmt == NULL)
4196 return ERROR_MARK;
4197 if (vcond)
4198 *vcond = stmt;
4200 /* ??? If we start creating more COND_EXPR, we could perform
4201 this same optimization with them. For now, simplify. */
4202 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4203 return ERROR_MARK;
4205 tree cond = gimple_assign_rhs1 (stmt);
4206 tree_code cmp = TREE_CODE (cond);
4207 if (cmp != SSA_NAME)
4208 return ERROR_MARK;
4210 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4211 if (assign == NULL
4212 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4213 return ERROR_MARK;
4215 cmp = gimple_assign_rhs_code (assign);
4216 if (lhs)
4217 *lhs = gimple_assign_rhs1 (assign);
4218 if (rhs)
4219 *rhs = gimple_assign_rhs2 (assign);
4221 /* ??? For now, allow only canonical true and false result vectors.
4222 We could expand this to other constants should the need arise,
4223 but at the moment we don't create them. */
4224 tree t = gimple_assign_rhs2 (stmt);
4225 tree f = gimple_assign_rhs3 (stmt);
4226 bool inv;
4227 if (integer_all_onesp (t))
4228 inv = false;
4229 else if (integer_all_onesp (f))
4231 cmp = invert_tree_comparison (cmp, false);
4232 inv = true;
4234 else
4235 return ERROR_MARK;
4236 if (!integer_zerop (f))
4237 return ERROR_MARK;
4239 /* Success! */
4240 if (rets)
4241 *rets = assign;
4242 if (reti)
4243 *reti = inv;
4244 if (type)
4245 *type = TREE_TYPE (cond);
4246 return cmp;
4249 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4250 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4252 static bool
4253 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4255 unsigned int length = ops->length (), i, j;
4256 bool any_changes = false;
4258 if (length == 1)
4259 return false;
4261 for (i = 0; i < length; ++i)
4263 tree elt0 = (*ops)[i]->op;
4265 gassign *stmt0, *vcond0;
4266 bool invert;
4267 tree type, lhs0, rhs0;
4268 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4269 &rhs0, &vcond0);
4270 if (cmp0 == ERROR_MARK)
4271 continue;
4273 for (j = i + 1; j < length; ++j)
4275 tree &elt1 = (*ops)[j]->op;
4277 gassign *stmt1, *vcond1;
4278 tree lhs1, rhs1;
4279 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4280 &rhs1, &vcond1);
4281 if (cmp1 == ERROR_MARK)
4282 continue;
4284 tree comb;
4285 if (opcode == BIT_AND_EXPR)
4286 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4287 cmp1, lhs1, rhs1);
4288 else if (opcode == BIT_IOR_EXPR)
4289 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4290 cmp1, lhs1, rhs1);
4291 else
4292 gcc_unreachable ();
4293 if (comb == NULL)
4294 continue;
4296 /* Success! */
4297 if (dump_file && (dump_flags & TDF_DETAILS))
4299 fprintf (dump_file, "Transforming ");
4300 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4301 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4302 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4303 fprintf (dump_file, " into ");
4304 print_generic_expr (dump_file, comb);
4305 fputc ('\n', dump_file);
4308 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4309 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4310 true, GSI_SAME_STMT);
4311 if (invert)
4312 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4313 gimple_assign_rhs3_ptr (vcond0));
4314 gimple_assign_set_rhs1 (vcond0, exp);
4315 update_stmt (vcond0);
4317 elt1 = error_mark_node;
4318 any_changes = true;
4322 if (any_changes)
4324 operand_entry *oe;
4325 j = 0;
4326 FOR_EACH_VEC_ELT (*ops, i, oe)
4328 if (oe->op == error_mark_node)
4329 continue;
4330 else if (i != j)
4331 (*ops)[j] = oe;
4332 j++;
4334 ops->truncate (j);
4337 return any_changes;
4340 /* Return true if STMT is a cast like:
4341 <bb N>:
4343 _123 = (int) _234;
4345 <bb M>:
4346 # _345 = PHI <_123(N), 1(...), 1(...)>
4347 where _234 has bool type, _123 has single use and
4348 bb N has a single successor M. This is commonly used in
4349 the last block of a range test.
4351 Also Return true if STMT is tcc_compare like:
4352 <bb N>:
4354 _234 = a_2(D) == 2;
4356 <bb M>:
4357 # _345 = PHI <_234(N), 1(...), 1(...)>
4358 _346 = (int) _345;
4359 where _234 has booltype, single use and
4360 bb N has a single successor M. This is commonly used in
4361 the last block of a range test. */
4363 static bool
4364 final_range_test_p (gimple *stmt)
4366 basic_block bb, rhs_bb, lhs_bb;
4367 edge e;
4368 tree lhs, rhs;
4369 use_operand_p use_p;
4370 gimple *use_stmt;
4372 if (!gimple_assign_cast_p (stmt)
4373 && (!is_gimple_assign (stmt)
4374 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4375 != tcc_comparison)))
4376 return false;
4377 bb = gimple_bb (stmt);
4378 if (!single_succ_p (bb))
4379 return false;
4380 e = single_succ_edge (bb);
4381 if (e->flags & EDGE_COMPLEX)
4382 return false;
4384 lhs = gimple_assign_lhs (stmt);
4385 rhs = gimple_assign_rhs1 (stmt);
4386 if (gimple_assign_cast_p (stmt)
4387 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4388 || TREE_CODE (rhs) != SSA_NAME
4389 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4390 return false;
4392 if (!gimple_assign_cast_p (stmt)
4393 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4394 return false;
4396 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4397 if (!single_imm_use (lhs, &use_p, &use_stmt))
4398 return false;
4400 if (gimple_code (use_stmt) != GIMPLE_PHI
4401 || gimple_bb (use_stmt) != e->dest)
4402 return false;
4404 /* And that the rhs is defined in the same loop. */
4405 if (gimple_assign_cast_p (stmt))
4407 if (TREE_CODE (rhs) != SSA_NAME
4408 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4409 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4410 return false;
4412 else
4414 if (TREE_CODE (lhs) != SSA_NAME
4415 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4416 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4417 return false;
4420 return true;
4423 /* Return true if BB is suitable basic block for inter-bb range test
4424 optimization. If BACKWARD is true, BB should be the only predecessor
4425 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4426 or compared with to find a common basic block to which all conditions
4427 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4428 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4429 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4430 block points to an empty block that falls through into *OTHER_BB and
4431 the phi args match that path. */
4433 static bool
4434 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4435 bool *test_swapped_p, bool backward)
4437 edge_iterator ei, ei2;
4438 edge e, e2;
4439 gimple *stmt;
4440 gphi_iterator gsi;
4441 bool other_edge_seen = false;
4442 bool is_cond;
4444 if (test_bb == bb)
4445 return false;
4446 /* Check last stmt first. */
4447 stmt = last_nondebug_stmt (bb);
4448 if (stmt == NULL
4449 || (gimple_code (stmt) != GIMPLE_COND
4450 && (backward || !final_range_test_p (stmt)))
4451 || gimple_visited_p (stmt)
4452 || stmt_could_throw_p (cfun, stmt)
4453 || *other_bb == bb)
4454 return false;
4455 is_cond = gimple_code (stmt) == GIMPLE_COND;
4456 if (is_cond)
4458 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4459 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4460 to *OTHER_BB (if not set yet, try to find it out). */
4461 if (EDGE_COUNT (bb->succs) != 2)
4462 return false;
4463 FOR_EACH_EDGE (e, ei, bb->succs)
4465 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4466 return false;
4467 if (e->dest == test_bb)
4469 if (backward)
4470 continue;
4471 else
4472 return false;
4474 if (e->dest == bb)
4475 return false;
4476 if (*other_bb == NULL)
4478 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4479 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4480 return false;
4481 else if (e->dest == e2->dest)
4482 *other_bb = e->dest;
4483 if (*other_bb == NULL)
4484 return false;
4486 if (e->dest == *other_bb)
4487 other_edge_seen = true;
4488 else if (backward)
4489 return false;
4491 if (*other_bb == NULL || !other_edge_seen)
4492 return false;
4494 else if (single_succ (bb) != *other_bb)
4495 return false;
4497 /* Now check all PHIs of *OTHER_BB. */
4498 e = find_edge (bb, *other_bb);
4499 e2 = find_edge (test_bb, *other_bb);
4500 retry:;
4501 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4503 gphi *phi = gsi.phi ();
4504 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4505 corresponding to BB and TEST_BB predecessor must be the same. */
4506 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4507 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4509 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4510 one of the PHIs should have the lhs of the last stmt in
4511 that block as PHI arg and that PHI should have 0 or 1
4512 corresponding to it in all other range test basic blocks
4513 considered. */
4514 if (!is_cond)
4516 if (gimple_phi_arg_def (phi, e->dest_idx)
4517 == gimple_assign_lhs (stmt)
4518 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4519 || integer_onep (gimple_phi_arg_def (phi,
4520 e2->dest_idx))))
4521 continue;
4523 else
4525 gimple *test_last = last_nondebug_stmt (test_bb);
4526 if (gimple_code (test_last) == GIMPLE_COND)
4528 if (backward ? e2->src != test_bb : e->src != bb)
4529 return false;
4531 /* For last_bb, handle also:
4532 if (x_3(D) == 3)
4533 goto <bb 6>; [34.00%]
4534 else
4535 goto <bb 7>; [66.00%]
4537 <bb 6> [local count: 79512730]:
4539 <bb 7> [local count: 1073741824]:
4540 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4541 where bb 7 is *OTHER_BB, but the PHI values from the
4542 earlier bbs match the path through the empty bb
4543 in between. */
4544 edge e3;
4545 if (backward)
4546 e3 = EDGE_SUCC (test_bb,
4547 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4548 else
4549 e3 = EDGE_SUCC (bb,
4550 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4551 if (empty_block_p (e3->dest)
4552 && single_succ_p (e3->dest)
4553 && single_succ (e3->dest) == *other_bb
4554 && single_pred_p (e3->dest)
4555 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4557 if (backward)
4558 e2 = single_succ_edge (e3->dest);
4559 else
4560 e = single_succ_edge (e3->dest);
4561 if (test_swapped_p)
4562 *test_swapped_p = true;
4563 goto retry;
4566 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4567 == gimple_assign_lhs (test_last)
4568 && (integer_zerop (gimple_phi_arg_def (phi,
4569 e->dest_idx))
4570 || integer_onep (gimple_phi_arg_def (phi,
4571 e->dest_idx))))
4572 continue;
4575 return false;
4578 return true;
4581 /* Return true if BB doesn't have side-effects that would disallow
4582 range test optimization, all SSA_NAMEs set in the bb are consumed
4583 in the bb and there are no PHIs. */
4585 bool
4586 no_side_effect_bb (basic_block bb)
4588 gimple_stmt_iterator gsi;
4589 gimple *last;
4591 if (!gimple_seq_empty_p (phi_nodes (bb)))
4592 return false;
4593 last = last_nondebug_stmt (bb);
4594 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4596 gimple *stmt = gsi_stmt (gsi);
4597 tree lhs;
4598 imm_use_iterator imm_iter;
4599 use_operand_p use_p;
4601 if (is_gimple_debug (stmt))
4602 continue;
4603 if (gimple_has_side_effects (stmt))
4604 return false;
4605 if (stmt == last)
4606 return true;
4607 if (!is_gimple_assign (stmt))
4608 return false;
4609 lhs = gimple_assign_lhs (stmt);
4610 if (TREE_CODE (lhs) != SSA_NAME)
4611 return false;
4612 if (gimple_assign_rhs_could_trap_p (stmt))
4613 return false;
4614 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4616 gimple *use_stmt = USE_STMT (use_p);
4617 if (is_gimple_debug (use_stmt))
4618 continue;
4619 if (gimple_bb (use_stmt) != bb)
4620 return false;
4623 return false;
4626 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4627 return true and fill in *OPS recursively. */
4629 static bool
4630 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4631 class loop *loop)
4633 gimple *stmt = SSA_NAME_DEF_STMT (var);
4634 tree rhs[2];
4635 int i;
4637 if (!is_reassociable_op (stmt, code, loop))
4638 return false;
4640 rhs[0] = gimple_assign_rhs1 (stmt);
4641 rhs[1] = gimple_assign_rhs2 (stmt);
4642 gimple_set_visited (stmt, true);
4643 for (i = 0; i < 2; i++)
4644 if (TREE_CODE (rhs[i]) == SSA_NAME
4645 && !get_ops (rhs[i], code, ops, loop)
4646 && has_single_use (rhs[i]))
4648 operand_entry *oe = operand_entry_pool.allocate ();
4650 oe->op = rhs[i];
4651 oe->rank = code;
4652 oe->id = 0;
4653 oe->count = 1;
4654 oe->stmt_to_insert = NULL;
4655 ops->safe_push (oe);
4657 return true;
4660 /* Find the ops that were added by get_ops starting from VAR, see if
4661 they were changed during update_range_test and if yes, create new
4662 stmts. */
4664 static tree
4665 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4666 unsigned int *pidx, class loop *loop)
4668 gimple *stmt = SSA_NAME_DEF_STMT (var);
4669 tree rhs[4];
4670 int i;
4672 if (!is_reassociable_op (stmt, code, loop))
4673 return NULL;
4675 rhs[0] = gimple_assign_rhs1 (stmt);
4676 rhs[1] = gimple_assign_rhs2 (stmt);
4677 rhs[2] = rhs[0];
4678 rhs[3] = rhs[1];
4679 for (i = 0; i < 2; i++)
4680 if (TREE_CODE (rhs[i]) == SSA_NAME)
4682 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4683 if (rhs[2 + i] == NULL_TREE)
4685 if (has_single_use (rhs[i]))
4686 rhs[2 + i] = ops[(*pidx)++]->op;
4687 else
4688 rhs[2 + i] = rhs[i];
4691 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4692 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4694 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4695 var = make_ssa_name (TREE_TYPE (var));
4696 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4697 rhs[2], rhs[3]);
4698 gimple_set_uid (g, gimple_uid (stmt));
4699 gimple_set_visited (g, true);
4700 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4701 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4702 if (fold_stmt_inplace (&gsi2))
4703 update_stmt (g);
4705 return var;
4708 /* Structure to track the initial value passed to get_ops and
4709 the range in the ops vector for each basic block. */
4711 struct inter_bb_range_test_entry
4713 tree op;
4714 unsigned int first_idx, last_idx;
4717 /* Inter-bb range test optimization.
4719 Returns TRUE if a gimple conditional is optimized to a true/false,
4720 otherwise return FALSE.
4722 This indicates to the caller that it should run a CFG cleanup pass
4723 once reassociation is completed. */
4725 static bool
4726 maybe_optimize_range_tests (gimple *stmt)
4728 basic_block first_bb = gimple_bb (stmt);
4729 basic_block last_bb = first_bb;
4730 basic_block other_bb = NULL;
4731 basic_block bb;
4732 edge_iterator ei;
4733 edge e;
4734 auto_vec<operand_entry *> ops;
4735 auto_vec<inter_bb_range_test_entry> bbinfo;
4736 bool any_changes = false;
4737 bool cfg_cleanup_needed = false;
4739 /* Consider only basic blocks that end with GIMPLE_COND or
4740 a cast statement satisfying final_range_test_p. All
4741 but the last bb in the first_bb .. last_bb range
4742 should end with GIMPLE_COND. */
4743 if (gimple_code (stmt) == GIMPLE_COND)
4745 if (EDGE_COUNT (first_bb->succs) != 2)
4746 return cfg_cleanup_needed;
4748 else if (final_range_test_p (stmt))
4749 other_bb = single_succ (first_bb);
4750 else
4751 return cfg_cleanup_needed;
4753 if (stmt_could_throw_p (cfun, stmt))
4754 return cfg_cleanup_needed;
4756 /* As relative ordering of post-dominator sons isn't fixed,
4757 maybe_optimize_range_tests can be called first on any
4758 bb in the range we want to optimize. So, start searching
4759 backwards, if first_bb can be set to a predecessor. */
4760 while (single_pred_p (first_bb))
4762 basic_block pred_bb = single_pred (first_bb);
4763 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4764 break;
4765 if (!no_side_effect_bb (first_bb))
4766 break;
4767 first_bb = pred_bb;
4769 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4770 Before starting forward search in last_bb successors, find
4771 out the other_bb. */
4772 if (first_bb == last_bb)
4774 other_bb = NULL;
4775 /* As non-GIMPLE_COND last stmt always terminates the range,
4776 if forward search didn't discover anything, just give up. */
4777 if (gimple_code (stmt) != GIMPLE_COND)
4778 return cfg_cleanup_needed;
4779 /* Look at both successors. Either it ends with a GIMPLE_COND
4780 and satisfies suitable_cond_bb, or ends with a cast and
4781 other_bb is that cast's successor. */
4782 FOR_EACH_EDGE (e, ei, first_bb->succs)
4783 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4784 || e->dest == first_bb)
4785 return cfg_cleanup_needed;
4786 else if (single_pred_p (e->dest))
4788 stmt = last_nondebug_stmt (e->dest);
4789 if (stmt
4790 && gimple_code (stmt) == GIMPLE_COND
4791 && EDGE_COUNT (e->dest->succs) == 2)
4793 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4794 NULL, true))
4795 break;
4796 else
4797 other_bb = NULL;
4799 else if (stmt
4800 && final_range_test_p (stmt)
4801 && find_edge (first_bb, single_succ (e->dest)))
4803 other_bb = single_succ (e->dest);
4804 if (other_bb == first_bb)
4805 other_bb = NULL;
4808 if (other_bb == NULL)
4809 return cfg_cleanup_needed;
4811 /* Now do the forward search, moving last_bb to successor bbs
4812 that aren't other_bb. */
4813 while (EDGE_COUNT (last_bb->succs) == 2)
4815 FOR_EACH_EDGE (e, ei, last_bb->succs)
4816 if (e->dest != other_bb)
4817 break;
4818 if (e == NULL)
4819 break;
4820 if (!single_pred_p (e->dest))
4821 break;
4822 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4823 break;
4824 if (!no_side_effect_bb (e->dest))
4825 break;
4826 last_bb = e->dest;
4828 if (first_bb == last_bb)
4829 return cfg_cleanup_needed;
4830 /* Here basic blocks first_bb through last_bb's predecessor
4831 end with GIMPLE_COND, all of them have one of the edges to
4832 other_bb and another to another block in the range,
4833 all blocks except first_bb don't have side-effects and
4834 last_bb ends with either GIMPLE_COND, or cast satisfying
4835 final_range_test_p. */
4836 for (bb = last_bb; ; bb = single_pred (bb))
4838 enum tree_code code;
4839 tree lhs, rhs;
4840 inter_bb_range_test_entry bb_ent;
4842 bb_ent.op = NULL_TREE;
4843 bb_ent.first_idx = ops.length ();
4844 bb_ent.last_idx = bb_ent.first_idx;
4845 e = find_edge (bb, other_bb);
4846 stmt = last_nondebug_stmt (bb);
4847 gimple_set_visited (stmt, true);
4848 if (gimple_code (stmt) != GIMPLE_COND)
4850 use_operand_p use_p;
4851 gimple *phi;
4852 edge e2;
4853 unsigned int d;
4855 lhs = gimple_assign_lhs (stmt);
4856 rhs = gimple_assign_rhs1 (stmt);
4857 gcc_assert (bb == last_bb);
4859 /* stmt is
4860 _123 = (int) _234;
4862 _234 = a_2(D) == 2;
4864 followed by:
4865 <bb M>:
4866 # _345 = PHI <_123(N), 1(...), 1(...)>
4868 or 0 instead of 1. If it is 0, the _234
4869 range test is anded together with all the
4870 other range tests, if it is 1, it is ored with
4871 them. */
4872 single_imm_use (lhs, &use_p, &phi);
4873 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4874 e2 = find_edge (first_bb, other_bb);
4875 d = e2->dest_idx;
4876 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4877 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4878 code = BIT_AND_EXPR;
4879 else
4881 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4882 code = BIT_IOR_EXPR;
4885 /* If _234 SSA_NAME_DEF_STMT is
4886 _234 = _567 | _789;
4887 (or &, corresponding to 1/0 in the phi arguments,
4888 push into ops the individual range test arguments
4889 of the bitwise or resp. and, recursively. */
4890 if (TREE_CODE (rhs) == SSA_NAME
4891 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4892 != tcc_comparison)
4893 && !get_ops (rhs, code, &ops,
4894 loop_containing_stmt (stmt))
4895 && has_single_use (rhs))
4897 /* Otherwise, push the _234 range test itself. */
4898 operand_entry *oe = operand_entry_pool.allocate ();
4900 oe->op = rhs;
4901 oe->rank = code;
4902 oe->id = 0;
4903 oe->count = 1;
4904 oe->stmt_to_insert = NULL;
4905 ops.safe_push (oe);
4906 bb_ent.last_idx++;
4907 bb_ent.op = rhs;
4909 else if (is_gimple_assign (stmt)
4910 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4911 == tcc_comparison)
4912 && !get_ops (lhs, code, &ops,
4913 loop_containing_stmt (stmt))
4914 && has_single_use (lhs))
4916 operand_entry *oe = operand_entry_pool.allocate ();
4917 oe->op = lhs;
4918 oe->rank = code;
4919 oe->id = 0;
4920 oe->count = 1;
4921 ops.safe_push (oe);
4922 bb_ent.last_idx++;
4923 bb_ent.op = lhs;
4925 else
4927 bb_ent.last_idx = ops.length ();
4928 bb_ent.op = rhs;
4930 bbinfo.safe_push (bb_ent);
4931 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4932 ops[i]->id = bb->index;
4933 continue;
4935 else if (bb == last_bb)
4937 /* For last_bb, handle also:
4938 if (x_3(D) == 3)
4939 goto <bb 6>; [34.00%]
4940 else
4941 goto <bb 7>; [66.00%]
4943 <bb 6> [local count: 79512730]:
4945 <bb 7> [local count: 1073741824]:
4946 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4947 where bb 7 is OTHER_BB, but the PHI values from the
4948 earlier bbs match the path through the empty bb
4949 in between. */
4950 bool test_swapped_p = false;
4951 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4952 &other_bb, &test_swapped_p, true);
4953 gcc_assert (ok);
4954 if (test_swapped_p)
4955 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4957 /* Otherwise stmt is GIMPLE_COND. */
4958 code = gimple_cond_code (stmt);
4959 lhs = gimple_cond_lhs (stmt);
4960 rhs = gimple_cond_rhs (stmt);
4961 if (TREE_CODE (lhs) == SSA_NAME
4962 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4963 && ((code != EQ_EXPR && code != NE_EXPR)
4964 || rhs != boolean_false_node
4965 /* Either push into ops the individual bitwise
4966 or resp. and operands, depending on which
4967 edge is other_bb. */
4968 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4969 ^ (code == EQ_EXPR))
4970 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4971 loop_containing_stmt (stmt))))
4973 /* Or push the GIMPLE_COND stmt itself. */
4974 operand_entry *oe = operand_entry_pool.allocate ();
4976 oe->op = NULL;
4977 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4978 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4979 /* oe->op = NULL signs that there is no SSA_NAME
4980 for the range test, and oe->id instead is the
4981 basic block number, at which's end the GIMPLE_COND
4982 is. */
4983 oe->id = bb->index;
4984 oe->count = 1;
4985 oe->stmt_to_insert = NULL;
4986 ops.safe_push (oe);
4987 bb_ent.op = NULL;
4988 bb_ent.last_idx++;
4990 else if (ops.length () > bb_ent.first_idx)
4992 bb_ent.op = lhs;
4993 bb_ent.last_idx = ops.length ();
4995 bbinfo.safe_push (bb_ent);
4996 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4997 ops[i]->id = bb->index;
4998 if (bb == first_bb)
4999 break;
5001 if (ops.length () > 1)
5002 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5003 if (any_changes)
5005 unsigned int idx, max_idx = 0;
5006 /* update_ops relies on has_single_use predicates returning the
5007 same values as it did during get_ops earlier. Additionally it
5008 never removes statements, only adds new ones and it should walk
5009 from the single imm use and check the predicate already before
5010 making those changes.
5011 On the other side, the handling of GIMPLE_COND directly can turn
5012 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5013 it needs to be done in a separate loop afterwards. */
5014 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5016 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5017 && bbinfo[idx].op != NULL_TREE)
5019 tree new_op;
5021 max_idx = idx;
5022 stmt = last_nondebug_stmt (bb);
5023 new_op = update_ops (bbinfo[idx].op,
5024 (enum tree_code)
5025 ops[bbinfo[idx].first_idx]->rank,
5026 ops, &bbinfo[idx].first_idx,
5027 loop_containing_stmt (stmt));
5028 if (new_op == NULL_TREE)
5030 gcc_assert (bb == last_bb);
5031 new_op = ops[bbinfo[idx].first_idx++]->op;
5033 if (bbinfo[idx].op != new_op)
5035 imm_use_iterator iter;
5036 use_operand_p use_p;
5037 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5039 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5040 if (is_gimple_debug (use_stmt))
5041 continue;
5042 else if (gimple_code (use_stmt) == GIMPLE_COND
5043 || gimple_code (use_stmt) == GIMPLE_PHI)
5044 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5045 SET_USE (use_p, new_op);
5046 else if ((is_gimple_assign (use_stmt)
5047 && (TREE_CODE_CLASS
5048 (gimple_assign_rhs_code (use_stmt))
5049 == tcc_comparison)))
5050 cast_or_tcc_cmp_stmt = use_stmt;
5051 else if (gimple_assign_cast_p (use_stmt))
5052 cast_or_tcc_cmp_stmt = use_stmt;
5053 else
5054 gcc_unreachable ();
5056 if (cast_or_tcc_cmp_stmt)
5058 gcc_assert (bb == last_bb);
5059 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5060 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5061 enum tree_code rhs_code
5062 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5063 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5064 : CONVERT_EXPR;
5065 gassign *g;
5066 if (is_gimple_min_invariant (new_op))
5068 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5069 g = gimple_build_assign (new_lhs, new_op);
5071 else
5072 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5073 gimple_stmt_iterator gsi
5074 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5075 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5076 gimple_set_visited (g, true);
5077 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5078 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5079 if (is_gimple_debug (use_stmt))
5080 continue;
5081 else if (gimple_code (use_stmt) == GIMPLE_COND
5082 || gimple_code (use_stmt) == GIMPLE_PHI)
5083 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5084 SET_USE (use_p, new_lhs);
5085 else
5086 gcc_unreachable ();
5090 if (bb == first_bb)
5091 break;
5093 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5095 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5096 && bbinfo[idx].op == NULL_TREE
5097 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5099 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5101 if (idx > max_idx)
5102 max_idx = idx;
5104 /* If we collapse the conditional to a true/false
5105 condition, then bubble that knowledge up to our caller. */
5106 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5108 gimple_cond_make_false (cond_stmt);
5109 cfg_cleanup_needed = true;
5111 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5113 gimple_cond_make_true (cond_stmt);
5114 cfg_cleanup_needed = true;
5116 else
5118 gimple_cond_set_code (cond_stmt, NE_EXPR);
5119 gimple_cond_set_lhs (cond_stmt,
5120 ops[bbinfo[idx].first_idx]->op);
5121 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5123 update_stmt (cond_stmt);
5125 if (bb == first_bb)
5126 break;
5129 /* The above changes could result in basic blocks after the first
5130 modified one, up to and including last_bb, to be executed even if
5131 they would not be in the original program. If the value ranges of
5132 assignment lhs' in those bbs were dependent on the conditions
5133 guarding those basic blocks which now can change, the VRs might
5134 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5135 are only used within the same bb, it should be not a big deal if
5136 we just reset all the VRs in those bbs. See PR68671. */
5137 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5138 reset_flow_sensitive_info_in_bb (bb);
5140 return cfg_cleanup_needed;
5143 /* Remove def stmt of VAR if VAR has zero uses and recurse
5144 on rhs1 operand if so. */
5146 static void
5147 remove_visited_stmt_chain (tree var)
5149 gimple *stmt;
5150 gimple_stmt_iterator gsi;
5152 while (1)
5154 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5155 return;
5156 stmt = SSA_NAME_DEF_STMT (var);
5157 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5159 var = gimple_assign_rhs1 (stmt);
5160 gsi = gsi_for_stmt (stmt);
5161 reassoc_remove_stmt (&gsi);
5162 release_defs (stmt);
5164 else
5165 return;
5169 /* This function checks three consequtive operands in
5170 passed operands vector OPS starting from OPINDEX and
5171 swaps two operands if it is profitable for binary operation
5172 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5174 We pair ops with the same rank if possible. */
5176 static void
5177 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5178 unsigned int opindex)
5180 operand_entry *oe1, *oe2, *oe3;
5182 oe1 = ops[opindex];
5183 oe2 = ops[opindex + 1];
5184 oe3 = ops[opindex + 2];
5186 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5187 std::swap (*oe1, *oe3);
5188 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5189 std::swap (*oe1, *oe2);
5192 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5193 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5194 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5195 after the returned stmt. */
5197 static inline gimple *
5198 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5200 insert_before = true;
5201 if (TREE_CODE (rhs1) == SSA_NAME
5202 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5204 stmt = SSA_NAME_DEF_STMT (rhs1);
5205 insert_before = false;
5207 if (TREE_CODE (rhs2) == SSA_NAME
5208 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5210 stmt = SSA_NAME_DEF_STMT (rhs2);
5211 insert_before = false;
5213 return stmt;
5216 /* If the stmt that defines operand has to be inserted, insert it
5217 before the use. */
5218 static void
5219 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5221 gcc_assert (is_gimple_assign (stmt_to_insert));
5222 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5223 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5224 bool insert_before;
5225 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5226 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5227 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5229 /* If the insert point is not stmt, then insert_point would be
5230 the point where operand rhs1 or rhs2 is defined. In this case,
5231 stmt_to_insert has to be inserted afterwards. This would
5232 only happen when the stmt insertion point is flexible. */
5233 if (insert_before)
5234 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5235 else
5236 insert_stmt_after (stmt_to_insert, insert_point);
5240 /* Recursively rewrite our linearized statements so that the operators
5241 match those in OPS[OPINDEX], putting the computation in rank
5242 order. Return new lhs.
5243 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5244 the current stmt and during recursive invocations.
5245 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5246 recursive invocations. */
5248 static tree
5249 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5250 const vec<operand_entry *> &ops, bool changed,
5251 bool next_changed)
5253 tree rhs1 = gimple_assign_rhs1 (stmt);
5254 tree rhs2 = gimple_assign_rhs2 (stmt);
5255 tree lhs = gimple_assign_lhs (stmt);
5256 operand_entry *oe;
5258 /* The final recursion case for this function is that you have
5259 exactly two operations left.
5260 If we had exactly one op in the entire list to start with, we
5261 would have never called this function, and the tail recursion
5262 rewrites them one at a time. */
5263 if (opindex + 2 == ops.length ())
5265 operand_entry *oe1, *oe2;
5267 oe1 = ops[opindex];
5268 oe2 = ops[opindex + 1];
5270 if (rhs1 != oe1->op || rhs2 != oe2->op)
5272 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5273 unsigned int uid = gimple_uid (stmt);
5275 if (dump_file && (dump_flags & TDF_DETAILS))
5277 fprintf (dump_file, "Transforming ");
5278 print_gimple_stmt (dump_file, stmt, 0);
5281 /* If the stmt that defines operand has to be inserted, insert it
5282 before the use. */
5283 if (oe1->stmt_to_insert)
5284 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5285 if (oe2->stmt_to_insert)
5286 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5287 /* Even when changed is false, reassociation could have e.g. removed
5288 some redundant operations, so unless we are just swapping the
5289 arguments or unless there is no change at all (then we just
5290 return lhs), force creation of a new SSA_NAME. */
5291 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5293 bool insert_before;
5294 gimple *insert_point
5295 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5296 lhs = make_ssa_name (TREE_TYPE (lhs));
5297 stmt
5298 = gimple_build_assign (lhs, rhs_code,
5299 oe1->op, oe2->op);
5300 gimple_set_uid (stmt, uid);
5301 gimple_set_visited (stmt, true);
5302 if (insert_before)
5303 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5304 else
5305 insert_stmt_after (stmt, insert_point);
5307 else
5309 bool insert_before;
5310 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5311 insert_before)
5312 == stmt);
5313 gimple_assign_set_rhs1 (stmt, oe1->op);
5314 gimple_assign_set_rhs2 (stmt, oe2->op);
5315 update_stmt (stmt);
5318 if (rhs1 != oe1->op && rhs1 != oe2->op)
5319 remove_visited_stmt_chain (rhs1);
5321 if (dump_file && (dump_flags & TDF_DETAILS))
5323 fprintf (dump_file, " into ");
5324 print_gimple_stmt (dump_file, stmt, 0);
5327 return lhs;
5330 /* If we hit here, we should have 3 or more ops left. */
5331 gcc_assert (opindex + 2 < ops.length ());
5333 /* Rewrite the next operator. */
5334 oe = ops[opindex];
5336 /* If the stmt that defines operand has to be inserted, insert it
5337 before the use. */
5338 if (oe->stmt_to_insert)
5339 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5341 /* Recurse on the LHS of the binary operator, which is guaranteed to
5342 be the non-leaf side. */
5343 tree new_rhs1
5344 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5345 changed || oe->op != rhs2 || next_changed,
5346 false);
5348 if (oe->op != rhs2 || new_rhs1 != rhs1)
5350 if (dump_file && (dump_flags & TDF_DETAILS))
5352 fprintf (dump_file, "Transforming ");
5353 print_gimple_stmt (dump_file, stmt, 0);
5356 /* If changed is false, this is either opindex == 0
5357 or all outer rhs2's were equal to corresponding oe->op,
5358 and powi_result is NULL.
5359 That means lhs is equivalent before and after reassociation.
5360 Otherwise ensure the old lhs SSA_NAME is not reused and
5361 create a new stmt as well, so that any debug stmts will be
5362 properly adjusted. */
5363 if (changed)
5365 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5366 unsigned int uid = gimple_uid (stmt);
5367 bool insert_before;
5368 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5369 insert_before);
5371 lhs = make_ssa_name (TREE_TYPE (lhs));
5372 stmt = gimple_build_assign (lhs, rhs_code,
5373 new_rhs1, oe->op);
5374 gimple_set_uid (stmt, uid);
5375 gimple_set_visited (stmt, true);
5376 if (insert_before)
5377 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5378 else
5379 insert_stmt_after (stmt, insert_point);
5381 else
5383 bool insert_before;
5384 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5385 insert_before)
5386 == stmt);
5387 gimple_assign_set_rhs1 (stmt, new_rhs1);
5388 gimple_assign_set_rhs2 (stmt, oe->op);
5389 update_stmt (stmt);
5392 if (dump_file && (dump_flags & TDF_DETAILS))
5394 fprintf (dump_file, " into ");
5395 print_gimple_stmt (dump_file, stmt, 0);
5398 return lhs;
5401 /* Find out how many cycles we need to compute statements chain.
5402 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5403 maximum number of independent statements we may execute per cycle. */
5405 static int
5406 get_required_cycles (int ops_num, int cpu_width)
5408 int res;
5409 int elog;
5410 unsigned int rest;
5412 /* While we have more than 2 * cpu_width operands
5413 we may reduce number of operands by cpu_width
5414 per cycle. */
5415 res = ops_num / (2 * cpu_width);
5417 /* Remained operands count may be reduced twice per cycle
5418 until we have only one operand. */
5419 rest = (unsigned)(ops_num - res * cpu_width);
5420 elog = exact_log2 (rest);
5421 if (elog >= 0)
5422 res += elog;
5423 else
5424 res += floor_log2 (rest) + 1;
5426 return res;
5429 /* Returns an optimal number of registers to use for computation of
5430 given statements. */
5432 static int
5433 get_reassociation_width (int ops_num, enum tree_code opc,
5434 machine_mode mode)
5436 int param_width = param_tree_reassoc_width;
5437 int width;
5438 int width_min;
5439 int cycles_best;
5441 if (param_width > 0)
5442 width = param_width;
5443 else
5444 width = targetm.sched.reassociation_width (opc, mode);
5446 if (width == 1)
5447 return width;
5449 /* Get the minimal time required for sequence computation. */
5450 cycles_best = get_required_cycles (ops_num, width);
5452 /* Check if we may use less width and still compute sequence for
5453 the same time. It will allow us to reduce registers usage.
5454 get_required_cycles is monotonically increasing with lower width
5455 so we can perform a binary search for the minimal width that still
5456 results in the optimal cycle count. */
5457 width_min = 1;
5458 while (width > width_min)
5460 int width_mid = (width + width_min) / 2;
5462 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5463 width = width_mid;
5464 else if (width_min < width_mid)
5465 width_min = width_mid;
5466 else
5467 break;
5470 return width;
5473 /* Recursively rewrite our linearized statements so that the operators
5474 match those in OPS[OPINDEX], putting the computation in rank
5475 order and trying to allow operations to be executed in
5476 parallel. */
5478 static void
5479 rewrite_expr_tree_parallel (gassign *stmt, int width,
5480 const vec<operand_entry *> &ops)
5482 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5483 int op_num = ops.length ();
5484 gcc_assert (op_num > 0);
5485 int stmt_num = op_num - 1;
5486 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5487 int op_index = op_num - 1;
5488 int stmt_index = 0;
5489 int ready_stmts_end = 0;
5490 int i = 0;
5491 gimple *stmt1 = NULL, *stmt2 = NULL;
5492 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5494 /* We start expression rewriting from the top statements.
5495 So, in this loop we create a full list of statements
5496 we will work with. */
5497 stmts[stmt_num - 1] = stmt;
5498 for (i = stmt_num - 2; i >= 0; i--)
5499 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5501 for (i = 0; i < stmt_num; i++)
5503 tree op1, op2;
5505 /* Determine whether we should use results of
5506 already handled statements or not. */
5507 if (ready_stmts_end == 0
5508 && (i - stmt_index >= width || op_index < 1))
5509 ready_stmts_end = i;
5511 /* Now we choose operands for the next statement. Non zero
5512 value in ready_stmts_end means here that we should use
5513 the result of already generated statements as new operand. */
5514 if (ready_stmts_end > 0)
5516 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5517 if (ready_stmts_end > stmt_index)
5518 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5519 else if (op_index >= 0)
5521 operand_entry *oe = ops[op_index--];
5522 stmt2 = oe->stmt_to_insert;
5523 op2 = oe->op;
5525 else
5527 gcc_assert (stmt_index < i);
5528 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5531 if (stmt_index >= ready_stmts_end)
5532 ready_stmts_end = 0;
5534 else
5536 if (op_index > 1)
5537 swap_ops_for_binary_stmt (ops, op_index - 2);
5538 operand_entry *oe2 = ops[op_index--];
5539 operand_entry *oe1 = ops[op_index--];
5540 op2 = oe2->op;
5541 stmt2 = oe2->stmt_to_insert;
5542 op1 = oe1->op;
5543 stmt1 = oe1->stmt_to_insert;
5546 /* If we emit the last statement then we should put
5547 operands into the last statement. It will also
5548 break the loop. */
5549 if (op_index < 0 && stmt_index == i)
5550 i = stmt_num - 1;
5552 if (dump_file && (dump_flags & TDF_DETAILS))
5554 fprintf (dump_file, "Transforming ");
5555 print_gimple_stmt (dump_file, stmts[i], 0);
5558 /* If the stmt that defines operand has to be inserted, insert it
5559 before the use. */
5560 if (stmt1)
5561 insert_stmt_before_use (stmts[i], stmt1);
5562 if (stmt2)
5563 insert_stmt_before_use (stmts[i], stmt2);
5564 stmt1 = stmt2 = NULL;
5566 /* We keep original statement only for the last one. All
5567 others are recreated. */
5568 if (i == stmt_num - 1)
5570 gimple_assign_set_rhs1 (stmts[i], op1);
5571 gimple_assign_set_rhs2 (stmts[i], op2);
5572 update_stmt (stmts[i]);
5574 else
5576 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5577 gimple_set_visited (stmts[i], true);
5579 if (dump_file && (dump_flags & TDF_DETAILS))
5581 fprintf (dump_file, " into ");
5582 print_gimple_stmt (dump_file, stmts[i], 0);
5586 remove_visited_stmt_chain (last_rhs1);
5589 /* Transform STMT, which is really (A +B) + (C + D) into the left
5590 linear form, ((A+B)+C)+D.
5591 Recurse on D if necessary. */
5593 static void
5594 linearize_expr (gimple *stmt)
5596 gimple_stmt_iterator gsi;
5597 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5598 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5599 gimple *oldbinrhs = binrhs;
5600 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5601 gimple *newbinrhs = NULL;
5602 class loop *loop = loop_containing_stmt (stmt);
5603 tree lhs = gimple_assign_lhs (stmt);
5605 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5606 && is_reassociable_op (binrhs, rhscode, loop));
5608 gsi = gsi_for_stmt (stmt);
5610 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5611 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5612 gimple_assign_rhs_code (binrhs),
5613 gimple_assign_lhs (binlhs),
5614 gimple_assign_rhs2 (binrhs));
5615 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5616 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5617 gimple_set_uid (binrhs, gimple_uid (stmt));
5619 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5620 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5622 if (dump_file && (dump_flags & TDF_DETAILS))
5624 fprintf (dump_file, "Linearized: ");
5625 print_gimple_stmt (dump_file, stmt, 0);
5628 reassociate_stats.linearized++;
5629 update_stmt (stmt);
5631 gsi = gsi_for_stmt (oldbinrhs);
5632 reassoc_remove_stmt (&gsi);
5633 release_defs (oldbinrhs);
5635 gimple_set_visited (stmt, true);
5636 gimple_set_visited (binlhs, true);
5637 gimple_set_visited (binrhs, true);
5639 /* Tail recurse on the new rhs if it still needs reassociation. */
5640 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5641 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5642 want to change the algorithm while converting to tuples. */
5643 linearize_expr (stmt);
5646 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5647 it. Otherwise, return NULL. */
5649 static gimple *
5650 get_single_immediate_use (tree lhs)
5652 use_operand_p immuse;
5653 gimple *immusestmt;
5655 if (TREE_CODE (lhs) == SSA_NAME
5656 && single_imm_use (lhs, &immuse, &immusestmt)
5657 && is_gimple_assign (immusestmt))
5658 return immusestmt;
5660 return NULL;
5663 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5664 representing the negated value. Insertions of any necessary
5665 instructions go before GSI.
5666 This function is recursive in that, if you hand it "a_5" as the
5667 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5668 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5670 static tree
5671 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5673 gimple *negatedefstmt = NULL;
5674 tree resultofnegate;
5675 gimple_stmt_iterator gsi;
5676 unsigned int uid;
5678 /* If we are trying to negate a name, defined by an add, negate the
5679 add operands instead. */
5680 if (TREE_CODE (tonegate) == SSA_NAME)
5681 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5682 if (TREE_CODE (tonegate) == SSA_NAME
5683 && is_gimple_assign (negatedefstmt)
5684 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5685 && has_single_use (gimple_assign_lhs (negatedefstmt))
5686 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5688 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5689 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5690 tree lhs = gimple_assign_lhs (negatedefstmt);
5691 gimple *g;
5693 gsi = gsi_for_stmt (negatedefstmt);
5694 rhs1 = negate_value (rhs1, &gsi);
5696 gsi = gsi_for_stmt (negatedefstmt);
5697 rhs2 = negate_value (rhs2, &gsi);
5699 gsi = gsi_for_stmt (negatedefstmt);
5700 lhs = make_ssa_name (TREE_TYPE (lhs));
5701 gimple_set_visited (negatedefstmt, true);
5702 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5703 gimple_set_uid (g, gimple_uid (negatedefstmt));
5704 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5705 return lhs;
5708 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5709 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5710 NULL_TREE, true, GSI_SAME_STMT);
5711 gsi = *gsip;
5712 uid = gimple_uid (gsi_stmt (gsi));
5713 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5715 gimple *stmt = gsi_stmt (gsi);
5716 if (gimple_uid (stmt) != 0)
5717 break;
5718 gimple_set_uid (stmt, uid);
5720 return resultofnegate;
5723 /* Return true if we should break up the subtract in STMT into an add
5724 with negate. This is true when we the subtract operands are really
5725 adds, or the subtract itself is used in an add expression. In
5726 either case, breaking up the subtract into an add with negate
5727 exposes the adds to reassociation. */
5729 static bool
5730 should_break_up_subtract (gimple *stmt)
5732 tree lhs = gimple_assign_lhs (stmt);
5733 tree binlhs = gimple_assign_rhs1 (stmt);
5734 tree binrhs = gimple_assign_rhs2 (stmt);
5735 gimple *immusestmt;
5736 class loop *loop = loop_containing_stmt (stmt);
5738 if (TREE_CODE (binlhs) == SSA_NAME
5739 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5740 return true;
5742 if (TREE_CODE (binrhs) == SSA_NAME
5743 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5744 return true;
5746 if (TREE_CODE (lhs) == SSA_NAME
5747 && (immusestmt = get_single_immediate_use (lhs))
5748 && is_gimple_assign (immusestmt)
5749 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5750 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5751 && gimple_assign_rhs1 (immusestmt) == lhs)
5752 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5753 return true;
5754 return false;
5757 /* Transform STMT from A - B into A + -B. */
5759 static void
5760 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5762 tree rhs1 = gimple_assign_rhs1 (stmt);
5763 tree rhs2 = gimple_assign_rhs2 (stmt);
5765 if (dump_file && (dump_flags & TDF_DETAILS))
5767 fprintf (dump_file, "Breaking up subtract ");
5768 print_gimple_stmt (dump_file, stmt, 0);
5771 rhs2 = negate_value (rhs2, gsip);
5772 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5773 update_stmt (stmt);
5776 /* Determine whether STMT is a builtin call that raises an SSA name
5777 to an integer power and has only one use. If so, and this is early
5778 reassociation and unsafe math optimizations are permitted, place
5779 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5780 If any of these conditions does not hold, return FALSE. */
5782 static bool
5783 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5785 tree arg1;
5786 REAL_VALUE_TYPE c, cint;
5788 switch (gimple_call_combined_fn (stmt))
5790 CASE_CFN_POW:
5791 if (flag_errno_math)
5792 return false;
5794 *base = gimple_call_arg (stmt, 0);
5795 arg1 = gimple_call_arg (stmt, 1);
5797 if (TREE_CODE (arg1) != REAL_CST)
5798 return false;
5800 c = TREE_REAL_CST (arg1);
5802 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5803 return false;
5805 *exponent = real_to_integer (&c);
5806 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5807 if (!real_identical (&c, &cint))
5808 return false;
5810 break;
5812 CASE_CFN_POWI:
5813 *base = gimple_call_arg (stmt, 0);
5814 arg1 = gimple_call_arg (stmt, 1);
5816 if (!tree_fits_shwi_p (arg1))
5817 return false;
5819 *exponent = tree_to_shwi (arg1);
5820 break;
5822 default:
5823 return false;
5826 /* Expanding negative exponents is generally unproductive, so we don't
5827 complicate matters with those. Exponents of zero and one should
5828 have been handled by expression folding. */
5829 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5830 return false;
5832 return true;
5835 /* Try to derive and add operand entry for OP to *OPS. Return false if
5836 unsuccessful. */
5838 static bool
5839 try_special_add_to_ops (vec<operand_entry *> *ops,
5840 enum tree_code code,
5841 tree op, gimple* def_stmt)
5843 tree base = NULL_TREE;
5844 HOST_WIDE_INT exponent = 0;
5846 if (TREE_CODE (op) != SSA_NAME
5847 || ! has_single_use (op))
5848 return false;
5850 if (code == MULT_EXPR
5851 && reassoc_insert_powi_p
5852 && flag_unsafe_math_optimizations
5853 && is_gimple_call (def_stmt)
5854 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5856 add_repeat_to_ops_vec (ops, base, exponent);
5857 gimple_set_visited (def_stmt, true);
5858 return true;
5860 else if (code == MULT_EXPR
5861 && is_gimple_assign (def_stmt)
5862 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5863 && !HONOR_SNANS (TREE_TYPE (op))
5864 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5865 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5866 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5867 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5869 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5870 tree cst = build_minus_one_cst (TREE_TYPE (op));
5871 add_to_ops_vec (ops, rhs1);
5872 add_to_ops_vec (ops, cst);
5873 gimple_set_visited (def_stmt, true);
5874 return true;
5877 return false;
5880 /* Recursively linearize a binary expression that is the RHS of STMT.
5881 Place the operands of the expression tree in the vector named OPS. */
5883 static void
5884 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5885 bool is_associative, bool set_visited)
5887 tree binlhs = gimple_assign_rhs1 (stmt);
5888 tree binrhs = gimple_assign_rhs2 (stmt);
5889 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5890 bool binlhsisreassoc = false;
5891 bool binrhsisreassoc = false;
5892 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5893 class loop *loop = loop_containing_stmt (stmt);
5895 if (set_visited)
5896 gimple_set_visited (stmt, true);
5898 if (TREE_CODE (binlhs) == SSA_NAME)
5900 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5901 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5902 && !stmt_could_throw_p (cfun, binlhsdef));
5905 if (TREE_CODE (binrhs) == SSA_NAME)
5907 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5908 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5909 && !stmt_could_throw_p (cfun, binrhsdef));
5912 /* If the LHS is not reassociable, but the RHS is, we need to swap
5913 them. If neither is reassociable, there is nothing we can do, so
5914 just put them in the ops vector. If the LHS is reassociable,
5915 linearize it. If both are reassociable, then linearize the RHS
5916 and the LHS. */
5918 if (!binlhsisreassoc)
5920 /* If this is not a associative operation like division, give up. */
5921 if (!is_associative)
5923 add_to_ops_vec (ops, binrhs);
5924 return;
5927 if (!binrhsisreassoc)
5929 bool swap = false;
5930 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5931 /* If we add ops for the rhs we expect to be able to recurse
5932 to it via the lhs during expression rewrite so swap
5933 operands. */
5934 swap = true;
5935 else
5936 add_to_ops_vec (ops, binrhs);
5938 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5939 add_to_ops_vec (ops, binlhs);
5941 if (!swap)
5942 return;
5945 if (dump_file && (dump_flags & TDF_DETAILS))
5947 fprintf (dump_file, "swapping operands of ");
5948 print_gimple_stmt (dump_file, stmt, 0);
5951 swap_ssa_operands (stmt,
5952 gimple_assign_rhs1_ptr (stmt),
5953 gimple_assign_rhs2_ptr (stmt));
5954 update_stmt (stmt);
5956 if (dump_file && (dump_flags & TDF_DETAILS))
5958 fprintf (dump_file, " is now ");
5959 print_gimple_stmt (dump_file, stmt, 0);
5961 if (!binrhsisreassoc)
5962 return;
5964 /* We want to make it so the lhs is always the reassociative op,
5965 so swap. */
5966 std::swap (binlhs, binrhs);
5968 else if (binrhsisreassoc)
5970 linearize_expr (stmt);
5971 binlhs = gimple_assign_rhs1 (stmt);
5972 binrhs = gimple_assign_rhs2 (stmt);
5975 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5976 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5977 rhscode, loop));
5978 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5979 is_associative, set_visited);
5981 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5982 add_to_ops_vec (ops, binrhs);
5985 /* Repropagate the negates back into subtracts, since no other pass
5986 currently does it. */
5988 static void
5989 repropagate_negates (void)
5991 unsigned int i = 0;
5992 tree negate;
5994 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5996 gimple *user = get_single_immediate_use (negate);
5997 if (!user || !is_gimple_assign (user))
5998 continue;
6000 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6001 if (TREE_CODE (negateop) == SSA_NAME
6002 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6003 continue;
6005 /* The negate operand can be either operand of a PLUS_EXPR
6006 (it can be the LHS if the RHS is a constant for example).
6008 Force the negate operand to the RHS of the PLUS_EXPR, then
6009 transform the PLUS_EXPR into a MINUS_EXPR. */
6010 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6012 /* If the negated operand appears on the LHS of the
6013 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6014 to force the negated operand to the RHS of the PLUS_EXPR. */
6015 if (gimple_assign_rhs1 (user) == negate)
6017 swap_ssa_operands (user,
6018 gimple_assign_rhs1_ptr (user),
6019 gimple_assign_rhs2_ptr (user));
6022 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6023 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6024 if (gimple_assign_rhs2 (user) == negate)
6026 tree rhs1 = gimple_assign_rhs1 (user);
6027 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6028 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6029 negateop);
6030 update_stmt (user);
6033 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6035 if (gimple_assign_rhs1 (user) == negate)
6037 /* We have
6038 x = -negateop
6039 y = x - b
6040 which we transform into
6041 x = negateop + b
6042 y = -x .
6043 This pushes down the negate which we possibly can merge
6044 into some other operation, hence insert it into the
6045 plus_negates vector. */
6046 gimple *feed = SSA_NAME_DEF_STMT (negate);
6047 tree b = gimple_assign_rhs2 (user);
6048 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6049 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6050 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6051 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6052 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6053 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6054 user = gsi_stmt (gsi2);
6055 update_stmt (user);
6056 reassoc_remove_stmt (&gsi);
6057 release_defs (feed);
6058 plus_negates.safe_push (gimple_assign_lhs (user));
6060 else
6062 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6063 getting rid of one operation. */
6064 tree rhs1 = gimple_assign_rhs1 (user);
6065 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6066 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6067 update_stmt (gsi_stmt (gsi));
6073 /* Break up subtract operations in block BB.
6075 We do this top down because we don't know whether the subtract is
6076 part of a possible chain of reassociation except at the top.
6078 IE given
6079 d = f + g
6080 c = a + e
6081 b = c - d
6082 q = b - r
6083 k = t - q
6085 we want to break up k = t - q, but we won't until we've transformed q
6086 = b - r, which won't be broken up until we transform b = c - d.
6088 En passant, clear the GIMPLE visited flag on every statement
6089 and set UIDs within each basic block. */
6091 static void
6092 break_up_subtract_bb (basic_block bb)
6094 gimple_stmt_iterator gsi;
6095 basic_block son;
6096 unsigned int uid = 1;
6098 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6100 gimple *stmt = gsi_stmt (gsi);
6101 gimple_set_visited (stmt, false);
6102 gimple_set_uid (stmt, uid++);
6104 if (!is_gimple_assign (stmt)
6105 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6106 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6107 continue;
6109 /* Look for simple gimple subtract operations. */
6110 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6112 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6113 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6114 continue;
6116 /* Check for a subtract used only in an addition. If this
6117 is the case, transform it into add of a negate for better
6118 reassociation. IE transform C = A-B into C = A + -B if C
6119 is only used in an addition. */
6120 if (should_break_up_subtract (stmt))
6121 break_up_subtract (stmt, &gsi);
6123 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6124 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6125 plus_negates.safe_push (gimple_assign_lhs (stmt));
6127 for (son = first_dom_son (CDI_DOMINATORS, bb);
6128 son;
6129 son = next_dom_son (CDI_DOMINATORS, son))
6130 break_up_subtract_bb (son);
6133 /* Used for repeated factor analysis. */
6134 struct repeat_factor
6136 /* An SSA name that occurs in a multiply chain. */
6137 tree factor;
6139 /* Cached rank of the factor. */
6140 unsigned rank;
6142 /* Number of occurrences of the factor in the chain. */
6143 HOST_WIDE_INT count;
6145 /* An SSA name representing the product of this factor and
6146 all factors appearing later in the repeated factor vector. */
6147 tree repr;
6151 static vec<repeat_factor> repeat_factor_vec;
6153 /* Used for sorting the repeat factor vector. Sort primarily by
6154 ascending occurrence count, secondarily by descending rank. */
6156 static int
6157 compare_repeat_factors (const void *x1, const void *x2)
6159 const repeat_factor *rf1 = (const repeat_factor *) x1;
6160 const repeat_factor *rf2 = (const repeat_factor *) x2;
6162 if (rf1->count != rf2->count)
6163 return rf1->count - rf2->count;
6165 return rf2->rank - rf1->rank;
6168 /* Look for repeated operands in OPS in the multiply tree rooted at
6169 STMT. Replace them with an optimal sequence of multiplies and powi
6170 builtin calls, and remove the used operands from OPS. Return an
6171 SSA name representing the value of the replacement sequence. */
6173 static tree
6174 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6176 unsigned i, j, vec_len;
6177 int ii;
6178 operand_entry *oe;
6179 repeat_factor *rf1, *rf2;
6180 repeat_factor rfnew;
6181 tree result = NULL_TREE;
6182 tree target_ssa, iter_result;
6183 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6184 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6185 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6186 gimple *mul_stmt, *pow_stmt;
6188 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6189 target, unless type is integral. */
6190 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6191 return NULL_TREE;
6193 /* Allocate the repeated factor vector. */
6194 repeat_factor_vec.create (10);
6196 /* Scan the OPS vector for all SSA names in the product and build
6197 up a vector of occurrence counts for each factor. */
6198 FOR_EACH_VEC_ELT (*ops, i, oe)
6200 if (TREE_CODE (oe->op) == SSA_NAME)
6202 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6204 if (rf1->factor == oe->op)
6206 rf1->count += oe->count;
6207 break;
6211 if (j >= repeat_factor_vec.length ())
6213 rfnew.factor = oe->op;
6214 rfnew.rank = oe->rank;
6215 rfnew.count = oe->count;
6216 rfnew.repr = NULL_TREE;
6217 repeat_factor_vec.safe_push (rfnew);
6222 /* Sort the repeated factor vector by (a) increasing occurrence count,
6223 and (b) decreasing rank. */
6224 repeat_factor_vec.qsort (compare_repeat_factors);
6226 /* It is generally best to combine as many base factors as possible
6227 into a product before applying __builtin_powi to the result.
6228 However, the sort order chosen for the repeated factor vector
6229 allows us to cache partial results for the product of the base
6230 factors for subsequent use. When we already have a cached partial
6231 result from a previous iteration, it is best to make use of it
6232 before looking for another __builtin_pow opportunity.
6234 As an example, consider x * x * y * y * y * z * z * z * z.
6235 We want to first compose the product x * y * z, raise it to the
6236 second power, then multiply this by y * z, and finally multiply
6237 by z. This can be done in 5 multiplies provided we cache y * z
6238 for use in both expressions:
6240 t1 = y * z
6241 t2 = t1 * x
6242 t3 = t2 * t2
6243 t4 = t1 * t3
6244 result = t4 * z
6246 If we instead ignored the cached y * z and first multiplied by
6247 the __builtin_pow opportunity z * z, we would get the inferior:
6249 t1 = y * z
6250 t2 = t1 * x
6251 t3 = t2 * t2
6252 t4 = z * z
6253 t5 = t3 * t4
6254 result = t5 * y */
6256 vec_len = repeat_factor_vec.length ();
6258 /* Repeatedly look for opportunities to create a builtin_powi call. */
6259 while (true)
6261 HOST_WIDE_INT power;
6263 /* First look for the largest cached product of factors from
6264 preceding iterations. If found, create a builtin_powi for
6265 it if the minimum occurrence count for its factors is at
6266 least 2, or just use this cached product as our next
6267 multiplicand if the minimum occurrence count is 1. */
6268 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6270 if (rf1->repr && rf1->count > 0)
6271 break;
6274 if (j < vec_len)
6276 power = rf1->count;
6278 if (power == 1)
6280 iter_result = rf1->repr;
6282 if (dump_file && (dump_flags & TDF_DETAILS))
6284 unsigned elt;
6285 repeat_factor *rf;
6286 fputs ("Multiplying by cached product ", dump_file);
6287 for (elt = j; elt < vec_len; elt++)
6289 rf = &repeat_factor_vec[elt];
6290 print_generic_expr (dump_file, rf->factor);
6291 if (elt < vec_len - 1)
6292 fputs (" * ", dump_file);
6294 fputs ("\n", dump_file);
6297 else
6299 if (INTEGRAL_TYPE_P (type))
6301 gcc_assert (power > 1);
6302 gimple_stmt_iterator gsip = gsi;
6303 gsi_prev (&gsip);
6304 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6305 rf1->repr, power);
6306 gimple_stmt_iterator gsic = gsi;
6307 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6309 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6310 gimple_set_visited (gsi_stmt (gsic), true);
6311 gsi_prev (&gsic);
6314 else
6316 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6317 pow_stmt
6318 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6319 build_int_cst (integer_type_node,
6320 power));
6321 gimple_call_set_lhs (pow_stmt, iter_result);
6322 gimple_set_location (pow_stmt, gimple_location (stmt));
6323 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6324 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6327 if (dump_file && (dump_flags & TDF_DETAILS))
6329 unsigned elt;
6330 repeat_factor *rf;
6331 fputs ("Building __builtin_pow call for cached product (",
6332 dump_file);
6333 for (elt = j; elt < vec_len; elt++)
6335 rf = &repeat_factor_vec[elt];
6336 print_generic_expr (dump_file, rf->factor);
6337 if (elt < vec_len - 1)
6338 fputs (" * ", dump_file);
6340 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6341 power);
6345 else
6347 /* Otherwise, find the first factor in the repeated factor
6348 vector whose occurrence count is at least 2. If no such
6349 factor exists, there are no builtin_powi opportunities
6350 remaining. */
6351 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6353 if (rf1->count >= 2)
6354 break;
6357 if (j >= vec_len)
6358 break;
6360 power = rf1->count;
6362 if (dump_file && (dump_flags & TDF_DETAILS))
6364 unsigned elt;
6365 repeat_factor *rf;
6366 fputs ("Building __builtin_pow call for (", dump_file);
6367 for (elt = j; elt < vec_len; elt++)
6369 rf = &repeat_factor_vec[elt];
6370 print_generic_expr (dump_file, rf->factor);
6371 if (elt < vec_len - 1)
6372 fputs (" * ", dump_file);
6374 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6377 reassociate_stats.pows_created++;
6379 /* Visit each element of the vector in reverse order (so that
6380 high-occurrence elements are visited first, and within the
6381 same occurrence count, lower-ranked elements are visited
6382 first). Form a linear product of all elements in this order
6383 whose occurrencce count is at least that of element J.
6384 Record the SSA name representing the product of each element
6385 with all subsequent elements in the vector. */
6386 if (j == vec_len - 1)
6387 rf1->repr = rf1->factor;
6388 else
6390 for (ii = vec_len - 2; ii >= (int)j; ii--)
6392 tree op1, op2;
6394 rf1 = &repeat_factor_vec[ii];
6395 rf2 = &repeat_factor_vec[ii + 1];
6397 /* Init the last factor's representative to be itself. */
6398 if (!rf2->repr)
6399 rf2->repr = rf2->factor;
6401 op1 = rf1->factor;
6402 op2 = rf2->repr;
6404 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6405 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6406 op1, op2);
6407 gimple_set_location (mul_stmt, gimple_location (stmt));
6408 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6409 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6410 rf1->repr = target_ssa;
6412 /* Don't reprocess the multiply we just introduced. */
6413 gimple_set_visited (mul_stmt, true);
6417 /* Form a call to __builtin_powi for the maximum product
6418 just formed, raised to the power obtained earlier. */
6419 rf1 = &repeat_factor_vec[j];
6420 if (INTEGRAL_TYPE_P (type))
6422 gcc_assert (power > 1);
6423 gimple_stmt_iterator gsip = gsi;
6424 gsi_prev (&gsip);
6425 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6426 rf1->repr, power);
6427 gimple_stmt_iterator gsic = gsi;
6428 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6430 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6431 gimple_set_visited (gsi_stmt (gsic), true);
6432 gsi_prev (&gsic);
6435 else
6437 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6438 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6439 build_int_cst (integer_type_node,
6440 power));
6441 gimple_call_set_lhs (pow_stmt, iter_result);
6442 gimple_set_location (pow_stmt, gimple_location (stmt));
6443 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6444 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6448 /* If we previously formed at least one other builtin_powi call,
6449 form the product of this one and those others. */
6450 if (result)
6452 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6453 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6454 result, iter_result);
6455 gimple_set_location (mul_stmt, gimple_location (stmt));
6456 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6457 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6458 gimple_set_visited (mul_stmt, true);
6459 result = new_result;
6461 else
6462 result = iter_result;
6464 /* Decrement the occurrence count of each element in the product
6465 by the count found above, and remove this many copies of each
6466 factor from OPS. */
6467 for (i = j; i < vec_len; i++)
6469 unsigned k = power;
6470 unsigned n;
6472 rf1 = &repeat_factor_vec[i];
6473 rf1->count -= power;
6475 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6477 if (oe->op == rf1->factor)
6479 if (oe->count <= k)
6481 ops->ordered_remove (n);
6482 k -= oe->count;
6484 if (k == 0)
6485 break;
6487 else
6489 oe->count -= k;
6490 break;
6497 /* At this point all elements in the repeated factor vector have a
6498 remaining occurrence count of 0 or 1, and those with a count of 1
6499 don't have cached representatives. Re-sort the ops vector and
6500 clean up. */
6501 ops->qsort (sort_by_operand_rank);
6502 repeat_factor_vec.release ();
6504 /* Return the final product computed herein. Note that there may
6505 still be some elements with single occurrence count left in OPS;
6506 those will be handled by the normal reassociation logic. */
6507 return result;
6510 /* Attempt to optimize
6511 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6512 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6514 static void
6515 attempt_builtin_copysign (vec<operand_entry *> *ops)
6517 operand_entry *oe;
6518 unsigned int i;
6519 unsigned int length = ops->length ();
6520 tree cst = ops->last ()->op;
6522 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6523 return;
6525 FOR_EACH_VEC_ELT (*ops, i, oe)
6527 if (TREE_CODE (oe->op) == SSA_NAME
6528 && has_single_use (oe->op))
6530 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6531 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6533 tree arg0, arg1;
6534 switch (gimple_call_combined_fn (old_call))
6536 CASE_CFN_COPYSIGN:
6537 CASE_CFN_COPYSIGN_FN:
6538 arg0 = gimple_call_arg (old_call, 0);
6539 arg1 = gimple_call_arg (old_call, 1);
6540 /* The first argument of copysign must be a constant,
6541 otherwise there's nothing to do. */
6542 if (TREE_CODE (arg0) == REAL_CST)
6544 tree type = TREE_TYPE (arg0);
6545 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6546 /* If we couldn't fold to a single constant, skip it.
6547 That happens e.g. for inexact multiplication when
6548 -frounding-math. */
6549 if (mul == NULL_TREE)
6550 break;
6551 /* Instead of adjusting OLD_CALL, let's build a new
6552 call to not leak the LHS and prevent keeping bogus
6553 debug statements. DCE will clean up the old call. */
6554 gcall *new_call;
6555 if (gimple_call_internal_p (old_call))
6556 new_call = gimple_build_call_internal
6557 (IFN_COPYSIGN, 2, mul, arg1);
6558 else
6559 new_call = gimple_build_call
6560 (gimple_call_fndecl (old_call), 2, mul, arg1);
6561 tree lhs = make_ssa_name (type);
6562 gimple_call_set_lhs (new_call, lhs);
6563 gimple_set_location (new_call,
6564 gimple_location (old_call));
6565 insert_stmt_after (new_call, old_call);
6566 /* We've used the constant, get rid of it. */
6567 ops->pop ();
6568 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6569 /* Handle the CST1 < 0 case by negating the result. */
6570 if (cst1_neg)
6572 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6573 gimple *negate_stmt
6574 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6575 insert_stmt_after (negate_stmt, new_call);
6576 oe->op = negrhs;
6578 else
6579 oe->op = lhs;
6580 if (dump_file && (dump_flags & TDF_DETAILS))
6582 fprintf (dump_file, "Optimizing copysign: ");
6583 print_generic_expr (dump_file, cst);
6584 fprintf (dump_file, " * COPYSIGN (");
6585 print_generic_expr (dump_file, arg0);
6586 fprintf (dump_file, ", ");
6587 print_generic_expr (dump_file, arg1);
6588 fprintf (dump_file, ") into %sCOPYSIGN (",
6589 cst1_neg ? "-" : "");
6590 print_generic_expr (dump_file, mul);
6591 fprintf (dump_file, ", ");
6592 print_generic_expr (dump_file, arg1);
6593 fprintf (dump_file, "\n");
6595 return;
6597 break;
6598 default:
6599 break;
6606 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6608 static void
6609 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6611 tree rhs1;
6613 if (dump_file && (dump_flags & TDF_DETAILS))
6615 fprintf (dump_file, "Transforming ");
6616 print_gimple_stmt (dump_file, stmt, 0);
6619 rhs1 = gimple_assign_rhs1 (stmt);
6620 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6621 update_stmt (stmt);
6622 remove_visited_stmt_chain (rhs1);
6624 if (dump_file && (dump_flags & TDF_DETAILS))
6626 fprintf (dump_file, " into ");
6627 print_gimple_stmt (dump_file, stmt, 0);
6631 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6633 static void
6634 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6635 tree rhs1, tree rhs2)
6637 if (dump_file && (dump_flags & TDF_DETAILS))
6639 fprintf (dump_file, "Transforming ");
6640 print_gimple_stmt (dump_file, stmt, 0);
6643 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6644 update_stmt (gsi_stmt (*gsi));
6645 remove_visited_stmt_chain (rhs1);
6647 if (dump_file && (dump_flags & TDF_DETAILS))
6649 fprintf (dump_file, " into ");
6650 print_gimple_stmt (dump_file, stmt, 0);
6654 /* Reassociate expressions in basic block BB and its post-dominator as
6655 children.
6657 Bubble up return status from maybe_optimize_range_tests. */
6659 static bool
6660 reassociate_bb (basic_block bb)
6662 gimple_stmt_iterator gsi;
6663 basic_block son;
6664 gimple *stmt = last_nondebug_stmt (bb);
6665 bool cfg_cleanup_needed = false;
6667 if (stmt && !gimple_visited_p (stmt))
6668 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6670 bool do_prev = false;
6671 for (gsi = gsi_last_bb (bb);
6672 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6674 do_prev = true;
6675 stmt = gsi_stmt (gsi);
6677 if (is_gimple_assign (stmt)
6678 && !stmt_could_throw_p (cfun, stmt))
6680 tree lhs, rhs1, rhs2;
6681 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6683 /* If this was part of an already processed statement,
6684 we don't need to touch it again. */
6685 if (gimple_visited_p (stmt))
6687 /* This statement might have become dead because of previous
6688 reassociations. */
6689 if (has_zero_uses (gimple_get_lhs (stmt)))
6691 reassoc_remove_stmt (&gsi);
6692 release_defs (stmt);
6693 /* We might end up removing the last stmt above which
6694 places the iterator to the end of the sequence.
6695 Reset it to the last stmt in this case and make sure
6696 we don't do gsi_prev in that case. */
6697 if (gsi_end_p (gsi))
6699 gsi = gsi_last_bb (bb);
6700 do_prev = false;
6703 continue;
6706 /* If this is not a gimple binary expression, there is
6707 nothing for us to do with it. */
6708 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6709 continue;
6711 lhs = gimple_assign_lhs (stmt);
6712 rhs1 = gimple_assign_rhs1 (stmt);
6713 rhs2 = gimple_assign_rhs2 (stmt);
6715 /* For non-bit or min/max operations we can't associate
6716 all types. Verify that here. */
6717 if ((rhs_code != BIT_IOR_EXPR
6718 && rhs_code != BIT_AND_EXPR
6719 && rhs_code != BIT_XOR_EXPR
6720 && rhs_code != MIN_EXPR
6721 && rhs_code != MAX_EXPR
6722 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6723 || !can_reassociate_op_p (rhs1)
6724 || !can_reassociate_op_p (rhs2))
6725 continue;
6727 if (associative_tree_code (rhs_code))
6729 auto_vec<operand_entry *> ops;
6730 tree powi_result = NULL_TREE;
6731 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6733 /* There may be no immediate uses left by the time we
6734 get here because we may have eliminated them all. */
6735 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6736 continue;
6738 gimple_set_visited (stmt, true);
6739 linearize_expr_tree (&ops, stmt, true, true);
6740 ops.qsort (sort_by_operand_rank);
6741 int orig_len = ops.length ();
6742 optimize_ops_list (rhs_code, &ops);
6743 if (undistribute_ops_list (rhs_code, &ops,
6744 loop_containing_stmt (stmt)))
6746 ops.qsort (sort_by_operand_rank);
6747 optimize_ops_list (rhs_code, &ops);
6749 if (undistribute_bitref_for_vector (rhs_code, &ops,
6750 loop_containing_stmt (stmt)))
6752 ops.qsort (sort_by_operand_rank);
6753 optimize_ops_list (rhs_code, &ops);
6755 if (rhs_code == PLUS_EXPR
6756 && transform_add_to_multiply (&ops))
6757 ops.qsort (sort_by_operand_rank);
6759 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6761 if (is_vector)
6762 optimize_vec_cond_expr (rhs_code, &ops);
6763 else
6764 optimize_range_tests (rhs_code, &ops, NULL);
6767 if (rhs_code == MULT_EXPR && !is_vector)
6769 attempt_builtin_copysign (&ops);
6771 if (reassoc_insert_powi_p
6772 && (flag_unsafe_math_optimizations
6773 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6774 powi_result = attempt_builtin_powi (stmt, &ops);
6777 operand_entry *last;
6778 bool negate_result = false;
6779 if (ops.length () > 1
6780 && rhs_code == MULT_EXPR)
6782 last = ops.last ();
6783 if ((integer_minus_onep (last->op)
6784 || real_minus_onep (last->op))
6785 && !HONOR_SNANS (TREE_TYPE (lhs))
6786 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6787 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6789 ops.pop ();
6790 negate_result = true;
6794 tree new_lhs = lhs;
6795 /* If the operand vector is now empty, all operands were
6796 consumed by the __builtin_powi optimization. */
6797 if (ops.length () == 0)
6798 transform_stmt_to_copy (&gsi, stmt, powi_result);
6799 else if (ops.length () == 1)
6801 tree last_op = ops.last ()->op;
6803 /* If the stmt that defines operand has to be inserted, insert it
6804 before the use. */
6805 if (ops.last ()->stmt_to_insert)
6806 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6807 if (powi_result)
6808 transform_stmt_to_multiply (&gsi, stmt, last_op,
6809 powi_result);
6810 else
6811 transform_stmt_to_copy (&gsi, stmt, last_op);
6813 else
6815 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6816 int ops_num = ops.length ();
6817 int width;
6819 /* For binary bit operations, if there are at least 3
6820 operands and the last operand in OPS is a constant,
6821 move it to the front. This helps ensure that we generate
6822 (X & Y) & C rather than (X & C) & Y. The former will
6823 often match a canonical bit test when we get to RTL. */
6824 if (ops.length () > 2
6825 && (rhs_code == BIT_AND_EXPR
6826 || rhs_code == BIT_IOR_EXPR
6827 || rhs_code == BIT_XOR_EXPR)
6828 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6829 std::swap (*ops[0], *ops[ops_num - 1]);
6831 /* Only rewrite the expression tree to parallel in the
6832 last reassoc pass to avoid useless work back-and-forth
6833 with initial linearization. */
6834 if (!reassoc_insert_powi_p
6835 && ops.length () > 3
6836 && (width = get_reassociation_width (ops_num, rhs_code,
6837 mode)) > 1)
6839 if (dump_file && (dump_flags & TDF_DETAILS))
6840 fprintf (dump_file,
6841 "Width = %d was chosen for reassociation\n",
6842 width);
6843 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6844 width, ops);
6846 else
6848 /* When there are three operands left, we want
6849 to make sure the ones that get the double
6850 binary op are chosen wisely. */
6851 int len = ops.length ();
6852 if (len >= 3)
6853 swap_ops_for_binary_stmt (ops, len - 3);
6855 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6856 powi_result != NULL
6857 || negate_result,
6858 len != orig_len);
6861 /* If we combined some repeated factors into a
6862 __builtin_powi call, multiply that result by the
6863 reassociated operands. */
6864 if (powi_result)
6866 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6867 tree type = TREE_TYPE (lhs);
6868 tree target_ssa = make_temp_ssa_name (type, NULL,
6869 "reassocpow");
6870 gimple_set_lhs (lhs_stmt, target_ssa);
6871 update_stmt (lhs_stmt);
6872 if (lhs != new_lhs)
6874 target_ssa = new_lhs;
6875 new_lhs = lhs;
6877 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6878 powi_result, target_ssa);
6879 gimple_set_location (mul_stmt, gimple_location (stmt));
6880 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6881 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6885 if (negate_result)
6887 stmt = SSA_NAME_DEF_STMT (lhs);
6888 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6889 gimple_set_lhs (stmt, tmp);
6890 if (lhs != new_lhs)
6891 tmp = new_lhs;
6892 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6893 tmp);
6894 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6895 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6896 update_stmt (stmt);
6901 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6902 son;
6903 son = next_dom_son (CDI_POST_DOMINATORS, son))
6904 cfg_cleanup_needed |= reassociate_bb (son);
6906 return cfg_cleanup_needed;
6909 /* Add jumps around shifts for range tests turned into bit tests.
6910 For each SSA_NAME VAR we have code like:
6911 VAR = ...; // final stmt of range comparison
6912 // bit test here...;
6913 OTHERVAR = ...; // final stmt of the bit test sequence
6914 RES = VAR | OTHERVAR;
6915 Turn the above into:
6916 VAR = ...;
6917 if (VAR != 0)
6918 goto <l3>;
6919 else
6920 goto <l2>;
6921 <l2>:
6922 // bit test here...;
6923 OTHERVAR = ...;
6924 <l3>:
6925 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6927 static void
6928 branch_fixup (void)
6930 tree var;
6931 unsigned int i;
6933 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6935 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6936 gimple *use_stmt;
6937 use_operand_p use;
6938 bool ok = single_imm_use (var, &use, &use_stmt);
6939 gcc_assert (ok
6940 && is_gimple_assign (use_stmt)
6941 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6942 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6944 basic_block cond_bb = gimple_bb (def_stmt);
6945 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6946 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6948 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6949 gimple *g = gimple_build_cond (NE_EXPR, var,
6950 build_zero_cst (TREE_TYPE (var)),
6951 NULL_TREE, NULL_TREE);
6952 location_t loc = gimple_location (use_stmt);
6953 gimple_set_location (g, loc);
6954 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6956 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6957 etrue->probability = profile_probability::even ();
6958 edge efalse = find_edge (cond_bb, then_bb);
6959 efalse->flags = EDGE_FALSE_VALUE;
6960 efalse->probability -= etrue->probability;
6961 then_bb->count -= etrue->count ();
6963 tree othervar = NULL_TREE;
6964 if (gimple_assign_rhs1 (use_stmt) == var)
6965 othervar = gimple_assign_rhs2 (use_stmt);
6966 else if (gimple_assign_rhs2 (use_stmt) == var)
6967 othervar = gimple_assign_rhs1 (use_stmt);
6968 else
6969 gcc_unreachable ();
6970 tree lhs = gimple_assign_lhs (use_stmt);
6971 gphi *phi = create_phi_node (lhs, merge_bb);
6972 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6973 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6974 gsi = gsi_for_stmt (use_stmt);
6975 gsi_remove (&gsi, true);
6977 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6978 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6980 reassoc_branch_fixups.release ();
6983 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6984 void debug_ops_vector (vec<operand_entry *> ops);
6986 /* Dump the operand entry vector OPS to FILE. */
6988 void
6989 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6991 operand_entry *oe;
6992 unsigned int i;
6994 FOR_EACH_VEC_ELT (ops, i, oe)
6996 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6997 print_generic_expr (file, oe->op);
6998 fprintf (file, "\n");
7002 /* Dump the operand entry vector OPS to STDERR. */
7004 DEBUG_FUNCTION void
7005 debug_ops_vector (vec<operand_entry *> ops)
7007 dump_ops_vector (stderr, ops);
7010 /* Bubble up return status from reassociate_bb. */
7012 static bool
7013 do_reassoc (void)
7015 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7016 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7019 /* Initialize the reassociation pass. */
7021 static void
7022 init_reassoc (void)
7024 int i;
7025 int64_t rank = 2;
7026 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7028 /* Find the loops, so that we can prevent moving calculations in
7029 them. */
7030 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7032 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7034 next_operand_entry_id = 0;
7036 /* Reverse RPO (Reverse Post Order) will give us something where
7037 deeper loops come later. */
7038 pre_and_rev_post_order_compute (NULL, bbs, false);
7039 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7040 operand_rank = new hash_map<tree, int64_t>;
7042 /* Give each default definition a distinct rank. This includes
7043 parameters and the static chain. Walk backwards over all
7044 SSA names so that we get proper rank ordering according
7045 to tree_swap_operands_p. */
7046 for (i = num_ssa_names - 1; i > 0; --i)
7048 tree name = ssa_name (i);
7049 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7050 insert_operand_rank (name, ++rank);
7053 /* Set up rank for each BB */
7054 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7055 bb_rank[bbs[i]] = ++rank << 16;
7057 free (bbs);
7058 calculate_dominance_info (CDI_POST_DOMINATORS);
7059 plus_negates = vNULL;
7062 /* Cleanup after the reassociation pass, and print stats if
7063 requested. */
7065 static void
7066 fini_reassoc (void)
7068 statistics_counter_event (cfun, "Linearized",
7069 reassociate_stats.linearized);
7070 statistics_counter_event (cfun, "Constants eliminated",
7071 reassociate_stats.constants_eliminated);
7072 statistics_counter_event (cfun, "Ops eliminated",
7073 reassociate_stats.ops_eliminated);
7074 statistics_counter_event (cfun, "Statements rewritten",
7075 reassociate_stats.rewritten);
7076 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7077 reassociate_stats.pows_encountered);
7078 statistics_counter_event (cfun, "Built-in powi calls created",
7079 reassociate_stats.pows_created);
7081 delete operand_rank;
7082 bitmap_clear (biased_names);
7083 operand_entry_pool.release ();
7084 free (bb_rank);
7085 plus_negates.release ();
7086 free_dominance_info (CDI_POST_DOMINATORS);
7087 loop_optimizer_finalize ();
7090 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7091 insertion of __builtin_powi calls.
7093 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7094 optimization of a gimple conditional. Otherwise returns zero. */
7096 static unsigned int
7097 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7099 reassoc_insert_powi_p = insert_powi_p;
7100 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7102 init_reassoc ();
7104 bool cfg_cleanup_needed;
7105 cfg_cleanup_needed = do_reassoc ();
7106 repropagate_negates ();
7107 branch_fixup ();
7109 fini_reassoc ();
7110 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7113 namespace {
7115 const pass_data pass_data_reassoc =
7117 GIMPLE_PASS, /* type */
7118 "reassoc", /* name */
7119 OPTGROUP_NONE, /* optinfo_flags */
7120 TV_TREE_REASSOC, /* tv_id */
7121 ( PROP_cfg | PROP_ssa ), /* properties_required */
7122 0, /* properties_provided */
7123 0, /* properties_destroyed */
7124 0, /* todo_flags_start */
7125 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7128 class pass_reassoc : public gimple_opt_pass
7130 public:
7131 pass_reassoc (gcc::context *ctxt)
7132 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7135 /* opt_pass methods: */
7136 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7137 void set_pass_param (unsigned int n, bool param) final override
7139 gcc_assert (n == 0);
7140 insert_powi_p = param;
7141 bias_loop_carried_phi_ranks_p = !param;
7143 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7144 unsigned int execute (function *) final override
7146 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7149 private:
7150 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7151 point 3a in the pass header comment. */
7152 bool insert_powi_p;
7153 bool bias_loop_carried_phi_ranks_p;
7154 }; // class pass_reassoc
7156 } // anon namespace
7158 gimple_opt_pass *
7159 make_pass_reassoc (gcc::context *ctxt)
7161 return new pass_reassoc (ctxt);