MATCH: Improve `A CMP 0 ? A : -A` set of patterns to use bitwise_equal_p.
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob41ee36413b533b8ee1d2db95355933b8b9821ce1
1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
110 You want to first merge all leaves of the same rank, as much as
111 possible.
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
126 mergetmp2 = d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
135 So we have
137 build binary op
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p;
188 /* Statistics */
189 static struct
191 int linearized;
192 int constants_eliminated;
193 int ops_eliminated;
194 int rewritten;
195 int pows_encountered;
196 int pows_created;
197 } reassociate_stats;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static int64_t *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, int64_t> *operand_rank;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 biased. */
217 static auto_bitmap biased_names;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec<tree> reassoc_branch_fixups;
225 /* Forward decls. */
226 static int64_t get_rank (tree);
227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
232 static bool
233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
235 gimple *stmt = gsi_stmt (*gsi);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 return gsi_remove (gsi, true);
240 gimple_stmt_iterator prev = *gsi;
241 gsi_prev (&prev);
242 unsigned uid = gimple_uid (stmt);
243 basic_block bb = gimple_bb (stmt);
244 bool ret = gsi_remove (gsi, true);
245 if (!gsi_end_p (prev))
246 gsi_next (&prev);
247 else
248 prev = gsi_start_bb (bb);
249 gimple *end_stmt = gsi_stmt (*gsi);
250 while ((stmt = gsi_stmt (prev)) != end_stmt)
252 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 gimple_set_uid (stmt, uid);
254 gsi_next (&prev);
256 return ret;
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
266 like this:
268 x_1 = phi(x_0, x_2)
269 a = x_1 | 1
270 b = a ^ 2
271 .MEM = b
272 c = b + d
273 x_2 = c + e
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
278 static bool
279 propagate_bias_p (gimple *stmt)
281 use_operand_p use;
282 imm_use_iterator use_iter;
283 gimple *single_use_stmt = NULL;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 return false;
288 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
290 gimple *current_use_stmt = USE_STMT (use);
292 if (is_gimple_assign (current_use_stmt)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
295 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 return false;
297 single_use_stmt = current_use_stmt;
301 if (single_use_stmt == NULL)
302 return false;
304 if (gimple_bb (stmt)->loop_father
305 != gimple_bb (single_use_stmt)->loop_father)
306 return false;
308 return true;
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
318 static int64_t
319 phi_rank (gimple *stmt)
321 basic_block bb = gimple_bb (stmt);
322 class loop *father = bb->loop_father;
323 tree res;
324 unsigned i;
325 use_operand_p use;
326 gimple *use_stmt;
328 if (!reassoc_bias_loop_carried_phi_ranks_p)
329 return bb_rank[bb->index];
331 /* We only care about real loops (those with a latch). */
332 if (!father->latch)
333 return bb_rank[bb->index];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb != father->header
337 || father->inner)
338 return bb_rank[bb->index];
340 /* Ignore virtual SSA_NAMEs. */
341 res = gimple_phi_result (stmt);
342 if (virtual_operand_p (res))
343 return bb_rank[bb->index];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res, &use, &use_stmt)
348 || gimple_bb (use_stmt)->loop_father != father)
349 return bb_rank[bb->index];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i = 0; i < gimple_phi_num_args (stmt); i++)
354 tree arg = gimple_phi_arg_def (stmt, i);
355 if (TREE_CODE (arg) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg))
358 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 if (gimple_bb (def_stmt)->loop_father == father)
360 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
364 /* Must be an uninteresting phi. */
365 return bb_rank[bb->index];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
372 static int64_t
373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
375 int64_t op_rank;
377 op_rank = get_rank (op);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op) == SSA_NAME
382 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
384 if (maybe_biased_p == NULL)
385 return rank;
386 *maybe_biased_p = true;
389 return MAX (rank, op_rank);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e)
397 int64_t *slot = operand_rank->get (e);
398 return slot ? *slot : -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
403 static inline void
404 insert_operand_rank (tree e, int64_t rank)
406 gcc_assert (rank > 0);
407 gcc_assert (!operand_rank->put (e, rank));
410 /* Given an expression E, return the rank of the expression. */
412 static int64_t
413 get_rank (tree e)
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
420 the BB.
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
424 results. */
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
430 x_1 = phi(x_0, x_2)
431 b = a + x_1
432 c = b + d
433 x_2 = c + e
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
439 x_1 = phi(x_0, x_2)
440 b = a + d
441 c = b + e
442 x_2 = c + x_1
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e) == SSA_NAME)
454 ssa_op_iter iter;
455 gimple *stmt;
456 int64_t rank;
457 tree op;
459 /* If we already have a rank for this expression, use that. */
460 rank = find_operand_rank (e);
461 if (rank != -1)
462 return rank;
464 stmt = SSA_NAME_DEF_STMT (e);
465 if (gimple_code (stmt) == GIMPLE_PHI)
467 rank = phi_rank (stmt);
468 if (rank != bb_rank[gimple_bb (stmt)->index])
469 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
472 else if (!is_gimple_assign (stmt))
473 rank = bb_rank[gimple_bb (stmt)->index];
475 else
477 bool biased_p = false;
478 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
485 thus have rank 0. */
486 rank = 0;
487 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
488 rank = propagate_rank (rank, op, maybe_biased_p);
490 rank += 1;
491 if (biased_p)
492 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
495 if (dump_file && (dump_flags & TDF_DETAILS))
497 fprintf (dump_file, "Rank for ");
498 print_generic_expr (dump_file, e);
499 fprintf (dump_file, " is %" PRId64 "\n", rank);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e, rank);
504 return rank;
507 /* Constants, globals, etc., are rank 0 */
508 return 0;
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
521 static inline int
522 constant_type (tree t)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
525 return INTEGER_CONST_TYPE;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t) || real_minus_onep (t))
532 return FLOAT_ONE_CONST_TYPE;
533 return FLOAT_CONST_TYPE;
535 else
536 return OTHER_CONST_TYPE;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
541 static int
542 sort_by_operand_rank (const void *pa, const void *pb)
544 const operand_entry *oea = *(const operand_entry *const *)pa;
545 const operand_entry *oeb = *(const operand_entry *const *)pb;
547 if (oeb->rank != oea->rank)
548 return oeb->rank > oea->rank ? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
553 if (oea->rank == 0)
555 if (constant_type (oeb->op) != constant_type (oea->op))
556 return constant_type (oea->op) - constant_type (oeb->op);
557 else
558 /* To make sorting result stable, we use unique IDs to determine
559 order. */
560 return oeb->id > oea->id ? 1 : -1;
563 if (TREE_CODE (oea->op) != SSA_NAME)
565 if (TREE_CODE (oeb->op) != SSA_NAME)
566 return oeb->id > oea->id ? 1 : -1;
567 else
568 return 1;
570 else if (TREE_CODE (oeb->op) != SSA_NAME)
571 return -1;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
580 See PR60418. */
581 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
582 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
583 basic_block bba = gimple_bb (stmta);
584 basic_block bbb = gimple_bb (stmtb);
585 if (bbb != bba)
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
589 if (!bba)
590 return 1;
591 if (!bbb)
592 return -1;
593 /* If neither is, compare bb_rank. */
594 if (bb_rank[bbb->index] != bb_rank[bba->index])
595 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
603 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
606 return oeb->id > oea->id ? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
611 static void
612 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 operand_entry *oe = operand_entry_pool.allocate ();
616 oe->op = op;
617 oe->rank = get_rank (op);
618 oe->id = next_operand_entry_id++;
619 oe->count = 1;
620 oe->stmt_to_insert = stmt_to_insert;
621 ops->safe_push (oe);
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
625 count REPEAT. */
627 static void
628 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
629 HOST_WIDE_INT repeat)
631 operand_entry *oe = operand_entry_pool.allocate ();
633 oe->op = op;
634 oe->rank = get_rank (op);
635 oe->id = next_operand_entry_id++;
636 oe->count = repeat;
637 oe->stmt_to_insert = NULL;
638 ops->safe_push (oe);
640 reassociate_stats.pows_encountered++;
643 /* Returns true if we can associate the SSA def OP. */
645 static bool
646 can_reassociate_op_p (tree op)
648 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
649 return false;
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
655 return false;
656 return true;
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
663 static bool
664 can_reassociate_type_p (tree type)
666 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
667 || NON_SAT_FIXED_POINT_TYPE_P (type)
668 || (flag_associative_math && FLOAT_TYPE_P (type)))
669 return true;
670 return false;
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
676 static bool
677 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
679 basic_block bb = gimple_bb (stmt);
681 if (gimple_bb (stmt) == NULL)
682 return false;
684 if (!flow_bb_inside_loop_p (loop, bb))
685 return false;
687 if (is_gimple_assign (stmt)
688 && gimple_assign_rhs_code (stmt) == code
689 && has_single_use (gimple_assign_lhs (stmt)))
691 tree rhs1 = gimple_assign_rhs1 (stmt);
692 tree rhs2 = gimple_assign_rhs2 (stmt);
693 if (!can_reassociate_op_p (rhs1)
694 || (rhs2 && !can_reassociate_op_p (rhs2)))
695 return false;
696 return true;
699 return false;
703 /* Return true if STMT is a nop-conversion. */
705 static bool
706 gimple_nop_conversion_p (gimple *stmt)
708 if (gassign *ass = dyn_cast <gassign *> (stmt))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
712 TREE_TYPE (gimple_assign_rhs1 (ass))))
713 return true;
715 return false;
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
721 static tree
722 get_unary_op (tree name, enum tree_code opcode)
724 gimple *stmt = SSA_NAME_DEF_STMT (name);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt)
728 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
729 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
731 if (!is_gimple_assign (stmt))
732 return NULL_TREE;
734 if (gimple_assign_rhs_code (stmt) == opcode)
735 return gimple_assign_rhs1 (stmt);
736 return NULL_TREE;
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
741 static bool
742 ops_equal_values_p (tree op1, tree op2)
744 if (op1 == op2)
745 return true;
747 tree orig_op1 = op1;
748 if (TREE_CODE (op1) == SSA_NAME)
750 gimple *stmt = SSA_NAME_DEF_STMT (op1);
751 if (gimple_nop_conversion_p (stmt))
753 op1 = gimple_assign_rhs1 (stmt);
754 if (op1 == op2)
755 return true;
759 if (TREE_CODE (op2) == SSA_NAME)
761 gimple *stmt = SSA_NAME_DEF_STMT (op2);
762 if (gimple_nop_conversion_p (stmt))
764 op2 = gimple_assign_rhs1 (stmt);
765 if (op1 == op2
766 || orig_op1 == op2)
767 return true;
771 return false;
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
779 static bool
780 eliminate_duplicate_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 bool *all_done,
783 unsigned int i,
784 operand_entry *curr,
785 operand_entry *last)
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last && last->op == curr->op)
795 switch (opcode)
797 case MAX_EXPR:
798 case MIN_EXPR:
799 case BIT_IOR_EXPR:
800 case BIT_AND_EXPR:
801 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, curr->op);
805 fprintf (dump_file, " [&|minmax] ");
806 print_generic_expr (dump_file, last->op);
807 fprintf (dump_file, " -> ");
808 print_generic_stmt (dump_file, last->op);
811 ops->ordered_remove (i);
812 reassociate_stats.ops_eliminated ++;
814 return true;
816 case BIT_XOR_EXPR:
817 if (dump_file && (dump_flags & TDF_DETAILS))
819 fprintf (dump_file, "Equivalence: ");
820 print_generic_expr (dump_file, curr->op);
821 fprintf (dump_file, " ^ ");
822 print_generic_expr (dump_file, last->op);
823 fprintf (dump_file, " -> nothing\n");
826 reassociate_stats.ops_eliminated += 2;
828 if (ops->length () == 2)
830 ops->truncate (0);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
832 *all_done = true;
834 else
836 ops->ordered_remove (i-1);
837 ops->ordered_remove (i-1);
840 return true;
842 default:
843 break;
846 return false;
849 static vec<tree> plus_negates;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
855 return false. */
857 static bool
858 eliminate_plus_minus_pair (enum tree_code opcode,
859 vec<operand_entry *> *ops,
860 unsigned int currindex,
861 operand_entry *curr)
863 tree negateop;
864 tree notop;
865 unsigned int i;
866 operand_entry *oe;
868 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
869 return false;
871 negateop = get_unary_op (curr->op, NEGATE_EXPR);
872 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
873 if (negateop == NULL_TREE && notop == NULL_TREE)
874 return false;
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
878 one, we can stop. */
880 for (i = currindex + 1;
881 ops->iterate (i, &oe)
882 && oe->rank >= curr->rank - 1 ;
883 i++)
885 if (negateop
886 && ops_equal_values_p (oe->op, negateop))
888 if (dump_file && (dump_flags & TDF_DETAILS))
890 fprintf (dump_file, "Equivalence: ");
891 print_generic_expr (dump_file, negateop);
892 fprintf (dump_file, " + -");
893 print_generic_expr (dump_file, oe->op);
894 fprintf (dump_file, " -> 0\n");
897 ops->ordered_remove (i);
898 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
899 ops->ordered_remove (currindex);
900 reassociate_stats.ops_eliminated ++;
902 return true;
904 else if (notop
905 && ops_equal_values_p (oe->op, notop))
907 tree op_type = TREE_TYPE (oe->op);
909 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Equivalence: ");
912 print_generic_expr (dump_file, notop);
913 fprintf (dump_file, " + ~");
914 print_generic_expr (dump_file, oe->op);
915 fprintf (dump_file, " -> -1\n");
918 ops->ordered_remove (i);
919 add_to_ops_vec (ops, build_all_ones_cst (op_type));
920 ops->ordered_remove (currindex);
921 reassociate_stats.ops_eliminated ++;
923 return true;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop != NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
931 plus_negates.safe_push (curr->op);
933 return false;
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
940 false. */
942 static bool
943 eliminate_not_pairs (enum tree_code opcode,
944 vec<operand_entry *> *ops,
945 unsigned int currindex,
946 operand_entry *curr)
948 tree notop;
949 unsigned int i;
950 operand_entry *oe;
952 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
953 || TREE_CODE (curr->op) != SSA_NAME)
954 return false;
956 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
957 if (notop == NULL_TREE)
958 return false;
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
962 one, we can stop. */
964 for (i = currindex + 1;
965 ops->iterate (i, &oe)
966 && oe->rank >= curr->rank - 1;
967 i++)
969 if (oe->op == notop)
971 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Equivalence: ");
974 print_generic_expr (dump_file, notop);
975 if (opcode == BIT_AND_EXPR)
976 fprintf (dump_file, " & ~");
977 else if (opcode == BIT_IOR_EXPR)
978 fprintf (dump_file, " | ~");
979 print_generic_expr (dump_file, oe->op);
980 if (opcode == BIT_AND_EXPR)
981 fprintf (dump_file, " -> 0\n");
982 else if (opcode == BIT_IOR_EXPR)
983 fprintf (dump_file, " -> -1\n");
986 if (opcode == BIT_AND_EXPR)
987 oe->op = build_zero_cst (TREE_TYPE (oe->op));
988 else if (opcode == BIT_IOR_EXPR)
989 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
991 reassociate_stats.ops_eliminated += ops->length () - 1;
992 ops->truncate (0);
993 ops->quick_push (oe);
994 return true;
998 return false;
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1008 static void
1009 eliminate_using_constants (enum tree_code opcode,
1010 vec<operand_entry *> *ops)
1012 operand_entry *oelast = ops->last ();
1013 tree type = TREE_TYPE (oelast->op);
1015 if (oelast->rank == 0
1016 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1018 switch (opcode)
1020 case BIT_AND_EXPR:
1021 if (integer_zerop (oelast->op))
1023 if (ops->length () != 1)
1025 if (dump_file && (dump_flags & TDF_DETAILS))
1026 fprintf (dump_file, "Found & 0, removing all other ops\n");
1028 reassociate_stats.ops_eliminated += ops->length () - 1;
1030 ops->truncate (0);
1031 ops->quick_push (oelast);
1032 return;
1035 else if (integer_all_onesp (oelast->op))
1037 if (ops->length () != 1)
1039 if (dump_file && (dump_flags & TDF_DETAILS))
1040 fprintf (dump_file, "Found & -1, removing\n");
1041 ops->pop ();
1042 reassociate_stats.ops_eliminated++;
1045 break;
1046 case BIT_IOR_EXPR:
1047 if (integer_all_onesp (oelast->op))
1049 if (ops->length () != 1)
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 fprintf (dump_file, "Found | -1, removing all other ops\n");
1054 reassociate_stats.ops_eliminated += ops->length () - 1;
1056 ops->truncate (0);
1057 ops->quick_push (oelast);
1058 return;
1061 else if (integer_zerop (oelast->op))
1063 if (ops->length () != 1)
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 fprintf (dump_file, "Found | 0, removing\n");
1067 ops->pop ();
1068 reassociate_stats.ops_eliminated++;
1071 break;
1072 case MULT_EXPR:
1073 if (integer_zerop (oelast->op)
1074 || (FLOAT_TYPE_P (type)
1075 && !HONOR_NANS (type)
1076 && !HONOR_SIGNED_ZEROS (type)
1077 && real_zerop (oelast->op)))
1079 if (ops->length () != 1)
1081 if (dump_file && (dump_flags & TDF_DETAILS))
1082 fprintf (dump_file, "Found * 0, removing all other ops\n");
1084 reassociate_stats.ops_eliminated += ops->length () - 1;
1085 ops->truncate (0);
1086 ops->quick_push (oelast);
1087 return;
1090 else if (integer_onep (oelast->op)
1091 || (FLOAT_TYPE_P (type)
1092 && !HONOR_SNANS (type)
1093 && real_onep (oelast->op)))
1095 if (ops->length () != 1)
1097 if (dump_file && (dump_flags & TDF_DETAILS))
1098 fprintf (dump_file, "Found * 1, removing\n");
1099 ops->pop ();
1100 reassociate_stats.ops_eliminated++;
1101 return;
1104 break;
1105 case BIT_XOR_EXPR:
1106 case PLUS_EXPR:
1107 case MINUS_EXPR:
1108 if (integer_zerop (oelast->op)
1109 || (FLOAT_TYPE_P (type)
1110 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1111 && fold_real_zero_addition_p (type, 0, oelast->op,
1112 opcode == MINUS_EXPR)))
1114 if (ops->length () != 1)
1116 if (dump_file && (dump_flags & TDF_DETAILS))
1117 fprintf (dump_file, "Found [|^+] 0, removing\n");
1118 ops->pop ();
1119 reassociate_stats.ops_eliminated++;
1120 return;
1123 break;
1124 default:
1125 break;
1131 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1132 bool, bool);
1134 /* Structure for tracking and counting operands. */
1135 struct oecount {
1136 unsigned int cnt;
1137 unsigned int id;
1138 enum tree_code oecode;
1139 tree op;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec<oecount> cvec;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher : int_hash <int, 0, 1>
1151 static inline hashval_t hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1157 inline hashval_t
1158 oecount_hasher::hash (int p)
1160 const oecount *c = &cvec[p - 42];
1161 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1164 /* Comparison function for oecount. */
1166 inline bool
1167 oecount_hasher::equal (int p1, int p2)
1169 const oecount *c1 = &cvec[p1 - 42];
1170 const oecount *c2 = &cvec[p2 - 42];
1171 return c1->oecode == c2->oecode && c1->op == c2->op;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1176 static int
1177 oecount_cmp (const void *p1, const void *p2)
1179 const oecount *c1 = (const oecount *)p1;
1180 const oecount *c2 = (const oecount *)p2;
1181 if (c1->cnt != c2->cnt)
1182 return c1->cnt > c2->cnt ? 1 : -1;
1183 else
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1->id > c2->id ? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1191 static bool
1192 stmt_is_power_of_op (gimple *stmt, tree op)
1194 if (!is_gimple_call (stmt))
1195 return false;
1197 switch (gimple_call_combined_fn (stmt))
1199 CASE_CFN_POW:
1200 CASE_CFN_POWI:
1201 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1203 default:
1204 return false;
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple *stmt)
1215 REAL_VALUE_TYPE c, cint;
1216 HOST_WIDE_INT power;
1217 tree arg1;
1219 switch (gimple_call_combined_fn (stmt))
1221 CASE_CFN_POW:
1222 arg1 = gimple_call_arg (stmt, 1);
1223 c = TREE_REAL_CST (arg1);
1224 power = real_to_integer (&c) - 1;
1225 real_from_integer (&cint, VOIDmode, power, SIGNED);
1226 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1227 return power;
1229 CASE_CFN_POWI:
1230 arg1 = gimple_call_arg (stmt, 1);
1231 power = TREE_INT_CST_LOW (arg1) - 1;
1232 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1233 return power;
1235 default:
1236 gcc_unreachable ();
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1243 static tree
1244 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 imm_use_iterator iter;
1249 tree new_lhs, new_debug_lhs = NULL_TREE;
1250 tree lhs = gimple_get_lhs (stmt);
1252 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1253 gimple_set_lhs (stmt, new_lhs);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1258 tree repl = new_lhs;
1259 if (is_gimple_debug (use_stmt))
1261 if (new_debug_lhs == NULL_TREE)
1263 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1264 gdebug *def_temp
1265 = gimple_build_debug_bind (new_debug_lhs,
1266 build2 (opcode, TREE_TYPE (lhs),
1267 new_lhs, op),
1268 stmt);
1269 gimple_set_uid (def_temp, gimple_uid (stmt));
1270 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1271 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1273 repl = new_debug_lhs;
1275 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1276 SET_USE (use, repl);
1277 update_stmt (use_stmt);
1279 return new_lhs;
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1286 static void
1287 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1288 vec<gimple *> &stmts_to_fix)
1290 unsigned i;
1291 gimple *stmt;
1293 if (*def != op
1294 && TREE_CODE (*def) == SSA_NAME
1295 && (stmt = SSA_NAME_DEF_STMT (*def))
1296 && gimple_code (stmt) != GIMPLE_NOP)
1297 *def = make_new_ssa_for_def (stmt, opcode, op);
1299 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1300 make_new_ssa_for_def (stmt, opcode, op);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1307 static void
1308 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1310 tree lhs;
1311 gimple *use_stmt;
1312 use_operand_p use;
1313 gimple_stmt_iterator gsi;
1315 if (is_gimple_call (stmt))
1316 lhs = gimple_call_lhs (stmt);
1317 else
1318 lhs = gimple_assign_lhs (stmt);
1320 gcc_assert (has_single_use (lhs));
1321 single_imm_use (lhs, &use, &use_stmt);
1322 if (lhs == *def)
1323 *def = op;
1324 SET_USE (use, op);
1325 if (TREE_CODE (op) != SSA_NAME)
1326 update_stmt (use_stmt);
1327 gsi = gsi_for_stmt (stmt);
1328 unlink_stmt_vdef (stmt);
1329 reassoc_remove_stmt (&gsi);
1330 release_defs (stmt);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1337 static void
1338 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1340 tree orig_def = *def;
1341 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec<gimple *, 64> stmts_to_fix;
1348 tree name;
1350 if (opcode == MULT_EXPR)
1352 if (stmt_is_power_of_op (stmt, op))
1354 if (decrement_power (stmt) == 1)
1356 if (stmts_to_fix.length () > 0)
1357 stmts_to_fix.pop ();
1358 propagate_op_to_single_use (op, stmt, def);
1360 break;
1362 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1364 if (gimple_assign_rhs1 (stmt) == op)
1366 tree cst = build_minus_one_cst (TREE_TYPE (op));
1367 if (stmts_to_fix.length () > 0)
1368 stmts_to_fix.pop ();
1369 propagate_op_to_single_use (cst, stmt, def);
1370 break;
1372 else if (integer_minus_onep (op)
1373 || real_minus_onep (op))
1375 gimple_assign_set_rhs_code
1376 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1377 break;
1382 name = gimple_assign_rhs1 (stmt);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1386 single use. */
1387 if (gimple_assign_rhs_code (stmt) == opcode
1388 && (name == op
1389 || gimple_assign_rhs2 (stmt) == op))
1391 if (name == op)
1392 name = gimple_assign_rhs2 (stmt);
1393 if (stmts_to_fix.length () > 0)
1394 stmts_to_fix.pop ();
1395 propagate_op_to_single_use (name, stmt, def);
1396 break;
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode == MULT_EXPR
1403 && gimple_assign_rhs_code (stmt) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt)))
1407 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1408 if (stmt_is_power_of_op (stmt2, op))
1410 if (decrement_power (stmt2) == 1)
1411 propagate_op_to_single_use (op, stmt2, def);
1412 else
1413 stmts_to_fix.safe_push (stmt2);
1414 break;
1416 else if (is_gimple_assign (stmt2)
1417 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1419 if (gimple_assign_rhs1 (stmt2) == op)
1421 tree cst = build_minus_one_cst (TREE_TYPE (op));
1422 propagate_op_to_single_use (cst, stmt2, def);
1423 break;
1425 else if (integer_minus_onep (op)
1426 || real_minus_onep (op))
1428 stmts_to_fix.safe_push (stmt2);
1429 gimple_assign_set_rhs_code
1430 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1431 break;
1436 /* Continue walking the chain. */
1437 gcc_assert (name != op
1438 && TREE_CODE (name) == SSA_NAME);
1439 stmt = SSA_NAME_DEF_STMT (name);
1440 stmts_to_fix.safe_push (stmt);
1442 while (1);
1444 if (stmts_to_fix.length () > 0 || *def == orig_def)
1445 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1451 static bool
1452 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1454 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1 || s1 == s2)
1460 return true;
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1463 if (!bb2)
1464 return false;
1466 if (bb1 == bb2)
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1) == GIMPLE_PHI)
1472 return true;
1474 if (gimple_code (s2) == GIMPLE_PHI)
1475 return false;
1477 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1479 if (gimple_uid (s1) < gimple_uid (s2))
1480 return true;
1482 if (gimple_uid (s1) > gimple_uid (s2))
1483 return false;
1485 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1486 unsigned int uid = gimple_uid (s1);
1487 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1489 gimple *s = gsi_stmt (gsi);
1490 if (gimple_uid (s) != uid)
1491 break;
1492 if (s == s2)
1493 return true;
1496 return false;
1499 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1502 /* Insert STMT after INSERT_POINT. */
1504 static void
1505 insert_stmt_after (gimple *stmt, gimple *insert_point)
1507 gimple_stmt_iterator gsi;
1508 basic_block bb;
1510 if (gimple_code (insert_point) == GIMPLE_PHI)
1511 bb = gimple_bb (insert_point);
1512 else if (!stmt_ends_bb_p (insert_point))
1514 gsi = gsi_for_stmt (insert_point);
1515 gimple_set_uid (stmt, gimple_uid (insert_point));
1516 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1517 return;
1519 else if (gimple_code (insert_point) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1523 gcc_unreachable ();
1524 else
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1531 gsi = gsi_after_labels (bb);
1532 if (gsi_end_p (gsi))
1534 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1535 gimple_set_uid (stmt,
1536 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1538 else
1539 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1540 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1547 static gimple *
1548 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1550 gimple *op1def = NULL, *op2def = NULL;
1551 gimple_stmt_iterator gsi;
1552 tree op;
1553 gassign *sum;
1555 /* Create the addition statement. */
1556 op = make_ssa_name (type);
1557 sum = gimple_build_assign (op, opcode, op1, op2);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1) == SSA_NAME)
1561 op1def = SSA_NAME_DEF_STMT (op1);
1562 if (TREE_CODE (op2) == SSA_NAME)
1563 op2def = SSA_NAME_DEF_STMT (op2);
1564 if ((!op1def || gimple_nop_p (op1def))
1565 && (!op2def || gimple_nop_p (op2def)))
1567 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1568 if (!gsi_end_p (gsi)
1569 && is_gimple_call (gsi_stmt (gsi))
1570 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1572 /* Don't add statements before a returns_twice call at the start
1573 of a function. */
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1575 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1577 if (gsi_end_p (gsi))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 gimple_set_uid (sum,
1582 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1584 else
1585 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1586 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1588 else
1590 gimple *insert_point;
1591 if ((!op1def || gimple_nop_p (op1def))
1592 || (op2def && !gimple_nop_p (op2def)
1593 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1594 insert_point = op2def;
1595 else
1596 insert_point = op1def;
1597 insert_stmt_after (sum, insert_point);
1599 update_stmt (sum);
1601 return sum;
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1639 static bool
1640 undistribute_ops_list (enum tree_code opcode,
1641 vec<operand_entry *> *ops, class loop *loop)
1643 unsigned int length = ops->length ();
1644 operand_entry *oe1;
1645 unsigned i, j;
1646 unsigned nr_candidates, nr_candidates2;
1647 sbitmap_iterator sbi0;
1648 vec<operand_entry *> *subops;
1649 bool changed = false;
1650 unsigned int next_oecount_id = 0;
1652 if (length <= 1
1653 || opcode != PLUS_EXPR)
1654 return false;
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap candidates (length);
1658 bitmap_clear (candidates);
1659 nr_candidates = 0;
1660 FOR_EACH_VEC_ELT (*ops, i, oe1)
1662 enum tree_code dcode;
1663 gimple *oe1def;
1665 if (TREE_CODE (oe1->op) != SSA_NAME)
1666 continue;
1667 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1668 if (!is_gimple_assign (oe1def))
1669 continue;
1670 dcode = gimple_assign_rhs_code (oe1def);
1671 if ((dcode != MULT_EXPR
1672 && dcode != RDIV_EXPR)
1673 || !is_reassociable_op (oe1def, dcode, loop))
1674 continue;
1676 bitmap_set_bit (candidates, i);
1677 nr_candidates++;
1680 if (nr_candidates < 2)
1681 return false;
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1685 fprintf (dump_file, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file,
1687 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1688 fprintf (dump_file, " %d\n", nr_candidates);
1691 /* Build linearized sub-operand lists and the counting table. */
1692 cvec.create (0);
1694 hash_table<oecount_hasher> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1699 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1702 gimple *oedef;
1703 enum tree_code oecode;
1704 unsigned j;
1706 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1707 oecode = gimple_assign_rhs_code (oedef);
1708 linearize_expr_tree (&subops[i], oedef,
1709 associative_tree_code (oecode), false);
1711 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1713 oecount c;
1714 int *slot;
1715 int idx;
1716 c.oecode = oecode;
1717 c.cnt = 1;
1718 c.id = next_oecount_id++;
1719 c.op = oe1->op;
1720 cvec.safe_push (c);
1721 idx = cvec.length () + 41;
1722 slot = ctable.find_slot (idx, INSERT);
1723 if (!*slot)
1725 *slot = idx;
1727 else
1729 cvec.pop ();
1730 cvec[*slot - 42].cnt++;
1735 /* Sort the counting table. */
1736 cvec.qsort (oecount_cmp);
1738 if (dump_file && (dump_flags & TDF_DETAILS))
1740 oecount *c;
1741 fprintf (dump_file, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec, j, c)
1744 fprintf (dump_file, " %u %s: ", c->cnt,
1745 c->oecode == MULT_EXPR
1746 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1747 print_generic_expr (dump_file, c->op);
1748 fprintf (dump_file, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap candidates2 (length);
1754 while (!cvec.is_empty ())
1756 oecount *c = &cvec.last ();
1757 if (c->cnt < 2)
1758 break;
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2);
1763 nr_candidates2 = 0;
1764 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1766 gimple *oedef;
1767 enum tree_code oecode;
1768 unsigned j;
1769 tree op = (*ops)[i]->op;
1771 /* If we undistributed in this chain already this may be
1772 a constant. */
1773 if (TREE_CODE (op) != SSA_NAME)
1774 continue;
1776 oedef = SSA_NAME_DEF_STMT (op);
1777 oecode = gimple_assign_rhs_code (oedef);
1778 if (oecode != c->oecode)
1779 continue;
1781 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1783 if (oe1->op == c->op)
1785 bitmap_set_bit (candidates2, i);
1786 ++nr_candidates2;
1787 break;
1792 if (nr_candidates2 >= 2)
1794 operand_entry *oe1, *oe2;
1795 gimple *prod;
1796 int first = bitmap_first_set_bit (candidates2);
1798 /* Build the new addition chain. */
1799 oe1 = (*ops)[first];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "Building (");
1803 print_generic_expr (dump_file, oe1->op);
1805 zero_one_operation (&oe1->op, c->oecode, c->op);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1808 gimple *sum;
1809 oe2 = (*ops)[i];
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1812 fprintf (dump_file, " + ");
1813 print_generic_expr (dump_file, oe2->op);
1815 zero_one_operation (&oe2->op, c->oecode, c->op);
1816 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1817 oe1->op, oe2->op, opcode);
1818 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1819 oe2->rank = 0;
1820 oe1->op = gimple_get_lhs (sum);
1823 /* Apply the multiplication/division. */
1824 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1825 oe1->op, c->op, c->oecode);
1826 if (dump_file && (dump_flags & TDF_DETAILS))
1828 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1829 print_generic_expr (dump_file, c->op);
1830 fprintf (dump_file, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1->op = gimple_assign_lhs (prod);
1836 oe1->rank = get_rank (oe1->op);
1837 subops[first].release ();
1839 changed = true;
1842 cvec.pop ();
1845 for (i = 0; i < ops->length (); ++i)
1846 subops[i].release ();
1847 free (subops);
1848 cvec.release ();
1850 return changed;
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair<unsigned, unsigned> v_info_elem;
1857 struct v_info {
1858 tree vec_type;
1859 auto_vec<v_info_elem, 32> vec;
1861 typedef v_info *v_info_ptr;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1864 static int
1865 sort_by_mach_mode (const void *p_i, const void *p_j)
1867 const tree tr1 = *((const tree *) p_i);
1868 const tree tr2 = *((const tree *) p_j);
1869 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1870 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1871 if (mode1 > mode2)
1872 return 1;
1873 else if (mode1 < mode2)
1874 return -1;
1875 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1876 return -1;
1877 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1878 return 1;
1879 return 0;
1882 /* Cleanup hash map for VECTOR information. */
1883 static void
1884 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1886 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1887 it != info_map.end (); ++it)
1889 v_info_ptr info = (*it).second;
1890 delete info;
1891 (*it).second = NULL;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1897 is transformed to
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1915 TODO:
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1921 static bool
1922 undistribute_bitref_for_vector (enum tree_code opcode,
1923 vec<operand_entry *> *ops, struct loop *loop)
1925 if (ops->length () <= 1)
1926 return false;
1928 if (opcode != PLUS_EXPR
1929 && opcode != MULT_EXPR
1930 && opcode != BIT_XOR_EXPR
1931 && opcode != BIT_IOR_EXPR
1932 && opcode != BIT_AND_EXPR)
1933 return false;
1935 hash_map<tree, v_info_ptr> v_info_map;
1936 operand_entry *oe1;
1937 unsigned i;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops, i, oe1)
1943 enum tree_code dcode;
1944 gimple *oe1def;
1946 if (TREE_CODE (oe1->op) != SSA_NAME)
1947 continue;
1948 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1949 if (!is_gimple_assign (oe1def))
1950 continue;
1951 dcode = gimple_assign_rhs_code (oe1def);
1952 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1953 continue;
1955 tree rhs = gimple_assign_rhs1 (oe1def);
1956 tree vec = TREE_OPERAND (rhs, 0);
1957 tree vec_type = TREE_TYPE (vec);
1959 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1960 continue;
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1964 continue;
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1968 continue;
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1971 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1972 continue;
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1979 machine_mode simd_mode;
1980 unsigned HOST_WIDE_INT size, nunits;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1984 continue;
1985 if (size <= elem_size || (size % elem_size) != 0)
1986 continue;
1987 nunits = size / elem_size;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1989 nunits).exists (&simd_mode))
1990 continue;
1991 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1995 continue;
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1998 size. */
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2000 continue;
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2004 continue;
2007 tree elem_type = TREE_TYPE (vec_type);
2008 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2009 if (maybe_ne (bit_field_size (rhs), elem_size))
2010 continue;
2012 unsigned idx;
2013 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2014 continue;
2016 /* Ignore it if target machine can't support this type of VECTOR
2017 operation. */
2018 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2019 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2020 continue;
2022 bool existed;
2023 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2024 if (!existed)
2026 info = new v_info;
2027 info->vec_type = vec_type;
2029 else if (!types_compatible_p (vec_type, info->vec_type))
2030 continue;
2031 info->vec.safe_push (std::make_pair (idx, i));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map);
2038 return false;
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec<tree> valid_vecs (v_info_map.elements ());
2046 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2047 it != v_info_map.end (); ++it)
2049 tree cand_vec = (*it).first;
2050 v_info_ptr cand_info = (*it).second;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2053 if (cand_info->vec.length () != num_elems)
2054 continue;
2055 sbitmap holes = sbitmap_alloc (num_elems);
2056 bitmap_ones (holes);
2057 bool valid = true;
2058 v_info_elem *curr;
2059 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2061 if (!bitmap_bit_p (holes, curr->first))
2063 valid = false;
2064 break;
2066 else
2067 bitmap_clear_bit (holes, curr->first);
2069 if (valid && bitmap_empty_p (holes))
2070 valid_vecs.quick_push (cand_vec);
2071 sbitmap_free (holes);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs.length () <= 1)
2077 cleanup_vinfo_map (v_info_map);
2078 return false;
2081 valid_vecs.qsort (sort_by_mach_mode);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2086 tree tvec = valid_vecs[i];
2087 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2091 continue;
2093 unsigned int idx, j;
2094 gimple *sum = NULL;
2095 tree sum_vec = tvec;
2096 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2097 v_info_elem *elem;
2098 tree vec_type = info_ptr->vec_type;
2100 /* Build the sum for all candidates with same mode. */
2103 sum = build_and_add_sum (vec_type, sum_vec,
2104 valid_vecs[i + 1], opcode);
2105 /* Update the operands only after build_and_add_sum,
2106 so that we don't have to repeat the placement algorithm
2107 of build_and_add_sum. */
2108 if (sum_vec == tvec
2109 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2111 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2112 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2113 tree lhs = make_ssa_name (vec_type);
2114 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2115 gimple_set_uid (g, gimple_uid (sum));
2116 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2117 gimple_assign_set_rhs1 (sum, lhs);
2118 update_stmt (sum);
2120 if (!useless_type_conversion_p (vec_type,
2121 TREE_TYPE (valid_vecs[i + 1])))
2123 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2124 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2125 valid_vecs[i + 1]);
2126 tree lhs = make_ssa_name (vec_type);
2127 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2128 gimple_set_uid (g, gimple_uid (sum));
2129 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2130 gimple_assign_set_rhs2 (sum, lhs);
2131 update_stmt (sum);
2133 sum_vec = gimple_get_lhs (sum);
2134 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2135 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2136 /* Update those related ops of current candidate VECTOR. */
2137 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2139 idx = elem->second;
2140 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2141 /* Set this then op definition will get DCEd later. */
2142 gimple_set_visited (def, true);
2143 if (opcode == PLUS_EXPR
2144 || opcode == BIT_XOR_EXPR
2145 || opcode == BIT_IOR_EXPR)
2146 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2147 else if (opcode == MULT_EXPR)
2148 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2149 else
2151 gcc_assert (opcode == BIT_AND_EXPR);
2152 (*ops)[idx]->op
2153 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2155 (*ops)[idx]->rank = 0;
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2159 fprintf (dump_file, "Generating addition -> ");
2160 print_gimple_stmt (dump_file, sum, 0);
2162 i++;
2164 while ((i < valid_vecs.length () - 1)
2165 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2167 /* Referring to first valid VECTOR with this mode, generate the
2168 BIT_FIELD_REF statements accordingly. */
2169 info_ptr = *(v_info_map.get (tvec));
2170 gcc_assert (sum);
2171 tree elem_type = TREE_TYPE (vec_type);
2172 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2174 idx = elem->second;
2175 tree dst = make_ssa_name (elem_type);
2176 tree pos = bitsize_int (elem->first
2177 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2178 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2179 TYPE_SIZE (elem_type), pos);
2180 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2181 insert_stmt_after (gs, sum);
2182 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2183 /* Set this then op definition will get DCEd later. */
2184 gimple_set_visited (def, true);
2185 (*ops)[idx]->op = gimple_assign_lhs (gs);
2186 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2187 if (dump_file && (dump_flags & TDF_DETAILS))
2189 fprintf (dump_file, "Generating bit_field_ref -> ");
2190 print_gimple_stmt (dump_file, gs, 0);
2195 if (dump_file && (dump_flags & TDF_DETAILS))
2196 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2198 cleanup_vinfo_map (v_info_map);
2200 return true;
2203 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2204 expression, examine the other OPS to see if any of them are comparisons
2205 of the same values, which we may be able to combine or eliminate.
2206 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2208 static bool
2209 eliminate_redundant_comparison (enum tree_code opcode,
2210 vec<operand_entry *> *ops,
2211 unsigned int currindex,
2212 operand_entry *curr)
2214 tree op1, op2;
2215 enum tree_code lcode, rcode;
2216 gimple *def1, *def2;
2217 int i;
2218 operand_entry *oe;
2220 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2221 return false;
2223 /* Check that CURR is a comparison. */
2224 if (TREE_CODE (curr->op) != SSA_NAME)
2225 return false;
2226 def1 = SSA_NAME_DEF_STMT (curr->op);
2227 if (!is_gimple_assign (def1))
2228 return false;
2229 lcode = gimple_assign_rhs_code (def1);
2230 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2231 return false;
2232 op1 = gimple_assign_rhs1 (def1);
2233 op2 = gimple_assign_rhs2 (def1);
2235 /* Now look for a similar comparison in the remaining OPS. */
2236 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2238 tree t;
2240 if (TREE_CODE (oe->op) != SSA_NAME)
2241 continue;
2242 def2 = SSA_NAME_DEF_STMT (oe->op);
2243 if (!is_gimple_assign (def2))
2244 continue;
2245 rcode = gimple_assign_rhs_code (def2);
2246 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2247 continue;
2249 /* If we got here, we have a match. See if we can combine the
2250 two comparisons. */
2251 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2252 if (opcode == BIT_IOR_EXPR)
2253 t = maybe_fold_or_comparisons (type,
2254 lcode, op1, op2,
2255 rcode, gimple_assign_rhs1 (def2),
2256 gimple_assign_rhs2 (def2));
2257 else
2258 t = maybe_fold_and_comparisons (type,
2259 lcode, op1, op2,
2260 rcode, gimple_assign_rhs1 (def2),
2261 gimple_assign_rhs2 (def2));
2262 if (!t)
2263 continue;
2265 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2266 always give us a boolean_type_node value back. If the original
2267 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2268 we need to convert. */
2269 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2271 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2272 continue;
2273 t = fold_convert (TREE_TYPE (curr->op), t);
2276 if (TREE_CODE (t) != INTEGER_CST
2277 && !operand_equal_p (t, curr->op, 0))
2279 enum tree_code subcode;
2280 tree newop1, newop2;
2281 if (!COMPARISON_CLASS_P (t))
2282 continue;
2283 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2284 STRIP_USELESS_TYPE_CONVERSION (newop1);
2285 STRIP_USELESS_TYPE_CONVERSION (newop2);
2286 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2287 continue;
2288 if (lcode == TREE_CODE (t)
2289 && operand_equal_p (op1, newop1, 0)
2290 && operand_equal_p (op2, newop2, 0))
2291 t = curr->op;
2292 else if ((TREE_CODE (newop1) == SSA_NAME
2293 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2294 || (TREE_CODE (newop2) == SSA_NAME
2295 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2296 continue;
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2301 fprintf (dump_file, "Equivalence: ");
2302 print_generic_expr (dump_file, curr->op);
2303 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2304 print_generic_expr (dump_file, oe->op);
2305 fprintf (dump_file, " -> ");
2306 print_generic_expr (dump_file, t);
2307 fprintf (dump_file, "\n");
2310 /* Now we can delete oe, as it has been subsumed by the new combined
2311 expression t. */
2312 ops->ordered_remove (i);
2313 reassociate_stats.ops_eliminated ++;
2315 /* If t is the same as curr->op, we're done. Otherwise we must
2316 replace curr->op with t. Special case is if we got a constant
2317 back, in which case we add it to the end instead of in place of
2318 the current entry. */
2319 if (TREE_CODE (t) == INTEGER_CST)
2321 ops->ordered_remove (currindex);
2322 add_to_ops_vec (ops, t);
2324 else if (!operand_equal_p (t, curr->op, 0))
2326 gimple *sum;
2327 enum tree_code subcode;
2328 tree newop1;
2329 tree newop2;
2330 gcc_assert (COMPARISON_CLASS_P (t));
2331 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2332 STRIP_USELESS_TYPE_CONVERSION (newop1);
2333 STRIP_USELESS_TYPE_CONVERSION (newop2);
2334 gcc_checking_assert (is_gimple_val (newop1)
2335 && is_gimple_val (newop2));
2336 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2337 curr->op = gimple_get_lhs (sum);
2339 return true;
2342 return false;
2346 /* Transform repeated addition of same values into multiply with
2347 constant. */
2348 static bool
2349 transform_add_to_multiply (vec<operand_entry *> *ops)
2351 operand_entry *oe;
2352 tree op = NULL_TREE;
2353 int j;
2354 int i, start = -1, end = 0, count = 0;
2355 auto_vec<std::pair <int, int> > indxs;
2356 bool changed = false;
2358 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2359 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2360 || !flag_unsafe_math_optimizations))
2361 return false;
2363 /* Look for repeated operands. */
2364 FOR_EACH_VEC_ELT (*ops, i, oe)
2366 if (start == -1)
2368 count = 1;
2369 op = oe->op;
2370 start = i;
2372 else if (operand_equal_p (oe->op, op, 0))
2374 count++;
2375 end = i;
2377 else
2379 if (count > 1)
2380 indxs.safe_push (std::make_pair (start, end));
2381 count = 1;
2382 op = oe->op;
2383 start = i;
2387 if (count > 1)
2388 indxs.safe_push (std::make_pair (start, end));
2390 for (j = indxs.length () - 1; j >= 0; --j)
2392 /* Convert repeated operand addition to multiplication. */
2393 start = indxs[j].first;
2394 end = indxs[j].second;
2395 op = (*ops)[start]->op;
2396 count = end - start + 1;
2397 for (i = end; i >= start; --i)
2398 ops->unordered_remove (i);
2399 tree tmp = make_ssa_name (TREE_TYPE (op));
2400 tree cst = build_int_cst (integer_type_node, count);
2401 gassign *mul_stmt
2402 = gimple_build_assign (tmp, MULT_EXPR,
2403 op, fold_convert (TREE_TYPE (op), cst));
2404 gimple_set_visited (mul_stmt, true);
2405 add_to_ops_vec (ops, tmp, mul_stmt);
2406 changed = true;
2409 return changed;
2413 /* Perform various identities and other optimizations on the list of
2414 operand entries, stored in OPS. The tree code for the binary
2415 operation between all the operands is OPCODE. */
2417 static void
2418 optimize_ops_list (enum tree_code opcode,
2419 vec<operand_entry *> *ops)
2421 unsigned int length = ops->length ();
2422 unsigned int i;
2423 operand_entry *oe;
2424 operand_entry *oelast = NULL;
2425 bool iterate = false;
2427 if (length == 1)
2428 return;
2430 oelast = ops->last ();
2432 /* If the last two are constants, pop the constants off, merge them
2433 and try the next two. */
2434 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2436 operand_entry *oelm1 = (*ops)[length - 2];
2438 if (oelm1->rank == 0
2439 && is_gimple_min_invariant (oelm1->op)
2440 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2441 TREE_TYPE (oelast->op)))
2443 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2444 oelm1->op, oelast->op);
2446 if (folded && is_gimple_min_invariant (folded))
2448 if (dump_file && (dump_flags & TDF_DETAILS))
2449 fprintf (dump_file, "Merging constants\n");
2451 ops->pop ();
2452 ops->pop ();
2454 add_to_ops_vec (ops, folded);
2455 reassociate_stats.constants_eliminated++;
2457 optimize_ops_list (opcode, ops);
2458 return;
2463 eliminate_using_constants (opcode, ops);
2464 oelast = NULL;
2466 for (i = 0; ops->iterate (i, &oe);)
2468 bool done = false;
2470 if (eliminate_not_pairs (opcode, ops, i, oe))
2471 return;
2472 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2473 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2474 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2476 if (done)
2477 return;
2478 iterate = true;
2479 oelast = NULL;
2480 continue;
2482 oelast = oe;
2483 i++;
2486 if (iterate)
2487 optimize_ops_list (opcode, ops);
2490 /* The following functions are subroutines to optimize_range_tests and allow
2491 it to try to change a logical combination of comparisons into a range
2492 test.
2494 For example, both
2495 X == 2 || X == 5 || X == 3 || X == 4
2497 X >= 2 && X <= 5
2498 are converted to
2499 (unsigned) (X - 2) <= 3
2501 For more information see comments above fold_test_range in fold-const.cc,
2502 this implementation is for GIMPLE. */
2506 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2508 void
2509 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2511 if (!skip_exp)
2512 print_generic_expr (file, r->exp);
2513 fprintf (file, " %c[", r->in_p ? '+' : '-');
2514 print_generic_expr (file, r->low);
2515 fputs (", ", file);
2516 print_generic_expr (file, r->high);
2517 fputc (']', file);
2520 /* Dump the range entry R to STDERR. */
2522 DEBUG_FUNCTION void
2523 debug_range_entry (struct range_entry *r)
2525 dump_range_entry (stderr, r, false);
2526 fputc ('\n', stderr);
2529 /* This is similar to make_range in fold-const.cc, but on top of
2530 GIMPLE instead of trees. If EXP is non-NULL, it should be
2531 an SSA_NAME and STMT argument is ignored, otherwise STMT
2532 argument should be a GIMPLE_COND. */
2534 void
2535 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2537 int in_p;
2538 tree low, high;
2539 bool is_bool, strict_overflow_p;
2541 r->exp = NULL_TREE;
2542 r->in_p = false;
2543 r->strict_overflow_p = false;
2544 r->low = NULL_TREE;
2545 r->high = NULL_TREE;
2546 if (exp != NULL_TREE
2547 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2548 return;
2550 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2551 and see if we can refine the range. Some of the cases below may not
2552 happen, but it doesn't seem worth worrying about this. We "continue"
2553 the outer loop when we've changed something; otherwise we "break"
2554 the switch, which will "break" the while. */
2555 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2556 high = low;
2557 in_p = 0;
2558 strict_overflow_p = false;
2559 is_bool = false;
2560 if (exp == NULL_TREE)
2561 is_bool = true;
2562 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2564 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2565 is_bool = true;
2566 else
2567 return;
2569 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2570 is_bool = true;
2572 while (1)
2574 enum tree_code code;
2575 tree arg0, arg1, exp_type;
2576 tree nexp;
2577 location_t loc;
2579 if (exp != NULL_TREE)
2581 if (TREE_CODE (exp) != SSA_NAME
2582 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2583 break;
2585 stmt = SSA_NAME_DEF_STMT (exp);
2586 if (!is_gimple_assign (stmt))
2587 break;
2589 code = gimple_assign_rhs_code (stmt);
2590 arg0 = gimple_assign_rhs1 (stmt);
2591 arg1 = gimple_assign_rhs2 (stmt);
2592 exp_type = TREE_TYPE (exp);
2594 else
2596 code = gimple_cond_code (stmt);
2597 arg0 = gimple_cond_lhs (stmt);
2598 arg1 = gimple_cond_rhs (stmt);
2599 exp_type = boolean_type_node;
2602 if (TREE_CODE (arg0) != SSA_NAME
2603 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2604 break;
2605 loc = gimple_location (stmt);
2606 switch (code)
2608 case BIT_NOT_EXPR:
2609 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2610 /* Ensure the range is either +[-,0], +[0,0],
2611 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2612 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2613 or similar expression of unconditional true or
2614 false, it should not be negated. */
2615 && ((high && integer_zerop (high))
2616 || (low && integer_onep (low))))
2618 in_p = !in_p;
2619 exp = arg0;
2620 continue;
2622 break;
2623 case SSA_NAME:
2624 exp = arg0;
2625 continue;
2626 CASE_CONVERT:
2627 if (is_bool)
2629 if ((TYPE_PRECISION (exp_type) == 1
2630 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2631 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2632 return;
2634 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2636 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2637 is_bool = true;
2638 else
2639 return;
2641 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2642 is_bool = true;
2643 goto do_default;
2644 case EQ_EXPR:
2645 case NE_EXPR:
2646 case LT_EXPR:
2647 case LE_EXPR:
2648 case GE_EXPR:
2649 case GT_EXPR:
2650 is_bool = true;
2651 /* FALLTHRU */
2652 default:
2653 if (!is_bool)
2654 return;
2655 do_default:
2656 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2657 &low, &high, &in_p,
2658 &strict_overflow_p);
2659 if (nexp != NULL_TREE)
2661 exp = nexp;
2662 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2663 continue;
2665 break;
2667 break;
2669 if (is_bool)
2671 r->exp = exp;
2672 r->in_p = in_p;
2673 r->low = low;
2674 r->high = high;
2675 r->strict_overflow_p = strict_overflow_p;
2679 /* Comparison function for qsort. Sort entries
2680 without SSA_NAME exp first, then with SSA_NAMEs sorted
2681 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2682 by increasing ->low and if ->low is the same, by increasing
2683 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2684 maximum. */
2686 static int
2687 range_entry_cmp (const void *a, const void *b)
2689 const struct range_entry *p = (const struct range_entry *) a;
2690 const struct range_entry *q = (const struct range_entry *) b;
2692 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2694 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2696 /* Group range_entries for the same SSA_NAME together. */
2697 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2698 return -1;
2699 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2700 return 1;
2701 /* If ->low is different, NULL low goes first, then by
2702 ascending low. */
2703 if (p->low != NULL_TREE)
2705 if (q->low != NULL_TREE)
2707 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2708 p->low, q->low);
2709 if (tem && integer_onep (tem))
2710 return -1;
2711 tem = fold_binary (GT_EXPR, boolean_type_node,
2712 p->low, q->low);
2713 if (tem && integer_onep (tem))
2714 return 1;
2716 else
2717 return 1;
2719 else if (q->low != NULL_TREE)
2720 return -1;
2721 /* If ->high is different, NULL high goes last, before that by
2722 ascending high. */
2723 if (p->high != NULL_TREE)
2725 if (q->high != NULL_TREE)
2727 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2728 p->high, q->high);
2729 if (tem && integer_onep (tem))
2730 return -1;
2731 tem = fold_binary (GT_EXPR, boolean_type_node,
2732 p->high, q->high);
2733 if (tem && integer_onep (tem))
2734 return 1;
2736 else
2737 return -1;
2739 else if (q->high != NULL_TREE)
2740 return 1;
2741 /* If both ranges are the same, sort below by ascending idx. */
2743 else
2744 return 1;
2746 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2747 return -1;
2749 if (p->idx < q->idx)
2750 return -1;
2751 else
2753 gcc_checking_assert (p->idx > q->idx);
2754 return 1;
2758 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2759 insert needed statements BEFORE or after GSI. */
2761 static tree
2762 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2764 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2765 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2766 if (TREE_CODE (ret) != SSA_NAME)
2768 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2769 if (before)
2770 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2771 else
2772 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2773 ret = gimple_assign_lhs (g);
2775 return ret;
2778 /* Helper routine of optimize_range_test.
2779 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2780 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2781 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2782 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2783 an array of COUNT pointers to other ranges. Return
2784 true if the range merge has been successful.
2785 If OPCODE is ERROR_MARK, this is called from within
2786 maybe_optimize_range_tests and is performing inter-bb range optimization.
2787 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2788 oe->rank. */
2790 static bool
2791 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2792 struct range_entry **otherrangep,
2793 unsigned int count, enum tree_code opcode,
2794 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2795 bool in_p, tree low, tree high, bool strict_overflow_p)
2797 unsigned int idx = range->idx;
2798 struct range_entry *swap_with = NULL;
2799 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2800 if (opcode == ERROR_MARK)
2802 /* For inter-bb range test optimization, pick from the range tests
2803 the one which is tested in the earliest condition (one dominating
2804 the others), because otherwise there could be some UB (e.g. signed
2805 overflow) in following bbs that we'd expose which wasn't there in
2806 the original program. See PR104196. */
2807 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2808 basic_block range_bb = orig_range_bb;
2809 for (unsigned int i = 0; i < count; i++)
2811 struct range_entry *this_range;
2812 if (otherrange)
2813 this_range = otherrange + i;
2814 else
2815 this_range = otherrangep[i];
2816 operand_entry *oe = (*ops)[this_range->idx];
2817 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2818 if (range_bb != this_bb
2819 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2821 swap_with = this_range;
2822 range_bb = this_bb;
2823 idx = this_range->idx;
2826 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2827 only defined in later blocks. In this case we can't move the
2828 merged comparison earlier, so instead check if there are any stmts
2829 that might trigger signed integer overflow in between and rewrite
2830 them. But only after we check if the optimization is possible. */
2831 if (seq && swap_with)
2833 rewrite_bb_first = range_bb;
2834 rewrite_bb_last = orig_range_bb;
2835 idx = range->idx;
2836 swap_with = NULL;
2839 operand_entry *oe = (*ops)[idx];
2840 tree op = oe->op;
2841 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2842 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2843 location_t loc = gimple_location (stmt);
2844 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2845 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2846 in_p, low, high);
2847 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2848 gimple_stmt_iterator gsi;
2849 unsigned int i, uid;
2851 if (tem == NULL_TREE)
2852 return false;
2854 /* If op is default def SSA_NAME, there is no place to insert the
2855 new comparison. Give up, unless we can use OP itself as the
2856 range test. */
2857 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2859 if (op == range->exp
2860 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2861 || TREE_CODE (optype) == BOOLEAN_TYPE)
2862 && (op == tem
2863 || (TREE_CODE (tem) == EQ_EXPR
2864 && TREE_OPERAND (tem, 0) == op
2865 && integer_onep (TREE_OPERAND (tem, 1))))
2866 && opcode != BIT_IOR_EXPR
2867 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2869 stmt = NULL;
2870 tem = op;
2872 else
2873 return false;
2876 if (swap_with)
2877 std::swap (range->idx, swap_with->idx);
2879 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2880 warning_at (loc, OPT_Wstrict_overflow,
2881 "assuming signed overflow does not occur "
2882 "when simplifying range test");
2884 if (dump_file && (dump_flags & TDF_DETAILS))
2886 struct range_entry *r;
2887 fprintf (dump_file, "Optimizing range tests ");
2888 dump_range_entry (dump_file, range, false);
2889 for (i = 0; i < count; i++)
2891 if (otherrange)
2892 r = otherrange + i;
2893 else
2894 r = otherrangep[i];
2895 if (r->exp
2896 && r->exp != range->exp
2897 && TREE_CODE (r->exp) == SSA_NAME)
2899 fprintf (dump_file, " and ");
2900 dump_range_entry (dump_file, r, false);
2902 else
2904 fprintf (dump_file, " and");
2905 dump_range_entry (dump_file, r, true);
2908 fprintf (dump_file, "\n into ");
2909 print_generic_expr (dump_file, tem);
2910 fprintf (dump_file, "\n");
2913 /* In inter-bb range optimization mode, if we have a seq, we can't
2914 move the merged comparison to the earliest bb from the comparisons
2915 being replaced, so instead rewrite stmts that could trigger signed
2916 integer overflow. */
2917 for (basic_block bb = rewrite_bb_last;
2918 bb != rewrite_bb_first; bb = single_pred (bb))
2919 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2920 !gsi_end_p (gsi); gsi_next (&gsi))
2922 gimple *stmt = gsi_stmt (gsi);
2923 if (is_gimple_assign (stmt))
2924 if (tree lhs = gimple_assign_lhs (stmt))
2925 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2926 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2927 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2929 enum tree_code code = gimple_assign_rhs_code (stmt);
2930 if (arith_code_with_undefined_signed_overflow (code))
2932 gimple_stmt_iterator gsip = gsi;
2933 gimple_stmt_iterator gsin = gsi;
2934 gsi_prev (&gsip);
2935 gsi_next (&gsin);
2936 rewrite_to_defined_overflow (stmt, true);
2937 unsigned uid = gimple_uid (stmt);
2938 if (gsi_end_p (gsip))
2939 gsip = gsi_after_labels (bb);
2940 else
2941 gsi_next (&gsip);
2942 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2943 gsi_next (&gsip))
2944 gimple_set_uid (gsi_stmt (gsip), uid);
2949 if (opcode == BIT_IOR_EXPR
2950 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2951 tem = invert_truthvalue_loc (loc, tem);
2953 tem = fold_convert_loc (loc, optype, tem);
2954 if (stmt)
2956 gsi = gsi_for_stmt (stmt);
2957 uid = gimple_uid (stmt);
2959 else
2961 gsi = gsi_none ();
2962 uid = 0;
2964 if (stmt == NULL)
2965 gcc_checking_assert (tem == op);
2966 /* In rare cases range->exp can be equal to lhs of stmt.
2967 In that case we have to insert after the stmt rather then before
2968 it. If stmt is a PHI, insert it at the start of the basic block. */
2969 else if (op != range->exp)
2971 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2972 tem = force_into_ssa_name (&gsi, tem, true);
2973 gsi_prev (&gsi);
2975 else if (gimple_code (stmt) != GIMPLE_PHI)
2977 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2978 tem = force_into_ssa_name (&gsi, tem, false);
2980 else
2982 gsi = gsi_after_labels (gimple_bb (stmt));
2983 if (!gsi_end_p (gsi))
2984 uid = gimple_uid (gsi_stmt (gsi));
2985 else
2987 gsi = gsi_start_bb (gimple_bb (stmt));
2988 uid = 1;
2989 while (!gsi_end_p (gsi))
2991 uid = gimple_uid (gsi_stmt (gsi));
2992 gsi_next (&gsi);
2995 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2996 tem = force_into_ssa_name (&gsi, tem, true);
2997 if (gsi_end_p (gsi))
2998 gsi = gsi_last_bb (gimple_bb (stmt));
2999 else
3000 gsi_prev (&gsi);
3002 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3003 if (gimple_uid (gsi_stmt (gsi)))
3004 break;
3005 else
3006 gimple_set_uid (gsi_stmt (gsi), uid);
3008 oe->op = tem;
3009 range->exp = exp;
3010 range->low = low;
3011 range->high = high;
3012 range->in_p = in_p;
3013 range->strict_overflow_p = false;
3015 for (i = 0; i < count; i++)
3017 if (otherrange)
3018 range = otherrange + i;
3019 else
3020 range = otherrangep[i];
3021 oe = (*ops)[range->idx];
3022 /* Now change all the other range test immediate uses, so that
3023 those tests will be optimized away. */
3024 if (opcode == ERROR_MARK)
3026 if (oe->op)
3027 oe->op = build_int_cst (TREE_TYPE (oe->op),
3028 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3029 else
3030 oe->op = (oe->rank == BIT_IOR_EXPR
3031 ? boolean_false_node : boolean_true_node);
3033 else
3034 oe->op = error_mark_node;
3035 range->exp = NULL_TREE;
3036 range->low = NULL_TREE;
3037 range->high = NULL_TREE;
3039 return true;
3042 /* Optimize X == CST1 || X == CST2
3043 if popcount (CST1 ^ CST2) == 1 into
3044 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3045 Similarly for ranges. E.g.
3046 X != 2 && X != 3 && X != 10 && X != 11
3047 will be transformed by the previous optimization into
3048 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3049 and this loop can transform that into
3050 !(((X & ~8) - 2U) <= 1U). */
3052 static bool
3053 optimize_range_tests_xor (enum tree_code opcode, tree type,
3054 tree lowi, tree lowj, tree highi, tree highj,
3055 vec<operand_entry *> *ops,
3056 struct range_entry *rangei,
3057 struct range_entry *rangej)
3059 tree lowxor, highxor, tem, exp;
3060 /* Check lowi ^ lowj == highi ^ highj and
3061 popcount (lowi ^ lowj) == 1. */
3062 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3063 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3064 return false;
3065 if (!integer_pow2p (lowxor))
3066 return false;
3067 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3068 if (!tree_int_cst_equal (lowxor, highxor))
3069 return false;
3071 exp = rangei->exp;
3072 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3073 int prec = GET_MODE_PRECISION (mode);
3074 if (TYPE_PRECISION (type) < prec
3075 || (wi::to_wide (TYPE_MIN_VALUE (type))
3076 != wi::min_value (prec, TYPE_SIGN (type)))
3077 || (wi::to_wide (TYPE_MAX_VALUE (type))
3078 != wi::max_value (prec, TYPE_SIGN (type))))
3080 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3081 exp = fold_convert (type, exp);
3082 lowxor = fold_convert (type, lowxor);
3083 lowi = fold_convert (type, lowi);
3084 highi = fold_convert (type, highi);
3086 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3087 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3088 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3089 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3090 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3091 NULL, rangei->in_p, lowj, highj,
3092 rangei->strict_overflow_p
3093 || rangej->strict_overflow_p))
3094 return true;
3095 return false;
3098 /* Optimize X == CST1 || X == CST2
3099 if popcount (CST2 - CST1) == 1 into
3100 ((X - CST1) & ~(CST2 - CST1)) == 0.
3101 Similarly for ranges. E.g.
3102 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3103 || X == 75 || X == 45
3104 will be transformed by the previous optimization into
3105 (X - 43U) <= 3U || (X - 75U) <= 3U
3106 and this loop can transform that into
3107 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3108 static bool
3109 optimize_range_tests_diff (enum tree_code opcode, tree type,
3110 tree lowi, tree lowj, tree highi, tree highj,
3111 vec<operand_entry *> *ops,
3112 struct range_entry *rangei,
3113 struct range_entry *rangej)
3115 tree tem1, tem2, mask;
3116 /* Check highi - lowi == highj - lowj. */
3117 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3118 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3119 return false;
3120 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3121 if (!tree_int_cst_equal (tem1, tem2))
3122 return false;
3123 /* Check popcount (lowj - lowi) == 1. */
3124 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3125 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3126 return false;
3127 if (!integer_pow2p (tem1))
3128 return false;
3130 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3131 int prec = GET_MODE_PRECISION (mode);
3132 if (TYPE_PRECISION (type) < prec
3133 || (wi::to_wide (TYPE_MIN_VALUE (type))
3134 != wi::min_value (prec, TYPE_SIGN (type)))
3135 || (wi::to_wide (TYPE_MAX_VALUE (type))
3136 != wi::max_value (prec, TYPE_SIGN (type))))
3137 type = build_nonstandard_integer_type (prec, 1);
3138 else
3139 type = unsigned_type_for (type);
3140 tem1 = fold_convert (type, tem1);
3141 tem2 = fold_convert (type, tem2);
3142 lowi = fold_convert (type, lowi);
3143 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3144 tem1 = fold_build2 (MINUS_EXPR, type,
3145 fold_convert (type, rangei->exp), lowi);
3146 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3147 lowj = build_int_cst (type, 0);
3148 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3149 NULL, rangei->in_p, lowj, tem2,
3150 rangei->strict_overflow_p
3151 || rangej->strict_overflow_p))
3152 return true;
3153 return false;
3156 /* It does some common checks for function optimize_range_tests_xor and
3157 optimize_range_tests_diff.
3158 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3159 Else it calls optimize_range_tests_diff. */
3161 static bool
3162 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3163 bool optimize_xor, vec<operand_entry *> *ops,
3164 struct range_entry *ranges)
3166 int i, j;
3167 bool any_changes = false;
3168 for (i = first; i < length; i++)
3170 tree lowi, highi, lowj, highj, type, tem;
3172 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3173 continue;
3174 type = TREE_TYPE (ranges[i].exp);
3175 if (!INTEGRAL_TYPE_P (type))
3176 continue;
3177 lowi = ranges[i].low;
3178 if (lowi == NULL_TREE)
3179 lowi = TYPE_MIN_VALUE (type);
3180 highi = ranges[i].high;
3181 if (highi == NULL_TREE)
3182 continue;
3183 for (j = i + 1; j < length && j < i + 64; j++)
3185 bool changes;
3186 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3187 continue;
3188 lowj = ranges[j].low;
3189 if (lowj == NULL_TREE)
3190 continue;
3191 highj = ranges[j].high;
3192 if (highj == NULL_TREE)
3193 highj = TYPE_MAX_VALUE (type);
3194 /* Check lowj > highi. */
3195 tem = fold_binary (GT_EXPR, boolean_type_node,
3196 lowj, highi);
3197 if (tem == NULL_TREE || !integer_onep (tem))
3198 continue;
3199 if (optimize_xor)
3200 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3201 highi, highj, ops,
3202 ranges + i, ranges + j);
3203 else
3204 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3205 highi, highj, ops,
3206 ranges + i, ranges + j);
3207 if (changes)
3209 any_changes = true;
3210 break;
3214 return any_changes;
3217 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3218 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3219 EXP on success, NULL otherwise. */
3221 static tree
3222 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3223 wide_int *mask, tree *totallowp)
3225 tree tem = int_const_binop (MINUS_EXPR, high, low);
3226 if (tem == NULL_TREE
3227 || TREE_CODE (tem) != INTEGER_CST
3228 || TREE_OVERFLOW (tem)
3229 || tree_int_cst_sgn (tem) == -1
3230 || compare_tree_int (tem, prec) != -1)
3231 return NULL_TREE;
3233 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3234 *mask = wi::shifted_mask (0, max, false, prec);
3235 if (TREE_CODE (exp) == BIT_AND_EXPR
3236 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3238 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3239 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3240 if (wi::popcount (msk) == 1
3241 && wi::ltu_p (msk, prec - max))
3243 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3244 max += msk.to_uhwi ();
3245 exp = TREE_OPERAND (exp, 0);
3246 if (integer_zerop (low)
3247 && TREE_CODE (exp) == PLUS_EXPR
3248 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3250 tree ret = TREE_OPERAND (exp, 0);
3251 STRIP_NOPS (ret);
3252 widest_int bias
3253 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3254 TYPE_PRECISION (TREE_TYPE (low))));
3255 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3256 if (totallowp)
3258 *totallowp = tbias;
3259 return ret;
3261 else if (!tree_int_cst_lt (totallow, tbias))
3262 return NULL_TREE;
3263 bias = wi::to_widest (tbias);
3264 bias -= wi::to_widest (totallow);
3265 if (bias >= 0 && bias < prec - max)
3267 *mask = wi::lshift (*mask, bias);
3268 return ret;
3273 if (totallowp)
3274 return exp;
3275 if (!tree_int_cst_lt (totallow, low))
3276 return exp;
3277 tem = int_const_binop (MINUS_EXPR, low, totallow);
3278 if (tem == NULL_TREE
3279 || TREE_CODE (tem) != INTEGER_CST
3280 || TREE_OVERFLOW (tem)
3281 || compare_tree_int (tem, prec - max) == 1)
3282 return NULL_TREE;
3284 *mask = wi::lshift (*mask, wi::to_widest (tem));
3285 return exp;
3288 /* Attempt to optimize small range tests using bit test.
3289 E.g.
3290 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3291 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3292 has been by earlier optimizations optimized into:
3293 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3294 As all the 43 through 82 range is less than 64 numbers,
3295 for 64-bit word targets optimize that into:
3296 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3298 static bool
3299 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3300 vec<operand_entry *> *ops,
3301 struct range_entry *ranges)
3303 int i, j;
3304 bool any_changes = false;
3305 int prec = GET_MODE_BITSIZE (word_mode);
3306 auto_vec<struct range_entry *, 64> candidates;
3308 for (i = first; i < length - 1; i++)
3310 tree lowi, highi, lowj, highj, type;
3312 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3313 continue;
3314 type = TREE_TYPE (ranges[i].exp);
3315 if (!INTEGRAL_TYPE_P (type))
3316 continue;
3317 lowi = ranges[i].low;
3318 if (lowi == NULL_TREE)
3319 lowi = TYPE_MIN_VALUE (type);
3320 highi = ranges[i].high;
3321 if (highi == NULL_TREE)
3322 continue;
3323 wide_int mask;
3324 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3325 highi, &mask, &lowi);
3326 if (exp == NULL_TREE)
3327 continue;
3328 bool strict_overflow_p = ranges[i].strict_overflow_p;
3329 candidates.truncate (0);
3330 int end = MIN (i + 64, length);
3331 for (j = i + 1; j < end; j++)
3333 tree exp2;
3334 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3335 continue;
3336 if (ranges[j].exp == exp)
3338 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3340 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3341 if (exp2 == exp)
3343 else if (TREE_CODE (exp2) == PLUS_EXPR)
3345 exp2 = TREE_OPERAND (exp2, 0);
3346 STRIP_NOPS (exp2);
3347 if (exp2 != exp)
3348 continue;
3350 else
3351 continue;
3353 else
3354 continue;
3355 lowj = ranges[j].low;
3356 if (lowj == NULL_TREE)
3357 continue;
3358 highj = ranges[j].high;
3359 if (highj == NULL_TREE)
3360 highj = TYPE_MAX_VALUE (type);
3361 wide_int mask2;
3362 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3363 highj, &mask2, NULL);
3364 if (exp2 != exp)
3365 continue;
3366 mask |= mask2;
3367 strict_overflow_p |= ranges[j].strict_overflow_p;
3368 candidates.safe_push (&ranges[j]);
3371 /* If every possible relative value of the expression is a valid shift
3372 amount, then we can merge the entry test in the bit test. In this
3373 case, if we would need otherwise 2 or more comparisons, then use
3374 the bit test; in the other cases, the threshold is 3 comparisons. */
3375 bool entry_test_needed;
3376 value_range r;
3377 if (TREE_CODE (exp) == SSA_NAME
3378 && get_range_query (cfun)->range_of_expr (r, exp)
3379 && !r.undefined_p ()
3380 && !r.varying_p ()
3381 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3383 wide_int min = r.lower_bound ();
3384 wide_int ilowi = wi::to_wide (lowi);
3385 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3387 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3388 mask = wi::lshift (mask, ilowi - min);
3390 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3392 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3393 mask = wi::lrshift (mask, min - ilowi);
3395 entry_test_needed = false;
3397 else
3398 entry_test_needed = true;
3399 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3401 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3402 wi::to_widest (lowi)
3403 + prec - 1 - wi::clz (mask));
3404 operand_entry *oe = (*ops)[ranges[i].idx];
3405 tree op = oe->op;
3406 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3407 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3408 (cfun, oe->id));
3409 location_t loc = gimple_location (stmt);
3410 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3412 /* See if it isn't cheaper to pretend the minimum value of the
3413 range is 0, if maximum value is small enough.
3414 We can avoid then subtraction of the minimum value, but the
3415 mask constant could be perhaps more expensive. */
3416 if (compare_tree_int (lowi, 0) > 0
3417 && compare_tree_int (high, prec) < 0)
3419 int cost_diff;
3420 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3421 rtx reg = gen_raw_REG (word_mode, 10000);
3422 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3423 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3424 GEN_INT (-m)),
3425 word_mode, speed_p);
3426 rtx r = immed_wide_int_const (mask, word_mode);
3427 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3428 word_mode, speed_p);
3429 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3430 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3431 word_mode, speed_p);
3432 if (cost_diff > 0)
3434 mask = wi::lshift (mask, m);
3435 lowi = build_zero_cst (TREE_TYPE (lowi));
3439 tree tem;
3440 if (entry_test_needed)
3442 tem = build_range_check (loc, optype, unshare_expr (exp),
3443 false, lowi, high);
3444 if (tem == NULL_TREE || is_gimple_val (tem))
3445 continue;
3447 else
3448 tem = NULL_TREE;
3449 tree etype = unsigned_type_for (TREE_TYPE (exp));
3450 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3451 fold_convert_loc (loc, etype, exp),
3452 fold_convert_loc (loc, etype, lowi));
3453 exp = fold_convert_loc (loc, integer_type_node, exp);
3454 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3455 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3456 build_int_cst (word_type, 1), exp);
3457 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3458 wide_int_to_tree (word_type, mask));
3459 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3460 build_zero_cst (word_type));
3461 if (is_gimple_val (exp))
3462 continue;
3464 /* The shift might have undefined behavior if TEM is true,
3465 but reassociate_bb isn't prepared to have basic blocks
3466 split when it is running. So, temporarily emit a code
3467 with BIT_IOR_EXPR instead of &&, and fix it up in
3468 branch_fixup. */
3469 gimple_seq seq = NULL;
3470 if (tem)
3472 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3473 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3474 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3476 gimple_seq seq2;
3477 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3478 gimple_seq_add_seq_without_update (&seq, seq2);
3479 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3480 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3481 if (tem)
3483 gimple *g = gimple_build_assign (make_ssa_name (optype),
3484 BIT_IOR_EXPR, tem, exp);
3485 gimple_set_location (g, loc);
3486 gimple_seq_add_stmt_without_update (&seq, g);
3487 exp = gimple_assign_lhs (g);
3489 tree val = build_zero_cst (optype);
3490 if (update_range_test (&ranges[i], NULL, candidates.address (),
3491 candidates.length (), opcode, ops, exp,
3492 seq, false, val, val, strict_overflow_p))
3494 any_changes = true;
3495 if (tem)
3496 reassoc_branch_fixups.safe_push (tem);
3498 else
3499 gimple_seq_discard (seq);
3502 return any_changes;
3505 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3506 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3507 Also, handle x < C && y < C && z < C where C is power of two as
3508 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3509 as (x | y | z) < 0. */
3511 static bool
3512 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3513 vec<operand_entry *> *ops,
3514 struct range_entry *ranges)
3516 int i;
3517 unsigned int b;
3518 bool any_changes = false;
3519 auto_vec<int, 128> buckets;
3520 auto_vec<int, 32> chains;
3521 auto_vec<struct range_entry *, 32> candidates;
3523 for (i = first; i < length; i++)
3525 int idx;
3527 if (ranges[i].exp == NULL_TREE
3528 || TREE_CODE (ranges[i].exp) != SSA_NAME
3529 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3530 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3531 continue;
3533 if (ranges[i].low != NULL_TREE
3534 && ranges[i].high != NULL_TREE
3535 && ranges[i].in_p
3536 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3538 idx = !integer_zerop (ranges[i].low);
3539 if (idx && !integer_all_onesp (ranges[i].low))
3540 continue;
3542 else if (ranges[i].high != NULL_TREE
3543 && TREE_CODE (ranges[i].high) == INTEGER_CST
3544 && ranges[i].in_p)
3546 wide_int w = wi::to_wide (ranges[i].high);
3547 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3548 int l = wi::clz (w);
3549 idx = 2;
3550 if (l <= 0
3551 || l >= prec
3552 || w != wi::mask (prec - l, false, prec))
3553 continue;
3554 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3555 && ranges[i].low == NULL_TREE)
3556 || (ranges[i].low
3557 && integer_zerop (ranges[i].low))))
3558 continue;
3560 else if (ranges[i].high == NULL_TREE
3561 && ranges[i].low != NULL_TREE
3562 /* Perform this optimization only in the last
3563 reassoc pass, as it interferes with the reassociation
3564 itself or could also with VRP etc. which might not
3565 be able to virtually undo the optimization. */
3566 && !reassoc_insert_powi_p
3567 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3568 && integer_zerop (ranges[i].low))
3569 idx = 3;
3570 else
3571 continue;
3573 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3574 if (buckets.length () <= b)
3575 buckets.safe_grow_cleared (b + 1, true);
3576 if (chains.length () <= (unsigned) i)
3577 chains.safe_grow (i + 1, true);
3578 chains[i] = buckets[b];
3579 buckets[b] = i + 1;
3582 FOR_EACH_VEC_ELT (buckets, b, i)
3583 if (i && chains[i - 1])
3585 int j, k = i;
3586 if ((b % 4) == 2)
3588 /* When ranges[X - 1].high + 1 is a power of two,
3589 we need to process the same bucket up to
3590 precision - 1 times, each time split the entries
3591 with the same high bound into one chain and the
3592 rest into another one to be processed later. */
3593 int this_prev = i;
3594 int other_prev = 0;
3595 for (j = chains[i - 1]; j; j = chains[j - 1])
3597 if (tree_int_cst_equal (ranges[i - 1].high,
3598 ranges[j - 1].high))
3600 chains[this_prev - 1] = j;
3601 this_prev = j;
3603 else if (other_prev == 0)
3605 buckets[b] = j;
3606 other_prev = j;
3608 else
3610 chains[other_prev - 1] = j;
3611 other_prev = j;
3614 chains[this_prev - 1] = 0;
3615 if (other_prev)
3616 chains[other_prev - 1] = 0;
3617 if (chains[i - 1] == 0)
3619 if (other_prev)
3620 b--;
3621 continue;
3624 for (j = chains[i - 1]; j; j = chains[j - 1])
3626 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3627 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3628 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3629 k = j;
3631 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3632 tree type2 = NULL_TREE;
3633 bool strict_overflow_p = false;
3634 candidates.truncate (0);
3635 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3636 type1 = pointer_sized_int_node;
3637 for (j = i; j; j = chains[j - 1])
3639 tree type = TREE_TYPE (ranges[j - 1].exp);
3640 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3641 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3642 type = pointer_sized_int_node;
3643 if ((b % 4) == 3)
3645 /* For the signed < 0 cases, the types should be
3646 really compatible (all signed with the same precision,
3647 instead put ranges that have different in_p from
3648 k first. */
3649 if (!useless_type_conversion_p (type1, type))
3650 continue;
3651 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3652 candidates.safe_push (&ranges[j - 1]);
3653 type2 = type1;
3654 continue;
3656 if (j == k
3657 || useless_type_conversion_p (type1, type))
3659 else if (type2 == NULL_TREE
3660 || useless_type_conversion_p (type2, type))
3662 if (type2 == NULL_TREE)
3663 type2 = type;
3664 candidates.safe_push (&ranges[j - 1]);
3667 unsigned l = candidates.length ();
3668 for (j = i; j; j = chains[j - 1])
3670 tree type = TREE_TYPE (ranges[j - 1].exp);
3671 if (j == k)
3672 continue;
3673 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3674 type = pointer_sized_int_node;
3675 if ((b % 4) == 3)
3677 if (!useless_type_conversion_p (type1, type))
3678 continue;
3679 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3680 candidates.safe_push (&ranges[j - 1]);
3681 continue;
3683 if (useless_type_conversion_p (type1, type))
3685 else if (type2 == NULL_TREE
3686 || useless_type_conversion_p (type2, type))
3687 continue;
3688 candidates.safe_push (&ranges[j - 1]);
3690 gimple_seq seq = NULL;
3691 tree op = NULL_TREE;
3692 unsigned int id;
3693 struct range_entry *r;
3694 candidates.safe_push (&ranges[k - 1]);
3695 FOR_EACH_VEC_ELT (candidates, id, r)
3697 gimple *g;
3698 enum tree_code code;
3699 if (id == 0)
3701 op = r->exp;
3702 continue;
3704 if (id == l
3705 || POINTER_TYPE_P (TREE_TYPE (op))
3706 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3708 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3709 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3710 if (code == BIT_NOT_EXPR
3711 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3713 g = gimple_build_assign (make_ssa_name (type3),
3714 NOP_EXPR, op);
3715 gimple_seq_add_stmt_without_update (&seq, g);
3716 op = gimple_assign_lhs (g);
3718 g = gimple_build_assign (make_ssa_name (type3), code, op);
3719 gimple_seq_add_stmt_without_update (&seq, g);
3720 op = gimple_assign_lhs (g);
3722 tree type = TREE_TYPE (r->exp);
3723 tree exp = r->exp;
3724 if (POINTER_TYPE_P (type)
3725 || TREE_CODE (type) == OFFSET_TYPE
3726 || (id >= l && !useless_type_conversion_p (type1, type)))
3728 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3729 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3730 gimple_seq_add_stmt_without_update (&seq, g);
3731 exp = gimple_assign_lhs (g);
3733 if ((b % 4) == 3)
3734 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3735 else
3736 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3737 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3738 code, op, exp);
3739 gimple_seq_add_stmt_without_update (&seq, g);
3740 op = gimple_assign_lhs (g);
3742 type1 = TREE_TYPE (ranges[k - 1].exp);
3743 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3745 gimple *g
3746 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3747 gimple_seq_add_stmt_without_update (&seq, g);
3748 op = gimple_assign_lhs (g);
3750 candidates.pop ();
3751 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3752 candidates.length (), opcode, ops, op,
3753 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3754 ranges[k - 1].high, strict_overflow_p))
3755 any_changes = true;
3756 else
3757 gimple_seq_discard (seq);
3758 if ((b % 4) == 2 && buckets[b] != i)
3759 /* There is more work to do for this bucket. */
3760 b--;
3763 return any_changes;
3766 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3767 a >= 0 && a < b into (unsigned) a < (unsigned) b
3768 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3770 static bool
3771 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3772 vec<operand_entry *> *ops,
3773 struct range_entry *ranges,
3774 basic_block first_bb)
3776 int i;
3777 bool any_changes = false;
3778 hash_map<tree, int> *map = NULL;
3780 for (i = first; i < length; i++)
3782 if (ranges[i].exp == NULL_TREE
3783 || TREE_CODE (ranges[i].exp) != SSA_NAME
3784 || !ranges[i].in_p)
3785 continue;
3787 tree type = TREE_TYPE (ranges[i].exp);
3788 if (!INTEGRAL_TYPE_P (type)
3789 || TYPE_UNSIGNED (type)
3790 || ranges[i].low == NULL_TREE
3791 || !integer_zerop (ranges[i].low)
3792 || ranges[i].high != NULL_TREE)
3793 continue;
3794 /* EXP >= 0 here. */
3795 if (map == NULL)
3796 map = new hash_map <tree, int>;
3797 map->put (ranges[i].exp, i);
3800 if (map == NULL)
3801 return false;
3803 for (i = 0; i < length; i++)
3805 bool in_p = ranges[i].in_p;
3806 if (ranges[i].low == NULL_TREE
3807 || ranges[i].high == NULL_TREE)
3808 continue;
3809 if (!integer_zerop (ranges[i].low)
3810 || !integer_zerop (ranges[i].high))
3812 if (ranges[i].exp
3813 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3814 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3815 && integer_onep (ranges[i].low)
3816 && integer_onep (ranges[i].high))
3817 in_p = !in_p;
3818 else
3819 continue;
3822 gimple *stmt;
3823 tree_code ccode;
3824 tree rhs1, rhs2;
3825 if (ranges[i].exp)
3827 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3828 continue;
3829 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3830 if (!is_gimple_assign (stmt))
3831 continue;
3832 ccode = gimple_assign_rhs_code (stmt);
3833 rhs1 = gimple_assign_rhs1 (stmt);
3834 rhs2 = gimple_assign_rhs2 (stmt);
3836 else
3838 operand_entry *oe = (*ops)[ranges[i].idx];
3839 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3840 if (gimple_code (stmt) != GIMPLE_COND)
3841 continue;
3842 ccode = gimple_cond_code (stmt);
3843 rhs1 = gimple_cond_lhs (stmt);
3844 rhs2 = gimple_cond_rhs (stmt);
3847 if (TREE_CODE (rhs1) != SSA_NAME
3848 || rhs2 == NULL_TREE
3849 || TREE_CODE (rhs2) != SSA_NAME)
3850 continue;
3852 switch (ccode)
3854 case GT_EXPR:
3855 case GE_EXPR:
3856 case LT_EXPR:
3857 case LE_EXPR:
3858 break;
3859 default:
3860 continue;
3862 if (in_p)
3863 ccode = invert_tree_comparison (ccode, false);
3864 switch (ccode)
3866 case GT_EXPR:
3867 case GE_EXPR:
3868 std::swap (rhs1, rhs2);
3869 ccode = swap_tree_comparison (ccode);
3870 break;
3871 case LT_EXPR:
3872 case LE_EXPR:
3873 break;
3874 default:
3875 gcc_unreachable ();
3878 int *idx = map->get (rhs1);
3879 if (idx == NULL)
3880 continue;
3882 /* maybe_optimize_range_tests allows statements without side-effects
3883 in the basic blocks as long as they are consumed in the same bb.
3884 Make sure rhs2's def stmt is not among them, otherwise we can't
3885 use safely get_nonzero_bits on it. E.g. in:
3886 # RANGE [-83, 1] NONZERO 173
3887 # k_32 = PHI <k_47(13), k_12(9)>
3889 if (k_32 >= 0)
3890 goto <bb 5>; [26.46%]
3891 else
3892 goto <bb 9>; [73.54%]
3894 <bb 5> [local count: 140323371]:
3895 # RANGE [0, 1] NONZERO 1
3896 _5 = (int) k_32;
3897 # RANGE [0, 4] NONZERO 4
3898 _21 = _5 << 2;
3899 # RANGE [0, 4] NONZERO 4
3900 iftmp.0_44 = (char) _21;
3901 if (k_32 < iftmp.0_44)
3902 goto <bb 6>; [84.48%]
3903 else
3904 goto <bb 9>; [15.52%]
3905 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3906 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3907 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3908 those stmts even for negative k_32 and the value ranges would be no
3909 longer guaranteed and so the optimization would be invalid. */
3910 while (opcode == ERROR_MARK)
3912 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3913 basic_block bb2 = gimple_bb (g);
3914 if (bb2
3915 && bb2 != first_bb
3916 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3918 /* As an exception, handle a few common cases. */
3919 if (gimple_assign_cast_p (g)
3920 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3922 tree op0 = gimple_assign_rhs1 (g);
3923 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3924 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3925 > TYPE_PRECISION (TREE_TYPE (op0))))
3926 /* Zero-extension is always ok. */
3927 break;
3928 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3929 == TYPE_PRECISION (TREE_TYPE (op0))
3930 && TREE_CODE (op0) == SSA_NAME)
3932 /* Cast from signed to unsigned or vice versa. Retry
3933 with the op0 as new rhs2. */
3934 rhs2 = op0;
3935 continue;
3938 else if (is_gimple_assign (g)
3939 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3940 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3941 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3942 /* Masking with INTEGER_CST with MSB clear is always ok
3943 too. */
3944 break;
3945 rhs2 = NULL_TREE;
3947 break;
3949 if (rhs2 == NULL_TREE)
3950 continue;
3952 wide_int nz = get_nonzero_bits (rhs2);
3953 if (wi::neg_p (nz))
3954 continue;
3956 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3957 and RHS2 is known to be RHS2 >= 0. */
3958 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3960 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3961 if ((ranges[*idx].strict_overflow_p
3962 || ranges[i].strict_overflow_p)
3963 && issue_strict_overflow_warning (wc))
3964 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3965 "assuming signed overflow does not occur "
3966 "when simplifying range test");
3968 if (dump_file && (dump_flags & TDF_DETAILS))
3970 struct range_entry *r = &ranges[*idx];
3971 fprintf (dump_file, "Optimizing range test ");
3972 print_generic_expr (dump_file, r->exp);
3973 fprintf (dump_file, " +[");
3974 print_generic_expr (dump_file, r->low);
3975 fprintf (dump_file, ", ");
3976 print_generic_expr (dump_file, r->high);
3977 fprintf (dump_file, "] and comparison ");
3978 print_generic_expr (dump_file, rhs1);
3979 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3980 print_generic_expr (dump_file, rhs2);
3981 fprintf (dump_file, "\n into (");
3982 print_generic_expr (dump_file, utype);
3983 fprintf (dump_file, ") ");
3984 print_generic_expr (dump_file, rhs1);
3985 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3986 print_generic_expr (dump_file, utype);
3987 fprintf (dump_file, ") ");
3988 print_generic_expr (dump_file, rhs2);
3989 fprintf (dump_file, "\n");
3992 operand_entry *oe = (*ops)[ranges[i].idx];
3993 ranges[i].in_p = 0;
3994 if (opcode == BIT_IOR_EXPR
3995 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3997 ranges[i].in_p = 1;
3998 ccode = invert_tree_comparison (ccode, false);
4001 unsigned int uid = gimple_uid (stmt);
4002 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4003 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4004 gimple_set_uid (g, uid);
4005 rhs1 = gimple_assign_lhs (g);
4006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4007 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4009 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4010 gimple_set_uid (g, uid);
4011 rhs2 = gimple_assign_lhs (g);
4012 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4014 if (tree_swap_operands_p (rhs1, rhs2))
4016 std::swap (rhs1, rhs2);
4017 ccode = swap_tree_comparison (ccode);
4019 if (gimple_code (stmt) == GIMPLE_COND)
4021 gcond *c = as_a <gcond *> (stmt);
4022 gimple_cond_set_code (c, ccode);
4023 gimple_cond_set_lhs (c, rhs1);
4024 gimple_cond_set_rhs (c, rhs2);
4025 update_stmt (stmt);
4027 else
4029 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4030 if (!INTEGRAL_TYPE_P (ctype)
4031 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4032 && TYPE_PRECISION (ctype) != 1))
4033 ctype = boolean_type_node;
4034 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4035 gimple_set_uid (g, uid);
4036 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4037 if (oe->op && ctype != TREE_TYPE (oe->op))
4039 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4040 NOP_EXPR, gimple_assign_lhs (g));
4041 gimple_set_uid (g, uid);
4042 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4044 ranges[i].exp = gimple_assign_lhs (g);
4045 oe->op = ranges[i].exp;
4046 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4047 ranges[i].high = ranges[i].low;
4049 ranges[i].strict_overflow_p = false;
4050 oe = (*ops)[ranges[*idx].idx];
4051 /* Now change all the other range test immediate uses, so that
4052 those tests will be optimized away. */
4053 if (opcode == ERROR_MARK)
4055 if (oe->op)
4056 oe->op = build_int_cst (TREE_TYPE (oe->op),
4057 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4058 else
4059 oe->op = (oe->rank == BIT_IOR_EXPR
4060 ? boolean_false_node : boolean_true_node);
4062 else
4063 oe->op = error_mark_node;
4064 ranges[*idx].exp = NULL_TREE;
4065 ranges[*idx].low = NULL_TREE;
4066 ranges[*idx].high = NULL_TREE;
4067 any_changes = true;
4070 delete map;
4071 return any_changes;
4074 /* Optimize range tests, similarly how fold_range_test optimizes
4075 it on trees. The tree code for the binary
4076 operation between all the operands is OPCODE.
4077 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4078 maybe_optimize_range_tests for inter-bb range optimization.
4079 In that case if oe->op is NULL, oe->id is bb->index whose
4080 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4081 the actual opcode.
4082 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4084 static bool
4085 optimize_range_tests (enum tree_code opcode,
4086 vec<operand_entry *> *ops, basic_block first_bb)
4088 unsigned int length = ops->length (), i, j, first;
4089 operand_entry *oe;
4090 struct range_entry *ranges;
4091 bool any_changes = false;
4093 if (length == 1)
4094 return false;
4096 ranges = XNEWVEC (struct range_entry, length);
4097 for (i = 0; i < length; i++)
4099 oe = (*ops)[i];
4100 ranges[i].idx = i;
4101 init_range_entry (ranges + i, oe->op,
4102 oe->op
4103 ? NULL
4104 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4105 /* For | invert it now, we will invert it again before emitting
4106 the optimized expression. */
4107 if (opcode == BIT_IOR_EXPR
4108 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4109 ranges[i].in_p = !ranges[i].in_p;
4112 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4113 for (i = 0; i < length; i++)
4114 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4115 break;
4117 /* Try to merge ranges. */
4118 for (first = i; i < length; i++)
4120 tree low = ranges[i].low;
4121 tree high = ranges[i].high;
4122 int in_p = ranges[i].in_p;
4123 bool strict_overflow_p = ranges[i].strict_overflow_p;
4124 int update_fail_count = 0;
4126 for (j = i + 1; j < length; j++)
4128 if (ranges[i].exp != ranges[j].exp)
4129 break;
4130 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4131 ranges[j].in_p, ranges[j].low, ranges[j].high))
4132 break;
4133 strict_overflow_p |= ranges[j].strict_overflow_p;
4136 if (j == i + 1)
4137 continue;
4139 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4140 opcode, ops, ranges[i].exp, NULL, in_p,
4141 low, high, strict_overflow_p))
4143 i = j - 1;
4144 any_changes = true;
4146 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4147 while update_range_test would fail. */
4148 else if (update_fail_count == 64)
4149 i = j - 1;
4150 else
4151 ++update_fail_count;
4154 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4155 ops, ranges);
4157 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4158 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4159 ops, ranges);
4160 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4161 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4162 ops, ranges);
4163 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4164 ranges, first_bb);
4165 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4166 ops, ranges);
4168 if (any_changes && opcode != ERROR_MARK)
4170 j = 0;
4171 FOR_EACH_VEC_ELT (*ops, i, oe)
4173 if (oe->op == error_mark_node)
4174 continue;
4175 else if (i != j)
4176 (*ops)[j] = oe;
4177 j++;
4179 ops->truncate (j);
4182 XDELETEVEC (ranges);
4183 return any_changes;
4186 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4187 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4188 otherwise the comparison code. TYPE is a return value that is set
4189 to type of comparison. */
4191 static tree_code
4192 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4193 tree *lhs, tree *rhs, gassign **vcond)
4195 if (TREE_CODE (var) != SSA_NAME)
4196 return ERROR_MARK;
4198 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4199 if (stmt == NULL)
4200 return ERROR_MARK;
4201 if (vcond)
4202 *vcond = stmt;
4204 /* ??? If we start creating more COND_EXPR, we could perform
4205 this same optimization with them. For now, simplify. */
4206 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4207 return ERROR_MARK;
4209 tree cond = gimple_assign_rhs1 (stmt);
4210 tree_code cmp = TREE_CODE (cond);
4211 if (cmp != SSA_NAME)
4212 return ERROR_MARK;
4214 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4215 if (assign == NULL
4216 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4217 return ERROR_MARK;
4219 cmp = gimple_assign_rhs_code (assign);
4220 if (lhs)
4221 *lhs = gimple_assign_rhs1 (assign);
4222 if (rhs)
4223 *rhs = gimple_assign_rhs2 (assign);
4225 /* ??? For now, allow only canonical true and false result vectors.
4226 We could expand this to other constants should the need arise,
4227 but at the moment we don't create them. */
4228 tree t = gimple_assign_rhs2 (stmt);
4229 tree f = gimple_assign_rhs3 (stmt);
4230 bool inv;
4231 if (integer_all_onesp (t))
4232 inv = false;
4233 else if (integer_all_onesp (f))
4235 cmp = invert_tree_comparison (cmp, false);
4236 inv = true;
4238 else
4239 return ERROR_MARK;
4240 if (!integer_zerop (f))
4241 return ERROR_MARK;
4243 /* Success! */
4244 if (rets)
4245 *rets = assign;
4246 if (reti)
4247 *reti = inv;
4248 if (type)
4249 *type = TREE_TYPE (cond);
4250 return cmp;
4253 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4254 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4256 static bool
4257 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4259 unsigned int length = ops->length (), i, j;
4260 bool any_changes = false;
4262 if (length == 1)
4263 return false;
4265 for (i = 0; i < length; ++i)
4267 tree elt0 = (*ops)[i]->op;
4269 gassign *stmt0, *vcond0;
4270 bool invert;
4271 tree type, lhs0, rhs0;
4272 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4273 &rhs0, &vcond0);
4274 if (cmp0 == ERROR_MARK)
4275 continue;
4277 for (j = i + 1; j < length; ++j)
4279 tree &elt1 = (*ops)[j]->op;
4281 gassign *stmt1, *vcond1;
4282 tree lhs1, rhs1;
4283 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4284 &rhs1, &vcond1);
4285 if (cmp1 == ERROR_MARK)
4286 continue;
4288 tree comb;
4289 if (opcode == BIT_AND_EXPR)
4290 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4291 cmp1, lhs1, rhs1);
4292 else if (opcode == BIT_IOR_EXPR)
4293 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4294 cmp1, lhs1, rhs1);
4295 else
4296 gcc_unreachable ();
4297 if (comb == NULL)
4298 continue;
4300 /* Success! */
4301 if (dump_file && (dump_flags & TDF_DETAILS))
4303 fprintf (dump_file, "Transforming ");
4304 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4305 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4306 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4307 fprintf (dump_file, " into ");
4308 print_generic_expr (dump_file, comb);
4309 fputc ('\n', dump_file);
4312 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4313 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4314 true, GSI_SAME_STMT);
4315 if (invert)
4316 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4317 gimple_assign_rhs3_ptr (vcond0));
4318 gimple_assign_set_rhs1 (vcond0, exp);
4319 update_stmt (vcond0);
4321 elt1 = error_mark_node;
4322 any_changes = true;
4326 if (any_changes)
4328 operand_entry *oe;
4329 j = 0;
4330 FOR_EACH_VEC_ELT (*ops, i, oe)
4332 if (oe->op == error_mark_node)
4333 continue;
4334 else if (i != j)
4335 (*ops)[j] = oe;
4336 j++;
4338 ops->truncate (j);
4341 return any_changes;
4344 /* Return true if STMT is a cast like:
4345 <bb N>:
4347 _123 = (int) _234;
4349 <bb M>:
4350 # _345 = PHI <_123(N), 1(...), 1(...)>
4351 where _234 has bool type, _123 has single use and
4352 bb N has a single successor M. This is commonly used in
4353 the last block of a range test.
4355 Also Return true if STMT is tcc_compare like:
4356 <bb N>:
4358 _234 = a_2(D) == 2;
4360 <bb M>:
4361 # _345 = PHI <_234(N), 1(...), 1(...)>
4362 _346 = (int) _345;
4363 where _234 has booltype, single use and
4364 bb N has a single successor M. This is commonly used in
4365 the last block of a range test. */
4367 static bool
4368 final_range_test_p (gimple *stmt)
4370 basic_block bb, rhs_bb, lhs_bb;
4371 edge e;
4372 tree lhs, rhs;
4373 use_operand_p use_p;
4374 gimple *use_stmt;
4376 if (!gimple_assign_cast_p (stmt)
4377 && (!is_gimple_assign (stmt)
4378 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4379 != tcc_comparison)))
4380 return false;
4381 bb = gimple_bb (stmt);
4382 if (!single_succ_p (bb))
4383 return false;
4384 e = single_succ_edge (bb);
4385 if (e->flags & EDGE_COMPLEX)
4386 return false;
4388 lhs = gimple_assign_lhs (stmt);
4389 rhs = gimple_assign_rhs1 (stmt);
4390 if (gimple_assign_cast_p (stmt)
4391 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4392 || TREE_CODE (rhs) != SSA_NAME
4393 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4394 return false;
4396 if (!gimple_assign_cast_p (stmt)
4397 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4398 return false;
4400 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4401 if (!single_imm_use (lhs, &use_p, &use_stmt))
4402 return false;
4404 if (gimple_code (use_stmt) != GIMPLE_PHI
4405 || gimple_bb (use_stmt) != e->dest)
4406 return false;
4408 /* And that the rhs is defined in the same loop. */
4409 if (gimple_assign_cast_p (stmt))
4411 if (TREE_CODE (rhs) != SSA_NAME
4412 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4413 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4414 return false;
4416 else
4418 if (TREE_CODE (lhs) != SSA_NAME
4419 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4420 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4421 return false;
4424 return true;
4427 /* Return true if BB is suitable basic block for inter-bb range test
4428 optimization. If BACKWARD is true, BB should be the only predecessor
4429 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4430 or compared with to find a common basic block to which all conditions
4431 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4432 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4433 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4434 block points to an empty block that falls through into *OTHER_BB and
4435 the phi args match that path. */
4437 static bool
4438 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4439 bool *test_swapped_p, bool backward)
4441 edge_iterator ei, ei2;
4442 edge e, e2;
4443 gimple *stmt;
4444 gphi_iterator gsi;
4445 bool other_edge_seen = false;
4446 bool is_cond;
4448 if (test_bb == bb)
4449 return false;
4450 /* Check last stmt first. */
4451 stmt = last_nondebug_stmt (bb);
4452 if (stmt == NULL
4453 || (gimple_code (stmt) != GIMPLE_COND
4454 && (backward || !final_range_test_p (stmt)))
4455 || gimple_visited_p (stmt)
4456 || stmt_could_throw_p (cfun, stmt)
4457 || *other_bb == bb)
4458 return false;
4459 is_cond = gimple_code (stmt) == GIMPLE_COND;
4460 if (is_cond)
4462 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4463 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4464 to *OTHER_BB (if not set yet, try to find it out). */
4465 if (EDGE_COUNT (bb->succs) != 2)
4466 return false;
4467 FOR_EACH_EDGE (e, ei, bb->succs)
4469 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4470 return false;
4471 if (e->dest == test_bb)
4473 if (backward)
4474 continue;
4475 else
4476 return false;
4478 if (e->dest == bb)
4479 return false;
4480 if (*other_bb == NULL)
4482 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4483 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4484 return false;
4485 else if (e->dest == e2->dest)
4486 *other_bb = e->dest;
4487 if (*other_bb == NULL)
4488 return false;
4490 if (e->dest == *other_bb)
4491 other_edge_seen = true;
4492 else if (backward)
4493 return false;
4495 if (*other_bb == NULL || !other_edge_seen)
4496 return false;
4498 else if (single_succ (bb) != *other_bb)
4499 return false;
4501 /* Now check all PHIs of *OTHER_BB. */
4502 e = find_edge (bb, *other_bb);
4503 e2 = find_edge (test_bb, *other_bb);
4504 retry:;
4505 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4507 gphi *phi = gsi.phi ();
4508 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4509 corresponding to BB and TEST_BB predecessor must be the same. */
4510 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4511 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4513 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4514 one of the PHIs should have the lhs of the last stmt in
4515 that block as PHI arg and that PHI should have 0 or 1
4516 corresponding to it in all other range test basic blocks
4517 considered. */
4518 if (!is_cond)
4520 if (gimple_phi_arg_def (phi, e->dest_idx)
4521 == gimple_assign_lhs (stmt)
4522 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4523 || integer_onep (gimple_phi_arg_def (phi,
4524 e2->dest_idx))))
4525 continue;
4527 else
4529 gimple *test_last = last_nondebug_stmt (test_bb);
4530 if (gimple_code (test_last) == GIMPLE_COND)
4532 if (backward ? e2->src != test_bb : e->src != bb)
4533 return false;
4535 /* For last_bb, handle also:
4536 if (x_3(D) == 3)
4537 goto <bb 6>; [34.00%]
4538 else
4539 goto <bb 7>; [66.00%]
4541 <bb 6> [local count: 79512730]:
4543 <bb 7> [local count: 1073741824]:
4544 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4545 where bb 7 is *OTHER_BB, but the PHI values from the
4546 earlier bbs match the path through the empty bb
4547 in between. */
4548 edge e3;
4549 if (backward)
4550 e3 = EDGE_SUCC (test_bb,
4551 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4552 else
4553 e3 = EDGE_SUCC (bb,
4554 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4555 if (empty_block_p (e3->dest)
4556 && single_succ_p (e3->dest)
4557 && single_succ (e3->dest) == *other_bb
4558 && single_pred_p (e3->dest)
4559 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4561 if (backward)
4562 e2 = single_succ_edge (e3->dest);
4563 else
4564 e = single_succ_edge (e3->dest);
4565 if (test_swapped_p)
4566 *test_swapped_p = true;
4567 goto retry;
4570 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4571 == gimple_assign_lhs (test_last)
4572 && (integer_zerop (gimple_phi_arg_def (phi,
4573 e->dest_idx))
4574 || integer_onep (gimple_phi_arg_def (phi,
4575 e->dest_idx))))
4576 continue;
4579 return false;
4582 return true;
4585 /* Return true if BB doesn't have side-effects that would disallow
4586 range test optimization, all SSA_NAMEs set in the bb are consumed
4587 in the bb and there are no PHIs. */
4589 bool
4590 no_side_effect_bb (basic_block bb)
4592 gimple_stmt_iterator gsi;
4593 gimple *last;
4595 if (!gimple_seq_empty_p (phi_nodes (bb)))
4596 return false;
4597 last = last_nondebug_stmt (bb);
4598 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4600 gimple *stmt = gsi_stmt (gsi);
4601 tree lhs;
4602 imm_use_iterator imm_iter;
4603 use_operand_p use_p;
4605 if (is_gimple_debug (stmt))
4606 continue;
4607 if (gimple_has_side_effects (stmt))
4608 return false;
4609 if (stmt == last)
4610 return true;
4611 if (!is_gimple_assign (stmt))
4612 return false;
4613 lhs = gimple_assign_lhs (stmt);
4614 if (TREE_CODE (lhs) != SSA_NAME)
4615 return false;
4616 if (gimple_assign_rhs_could_trap_p (stmt))
4617 return false;
4618 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4620 gimple *use_stmt = USE_STMT (use_p);
4621 if (is_gimple_debug (use_stmt))
4622 continue;
4623 if (gimple_bb (use_stmt) != bb)
4624 return false;
4627 return false;
4630 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4631 return true and fill in *OPS recursively. */
4633 static bool
4634 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4635 class loop *loop)
4637 gimple *stmt = SSA_NAME_DEF_STMT (var);
4638 tree rhs[2];
4639 int i;
4641 if (!is_reassociable_op (stmt, code, loop))
4642 return false;
4644 rhs[0] = gimple_assign_rhs1 (stmt);
4645 rhs[1] = gimple_assign_rhs2 (stmt);
4646 gimple_set_visited (stmt, true);
4647 for (i = 0; i < 2; i++)
4648 if (TREE_CODE (rhs[i]) == SSA_NAME
4649 && !get_ops (rhs[i], code, ops, loop)
4650 && has_single_use (rhs[i]))
4652 operand_entry *oe = operand_entry_pool.allocate ();
4654 oe->op = rhs[i];
4655 oe->rank = code;
4656 oe->id = 0;
4657 oe->count = 1;
4658 oe->stmt_to_insert = NULL;
4659 ops->safe_push (oe);
4661 return true;
4664 /* Find the ops that were added by get_ops starting from VAR, see if
4665 they were changed during update_range_test and if yes, create new
4666 stmts. */
4668 static tree
4669 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4670 unsigned int *pidx, class loop *loop)
4672 gimple *stmt = SSA_NAME_DEF_STMT (var);
4673 tree rhs[4];
4674 int i;
4676 if (!is_reassociable_op (stmt, code, loop))
4677 return NULL;
4679 rhs[0] = gimple_assign_rhs1 (stmt);
4680 rhs[1] = gimple_assign_rhs2 (stmt);
4681 rhs[2] = rhs[0];
4682 rhs[3] = rhs[1];
4683 for (i = 0; i < 2; i++)
4684 if (TREE_CODE (rhs[i]) == SSA_NAME)
4686 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4687 if (rhs[2 + i] == NULL_TREE)
4689 if (has_single_use (rhs[i]))
4690 rhs[2 + i] = ops[(*pidx)++]->op;
4691 else
4692 rhs[2 + i] = rhs[i];
4695 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4696 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4698 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4699 var = make_ssa_name (TREE_TYPE (var));
4700 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4701 rhs[2], rhs[3]);
4702 gimple_set_uid (g, gimple_uid (stmt));
4703 gimple_set_visited (g, true);
4704 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4705 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4706 if (fold_stmt_inplace (&gsi2))
4707 update_stmt (g);
4709 return var;
4712 /* Structure to track the initial value passed to get_ops and
4713 the range in the ops vector for each basic block. */
4715 struct inter_bb_range_test_entry
4717 tree op;
4718 unsigned int first_idx, last_idx;
4721 /* Inter-bb range test optimization.
4723 Returns TRUE if a gimple conditional is optimized to a true/false,
4724 otherwise return FALSE.
4726 This indicates to the caller that it should run a CFG cleanup pass
4727 once reassociation is completed. */
4729 static bool
4730 maybe_optimize_range_tests (gimple *stmt)
4732 basic_block first_bb = gimple_bb (stmt);
4733 basic_block last_bb = first_bb;
4734 basic_block other_bb = NULL;
4735 basic_block bb;
4736 edge_iterator ei;
4737 edge e;
4738 auto_vec<operand_entry *> ops;
4739 auto_vec<inter_bb_range_test_entry> bbinfo;
4740 bool any_changes = false;
4741 bool cfg_cleanup_needed = false;
4743 /* Consider only basic blocks that end with GIMPLE_COND or
4744 a cast statement satisfying final_range_test_p. All
4745 but the last bb in the first_bb .. last_bb range
4746 should end with GIMPLE_COND. */
4747 if (gimple_code (stmt) == GIMPLE_COND)
4749 if (EDGE_COUNT (first_bb->succs) != 2)
4750 return cfg_cleanup_needed;
4752 else if (final_range_test_p (stmt))
4753 other_bb = single_succ (first_bb);
4754 else
4755 return cfg_cleanup_needed;
4757 if (stmt_could_throw_p (cfun, stmt))
4758 return cfg_cleanup_needed;
4760 /* As relative ordering of post-dominator sons isn't fixed,
4761 maybe_optimize_range_tests can be called first on any
4762 bb in the range we want to optimize. So, start searching
4763 backwards, if first_bb can be set to a predecessor. */
4764 while (single_pred_p (first_bb))
4766 basic_block pred_bb = single_pred (first_bb);
4767 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4768 break;
4769 if (!no_side_effect_bb (first_bb))
4770 break;
4771 first_bb = pred_bb;
4773 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4774 Before starting forward search in last_bb successors, find
4775 out the other_bb. */
4776 if (first_bb == last_bb)
4778 other_bb = NULL;
4779 /* As non-GIMPLE_COND last stmt always terminates the range,
4780 if forward search didn't discover anything, just give up. */
4781 if (gimple_code (stmt) != GIMPLE_COND)
4782 return cfg_cleanup_needed;
4783 /* Look at both successors. Either it ends with a GIMPLE_COND
4784 and satisfies suitable_cond_bb, or ends with a cast and
4785 other_bb is that cast's successor. */
4786 FOR_EACH_EDGE (e, ei, first_bb->succs)
4787 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4788 || e->dest == first_bb)
4789 return cfg_cleanup_needed;
4790 else if (single_pred_p (e->dest))
4792 stmt = last_nondebug_stmt (e->dest);
4793 if (stmt
4794 && gimple_code (stmt) == GIMPLE_COND
4795 && EDGE_COUNT (e->dest->succs) == 2)
4797 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4798 NULL, true))
4799 break;
4800 else
4801 other_bb = NULL;
4803 else if (stmt
4804 && final_range_test_p (stmt)
4805 && find_edge (first_bb, single_succ (e->dest)))
4807 other_bb = single_succ (e->dest);
4808 if (other_bb == first_bb)
4809 other_bb = NULL;
4812 if (other_bb == NULL)
4813 return cfg_cleanup_needed;
4815 /* Now do the forward search, moving last_bb to successor bbs
4816 that aren't other_bb. */
4817 while (EDGE_COUNT (last_bb->succs) == 2)
4819 FOR_EACH_EDGE (e, ei, last_bb->succs)
4820 if (e->dest != other_bb)
4821 break;
4822 if (e == NULL)
4823 break;
4824 if (!single_pred_p (e->dest))
4825 break;
4826 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4827 break;
4828 if (!no_side_effect_bb (e->dest))
4829 break;
4830 last_bb = e->dest;
4832 if (first_bb == last_bb)
4833 return cfg_cleanup_needed;
4834 /* Here basic blocks first_bb through last_bb's predecessor
4835 end with GIMPLE_COND, all of them have one of the edges to
4836 other_bb and another to another block in the range,
4837 all blocks except first_bb don't have side-effects and
4838 last_bb ends with either GIMPLE_COND, or cast satisfying
4839 final_range_test_p. */
4840 for (bb = last_bb; ; bb = single_pred (bb))
4842 enum tree_code code;
4843 tree lhs, rhs;
4844 inter_bb_range_test_entry bb_ent;
4846 bb_ent.op = NULL_TREE;
4847 bb_ent.first_idx = ops.length ();
4848 bb_ent.last_idx = bb_ent.first_idx;
4849 e = find_edge (bb, other_bb);
4850 stmt = last_nondebug_stmt (bb);
4851 gimple_set_visited (stmt, true);
4852 if (gimple_code (stmt) != GIMPLE_COND)
4854 use_operand_p use_p;
4855 gimple *phi;
4856 edge e2;
4857 unsigned int d;
4859 lhs = gimple_assign_lhs (stmt);
4860 rhs = gimple_assign_rhs1 (stmt);
4861 gcc_assert (bb == last_bb);
4863 /* stmt is
4864 _123 = (int) _234;
4866 _234 = a_2(D) == 2;
4868 followed by:
4869 <bb M>:
4870 # _345 = PHI <_123(N), 1(...), 1(...)>
4872 or 0 instead of 1. If it is 0, the _234
4873 range test is anded together with all the
4874 other range tests, if it is 1, it is ored with
4875 them. */
4876 single_imm_use (lhs, &use_p, &phi);
4877 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4878 e2 = find_edge (first_bb, other_bb);
4879 d = e2->dest_idx;
4880 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4881 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4882 code = BIT_AND_EXPR;
4883 else
4885 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4886 code = BIT_IOR_EXPR;
4889 /* If _234 SSA_NAME_DEF_STMT is
4890 _234 = _567 | _789;
4891 (or &, corresponding to 1/0 in the phi arguments,
4892 push into ops the individual range test arguments
4893 of the bitwise or resp. and, recursively. */
4894 if (TREE_CODE (rhs) == SSA_NAME
4895 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4896 != tcc_comparison)
4897 && !get_ops (rhs, code, &ops,
4898 loop_containing_stmt (stmt))
4899 && has_single_use (rhs))
4901 /* Otherwise, push the _234 range test itself. */
4902 operand_entry *oe = operand_entry_pool.allocate ();
4904 oe->op = rhs;
4905 oe->rank = code;
4906 oe->id = 0;
4907 oe->count = 1;
4908 oe->stmt_to_insert = NULL;
4909 ops.safe_push (oe);
4910 bb_ent.last_idx++;
4911 bb_ent.op = rhs;
4913 else if (is_gimple_assign (stmt)
4914 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4915 == tcc_comparison)
4916 && !get_ops (lhs, code, &ops,
4917 loop_containing_stmt (stmt))
4918 && has_single_use (lhs))
4920 operand_entry *oe = operand_entry_pool.allocate ();
4921 oe->op = lhs;
4922 oe->rank = code;
4923 oe->id = 0;
4924 oe->count = 1;
4925 ops.safe_push (oe);
4926 bb_ent.last_idx++;
4927 bb_ent.op = lhs;
4929 else
4931 bb_ent.last_idx = ops.length ();
4932 bb_ent.op = rhs;
4934 bbinfo.safe_push (bb_ent);
4935 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4936 ops[i]->id = bb->index;
4937 continue;
4939 else if (bb == last_bb)
4941 /* For last_bb, handle also:
4942 if (x_3(D) == 3)
4943 goto <bb 6>; [34.00%]
4944 else
4945 goto <bb 7>; [66.00%]
4947 <bb 6> [local count: 79512730]:
4949 <bb 7> [local count: 1073741824]:
4950 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4951 where bb 7 is OTHER_BB, but the PHI values from the
4952 earlier bbs match the path through the empty bb
4953 in between. */
4954 bool test_swapped_p = false;
4955 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4956 &other_bb, &test_swapped_p, true);
4957 gcc_assert (ok);
4958 if (test_swapped_p)
4959 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4961 /* Otherwise stmt is GIMPLE_COND. */
4962 code = gimple_cond_code (stmt);
4963 lhs = gimple_cond_lhs (stmt);
4964 rhs = gimple_cond_rhs (stmt);
4965 if (TREE_CODE (lhs) == SSA_NAME
4966 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4967 && ((code != EQ_EXPR && code != NE_EXPR)
4968 || rhs != boolean_false_node
4969 /* Either push into ops the individual bitwise
4970 or resp. and operands, depending on which
4971 edge is other_bb. */
4972 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4973 ^ (code == EQ_EXPR))
4974 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4975 loop_containing_stmt (stmt))))
4977 /* Or push the GIMPLE_COND stmt itself. */
4978 operand_entry *oe = operand_entry_pool.allocate ();
4980 oe->op = NULL;
4981 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4982 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4983 /* oe->op = NULL signs that there is no SSA_NAME
4984 for the range test, and oe->id instead is the
4985 basic block number, at which's end the GIMPLE_COND
4986 is. */
4987 oe->id = bb->index;
4988 oe->count = 1;
4989 oe->stmt_to_insert = NULL;
4990 ops.safe_push (oe);
4991 bb_ent.op = NULL;
4992 bb_ent.last_idx++;
4994 else if (ops.length () > bb_ent.first_idx)
4996 bb_ent.op = lhs;
4997 bb_ent.last_idx = ops.length ();
4999 bbinfo.safe_push (bb_ent);
5000 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5001 ops[i]->id = bb->index;
5002 if (bb == first_bb)
5003 break;
5005 if (ops.length () > 1)
5006 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5007 if (any_changes)
5009 unsigned int idx, max_idx = 0;
5010 /* update_ops relies on has_single_use predicates returning the
5011 same values as it did during get_ops earlier. Additionally it
5012 never removes statements, only adds new ones and it should walk
5013 from the single imm use and check the predicate already before
5014 making those changes.
5015 On the other side, the handling of GIMPLE_COND directly can turn
5016 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5017 it needs to be done in a separate loop afterwards. */
5018 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5020 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5021 && bbinfo[idx].op != NULL_TREE)
5023 tree new_op;
5025 max_idx = idx;
5026 stmt = last_nondebug_stmt (bb);
5027 new_op = update_ops (bbinfo[idx].op,
5028 (enum tree_code)
5029 ops[bbinfo[idx].first_idx]->rank,
5030 ops, &bbinfo[idx].first_idx,
5031 loop_containing_stmt (stmt));
5032 if (new_op == NULL_TREE)
5034 gcc_assert (bb == last_bb);
5035 new_op = ops[bbinfo[idx].first_idx++]->op;
5037 if (bbinfo[idx].op != new_op)
5039 imm_use_iterator iter;
5040 use_operand_p use_p;
5041 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5043 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5044 if (is_gimple_debug (use_stmt))
5045 continue;
5046 else if (gimple_code (use_stmt) == GIMPLE_COND
5047 || gimple_code (use_stmt) == GIMPLE_PHI)
5048 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5049 SET_USE (use_p, new_op);
5050 else if ((is_gimple_assign (use_stmt)
5051 && (TREE_CODE_CLASS
5052 (gimple_assign_rhs_code (use_stmt))
5053 == tcc_comparison)))
5054 cast_or_tcc_cmp_stmt = use_stmt;
5055 else if (gimple_assign_cast_p (use_stmt))
5056 cast_or_tcc_cmp_stmt = use_stmt;
5057 else
5058 gcc_unreachable ();
5060 if (cast_or_tcc_cmp_stmt)
5062 gcc_assert (bb == last_bb);
5063 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5064 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5065 enum tree_code rhs_code
5066 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5067 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5068 : CONVERT_EXPR;
5069 gassign *g;
5070 if (is_gimple_min_invariant (new_op))
5072 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5073 g = gimple_build_assign (new_lhs, new_op);
5075 else
5076 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5077 gimple_stmt_iterator gsi
5078 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5079 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5080 gimple_set_visited (g, true);
5081 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5082 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5083 if (is_gimple_debug (use_stmt))
5084 continue;
5085 else if (gimple_code (use_stmt) == GIMPLE_COND
5086 || gimple_code (use_stmt) == GIMPLE_PHI)
5087 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5088 SET_USE (use_p, new_lhs);
5089 else
5090 gcc_unreachable ();
5094 if (bb == first_bb)
5095 break;
5097 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5099 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5100 && bbinfo[idx].op == NULL_TREE
5101 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5103 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5105 if (idx > max_idx)
5106 max_idx = idx;
5108 /* If we collapse the conditional to a true/false
5109 condition, then bubble that knowledge up to our caller. */
5110 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5112 gimple_cond_make_false (cond_stmt);
5113 cfg_cleanup_needed = true;
5115 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5117 gimple_cond_make_true (cond_stmt);
5118 cfg_cleanup_needed = true;
5120 else
5122 gimple_cond_set_code (cond_stmt, NE_EXPR);
5123 gimple_cond_set_lhs (cond_stmt,
5124 ops[bbinfo[idx].first_idx]->op);
5125 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5127 update_stmt (cond_stmt);
5129 if (bb == first_bb)
5130 break;
5133 /* The above changes could result in basic blocks after the first
5134 modified one, up to and including last_bb, to be executed even if
5135 they would not be in the original program. If the value ranges of
5136 assignment lhs' in those bbs were dependent on the conditions
5137 guarding those basic blocks which now can change, the VRs might
5138 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5139 are only used within the same bb, it should be not a big deal if
5140 we just reset all the VRs in those bbs. See PR68671. */
5141 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5142 reset_flow_sensitive_info_in_bb (bb);
5144 return cfg_cleanup_needed;
5147 /* Remove def stmt of VAR if VAR has zero uses and recurse
5148 on rhs1 operand if so. */
5150 static void
5151 remove_visited_stmt_chain (tree var)
5153 gimple *stmt;
5154 gimple_stmt_iterator gsi;
5156 while (1)
5158 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5159 return;
5160 stmt = SSA_NAME_DEF_STMT (var);
5161 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5163 var = gimple_assign_rhs1 (stmt);
5164 gsi = gsi_for_stmt (stmt);
5165 reassoc_remove_stmt (&gsi);
5166 release_defs (stmt);
5168 else
5169 return;
5173 /* This function checks three consequtive operands in
5174 passed operands vector OPS starting from OPINDEX and
5175 swaps two operands if it is profitable for binary operation
5176 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5178 We pair ops with the same rank if possible. */
5180 static void
5181 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5182 unsigned int opindex)
5184 operand_entry *oe1, *oe2, *oe3;
5186 oe1 = ops[opindex];
5187 oe2 = ops[opindex + 1];
5188 oe3 = ops[opindex + 2];
5190 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5191 std::swap (*oe1, *oe3);
5192 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5193 std::swap (*oe1, *oe2);
5196 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5197 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5198 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5199 after the returned stmt. */
5201 static inline gimple *
5202 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5204 insert_before = true;
5205 if (TREE_CODE (rhs1) == SSA_NAME
5206 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5208 stmt = SSA_NAME_DEF_STMT (rhs1);
5209 insert_before = false;
5211 if (TREE_CODE (rhs2) == SSA_NAME
5212 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5214 stmt = SSA_NAME_DEF_STMT (rhs2);
5215 insert_before = false;
5217 return stmt;
5220 /* If the stmt that defines operand has to be inserted, insert it
5221 before the use. */
5222 static void
5223 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5225 gcc_assert (is_gimple_assign (stmt_to_insert));
5226 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5227 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5228 bool insert_before;
5229 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5230 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5231 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5233 /* If the insert point is not stmt, then insert_point would be
5234 the point where operand rhs1 or rhs2 is defined. In this case,
5235 stmt_to_insert has to be inserted afterwards. This would
5236 only happen when the stmt insertion point is flexible. */
5237 if (insert_before)
5238 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5239 else
5240 insert_stmt_after (stmt_to_insert, insert_point);
5244 /* Recursively rewrite our linearized statements so that the operators
5245 match those in OPS[OPINDEX], putting the computation in rank
5246 order. Return new lhs.
5247 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5248 the current stmt and during recursive invocations.
5249 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5250 recursive invocations. */
5252 static tree
5253 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5254 const vec<operand_entry *> &ops, bool changed,
5255 bool next_changed)
5257 tree rhs1 = gimple_assign_rhs1 (stmt);
5258 tree rhs2 = gimple_assign_rhs2 (stmt);
5259 tree lhs = gimple_assign_lhs (stmt);
5260 operand_entry *oe;
5262 /* The final recursion case for this function is that you have
5263 exactly two operations left.
5264 If we had exactly one op in the entire list to start with, we
5265 would have never called this function, and the tail recursion
5266 rewrites them one at a time. */
5267 if (opindex + 2 == ops.length ())
5269 operand_entry *oe1, *oe2;
5271 oe1 = ops[opindex];
5272 oe2 = ops[opindex + 1];
5274 if (rhs1 != oe1->op || rhs2 != oe2->op)
5276 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5277 unsigned int uid = gimple_uid (stmt);
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5281 fprintf (dump_file, "Transforming ");
5282 print_gimple_stmt (dump_file, stmt, 0);
5285 /* If the stmt that defines operand has to be inserted, insert it
5286 before the use. */
5287 if (oe1->stmt_to_insert)
5288 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5289 if (oe2->stmt_to_insert)
5290 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5291 /* Even when changed is false, reassociation could have e.g. removed
5292 some redundant operations, so unless we are just swapping the
5293 arguments or unless there is no change at all (then we just
5294 return lhs), force creation of a new SSA_NAME. */
5295 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5297 bool insert_before;
5298 gimple *insert_point
5299 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5300 lhs = make_ssa_name (TREE_TYPE (lhs));
5301 stmt
5302 = gimple_build_assign (lhs, rhs_code,
5303 oe1->op, oe2->op);
5304 gimple_set_uid (stmt, uid);
5305 gimple_set_visited (stmt, true);
5306 if (insert_before)
5307 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5308 else
5309 insert_stmt_after (stmt, insert_point);
5311 else
5313 bool insert_before;
5314 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5315 insert_before)
5316 == stmt);
5317 gimple_assign_set_rhs1 (stmt, oe1->op);
5318 gimple_assign_set_rhs2 (stmt, oe2->op);
5319 update_stmt (stmt);
5322 if (rhs1 != oe1->op && rhs1 != oe2->op)
5323 remove_visited_stmt_chain (rhs1);
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5327 fprintf (dump_file, " into ");
5328 print_gimple_stmt (dump_file, stmt, 0);
5331 return lhs;
5334 /* If we hit here, we should have 3 or more ops left. */
5335 gcc_assert (opindex + 2 < ops.length ());
5337 /* Rewrite the next operator. */
5338 oe = ops[opindex];
5340 /* If the stmt that defines operand has to be inserted, insert it
5341 before the use. */
5342 if (oe->stmt_to_insert)
5343 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5345 /* Recurse on the LHS of the binary operator, which is guaranteed to
5346 be the non-leaf side. */
5347 tree new_rhs1
5348 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5349 changed || oe->op != rhs2 || next_changed,
5350 false);
5352 if (oe->op != rhs2 || new_rhs1 != rhs1)
5354 if (dump_file && (dump_flags & TDF_DETAILS))
5356 fprintf (dump_file, "Transforming ");
5357 print_gimple_stmt (dump_file, stmt, 0);
5360 /* If changed is false, this is either opindex == 0
5361 or all outer rhs2's were equal to corresponding oe->op,
5362 and powi_result is NULL.
5363 That means lhs is equivalent before and after reassociation.
5364 Otherwise ensure the old lhs SSA_NAME is not reused and
5365 create a new stmt as well, so that any debug stmts will be
5366 properly adjusted. */
5367 if (changed)
5369 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5370 unsigned int uid = gimple_uid (stmt);
5371 bool insert_before;
5372 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5373 insert_before);
5375 lhs = make_ssa_name (TREE_TYPE (lhs));
5376 stmt = gimple_build_assign (lhs, rhs_code,
5377 new_rhs1, oe->op);
5378 gimple_set_uid (stmt, uid);
5379 gimple_set_visited (stmt, true);
5380 if (insert_before)
5381 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5382 else
5383 insert_stmt_after (stmt, insert_point);
5385 else
5387 bool insert_before;
5388 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5389 insert_before)
5390 == stmt);
5391 gimple_assign_set_rhs1 (stmt, new_rhs1);
5392 gimple_assign_set_rhs2 (stmt, oe->op);
5393 update_stmt (stmt);
5396 if (dump_file && (dump_flags & TDF_DETAILS))
5398 fprintf (dump_file, " into ");
5399 print_gimple_stmt (dump_file, stmt, 0);
5402 return lhs;
5405 /* Find out how many cycles we need to compute statements chain.
5406 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5407 maximum number of independent statements we may execute per cycle. */
5409 static int
5410 get_required_cycles (int ops_num, int cpu_width)
5412 int res;
5413 int elog;
5414 unsigned int rest;
5416 /* While we have more than 2 * cpu_width operands
5417 we may reduce number of operands by cpu_width
5418 per cycle. */
5419 res = ops_num / (2 * cpu_width);
5421 /* Remained operands count may be reduced twice per cycle
5422 until we have only one operand. */
5423 rest = (unsigned)(ops_num - res * cpu_width);
5424 elog = exact_log2 (rest);
5425 if (elog >= 0)
5426 res += elog;
5427 else
5428 res += floor_log2 (rest) + 1;
5430 return res;
5433 /* Returns an optimal number of registers to use for computation of
5434 given statements. */
5436 static int
5437 get_reassociation_width (int ops_num, enum tree_code opc,
5438 machine_mode mode)
5440 int param_width = param_tree_reassoc_width;
5441 int width;
5442 int width_min;
5443 int cycles_best;
5445 if (param_width > 0)
5446 width = param_width;
5447 else
5448 width = targetm.sched.reassociation_width (opc, mode);
5450 if (width == 1)
5451 return width;
5453 /* Get the minimal time required for sequence computation. */
5454 cycles_best = get_required_cycles (ops_num, width);
5456 /* Check if we may use less width and still compute sequence for
5457 the same time. It will allow us to reduce registers usage.
5458 get_required_cycles is monotonically increasing with lower width
5459 so we can perform a binary search for the minimal width that still
5460 results in the optimal cycle count. */
5461 width_min = 1;
5462 while (width > width_min)
5464 int width_mid = (width + width_min) / 2;
5466 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5467 width = width_mid;
5468 else if (width_min < width_mid)
5469 width_min = width_mid;
5470 else
5471 break;
5474 return width;
5477 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5478 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5479 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5481 /* Rewrite statements with dependency chain with regard the chance to generate
5482 FMA.
5483 For the chain with FMA: Try to keep fma opportunity as much as possible.
5484 For the chain without FMA: Putting the computation in rank order and trying
5485 to allow operations to be executed in parallel.
5486 E.g.
5487 e + f + a * b + c * d;
5489 ssa1 = e + a * b;
5490 ssa2 = f + c * d;
5491 ssa3 = ssa1 + ssa2;
5493 This reassociation approach preserves the chance of fma generation as much
5494 as possible.
5496 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5497 the whole chain will have dependencies across the loop iteration. Just keep
5498 loop-carried ops in a separate chain.
5499 E.g.
5500 x_1 = phi (x_0, x_2)
5501 y_1 = phi (y_0, y_2)
5503 a + b + c + d + e + x1 + y1
5505 SSA1 = a + b;
5506 SSA2 = c + d;
5507 SSA3 = SSA1 + e;
5508 SSA4 = SSA3 + SSA2;
5509 SSA5 = x1 + y1;
5510 SSA6 = SSA4 + SSA5;
5512 static void
5513 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5514 const vec<operand_entry *> &ops)
5516 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5517 int op_num = ops.length ();
5518 int op_normal_num = op_num;
5519 gcc_assert (op_num > 0);
5520 int stmt_num = op_num - 1;
5521 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5522 int i = 0, j = 0;
5523 tree tmp_op[2], op1;
5524 operand_entry *oe;
5525 gimple *stmt1 = NULL;
5526 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5527 int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5528 int width_active = 0, width_count = 0;
5529 bool has_biased = false, ops_changed = false;
5530 auto_vec<operand_entry *> ops_normal;
5531 auto_vec<operand_entry *> ops_biased;
5532 vec<operand_entry *> *ops1;
5534 /* We start expression rewriting from the top statements.
5535 So, in this loop we create a full list of statements
5536 we will work with. */
5537 stmts[stmt_num - 1] = stmt;
5538 for (i = stmt_num - 2; i >= 0; i--)
5539 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5541 /* Avoid adding loop-carried ops to long chains, first filter out the
5542 loop-carried. But we need to make sure that the length of the remainder
5543 is not less than 4, which is the smallest ops length we can break the
5544 dependency. */
5545 FOR_EACH_VEC_ELT (ops, i, oe)
5547 if (TREE_CODE (oe->op) == SSA_NAME
5548 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5549 && op_normal_num > 4)
5551 ops_biased.safe_push (oe);
5552 has_biased = true;
5553 op_normal_num --;
5555 else
5556 ops_normal.safe_push (oe);
5559 /* Width should not be larger than ops length / 2, since we can not create
5560 more parallel dependency chains that exceeds such value. */
5561 int width_normal = op_normal_num / 2;
5562 int width_biased = (op_num - op_normal_num) / 2;
5563 width_normal = width <= width_normal ? width : width_normal;
5564 width_biased = width <= width_biased ? width : width_biased;
5566 ops1 = &ops_normal;
5567 width_count = width_active = width_normal;
5569 /* Build parallel dependency chain according to width. */
5570 for (i = 0; i < stmt_num; i++)
5572 if (dump_file && (dump_flags & TDF_DETAILS))
5574 fprintf (dump_file, "Transforming ");
5575 print_gimple_stmt (dump_file, stmts[i], 0);
5578 /* When the work of normal ops is over, but the loop is not over,
5579 continue to do biased ops. */
5580 if (width_count == 0 && ops1 == &ops_normal)
5582 ops1 = &ops_biased;
5583 width_count = width_active = width_biased;
5584 ops_changed = true;
5587 /* Swap the operands if no FMA in the chain. */
5588 if (ops1->length () > 2 && !has_fma)
5589 swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5591 if (i < width_active
5592 || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5594 for (j = 0; j < 2; j++)
5596 oe = ops1->pop ();
5597 tmp_op[j] = oe->op;
5598 /* If the stmt that defines operand has to be inserted, insert it
5599 before the use. */
5600 stmt1 = oe->stmt_to_insert;
5601 if (stmt1)
5602 insert_stmt_before_use (stmts[i], stmt1);
5603 stmt1 = NULL;
5605 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5606 tmp_op[1],
5607 tmp_op[0],
5608 opcode);
5609 gimple_set_visited (stmts[i], true);
5612 else
5614 /* We keep original statement only for the last one. All others are
5615 recreated. */
5616 if (!ops1->length ())
5618 /* For biased length equal to 2. */
5619 if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5620 last_rhs2_stmt_index = i - 1;
5622 /* When width_count == 2 and there is no biased, just finish. */
5623 if (width_count == NORMAL_END_STMT && !has_biased)
5625 last_rhs1_stmt_index = i - 1;
5626 last_rhs2_stmt_index = i - 2;
5628 if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5630 /* We keep original statement only for the last one. All
5631 others are recreated. */
5632 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5633 (stmts[last_rhs1_stmt_index]));
5634 gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5635 (stmts[last_rhs2_stmt_index]));
5636 update_stmt (stmts[i]);
5638 else
5640 stmts[i] =
5641 build_and_add_sum (TREE_TYPE (last_rhs1),
5642 gimple_assign_lhs (stmts[i-width_count]),
5643 gimple_assign_lhs
5644 (stmts[i-width_count+1]),
5645 opcode);
5646 gimple_set_visited (stmts[i], true);
5647 width_count--;
5649 /* It is the end of normal or biased ops.
5650 last_rhs1_stmt_index used to record the last stmt index
5651 for normal ops. last_rhs2_stmt_index used to record the
5652 last stmt index for biased ops. */
5653 if (width_count == BIASED_END_STMT)
5655 gcc_assert (has_biased);
5656 if (ops_biased.length ())
5657 last_rhs1_stmt_index = i;
5658 else
5659 last_rhs2_stmt_index = i;
5660 width_count--;
5664 else
5666 /* Attach the rest ops to the parallel dependency chain. */
5667 oe = ops1->pop ();
5668 op1 = oe->op;
5669 stmt1 = oe->stmt_to_insert;
5670 if (stmt1)
5671 insert_stmt_before_use (stmts[i], stmt1);
5672 stmt1 = NULL;
5674 /* For only one biased ops. */
5675 if (width_count == SPECIAL_BIASED_END_STMT)
5677 /* We keep original statement only for the last one. All
5678 others are recreated. */
5679 gcc_assert (has_biased);
5680 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5681 (stmts[last_rhs1_stmt_index]));
5682 gimple_assign_set_rhs2 (stmts[i], op1);
5683 update_stmt (stmts[i]);
5685 else
5687 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5688 gimple_assign_lhs
5689 (stmts[i-width_active]),
5690 op1,
5691 opcode);
5692 gimple_set_visited (stmts[i], true);
5697 if (dump_file && (dump_flags & TDF_DETAILS))
5699 fprintf (dump_file, " into ");
5700 print_gimple_stmt (dump_file, stmts[i], 0);
5704 remove_visited_stmt_chain (last_rhs1);
5707 /* Transform STMT, which is really (A +B) + (C + D) into the left
5708 linear form, ((A+B)+C)+D.
5709 Recurse on D if necessary. */
5711 static void
5712 linearize_expr (gimple *stmt)
5714 gimple_stmt_iterator gsi;
5715 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5716 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5717 gimple *oldbinrhs = binrhs;
5718 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5719 gimple *newbinrhs = NULL;
5720 class loop *loop = loop_containing_stmt (stmt);
5721 tree lhs = gimple_assign_lhs (stmt);
5723 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5724 && is_reassociable_op (binrhs, rhscode, loop));
5726 gsi = gsi_for_stmt (stmt);
5728 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5729 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5730 gimple_assign_rhs_code (binrhs),
5731 gimple_assign_lhs (binlhs),
5732 gimple_assign_rhs2 (binrhs));
5733 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5734 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5735 gimple_set_uid (binrhs, gimple_uid (stmt));
5737 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5738 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5740 if (dump_file && (dump_flags & TDF_DETAILS))
5742 fprintf (dump_file, "Linearized: ");
5743 print_gimple_stmt (dump_file, stmt, 0);
5746 reassociate_stats.linearized++;
5747 update_stmt (stmt);
5749 gsi = gsi_for_stmt (oldbinrhs);
5750 reassoc_remove_stmt (&gsi);
5751 release_defs (oldbinrhs);
5753 gimple_set_visited (stmt, true);
5754 gimple_set_visited (binlhs, true);
5755 gimple_set_visited (binrhs, true);
5757 /* Tail recurse on the new rhs if it still needs reassociation. */
5758 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5759 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5760 want to change the algorithm while converting to tuples. */
5761 linearize_expr (stmt);
5764 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5765 it. Otherwise, return NULL. */
5767 static gimple *
5768 get_single_immediate_use (tree lhs)
5770 use_operand_p immuse;
5771 gimple *immusestmt;
5773 if (TREE_CODE (lhs) == SSA_NAME
5774 && single_imm_use (lhs, &immuse, &immusestmt)
5775 && is_gimple_assign (immusestmt))
5776 return immusestmt;
5778 return NULL;
5781 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5782 representing the negated value. Insertions of any necessary
5783 instructions go before GSI.
5784 This function is recursive in that, if you hand it "a_5" as the
5785 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5786 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5788 static tree
5789 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5791 gimple *negatedefstmt = NULL;
5792 tree resultofnegate;
5793 gimple_stmt_iterator gsi;
5794 unsigned int uid;
5796 /* If we are trying to negate a name, defined by an add, negate the
5797 add operands instead. */
5798 if (TREE_CODE (tonegate) == SSA_NAME)
5799 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5800 if (TREE_CODE (tonegate) == SSA_NAME
5801 && is_gimple_assign (negatedefstmt)
5802 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5803 && has_single_use (gimple_assign_lhs (negatedefstmt))
5804 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5806 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5807 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5808 tree lhs = gimple_assign_lhs (negatedefstmt);
5809 gimple *g;
5811 gsi = gsi_for_stmt (negatedefstmt);
5812 rhs1 = negate_value (rhs1, &gsi);
5814 gsi = gsi_for_stmt (negatedefstmt);
5815 rhs2 = negate_value (rhs2, &gsi);
5817 gsi = gsi_for_stmt (negatedefstmt);
5818 lhs = make_ssa_name (TREE_TYPE (lhs));
5819 gimple_set_visited (negatedefstmt, true);
5820 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5821 gimple_set_uid (g, gimple_uid (negatedefstmt));
5822 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5823 return lhs;
5826 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5827 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5828 NULL_TREE, true, GSI_SAME_STMT);
5829 gsi = *gsip;
5830 uid = gimple_uid (gsi_stmt (gsi));
5831 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5833 gimple *stmt = gsi_stmt (gsi);
5834 if (gimple_uid (stmt) != 0)
5835 break;
5836 gimple_set_uid (stmt, uid);
5838 return resultofnegate;
5841 /* Return true if we should break up the subtract in STMT into an add
5842 with negate. This is true when we the subtract operands are really
5843 adds, or the subtract itself is used in an add expression. In
5844 either case, breaking up the subtract into an add with negate
5845 exposes the adds to reassociation. */
5847 static bool
5848 should_break_up_subtract (gimple *stmt)
5850 tree lhs = gimple_assign_lhs (stmt);
5851 tree binlhs = gimple_assign_rhs1 (stmt);
5852 tree binrhs = gimple_assign_rhs2 (stmt);
5853 gimple *immusestmt;
5854 class loop *loop = loop_containing_stmt (stmt);
5856 if (TREE_CODE (binlhs) == SSA_NAME
5857 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5858 return true;
5860 if (TREE_CODE (binrhs) == SSA_NAME
5861 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5862 return true;
5864 if (TREE_CODE (lhs) == SSA_NAME
5865 && (immusestmt = get_single_immediate_use (lhs))
5866 && is_gimple_assign (immusestmt)
5867 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5868 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5869 && gimple_assign_rhs1 (immusestmt) == lhs)
5870 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5871 return true;
5872 return false;
5875 /* Transform STMT from A - B into A + -B. */
5877 static void
5878 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5880 tree rhs1 = gimple_assign_rhs1 (stmt);
5881 tree rhs2 = gimple_assign_rhs2 (stmt);
5883 if (dump_file && (dump_flags & TDF_DETAILS))
5885 fprintf (dump_file, "Breaking up subtract ");
5886 print_gimple_stmt (dump_file, stmt, 0);
5889 rhs2 = negate_value (rhs2, gsip);
5890 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5891 update_stmt (stmt);
5894 /* Determine whether STMT is a builtin call that raises an SSA name
5895 to an integer power and has only one use. If so, and this is early
5896 reassociation and unsafe math optimizations are permitted, place
5897 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5898 If any of these conditions does not hold, return FALSE. */
5900 static bool
5901 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5903 tree arg1;
5904 REAL_VALUE_TYPE c, cint;
5906 switch (gimple_call_combined_fn (stmt))
5908 CASE_CFN_POW:
5909 if (flag_errno_math)
5910 return false;
5912 *base = gimple_call_arg (stmt, 0);
5913 arg1 = gimple_call_arg (stmt, 1);
5915 if (TREE_CODE (arg1) != REAL_CST)
5916 return false;
5918 c = TREE_REAL_CST (arg1);
5920 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5921 return false;
5923 *exponent = real_to_integer (&c);
5924 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5925 if (!real_identical (&c, &cint))
5926 return false;
5928 break;
5930 CASE_CFN_POWI:
5931 *base = gimple_call_arg (stmt, 0);
5932 arg1 = gimple_call_arg (stmt, 1);
5934 if (!tree_fits_shwi_p (arg1))
5935 return false;
5937 *exponent = tree_to_shwi (arg1);
5938 break;
5940 default:
5941 return false;
5944 /* Expanding negative exponents is generally unproductive, so we don't
5945 complicate matters with those. Exponents of zero and one should
5946 have been handled by expression folding. */
5947 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5948 return false;
5950 return true;
5953 /* Try to derive and add operand entry for OP to *OPS. Return false if
5954 unsuccessful. */
5956 static bool
5957 try_special_add_to_ops (vec<operand_entry *> *ops,
5958 enum tree_code code,
5959 tree op, gimple* def_stmt)
5961 tree base = NULL_TREE;
5962 HOST_WIDE_INT exponent = 0;
5964 if (TREE_CODE (op) != SSA_NAME
5965 || ! has_single_use (op))
5966 return false;
5968 if (code == MULT_EXPR
5969 && reassoc_insert_powi_p
5970 && flag_unsafe_math_optimizations
5971 && is_gimple_call (def_stmt)
5972 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5974 add_repeat_to_ops_vec (ops, base, exponent);
5975 gimple_set_visited (def_stmt, true);
5976 return true;
5978 else if (code == MULT_EXPR
5979 && is_gimple_assign (def_stmt)
5980 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5981 && !HONOR_SNANS (TREE_TYPE (op))
5982 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5983 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5984 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5985 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5987 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5988 tree cst = build_minus_one_cst (TREE_TYPE (op));
5989 add_to_ops_vec (ops, rhs1);
5990 add_to_ops_vec (ops, cst);
5991 gimple_set_visited (def_stmt, true);
5992 return true;
5995 return false;
5998 /* Recursively linearize a binary expression that is the RHS of STMT.
5999 Place the operands of the expression tree in the vector named OPS. */
6001 static void
6002 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6003 bool is_associative, bool set_visited)
6005 tree binlhs = gimple_assign_rhs1 (stmt);
6006 tree binrhs = gimple_assign_rhs2 (stmt);
6007 gimple *binlhsdef = NULL, *binrhsdef = NULL;
6008 bool binlhsisreassoc = false;
6009 bool binrhsisreassoc = false;
6010 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6011 class loop *loop = loop_containing_stmt (stmt);
6013 if (set_visited)
6014 gimple_set_visited (stmt, true);
6016 if (TREE_CODE (binlhs) == SSA_NAME)
6018 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6019 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6020 && !stmt_could_throw_p (cfun, binlhsdef));
6023 if (TREE_CODE (binrhs) == SSA_NAME)
6025 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6026 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6027 && !stmt_could_throw_p (cfun, binrhsdef));
6030 /* If the LHS is not reassociable, but the RHS is, we need to swap
6031 them. If neither is reassociable, there is nothing we can do, so
6032 just put them in the ops vector. If the LHS is reassociable,
6033 linearize it. If both are reassociable, then linearize the RHS
6034 and the LHS. */
6036 if (!binlhsisreassoc)
6038 /* If this is not a associative operation like division, give up. */
6039 if (!is_associative)
6041 add_to_ops_vec (ops, binrhs);
6042 return;
6045 if (!binrhsisreassoc)
6047 bool swap = false;
6048 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6049 /* If we add ops for the rhs we expect to be able to recurse
6050 to it via the lhs during expression rewrite so swap
6051 operands. */
6052 swap = true;
6053 else
6054 add_to_ops_vec (ops, binrhs);
6056 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6057 add_to_ops_vec (ops, binlhs);
6059 if (!swap)
6060 return;
6063 if (dump_file && (dump_flags & TDF_DETAILS))
6065 fprintf (dump_file, "swapping operands of ");
6066 print_gimple_stmt (dump_file, stmt, 0);
6069 swap_ssa_operands (stmt,
6070 gimple_assign_rhs1_ptr (stmt),
6071 gimple_assign_rhs2_ptr (stmt));
6072 update_stmt (stmt);
6074 if (dump_file && (dump_flags & TDF_DETAILS))
6076 fprintf (dump_file, " is now ");
6077 print_gimple_stmt (dump_file, stmt, 0);
6079 if (!binrhsisreassoc)
6080 return;
6082 /* We want to make it so the lhs is always the reassociative op,
6083 so swap. */
6084 std::swap (binlhs, binrhs);
6086 else if (binrhsisreassoc)
6088 linearize_expr (stmt);
6089 binlhs = gimple_assign_rhs1 (stmt);
6090 binrhs = gimple_assign_rhs2 (stmt);
6093 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6094 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6095 rhscode, loop));
6096 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6097 is_associative, set_visited);
6099 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6100 add_to_ops_vec (ops, binrhs);
6103 /* Repropagate the negates back into subtracts, since no other pass
6104 currently does it. */
6106 static void
6107 repropagate_negates (void)
6109 unsigned int i = 0;
6110 tree negate;
6112 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6114 gimple *user = get_single_immediate_use (negate);
6115 if (!user || !is_gimple_assign (user))
6116 continue;
6118 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6119 if (TREE_CODE (negateop) == SSA_NAME
6120 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6121 continue;
6123 /* The negate operand can be either operand of a PLUS_EXPR
6124 (it can be the LHS if the RHS is a constant for example).
6126 Force the negate operand to the RHS of the PLUS_EXPR, then
6127 transform the PLUS_EXPR into a MINUS_EXPR. */
6128 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6130 /* If the negated operand appears on the LHS of the
6131 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6132 to force the negated operand to the RHS of the PLUS_EXPR. */
6133 if (gimple_assign_rhs1 (user) == negate)
6135 swap_ssa_operands (user,
6136 gimple_assign_rhs1_ptr (user),
6137 gimple_assign_rhs2_ptr (user));
6140 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6141 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6142 if (gimple_assign_rhs2 (user) == negate)
6144 tree rhs1 = gimple_assign_rhs1 (user);
6145 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6146 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6147 negateop);
6148 update_stmt (user);
6151 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6153 if (gimple_assign_rhs1 (user) == negate)
6155 /* We have
6156 x = -negateop
6157 y = x - b
6158 which we transform into
6159 x = negateop + b
6160 y = -x .
6161 This pushes down the negate which we possibly can merge
6162 into some other operation, hence insert it into the
6163 plus_negates vector. */
6164 gimple *feed = SSA_NAME_DEF_STMT (negate);
6165 tree b = gimple_assign_rhs2 (user);
6166 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6167 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6168 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6169 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6170 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6171 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6172 user = gsi_stmt (gsi2);
6173 update_stmt (user);
6174 reassoc_remove_stmt (&gsi);
6175 release_defs (feed);
6176 plus_negates.safe_push (gimple_assign_lhs (user));
6178 else
6180 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6181 getting rid of one operation. */
6182 tree rhs1 = gimple_assign_rhs1 (user);
6183 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6184 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6185 update_stmt (gsi_stmt (gsi));
6191 /* Break up subtract operations in block BB.
6193 We do this top down because we don't know whether the subtract is
6194 part of a possible chain of reassociation except at the top.
6196 IE given
6197 d = f + g
6198 c = a + e
6199 b = c - d
6200 q = b - r
6201 k = t - q
6203 we want to break up k = t - q, but we won't until we've transformed q
6204 = b - r, which won't be broken up until we transform b = c - d.
6206 En passant, clear the GIMPLE visited flag on every statement
6207 and set UIDs within each basic block. */
6209 static void
6210 break_up_subtract_bb (basic_block bb)
6212 gimple_stmt_iterator gsi;
6213 basic_block son;
6214 unsigned int uid = 1;
6216 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6218 gimple *stmt = gsi_stmt (gsi);
6219 gimple_set_visited (stmt, false);
6220 gimple_set_uid (stmt, uid++);
6222 if (!is_gimple_assign (stmt)
6223 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6224 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6225 continue;
6227 /* Look for simple gimple subtract operations. */
6228 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6230 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6231 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6232 continue;
6234 /* Check for a subtract used only in an addition. If this
6235 is the case, transform it into add of a negate for better
6236 reassociation. IE transform C = A-B into C = A + -B if C
6237 is only used in an addition. */
6238 if (should_break_up_subtract (stmt))
6239 break_up_subtract (stmt, &gsi);
6241 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6242 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6243 plus_negates.safe_push (gimple_assign_lhs (stmt));
6245 for (son = first_dom_son (CDI_DOMINATORS, bb);
6246 son;
6247 son = next_dom_son (CDI_DOMINATORS, son))
6248 break_up_subtract_bb (son);
6251 /* Used for repeated factor analysis. */
6252 struct repeat_factor
6254 /* An SSA name that occurs in a multiply chain. */
6255 tree factor;
6257 /* Cached rank of the factor. */
6258 unsigned rank;
6260 /* Number of occurrences of the factor in the chain. */
6261 HOST_WIDE_INT count;
6263 /* An SSA name representing the product of this factor and
6264 all factors appearing later in the repeated factor vector. */
6265 tree repr;
6269 static vec<repeat_factor> repeat_factor_vec;
6271 /* Used for sorting the repeat factor vector. Sort primarily by
6272 ascending occurrence count, secondarily by descending rank. */
6274 static int
6275 compare_repeat_factors (const void *x1, const void *x2)
6277 const repeat_factor *rf1 = (const repeat_factor *) x1;
6278 const repeat_factor *rf2 = (const repeat_factor *) x2;
6280 if (rf1->count != rf2->count)
6281 return rf1->count - rf2->count;
6283 return rf2->rank - rf1->rank;
6286 /* Look for repeated operands in OPS in the multiply tree rooted at
6287 STMT. Replace them with an optimal sequence of multiplies and powi
6288 builtin calls, and remove the used operands from OPS. Return an
6289 SSA name representing the value of the replacement sequence. */
6291 static tree
6292 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6294 unsigned i, j, vec_len;
6295 int ii;
6296 operand_entry *oe;
6297 repeat_factor *rf1, *rf2;
6298 repeat_factor rfnew;
6299 tree result = NULL_TREE;
6300 tree target_ssa, iter_result;
6301 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6302 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6303 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6304 gimple *mul_stmt, *pow_stmt;
6306 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6307 target, unless type is integral. */
6308 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6309 return NULL_TREE;
6311 /* Allocate the repeated factor vector. */
6312 repeat_factor_vec.create (10);
6314 /* Scan the OPS vector for all SSA names in the product and build
6315 up a vector of occurrence counts for each factor. */
6316 FOR_EACH_VEC_ELT (*ops, i, oe)
6318 if (TREE_CODE (oe->op) == SSA_NAME)
6320 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6322 if (rf1->factor == oe->op)
6324 rf1->count += oe->count;
6325 break;
6329 if (j >= repeat_factor_vec.length ())
6331 rfnew.factor = oe->op;
6332 rfnew.rank = oe->rank;
6333 rfnew.count = oe->count;
6334 rfnew.repr = NULL_TREE;
6335 repeat_factor_vec.safe_push (rfnew);
6340 /* Sort the repeated factor vector by (a) increasing occurrence count,
6341 and (b) decreasing rank. */
6342 repeat_factor_vec.qsort (compare_repeat_factors);
6344 /* It is generally best to combine as many base factors as possible
6345 into a product before applying __builtin_powi to the result.
6346 However, the sort order chosen for the repeated factor vector
6347 allows us to cache partial results for the product of the base
6348 factors for subsequent use. When we already have a cached partial
6349 result from a previous iteration, it is best to make use of it
6350 before looking for another __builtin_pow opportunity.
6352 As an example, consider x * x * y * y * y * z * z * z * z.
6353 We want to first compose the product x * y * z, raise it to the
6354 second power, then multiply this by y * z, and finally multiply
6355 by z. This can be done in 5 multiplies provided we cache y * z
6356 for use in both expressions:
6358 t1 = y * z
6359 t2 = t1 * x
6360 t3 = t2 * t2
6361 t4 = t1 * t3
6362 result = t4 * z
6364 If we instead ignored the cached y * z and first multiplied by
6365 the __builtin_pow opportunity z * z, we would get the inferior:
6367 t1 = y * z
6368 t2 = t1 * x
6369 t3 = t2 * t2
6370 t4 = z * z
6371 t5 = t3 * t4
6372 result = t5 * y */
6374 vec_len = repeat_factor_vec.length ();
6376 /* Repeatedly look for opportunities to create a builtin_powi call. */
6377 while (true)
6379 HOST_WIDE_INT power;
6381 /* First look for the largest cached product of factors from
6382 preceding iterations. If found, create a builtin_powi for
6383 it if the minimum occurrence count for its factors is at
6384 least 2, or just use this cached product as our next
6385 multiplicand if the minimum occurrence count is 1. */
6386 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6388 if (rf1->repr && rf1->count > 0)
6389 break;
6392 if (j < vec_len)
6394 power = rf1->count;
6396 if (power == 1)
6398 iter_result = rf1->repr;
6400 if (dump_file && (dump_flags & TDF_DETAILS))
6402 unsigned elt;
6403 repeat_factor *rf;
6404 fputs ("Multiplying by cached product ", dump_file);
6405 for (elt = j; elt < vec_len; elt++)
6407 rf = &repeat_factor_vec[elt];
6408 print_generic_expr (dump_file, rf->factor);
6409 if (elt < vec_len - 1)
6410 fputs (" * ", dump_file);
6412 fputs ("\n", dump_file);
6415 else
6417 if (INTEGRAL_TYPE_P (type))
6419 gcc_assert (power > 1);
6420 gimple_stmt_iterator gsip = gsi;
6421 gsi_prev (&gsip);
6422 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6423 rf1->repr, power);
6424 gimple_stmt_iterator gsic = gsi;
6425 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6427 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6428 gimple_set_visited (gsi_stmt (gsic), true);
6429 gsi_prev (&gsic);
6432 else
6434 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6435 pow_stmt
6436 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6437 build_int_cst (integer_type_node,
6438 power));
6439 gimple_call_set_lhs (pow_stmt, iter_result);
6440 gimple_set_location (pow_stmt, gimple_location (stmt));
6441 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6442 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6445 if (dump_file && (dump_flags & TDF_DETAILS))
6447 unsigned elt;
6448 repeat_factor *rf;
6449 fputs ("Building __builtin_pow call for cached product (",
6450 dump_file);
6451 for (elt = j; elt < vec_len; elt++)
6453 rf = &repeat_factor_vec[elt];
6454 print_generic_expr (dump_file, rf->factor);
6455 if (elt < vec_len - 1)
6456 fputs (" * ", dump_file);
6458 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6459 power);
6463 else
6465 /* Otherwise, find the first factor in the repeated factor
6466 vector whose occurrence count is at least 2. If no such
6467 factor exists, there are no builtin_powi opportunities
6468 remaining. */
6469 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6471 if (rf1->count >= 2)
6472 break;
6475 if (j >= vec_len)
6476 break;
6478 power = rf1->count;
6480 if (dump_file && (dump_flags & TDF_DETAILS))
6482 unsigned elt;
6483 repeat_factor *rf;
6484 fputs ("Building __builtin_pow call for (", dump_file);
6485 for (elt = j; elt < vec_len; elt++)
6487 rf = &repeat_factor_vec[elt];
6488 print_generic_expr (dump_file, rf->factor);
6489 if (elt < vec_len - 1)
6490 fputs (" * ", dump_file);
6492 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6495 reassociate_stats.pows_created++;
6497 /* Visit each element of the vector in reverse order (so that
6498 high-occurrence elements are visited first, and within the
6499 same occurrence count, lower-ranked elements are visited
6500 first). Form a linear product of all elements in this order
6501 whose occurrencce count is at least that of element J.
6502 Record the SSA name representing the product of each element
6503 with all subsequent elements in the vector. */
6504 if (j == vec_len - 1)
6505 rf1->repr = rf1->factor;
6506 else
6508 for (ii = vec_len - 2; ii >= (int)j; ii--)
6510 tree op1, op2;
6512 rf1 = &repeat_factor_vec[ii];
6513 rf2 = &repeat_factor_vec[ii + 1];
6515 /* Init the last factor's representative to be itself. */
6516 if (!rf2->repr)
6517 rf2->repr = rf2->factor;
6519 op1 = rf1->factor;
6520 op2 = rf2->repr;
6522 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6523 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6524 op1, op2);
6525 gimple_set_location (mul_stmt, gimple_location (stmt));
6526 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6527 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6528 rf1->repr = target_ssa;
6530 /* Don't reprocess the multiply we just introduced. */
6531 gimple_set_visited (mul_stmt, true);
6535 /* Form a call to __builtin_powi for the maximum product
6536 just formed, raised to the power obtained earlier. */
6537 rf1 = &repeat_factor_vec[j];
6538 if (INTEGRAL_TYPE_P (type))
6540 gcc_assert (power > 1);
6541 gimple_stmt_iterator gsip = gsi;
6542 gsi_prev (&gsip);
6543 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6544 rf1->repr, power);
6545 gimple_stmt_iterator gsic = gsi;
6546 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6548 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6549 gimple_set_visited (gsi_stmt (gsic), true);
6550 gsi_prev (&gsic);
6553 else
6555 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6556 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6557 build_int_cst (integer_type_node,
6558 power));
6559 gimple_call_set_lhs (pow_stmt, iter_result);
6560 gimple_set_location (pow_stmt, gimple_location (stmt));
6561 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6562 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6566 /* If we previously formed at least one other builtin_powi call,
6567 form the product of this one and those others. */
6568 if (result)
6570 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6571 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6572 result, iter_result);
6573 gimple_set_location (mul_stmt, gimple_location (stmt));
6574 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6575 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6576 gimple_set_visited (mul_stmt, true);
6577 result = new_result;
6579 else
6580 result = iter_result;
6582 /* Decrement the occurrence count of each element in the product
6583 by the count found above, and remove this many copies of each
6584 factor from OPS. */
6585 for (i = j; i < vec_len; i++)
6587 unsigned k = power;
6588 unsigned n;
6590 rf1 = &repeat_factor_vec[i];
6591 rf1->count -= power;
6593 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6595 if (oe->op == rf1->factor)
6597 if (oe->count <= k)
6599 ops->ordered_remove (n);
6600 k -= oe->count;
6602 if (k == 0)
6603 break;
6605 else
6607 oe->count -= k;
6608 break;
6615 /* At this point all elements in the repeated factor vector have a
6616 remaining occurrence count of 0 or 1, and those with a count of 1
6617 don't have cached representatives. Re-sort the ops vector and
6618 clean up. */
6619 ops->qsort (sort_by_operand_rank);
6620 repeat_factor_vec.release ();
6622 /* Return the final product computed herein. Note that there may
6623 still be some elements with single occurrence count left in OPS;
6624 those will be handled by the normal reassociation logic. */
6625 return result;
6628 /* Attempt to optimize
6629 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6630 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6632 static void
6633 attempt_builtin_copysign (vec<operand_entry *> *ops)
6635 operand_entry *oe;
6636 unsigned int i;
6637 unsigned int length = ops->length ();
6638 tree cst = ops->last ()->op;
6640 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6641 return;
6643 FOR_EACH_VEC_ELT (*ops, i, oe)
6645 if (TREE_CODE (oe->op) == SSA_NAME
6646 && has_single_use (oe->op))
6648 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6649 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6651 tree arg0, arg1;
6652 switch (gimple_call_combined_fn (old_call))
6654 CASE_CFN_COPYSIGN:
6655 CASE_CFN_COPYSIGN_FN:
6656 arg0 = gimple_call_arg (old_call, 0);
6657 arg1 = gimple_call_arg (old_call, 1);
6658 /* The first argument of copysign must be a constant,
6659 otherwise there's nothing to do. */
6660 if (TREE_CODE (arg0) == REAL_CST)
6662 tree type = TREE_TYPE (arg0);
6663 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6664 /* If we couldn't fold to a single constant, skip it.
6665 That happens e.g. for inexact multiplication when
6666 -frounding-math. */
6667 if (mul == NULL_TREE)
6668 break;
6669 /* Instead of adjusting OLD_CALL, let's build a new
6670 call to not leak the LHS and prevent keeping bogus
6671 debug statements. DCE will clean up the old call. */
6672 gcall *new_call;
6673 if (gimple_call_internal_p (old_call))
6674 new_call = gimple_build_call_internal
6675 (IFN_COPYSIGN, 2, mul, arg1);
6676 else
6677 new_call = gimple_build_call
6678 (gimple_call_fndecl (old_call), 2, mul, arg1);
6679 tree lhs = make_ssa_name (type);
6680 gimple_call_set_lhs (new_call, lhs);
6681 gimple_set_location (new_call,
6682 gimple_location (old_call));
6683 insert_stmt_after (new_call, old_call);
6684 /* We've used the constant, get rid of it. */
6685 ops->pop ();
6686 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6687 /* Handle the CST1 < 0 case by negating the result. */
6688 if (cst1_neg)
6690 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6691 gimple *negate_stmt
6692 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6693 insert_stmt_after (negate_stmt, new_call);
6694 oe->op = negrhs;
6696 else
6697 oe->op = lhs;
6698 if (dump_file && (dump_flags & TDF_DETAILS))
6700 fprintf (dump_file, "Optimizing copysign: ");
6701 print_generic_expr (dump_file, cst);
6702 fprintf (dump_file, " * COPYSIGN (");
6703 print_generic_expr (dump_file, arg0);
6704 fprintf (dump_file, ", ");
6705 print_generic_expr (dump_file, arg1);
6706 fprintf (dump_file, ") into %sCOPYSIGN (",
6707 cst1_neg ? "-" : "");
6708 print_generic_expr (dump_file, mul);
6709 fprintf (dump_file, ", ");
6710 print_generic_expr (dump_file, arg1);
6711 fprintf (dump_file, "\n");
6713 return;
6715 break;
6716 default:
6717 break;
6724 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6726 static void
6727 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6729 tree rhs1;
6731 if (dump_file && (dump_flags & TDF_DETAILS))
6733 fprintf (dump_file, "Transforming ");
6734 print_gimple_stmt (dump_file, stmt, 0);
6737 rhs1 = gimple_assign_rhs1 (stmt);
6738 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6739 update_stmt (stmt);
6740 remove_visited_stmt_chain (rhs1);
6742 if (dump_file && (dump_flags & TDF_DETAILS))
6744 fprintf (dump_file, " into ");
6745 print_gimple_stmt (dump_file, stmt, 0);
6749 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6751 static void
6752 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6753 tree rhs1, tree rhs2)
6755 if (dump_file && (dump_flags & TDF_DETAILS))
6757 fprintf (dump_file, "Transforming ");
6758 print_gimple_stmt (dump_file, stmt, 0);
6761 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6762 update_stmt (gsi_stmt (*gsi));
6763 remove_visited_stmt_chain (rhs1);
6765 if (dump_file && (dump_flags & TDF_DETAILS))
6767 fprintf (dump_file, " into ");
6768 print_gimple_stmt (dump_file, stmt, 0);
6772 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6773 Put no-mult ops and mult ops alternately at the end of the queue, which is
6774 conducive to generating more FMA and reducing the loss of FMA when breaking
6775 the chain.
6776 E.g.
6777 a * b + c * d + e generates:
6779 _4 = c_9(D) * d_10(D);
6780 _12 = .FMA (a_7(D), b_8(D), _4);
6781 _11 = e_6(D) + _12;
6783 Rearrange ops to -> e + a * b + c * d generates:
6785 _4 = .FMA (c_7(D), d_8(D), _3);
6786 _11 = .FMA (a_5(D), b_6(D), _4); */
6787 static bool
6788 rank_ops_for_fma (vec<operand_entry *> *ops)
6790 operand_entry *oe;
6791 unsigned int i;
6792 unsigned int ops_length = ops->length ();
6793 auto_vec<operand_entry *> ops_mult;
6794 auto_vec<operand_entry *> ops_others;
6796 FOR_EACH_VEC_ELT (*ops, i, oe)
6798 if (TREE_CODE (oe->op) == SSA_NAME)
6800 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6801 if (is_gimple_assign (def_stmt)
6802 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6803 ops_mult.safe_push (oe);
6804 else
6805 ops_others.safe_push (oe);
6807 else
6808 ops_others.safe_push (oe);
6810 /* 1. When ops_mult.length == 2, like the following case,
6812 a * b + c * d + e.
6814 we need to rearrange the ops.
6816 Putting ops that not def from mult in front can generate more FMAs.
6818 2. If all ops are defined with mult, we don't need to rearrange them. */
6819 if (ops_mult.length () >= 2 && ops_mult.length () != ops_length)
6821 /* Put no-mult ops and mult ops alternately at the end of the
6822 queue, which is conducive to generating more FMA and reducing the
6823 loss of FMA when breaking the chain. */
6824 ops->truncate (0);
6825 ops->splice (ops_mult);
6826 int j, opindex = ops->length ();
6827 int others_length = ops_others.length ();
6828 for (j = 0; j < others_length; j++)
6830 oe = ops_others.pop ();
6831 ops->quick_insert (opindex, oe);
6832 if (opindex > 0)
6833 opindex--;
6835 return true;
6837 return false;
6839 /* Reassociate expressions in basic block BB and its post-dominator as
6840 children.
6842 Bubble up return status from maybe_optimize_range_tests. */
6844 static bool
6845 reassociate_bb (basic_block bb)
6847 gimple_stmt_iterator gsi;
6848 basic_block son;
6849 gimple *stmt = last_nondebug_stmt (bb);
6850 bool cfg_cleanup_needed = false;
6852 if (stmt && !gimple_visited_p (stmt))
6853 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6855 bool do_prev = false;
6856 for (gsi = gsi_last_bb (bb);
6857 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6859 do_prev = true;
6860 stmt = gsi_stmt (gsi);
6862 if (is_gimple_assign (stmt)
6863 && !stmt_could_throw_p (cfun, stmt))
6865 tree lhs, rhs1, rhs2;
6866 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6868 /* If this was part of an already processed statement,
6869 we don't need to touch it again. */
6870 if (gimple_visited_p (stmt))
6872 /* This statement might have become dead because of previous
6873 reassociations. */
6874 if (has_zero_uses (gimple_get_lhs (stmt)))
6876 reassoc_remove_stmt (&gsi);
6877 release_defs (stmt);
6878 /* We might end up removing the last stmt above which
6879 places the iterator to the end of the sequence.
6880 Reset it to the last stmt in this case and make sure
6881 we don't do gsi_prev in that case. */
6882 if (gsi_end_p (gsi))
6884 gsi = gsi_last_bb (bb);
6885 do_prev = false;
6888 continue;
6891 /* If this is not a gimple binary expression, there is
6892 nothing for us to do with it. */
6893 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6894 continue;
6896 lhs = gimple_assign_lhs (stmt);
6897 rhs1 = gimple_assign_rhs1 (stmt);
6898 rhs2 = gimple_assign_rhs2 (stmt);
6900 /* For non-bit or min/max operations we can't associate
6901 all types. Verify that here. */
6902 if ((rhs_code != BIT_IOR_EXPR
6903 && rhs_code != BIT_AND_EXPR
6904 && rhs_code != BIT_XOR_EXPR
6905 && rhs_code != MIN_EXPR
6906 && rhs_code != MAX_EXPR
6907 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6908 || !can_reassociate_op_p (rhs1)
6909 || !can_reassociate_op_p (rhs2))
6910 continue;
6912 if (associative_tree_code (rhs_code))
6914 auto_vec<operand_entry *> ops;
6915 tree powi_result = NULL_TREE;
6916 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6918 /* There may be no immediate uses left by the time we
6919 get here because we may have eliminated them all. */
6920 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6921 continue;
6923 gimple_set_visited (stmt, true);
6924 linearize_expr_tree (&ops, stmt, true, true);
6925 ops.qsort (sort_by_operand_rank);
6926 int orig_len = ops.length ();
6927 optimize_ops_list (rhs_code, &ops);
6928 if (undistribute_ops_list (rhs_code, &ops,
6929 loop_containing_stmt (stmt)))
6931 ops.qsort (sort_by_operand_rank);
6932 optimize_ops_list (rhs_code, &ops);
6934 if (undistribute_bitref_for_vector (rhs_code, &ops,
6935 loop_containing_stmt (stmt)))
6937 ops.qsort (sort_by_operand_rank);
6938 optimize_ops_list (rhs_code, &ops);
6940 if (rhs_code == PLUS_EXPR
6941 && transform_add_to_multiply (&ops))
6942 ops.qsort (sort_by_operand_rank);
6944 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6946 if (is_vector)
6947 optimize_vec_cond_expr (rhs_code, &ops);
6948 else
6949 optimize_range_tests (rhs_code, &ops, NULL);
6952 if (rhs_code == MULT_EXPR && !is_vector)
6954 attempt_builtin_copysign (&ops);
6956 if (reassoc_insert_powi_p
6957 && (flag_unsafe_math_optimizations
6958 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6959 powi_result = attempt_builtin_powi (stmt, &ops);
6962 operand_entry *last;
6963 bool negate_result = false;
6964 if (ops.length () > 1
6965 && rhs_code == MULT_EXPR)
6967 last = ops.last ();
6968 if ((integer_minus_onep (last->op)
6969 || real_minus_onep (last->op))
6970 && !HONOR_SNANS (TREE_TYPE (lhs))
6971 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6972 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6974 ops.pop ();
6975 negate_result = true;
6979 tree new_lhs = lhs;
6980 /* If the operand vector is now empty, all operands were
6981 consumed by the __builtin_powi optimization. */
6982 if (ops.length () == 0)
6983 transform_stmt_to_copy (&gsi, stmt, powi_result);
6984 else if (ops.length () == 1)
6986 tree last_op = ops.last ()->op;
6988 /* If the stmt that defines operand has to be inserted, insert it
6989 before the use. */
6990 if (ops.last ()->stmt_to_insert)
6991 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6992 if (powi_result)
6993 transform_stmt_to_multiply (&gsi, stmt, last_op,
6994 powi_result);
6995 else
6996 transform_stmt_to_copy (&gsi, stmt, last_op);
6998 else
7000 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7001 int ops_num = ops.length ();
7002 int width;
7003 bool has_fma = false;
7005 /* For binary bit operations, if there are at least 3
7006 operands and the last operand in OPS is a constant,
7007 move it to the front. This helps ensure that we generate
7008 (X & Y) & C rather than (X & C) & Y. The former will
7009 often match a canonical bit test when we get to RTL. */
7010 if (ops.length () > 2
7011 && (rhs_code == BIT_AND_EXPR
7012 || rhs_code == BIT_IOR_EXPR
7013 || rhs_code == BIT_XOR_EXPR)
7014 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7015 std::swap (*ops[0], *ops[ops_num - 1]);
7017 optimization_type opt_type = bb_optimization_type (bb);
7019 /* If the target support FMA, rank_ops_for_fma will detect if
7020 the chain has fmas and rearrange the ops if so. */
7021 if (direct_internal_fn_supported_p (IFN_FMA,
7022 TREE_TYPE (lhs),
7023 opt_type)
7024 && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7026 has_fma = rank_ops_for_fma (&ops);
7029 /* Only rewrite the expression tree to parallel in the
7030 last reassoc pass to avoid useless work back-and-forth
7031 with initial linearization. */
7032 if (!reassoc_insert_powi_p
7033 && ops.length () > 3
7034 && (width = get_reassociation_width (ops_num, rhs_code,
7035 mode)) > 1)
7037 if (dump_file && (dump_flags & TDF_DETAILS))
7038 fprintf (dump_file,
7039 "Width = %d was chosen for reassociation\n",
7040 width);
7041 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7042 width,
7043 has_fma,
7044 ops);
7046 else
7048 /* When there are three operands left, we want
7049 to make sure the ones that get the double
7050 binary op are chosen wisely. */
7051 int len = ops.length ();
7052 if (len >= 3 && !has_fma)
7053 swap_ops_for_binary_stmt (ops, len - 3);
7055 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7056 powi_result != NULL
7057 || negate_result,
7058 len != orig_len);
7061 /* If we combined some repeated factors into a
7062 __builtin_powi call, multiply that result by the
7063 reassociated operands. */
7064 if (powi_result)
7066 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7067 tree type = TREE_TYPE (lhs);
7068 tree target_ssa = make_temp_ssa_name (type, NULL,
7069 "reassocpow");
7070 gimple_set_lhs (lhs_stmt, target_ssa);
7071 update_stmt (lhs_stmt);
7072 if (lhs != new_lhs)
7074 target_ssa = new_lhs;
7075 new_lhs = lhs;
7077 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7078 powi_result, target_ssa);
7079 gimple_set_location (mul_stmt, gimple_location (stmt));
7080 gimple_set_uid (mul_stmt, gimple_uid (stmt));
7081 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7085 if (negate_result)
7087 stmt = SSA_NAME_DEF_STMT (lhs);
7088 tree tmp = make_ssa_name (TREE_TYPE (lhs));
7089 gimple_set_lhs (stmt, tmp);
7090 if (lhs != new_lhs)
7091 tmp = new_lhs;
7092 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7093 tmp);
7094 gimple_set_uid (neg_stmt, gimple_uid (stmt));
7095 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7096 update_stmt (stmt);
7101 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
7102 son;
7103 son = next_dom_son (CDI_POST_DOMINATORS, son))
7104 cfg_cleanup_needed |= reassociate_bb (son);
7106 return cfg_cleanup_needed;
7109 /* Add jumps around shifts for range tests turned into bit tests.
7110 For each SSA_NAME VAR we have code like:
7111 VAR = ...; // final stmt of range comparison
7112 // bit test here...;
7113 OTHERVAR = ...; // final stmt of the bit test sequence
7114 RES = VAR | OTHERVAR;
7115 Turn the above into:
7116 VAR = ...;
7117 if (VAR != 0)
7118 goto <l3>;
7119 else
7120 goto <l2>;
7121 <l2>:
7122 // bit test here...;
7123 OTHERVAR = ...;
7124 <l3>:
7125 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7127 static void
7128 branch_fixup (void)
7130 tree var;
7131 unsigned int i;
7133 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7135 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7136 gimple *use_stmt;
7137 use_operand_p use;
7138 bool ok = single_imm_use (var, &use, &use_stmt);
7139 gcc_assert (ok
7140 && is_gimple_assign (use_stmt)
7141 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7142 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7144 basic_block cond_bb = gimple_bb (def_stmt);
7145 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7146 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7148 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7149 gimple *g = gimple_build_cond (NE_EXPR, var,
7150 build_zero_cst (TREE_TYPE (var)),
7151 NULL_TREE, NULL_TREE);
7152 location_t loc = gimple_location (use_stmt);
7153 gimple_set_location (g, loc);
7154 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7156 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7157 etrue->probability = profile_probability::even ();
7158 edge efalse = find_edge (cond_bb, then_bb);
7159 efalse->flags = EDGE_FALSE_VALUE;
7160 efalse->probability -= etrue->probability;
7161 then_bb->count -= etrue->count ();
7163 tree othervar = NULL_TREE;
7164 if (gimple_assign_rhs1 (use_stmt) == var)
7165 othervar = gimple_assign_rhs2 (use_stmt);
7166 else if (gimple_assign_rhs2 (use_stmt) == var)
7167 othervar = gimple_assign_rhs1 (use_stmt);
7168 else
7169 gcc_unreachable ();
7170 tree lhs = gimple_assign_lhs (use_stmt);
7171 gphi *phi = create_phi_node (lhs, merge_bb);
7172 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7173 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7174 gsi = gsi_for_stmt (use_stmt);
7175 gsi_remove (&gsi, true);
7177 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7178 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7180 reassoc_branch_fixups.release ();
7183 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7184 void debug_ops_vector (vec<operand_entry *> ops);
7186 /* Dump the operand entry vector OPS to FILE. */
7188 void
7189 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7191 operand_entry *oe;
7192 unsigned int i;
7194 FOR_EACH_VEC_ELT (ops, i, oe)
7196 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7197 print_generic_expr (file, oe->op);
7198 fprintf (file, "\n");
7202 /* Dump the operand entry vector OPS to STDERR. */
7204 DEBUG_FUNCTION void
7205 debug_ops_vector (vec<operand_entry *> ops)
7207 dump_ops_vector (stderr, ops);
7210 /* Bubble up return status from reassociate_bb. */
7212 static bool
7213 do_reassoc (void)
7215 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7216 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7219 /* Initialize the reassociation pass. */
7221 static void
7222 init_reassoc (void)
7224 int i;
7225 int64_t rank = 2;
7226 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7228 /* Find the loops, so that we can prevent moving calculations in
7229 them. */
7230 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7232 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7234 next_operand_entry_id = 0;
7236 /* Reverse RPO (Reverse Post Order) will give us something where
7237 deeper loops come later. */
7238 pre_and_rev_post_order_compute (NULL, bbs, false);
7239 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7240 operand_rank = new hash_map<tree, int64_t>;
7242 /* Give each default definition a distinct rank. This includes
7243 parameters and the static chain. Walk backwards over all
7244 SSA names so that we get proper rank ordering according
7245 to tree_swap_operands_p. */
7246 for (i = num_ssa_names - 1; i > 0; --i)
7248 tree name = ssa_name (i);
7249 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7250 insert_operand_rank (name, ++rank);
7253 /* Set up rank for each BB */
7254 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7255 bb_rank[bbs[i]] = ++rank << 16;
7257 free (bbs);
7258 calculate_dominance_info (CDI_POST_DOMINATORS);
7259 plus_negates = vNULL;
7262 /* Cleanup after the reassociation pass, and print stats if
7263 requested. */
7265 static void
7266 fini_reassoc (void)
7268 statistics_counter_event (cfun, "Linearized",
7269 reassociate_stats.linearized);
7270 statistics_counter_event (cfun, "Constants eliminated",
7271 reassociate_stats.constants_eliminated);
7272 statistics_counter_event (cfun, "Ops eliminated",
7273 reassociate_stats.ops_eliminated);
7274 statistics_counter_event (cfun, "Statements rewritten",
7275 reassociate_stats.rewritten);
7276 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7277 reassociate_stats.pows_encountered);
7278 statistics_counter_event (cfun, "Built-in powi calls created",
7279 reassociate_stats.pows_created);
7281 delete operand_rank;
7282 bitmap_clear (biased_names);
7283 operand_entry_pool.release ();
7284 free (bb_rank);
7285 plus_negates.release ();
7286 free_dominance_info (CDI_POST_DOMINATORS);
7287 loop_optimizer_finalize ();
7290 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7291 insertion of __builtin_powi calls.
7293 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7294 optimization of a gimple conditional. Otherwise returns zero. */
7296 static unsigned int
7297 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7299 reassoc_insert_powi_p = insert_powi_p;
7300 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7302 init_reassoc ();
7304 bool cfg_cleanup_needed;
7305 cfg_cleanup_needed = do_reassoc ();
7306 repropagate_negates ();
7307 branch_fixup ();
7309 fini_reassoc ();
7310 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7313 namespace {
7315 const pass_data pass_data_reassoc =
7317 GIMPLE_PASS, /* type */
7318 "reassoc", /* name */
7319 OPTGROUP_NONE, /* optinfo_flags */
7320 TV_TREE_REASSOC, /* tv_id */
7321 ( PROP_cfg | PROP_ssa ), /* properties_required */
7322 0, /* properties_provided */
7323 0, /* properties_destroyed */
7324 0, /* todo_flags_start */
7325 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7328 class pass_reassoc : public gimple_opt_pass
7330 public:
7331 pass_reassoc (gcc::context *ctxt)
7332 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7335 /* opt_pass methods: */
7336 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7337 void set_pass_param (unsigned int n, bool param) final override
7339 gcc_assert (n == 0);
7340 insert_powi_p = param;
7341 bias_loop_carried_phi_ranks_p = !param;
7343 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7344 unsigned int execute (function *) final override
7346 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7349 private:
7350 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7351 point 3a in the pass header comment. */
7352 bool insert_powi_p;
7353 bool bias_loop_carried_phi_ranks_p;
7354 }; // class pass_reassoc
7356 } // anon namespace
7358 gimple_opt_pass *
7359 make_pass_reassoc (gcc::context *ctxt)
7361 return new pass_reassoc (ctxt);