PR27116, Spelling errors found by Debian style checker
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blobeda03bf98a6069c95f36867c323990d0e185c7e3
1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
110 You want to first merge all leaves of the same rank, as much as
111 possible.
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
126 mergetmp2 = d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
135 So we have
137 build binary op
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p;
188 /* Statistics */
189 static struct
191 int linearized;
192 int constants_eliminated;
193 int ops_eliminated;
194 int rewritten;
195 int pows_encountered;
196 int pows_created;
197 } reassociate_stats;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static int64_t *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, int64_t> *operand_rank;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 biased. */
217 static auto_bitmap biased_names;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec<tree> reassoc_branch_fixups;
225 /* Forward decls. */
226 static int64_t get_rank (tree);
227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
232 static bool
233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
235 gimple *stmt = gsi_stmt (*gsi);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 return gsi_remove (gsi, true);
240 gimple_stmt_iterator prev = *gsi;
241 gsi_prev (&prev);
242 unsigned uid = gimple_uid (stmt);
243 basic_block bb = gimple_bb (stmt);
244 bool ret = gsi_remove (gsi, true);
245 if (!gsi_end_p (prev))
246 gsi_next (&prev);
247 else
248 prev = gsi_start_bb (bb);
249 gimple *end_stmt = gsi_stmt (*gsi);
250 while ((stmt = gsi_stmt (prev)) != end_stmt)
252 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 gimple_set_uid (stmt, uid);
254 gsi_next (&prev);
256 return ret;
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
266 like this:
268 x_1 = phi(x_0, x_2)
269 a = x_1 | 1
270 b = a ^ 2
271 .MEM = b
272 c = b + d
273 x_2 = c + e
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
278 static bool
279 propagate_bias_p (gimple *stmt)
281 use_operand_p use;
282 imm_use_iterator use_iter;
283 gimple *single_use_stmt = NULL;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 return false;
288 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
290 gimple *current_use_stmt = USE_STMT (use);
292 if (is_gimple_assign (current_use_stmt)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
295 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 return false;
297 single_use_stmt = current_use_stmt;
301 if (single_use_stmt == NULL)
302 return false;
304 if (gimple_bb (stmt)->loop_father
305 != gimple_bb (single_use_stmt)->loop_father)
306 return false;
308 return true;
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
318 static int64_t
319 phi_rank (gimple *stmt)
321 basic_block bb = gimple_bb (stmt);
322 class loop *father = bb->loop_father;
323 tree res;
324 unsigned i;
325 use_operand_p use;
326 gimple *use_stmt;
328 if (!reassoc_bias_loop_carried_phi_ranks_p)
329 return bb_rank[bb->index];
331 /* We only care about real loops (those with a latch). */
332 if (!father->latch)
333 return bb_rank[bb->index];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb != father->header
337 || father->inner)
338 return bb_rank[bb->index];
340 /* Ignore virtual SSA_NAMEs. */
341 res = gimple_phi_result (stmt);
342 if (virtual_operand_p (res))
343 return bb_rank[bb->index];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res, &use, &use_stmt)
348 || gimple_bb (use_stmt)->loop_father != father)
349 return bb_rank[bb->index];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i = 0; i < gimple_phi_num_args (stmt); i++)
354 tree arg = gimple_phi_arg_def (stmt, i);
355 if (TREE_CODE (arg) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg))
358 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 if (gimple_bb (def_stmt)->loop_father == father)
360 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
364 /* Must be an uninteresting phi. */
365 return bb_rank[bb->index];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
372 static int64_t
373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
375 int64_t op_rank;
377 op_rank = get_rank (op);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op) == SSA_NAME
382 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
384 if (maybe_biased_p == NULL)
385 return rank;
386 *maybe_biased_p = true;
389 return MAX (rank, op_rank);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e)
397 int64_t *slot = operand_rank->get (e);
398 return slot ? *slot : -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
403 static inline void
404 insert_operand_rank (tree e, int64_t rank)
406 gcc_assert (rank > 0);
407 gcc_assert (!operand_rank->put (e, rank));
410 /* Given an expression E, return the rank of the expression. */
412 static int64_t
413 get_rank (tree e)
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
420 the BB.
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
424 results. */
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
430 x_1 = phi(x_0, x_2)
431 b = a + x_1
432 c = b + d
433 x_2 = c + e
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
439 x_1 = phi(x_0, x_2)
440 b = a + d
441 c = b + e
442 x_2 = c + x_1
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e) == SSA_NAME)
454 ssa_op_iter iter;
455 gimple *stmt;
456 int64_t rank;
457 tree op;
459 /* If we already have a rank for this expression, use that. */
460 rank = find_operand_rank (e);
461 if (rank != -1)
462 return rank;
464 stmt = SSA_NAME_DEF_STMT (e);
465 if (gimple_code (stmt) == GIMPLE_PHI)
467 rank = phi_rank (stmt);
468 if (rank != bb_rank[gimple_bb (stmt)->index])
469 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
472 else if (!is_gimple_assign (stmt))
473 rank = bb_rank[gimple_bb (stmt)->index];
475 else
477 bool biased_p = false;
478 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
485 thus have rank 0. */
486 rank = 0;
487 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
488 rank = propagate_rank (rank, op, maybe_biased_p);
490 rank += 1;
491 if (biased_p)
492 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
495 if (dump_file && (dump_flags & TDF_DETAILS))
497 fprintf (dump_file, "Rank for ");
498 print_generic_expr (dump_file, e);
499 fprintf (dump_file, " is %" PRId64 "\n", rank);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e, rank);
504 return rank;
507 /* Constants, globals, etc., are rank 0 */
508 return 0;
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
521 static inline int
522 constant_type (tree t)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
525 return INTEGER_CONST_TYPE;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t) || real_minus_onep (t))
532 return FLOAT_ONE_CONST_TYPE;
533 return FLOAT_CONST_TYPE;
535 else
536 return OTHER_CONST_TYPE;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
541 static int
542 sort_by_operand_rank (const void *pa, const void *pb)
544 const operand_entry *oea = *(const operand_entry *const *)pa;
545 const operand_entry *oeb = *(const operand_entry *const *)pb;
547 if (oeb->rank != oea->rank)
548 return oeb->rank > oea->rank ? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
553 if (oea->rank == 0)
555 if (constant_type (oeb->op) != constant_type (oea->op))
556 return constant_type (oea->op) - constant_type (oeb->op);
557 else
558 /* To make sorting result stable, we use unique IDs to determine
559 order. */
560 return oeb->id > oea->id ? 1 : -1;
563 if (TREE_CODE (oea->op) != SSA_NAME)
565 if (TREE_CODE (oeb->op) != SSA_NAME)
566 return oeb->id > oea->id ? 1 : -1;
567 else
568 return 1;
570 else if (TREE_CODE (oeb->op) != SSA_NAME)
571 return -1;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
580 See PR60418. */
581 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
582 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
583 basic_block bba = gimple_bb (stmta);
584 basic_block bbb = gimple_bb (stmtb);
585 if (bbb != bba)
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
589 if (!bba)
590 return 1;
591 if (!bbb)
592 return -1;
593 /* If neither is, compare bb_rank. */
594 if (bb_rank[bbb->index] != bb_rank[bba->index])
595 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
603 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
606 return oeb->id > oea->id ? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
611 static void
612 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 operand_entry *oe = operand_entry_pool.allocate ();
616 oe->op = op;
617 oe->rank = get_rank (op);
618 oe->id = next_operand_entry_id++;
619 oe->count = 1;
620 oe->stmt_to_insert = stmt_to_insert;
621 ops->safe_push (oe);
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
625 count REPEAT. */
627 static void
628 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
629 HOST_WIDE_INT repeat)
631 operand_entry *oe = operand_entry_pool.allocate ();
633 oe->op = op;
634 oe->rank = get_rank (op);
635 oe->id = next_operand_entry_id++;
636 oe->count = repeat;
637 oe->stmt_to_insert = NULL;
638 ops->safe_push (oe);
640 reassociate_stats.pows_encountered++;
643 /* Returns true if we can associate the SSA def OP. */
645 static bool
646 can_reassociate_op_p (tree op)
648 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
649 return false;
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
655 return false;
656 return true;
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
663 static bool
664 can_reassociate_type_p (tree type)
666 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
667 || NON_SAT_FIXED_POINT_TYPE_P (type)
668 || (flag_associative_math && FLOAT_TYPE_P (type)))
669 return true;
670 return false;
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
676 static bool
677 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
679 basic_block bb = gimple_bb (stmt);
681 if (gimple_bb (stmt) == NULL)
682 return false;
684 if (!flow_bb_inside_loop_p (loop, bb))
685 return false;
687 if (is_gimple_assign (stmt)
688 && gimple_assign_rhs_code (stmt) == code
689 && has_single_use (gimple_assign_lhs (stmt)))
691 tree rhs1 = gimple_assign_rhs1 (stmt);
692 tree rhs2 = gimple_assign_rhs2 (stmt);
693 if (!can_reassociate_op_p (rhs1)
694 || (rhs2 && !can_reassociate_op_p (rhs2)))
695 return false;
696 return true;
699 return false;
703 /* Return true if STMT is a nop-conversion. */
705 static bool
706 gimple_nop_conversion_p (gimple *stmt)
708 if (gassign *ass = dyn_cast <gassign *> (stmt))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
712 TREE_TYPE (gimple_assign_rhs1 (ass))))
713 return true;
715 return false;
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
721 static tree
722 get_unary_op (tree name, enum tree_code opcode)
724 gimple *stmt = SSA_NAME_DEF_STMT (name);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt)
728 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
729 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
731 if (!is_gimple_assign (stmt))
732 return NULL_TREE;
734 if (gimple_assign_rhs_code (stmt) == opcode)
735 return gimple_assign_rhs1 (stmt);
736 return NULL_TREE;
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
741 static bool
742 ops_equal_values_p (tree op1, tree op2)
744 if (op1 == op2)
745 return true;
747 tree orig_op1 = op1;
748 if (TREE_CODE (op1) == SSA_NAME)
750 gimple *stmt = SSA_NAME_DEF_STMT (op1);
751 if (gimple_nop_conversion_p (stmt))
753 op1 = gimple_assign_rhs1 (stmt);
754 if (op1 == op2)
755 return true;
759 if (TREE_CODE (op2) == SSA_NAME)
761 gimple *stmt = SSA_NAME_DEF_STMT (op2);
762 if (gimple_nop_conversion_p (stmt))
764 op2 = gimple_assign_rhs1 (stmt);
765 if (op1 == op2
766 || orig_op1 == op2)
767 return true;
771 return false;
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
779 static bool
780 eliminate_duplicate_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 bool *all_done,
783 unsigned int i,
784 operand_entry *curr,
785 operand_entry *last)
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last && last->op == curr->op)
795 switch (opcode)
797 case MAX_EXPR:
798 case MIN_EXPR:
799 case BIT_IOR_EXPR:
800 case BIT_AND_EXPR:
801 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, curr->op);
805 fprintf (dump_file, " [&|minmax] ");
806 print_generic_expr (dump_file, last->op);
807 fprintf (dump_file, " -> ");
808 print_generic_stmt (dump_file, last->op);
811 ops->ordered_remove (i);
812 reassociate_stats.ops_eliminated ++;
814 return true;
816 case BIT_XOR_EXPR:
817 if (dump_file && (dump_flags & TDF_DETAILS))
819 fprintf (dump_file, "Equivalence: ");
820 print_generic_expr (dump_file, curr->op);
821 fprintf (dump_file, " ^ ");
822 print_generic_expr (dump_file, last->op);
823 fprintf (dump_file, " -> nothing\n");
826 reassociate_stats.ops_eliminated += 2;
828 if (ops->length () == 2)
830 ops->truncate (0);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
832 *all_done = true;
834 else
836 ops->ordered_remove (i-1);
837 ops->ordered_remove (i-1);
840 return true;
842 default:
843 break;
846 return false;
849 static vec<tree> plus_negates;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
855 return false. */
857 static bool
858 eliminate_plus_minus_pair (enum tree_code opcode,
859 vec<operand_entry *> *ops,
860 unsigned int currindex,
861 operand_entry *curr)
863 tree negateop;
864 tree notop;
865 unsigned int i;
866 operand_entry *oe;
868 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
869 return false;
871 negateop = get_unary_op (curr->op, NEGATE_EXPR);
872 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
873 if (negateop == NULL_TREE && notop == NULL_TREE)
874 return false;
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
878 one, we can stop. */
880 for (i = currindex + 1;
881 ops->iterate (i, &oe)
882 && oe->rank >= curr->rank - 1 ;
883 i++)
885 if (negateop
886 && ops_equal_values_p (oe->op, negateop))
888 if (dump_file && (dump_flags & TDF_DETAILS))
890 fprintf (dump_file, "Equivalence: ");
891 print_generic_expr (dump_file, negateop);
892 fprintf (dump_file, " + -");
893 print_generic_expr (dump_file, oe->op);
894 fprintf (dump_file, " -> 0\n");
897 ops->ordered_remove (i);
898 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
899 ops->ordered_remove (currindex);
900 reassociate_stats.ops_eliminated ++;
902 return true;
904 else if (notop
905 && ops_equal_values_p (oe->op, notop))
907 tree op_type = TREE_TYPE (oe->op);
909 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Equivalence: ");
912 print_generic_expr (dump_file, notop);
913 fprintf (dump_file, " + ~");
914 print_generic_expr (dump_file, oe->op);
915 fprintf (dump_file, " -> -1\n");
918 ops->ordered_remove (i);
919 add_to_ops_vec (ops, build_all_ones_cst (op_type));
920 ops->ordered_remove (currindex);
921 reassociate_stats.ops_eliminated ++;
923 return true;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop != NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
931 plus_negates.safe_push (curr->op);
933 return false;
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
940 false. */
942 static bool
943 eliminate_not_pairs (enum tree_code opcode,
944 vec<operand_entry *> *ops,
945 unsigned int currindex,
946 operand_entry *curr)
948 tree notop;
949 unsigned int i;
950 operand_entry *oe;
952 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
953 || TREE_CODE (curr->op) != SSA_NAME)
954 return false;
956 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
957 if (notop == NULL_TREE)
958 return false;
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
962 one, we can stop. */
964 for (i = currindex + 1;
965 ops->iterate (i, &oe)
966 && oe->rank >= curr->rank - 1;
967 i++)
969 if (oe->op == notop)
971 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Equivalence: ");
974 print_generic_expr (dump_file, notop);
975 if (opcode == BIT_AND_EXPR)
976 fprintf (dump_file, " & ~");
977 else if (opcode == BIT_IOR_EXPR)
978 fprintf (dump_file, " | ~");
979 print_generic_expr (dump_file, oe->op);
980 if (opcode == BIT_AND_EXPR)
981 fprintf (dump_file, " -> 0\n");
982 else if (opcode == BIT_IOR_EXPR)
983 fprintf (dump_file, " -> -1\n");
986 if (opcode == BIT_AND_EXPR)
987 oe->op = build_zero_cst (TREE_TYPE (oe->op));
988 else if (opcode == BIT_IOR_EXPR)
989 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
991 reassociate_stats.ops_eliminated += ops->length () - 1;
992 ops->truncate (0);
993 ops->quick_push (oe);
994 return true;
998 return false;
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1008 static void
1009 eliminate_using_constants (enum tree_code opcode,
1010 vec<operand_entry *> *ops)
1012 operand_entry *oelast = ops->last ();
1013 tree type = TREE_TYPE (oelast->op);
1015 if (oelast->rank == 0
1016 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1018 switch (opcode)
1020 case BIT_AND_EXPR:
1021 if (integer_zerop (oelast->op))
1023 if (ops->length () != 1)
1025 if (dump_file && (dump_flags & TDF_DETAILS))
1026 fprintf (dump_file, "Found & 0, removing all other ops\n");
1028 reassociate_stats.ops_eliminated += ops->length () - 1;
1030 ops->truncate (0);
1031 ops->quick_push (oelast);
1032 return;
1035 else if (integer_all_onesp (oelast->op))
1037 if (ops->length () != 1)
1039 if (dump_file && (dump_flags & TDF_DETAILS))
1040 fprintf (dump_file, "Found & -1, removing\n");
1041 ops->pop ();
1042 reassociate_stats.ops_eliminated++;
1045 break;
1046 case BIT_IOR_EXPR:
1047 if (integer_all_onesp (oelast->op))
1049 if (ops->length () != 1)
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 fprintf (dump_file, "Found | -1, removing all other ops\n");
1054 reassociate_stats.ops_eliminated += ops->length () - 1;
1056 ops->truncate (0);
1057 ops->quick_push (oelast);
1058 return;
1061 else if (integer_zerop (oelast->op))
1063 if (ops->length () != 1)
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 fprintf (dump_file, "Found | 0, removing\n");
1067 ops->pop ();
1068 reassociate_stats.ops_eliminated++;
1071 break;
1072 case MULT_EXPR:
1073 if (integer_zerop (oelast->op)
1074 || (FLOAT_TYPE_P (type)
1075 && !HONOR_NANS (type)
1076 && !HONOR_SIGNED_ZEROS (type)
1077 && real_zerop (oelast->op)))
1079 if (ops->length () != 1)
1081 if (dump_file && (dump_flags & TDF_DETAILS))
1082 fprintf (dump_file, "Found * 0, removing all other ops\n");
1084 reassociate_stats.ops_eliminated += ops->length () - 1;
1085 ops->truncate (0);
1086 ops->quick_push (oelast);
1087 return;
1090 else if (integer_onep (oelast->op)
1091 || (FLOAT_TYPE_P (type)
1092 && !HONOR_SNANS (type)
1093 && real_onep (oelast->op)))
1095 if (ops->length () != 1)
1097 if (dump_file && (dump_flags & TDF_DETAILS))
1098 fprintf (dump_file, "Found * 1, removing\n");
1099 ops->pop ();
1100 reassociate_stats.ops_eliminated++;
1101 return;
1104 break;
1105 case BIT_XOR_EXPR:
1106 case PLUS_EXPR:
1107 case MINUS_EXPR:
1108 if (integer_zerop (oelast->op)
1109 || (FLOAT_TYPE_P (type)
1110 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1111 && fold_real_zero_addition_p (type, 0, oelast->op,
1112 opcode == MINUS_EXPR)))
1114 if (ops->length () != 1)
1116 if (dump_file && (dump_flags & TDF_DETAILS))
1117 fprintf (dump_file, "Found [|^+] 0, removing\n");
1118 ops->pop ();
1119 reassociate_stats.ops_eliminated++;
1120 return;
1123 break;
1124 default:
1125 break;
1131 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1132 bool, bool);
1134 /* Structure for tracking and counting operands. */
1135 struct oecount {
1136 unsigned int cnt;
1137 unsigned int id;
1138 enum tree_code oecode;
1139 tree op;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec<oecount> cvec;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher : int_hash <int, 0, 1>
1151 static inline hashval_t hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1157 inline hashval_t
1158 oecount_hasher::hash (int p)
1160 const oecount *c = &cvec[p - 42];
1161 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1164 /* Comparison function for oecount. */
1166 inline bool
1167 oecount_hasher::equal (int p1, int p2)
1169 const oecount *c1 = &cvec[p1 - 42];
1170 const oecount *c2 = &cvec[p2 - 42];
1171 return c1->oecode == c2->oecode && c1->op == c2->op;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1176 static int
1177 oecount_cmp (const void *p1, const void *p2)
1179 const oecount *c1 = (const oecount *)p1;
1180 const oecount *c2 = (const oecount *)p2;
1181 if (c1->cnt != c2->cnt)
1182 return c1->cnt > c2->cnt ? 1 : -1;
1183 else
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1->id > c2->id ? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1191 static bool
1192 stmt_is_power_of_op (gimple *stmt, tree op)
1194 if (!is_gimple_call (stmt))
1195 return false;
1197 switch (gimple_call_combined_fn (stmt))
1199 CASE_CFN_POW:
1200 CASE_CFN_POWI:
1201 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1203 default:
1204 return false;
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple *stmt)
1215 REAL_VALUE_TYPE c, cint;
1216 HOST_WIDE_INT power;
1217 tree arg1;
1219 switch (gimple_call_combined_fn (stmt))
1221 CASE_CFN_POW:
1222 arg1 = gimple_call_arg (stmt, 1);
1223 c = TREE_REAL_CST (arg1);
1224 power = real_to_integer (&c) - 1;
1225 real_from_integer (&cint, VOIDmode, power, SIGNED);
1226 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1227 return power;
1229 CASE_CFN_POWI:
1230 arg1 = gimple_call_arg (stmt, 1);
1231 power = TREE_INT_CST_LOW (arg1) - 1;
1232 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1233 return power;
1235 default:
1236 gcc_unreachable ();
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1243 static tree
1244 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 imm_use_iterator iter;
1249 tree new_lhs, new_debug_lhs = NULL_TREE;
1250 tree lhs = gimple_get_lhs (stmt);
1252 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1253 gimple_set_lhs (stmt, new_lhs);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1258 tree repl = new_lhs;
1259 if (is_gimple_debug (use_stmt))
1261 if (new_debug_lhs == NULL_TREE)
1263 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1264 gdebug *def_temp
1265 = gimple_build_debug_bind (new_debug_lhs,
1266 build2 (opcode, TREE_TYPE (lhs),
1267 new_lhs, op),
1268 stmt);
1269 gimple_set_uid (def_temp, gimple_uid (stmt));
1270 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1271 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1273 repl = new_debug_lhs;
1275 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1276 SET_USE (use, repl);
1277 update_stmt (use_stmt);
1279 return new_lhs;
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1286 static void
1287 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1288 vec<gimple *> &stmts_to_fix)
1290 unsigned i;
1291 gimple *stmt;
1293 if (*def != op
1294 && TREE_CODE (*def) == SSA_NAME
1295 && (stmt = SSA_NAME_DEF_STMT (*def))
1296 && gimple_code (stmt) != GIMPLE_NOP)
1297 *def = make_new_ssa_for_def (stmt, opcode, op);
1299 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1300 make_new_ssa_for_def (stmt, opcode, op);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1307 static void
1308 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1310 tree lhs;
1311 gimple *use_stmt;
1312 use_operand_p use;
1313 gimple_stmt_iterator gsi;
1315 if (is_gimple_call (stmt))
1316 lhs = gimple_call_lhs (stmt);
1317 else
1318 lhs = gimple_assign_lhs (stmt);
1320 gcc_assert (has_single_use (lhs));
1321 single_imm_use (lhs, &use, &use_stmt);
1322 if (lhs == *def)
1323 *def = op;
1324 SET_USE (use, op);
1325 if (TREE_CODE (op) != SSA_NAME)
1326 update_stmt (use_stmt);
1327 gsi = gsi_for_stmt (stmt);
1328 unlink_stmt_vdef (stmt);
1329 reassoc_remove_stmt (&gsi);
1330 release_defs (stmt);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1337 static void
1338 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1340 tree orig_def = *def;
1341 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec<gimple *, 64> stmts_to_fix;
1348 tree name;
1350 if (opcode == MULT_EXPR)
1352 if (stmt_is_power_of_op (stmt, op))
1354 if (decrement_power (stmt) == 1)
1356 if (stmts_to_fix.length () > 0)
1357 stmts_to_fix.pop ();
1358 propagate_op_to_single_use (op, stmt, def);
1360 break;
1362 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1364 if (gimple_assign_rhs1 (stmt) == op)
1366 tree cst = build_minus_one_cst (TREE_TYPE (op));
1367 if (stmts_to_fix.length () > 0)
1368 stmts_to_fix.pop ();
1369 propagate_op_to_single_use (cst, stmt, def);
1370 break;
1372 else if (integer_minus_onep (op)
1373 || real_minus_onep (op))
1375 gimple_assign_set_rhs_code
1376 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1377 break;
1382 name = gimple_assign_rhs1 (stmt);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1386 single use. */
1387 if (gimple_assign_rhs_code (stmt) == opcode
1388 && (name == op
1389 || gimple_assign_rhs2 (stmt) == op))
1391 if (name == op)
1392 name = gimple_assign_rhs2 (stmt);
1393 if (stmts_to_fix.length () > 0)
1394 stmts_to_fix.pop ();
1395 propagate_op_to_single_use (name, stmt, def);
1396 break;
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode == MULT_EXPR
1403 && gimple_assign_rhs_code (stmt) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt)))
1407 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1408 if (stmt_is_power_of_op (stmt2, op))
1410 if (decrement_power (stmt2) == 1)
1411 propagate_op_to_single_use (op, stmt2, def);
1412 else
1413 stmts_to_fix.safe_push (stmt2);
1414 break;
1416 else if (is_gimple_assign (stmt2)
1417 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1419 if (gimple_assign_rhs1 (stmt2) == op)
1421 tree cst = build_minus_one_cst (TREE_TYPE (op));
1422 propagate_op_to_single_use (cst, stmt2, def);
1423 break;
1425 else if (integer_minus_onep (op)
1426 || real_minus_onep (op))
1428 stmts_to_fix.safe_push (stmt2);
1429 gimple_assign_set_rhs_code
1430 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1431 break;
1436 /* Continue walking the chain. */
1437 gcc_assert (name != op
1438 && TREE_CODE (name) == SSA_NAME);
1439 stmt = SSA_NAME_DEF_STMT (name);
1440 stmts_to_fix.safe_push (stmt);
1442 while (1);
1444 if (stmts_to_fix.length () > 0 || *def == orig_def)
1445 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1451 static bool
1452 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1454 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1 || s1 == s2)
1460 return true;
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1463 if (!bb2)
1464 return false;
1466 if (bb1 == bb2)
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1) == GIMPLE_PHI)
1472 return true;
1474 if (gimple_code (s2) == GIMPLE_PHI)
1475 return false;
1477 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1479 if (gimple_uid (s1) < gimple_uid (s2))
1480 return true;
1482 if (gimple_uid (s1) > gimple_uid (s2))
1483 return false;
1485 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1486 unsigned int uid = gimple_uid (s1);
1487 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1489 gimple *s = gsi_stmt (gsi);
1490 if (gimple_uid (s) != uid)
1491 break;
1492 if (s == s2)
1493 return true;
1496 return false;
1499 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1502 /* Insert STMT after INSERT_POINT. */
1504 static void
1505 insert_stmt_after (gimple *stmt, gimple *insert_point)
1507 gimple_stmt_iterator gsi;
1508 basic_block bb;
1510 if (gimple_code (insert_point) == GIMPLE_PHI)
1511 bb = gimple_bb (insert_point);
1512 else if (!stmt_ends_bb_p (insert_point))
1514 gsi = gsi_for_stmt (insert_point);
1515 gimple_set_uid (stmt, gimple_uid (insert_point));
1516 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1517 return;
1519 else if (gimple_code (insert_point) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1523 gcc_unreachable ();
1524 else
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1531 gsi = gsi_after_labels (bb);
1532 if (gsi_end_p (gsi))
1534 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1535 gimple_set_uid (stmt,
1536 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1538 else
1539 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1540 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1547 static gimple *
1548 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1550 gimple *op1def = NULL, *op2def = NULL;
1551 gimple_stmt_iterator gsi;
1552 tree op;
1553 gassign *sum;
1555 /* Create the addition statement. */
1556 op = make_ssa_name (type);
1557 sum = gimple_build_assign (op, opcode, op1, op2);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1) == SSA_NAME)
1561 op1def = SSA_NAME_DEF_STMT (op1);
1562 if (TREE_CODE (op2) == SSA_NAME)
1563 op2def = SSA_NAME_DEF_STMT (op2);
1564 if ((!op1def || gimple_nop_p (op1def))
1565 && (!op2def || gimple_nop_p (op2def)))
1567 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1568 if (!gsi_end_p (gsi)
1569 && is_gimple_call (gsi_stmt (gsi))
1570 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1572 /* Don't add statements before a returns_twice call at the start
1573 of a function. */
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1575 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1577 if (gsi_end_p (gsi))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 gimple_set_uid (sum,
1582 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1584 else
1585 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1586 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1588 else
1590 gimple *insert_point;
1591 if ((!op1def || gimple_nop_p (op1def))
1592 || (op2def && !gimple_nop_p (op2def)
1593 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1594 insert_point = op2def;
1595 else
1596 insert_point = op1def;
1597 insert_stmt_after (sum, insert_point);
1599 update_stmt (sum);
1601 return sum;
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1639 static bool
1640 undistribute_ops_list (enum tree_code opcode,
1641 vec<operand_entry *> *ops, class loop *loop)
1643 unsigned int length = ops->length ();
1644 operand_entry *oe1;
1645 unsigned i, j;
1646 unsigned nr_candidates, nr_candidates2;
1647 sbitmap_iterator sbi0;
1648 vec<operand_entry *> *subops;
1649 bool changed = false;
1650 unsigned int next_oecount_id = 0;
1652 if (length <= 1
1653 || opcode != PLUS_EXPR)
1654 return false;
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap candidates (length);
1658 bitmap_clear (candidates);
1659 nr_candidates = 0;
1660 FOR_EACH_VEC_ELT (*ops, i, oe1)
1662 enum tree_code dcode;
1663 gimple *oe1def;
1665 if (TREE_CODE (oe1->op) != SSA_NAME)
1666 continue;
1667 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1668 if (!is_gimple_assign (oe1def))
1669 continue;
1670 dcode = gimple_assign_rhs_code (oe1def);
1671 if ((dcode != MULT_EXPR
1672 && dcode != RDIV_EXPR)
1673 || !is_reassociable_op (oe1def, dcode, loop))
1674 continue;
1676 bitmap_set_bit (candidates, i);
1677 nr_candidates++;
1680 if (nr_candidates < 2)
1681 return false;
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1685 fprintf (dump_file, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file,
1687 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1688 fprintf (dump_file, " %d\n", nr_candidates);
1691 /* Build linearized sub-operand lists and the counting table. */
1692 cvec.create (0);
1694 hash_table<oecount_hasher> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1699 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1702 gimple *oedef;
1703 enum tree_code oecode;
1704 unsigned j;
1706 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1707 oecode = gimple_assign_rhs_code (oedef);
1708 linearize_expr_tree (&subops[i], oedef,
1709 associative_tree_code (oecode), false);
1711 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1713 oecount c;
1714 int *slot;
1715 int idx;
1716 c.oecode = oecode;
1717 c.cnt = 1;
1718 c.id = next_oecount_id++;
1719 c.op = oe1->op;
1720 cvec.safe_push (c);
1721 idx = cvec.length () + 41;
1722 slot = ctable.find_slot (idx, INSERT);
1723 if (!*slot)
1725 *slot = idx;
1727 else
1729 cvec.pop ();
1730 cvec[*slot - 42].cnt++;
1735 /* Sort the counting table. */
1736 cvec.qsort (oecount_cmp);
1738 if (dump_file && (dump_flags & TDF_DETAILS))
1740 oecount *c;
1741 fprintf (dump_file, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec, j, c)
1744 fprintf (dump_file, " %u %s: ", c->cnt,
1745 c->oecode == MULT_EXPR
1746 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1747 print_generic_expr (dump_file, c->op);
1748 fprintf (dump_file, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap candidates2 (length);
1754 while (!cvec.is_empty ())
1756 oecount *c = &cvec.last ();
1757 if (c->cnt < 2)
1758 break;
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2);
1763 nr_candidates2 = 0;
1764 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1766 gimple *oedef;
1767 enum tree_code oecode;
1768 unsigned j;
1769 tree op = (*ops)[i]->op;
1771 /* If we undistributed in this chain already this may be
1772 a constant. */
1773 if (TREE_CODE (op) != SSA_NAME)
1774 continue;
1776 oedef = SSA_NAME_DEF_STMT (op);
1777 oecode = gimple_assign_rhs_code (oedef);
1778 if (oecode != c->oecode)
1779 continue;
1781 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1783 if (oe1->op == c->op)
1785 bitmap_set_bit (candidates2, i);
1786 ++nr_candidates2;
1787 break;
1792 if (nr_candidates2 >= 2)
1794 operand_entry *oe1, *oe2;
1795 gimple *prod;
1796 int first = bitmap_first_set_bit (candidates2);
1798 /* Build the new addition chain. */
1799 oe1 = (*ops)[first];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "Building (");
1803 print_generic_expr (dump_file, oe1->op);
1805 zero_one_operation (&oe1->op, c->oecode, c->op);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1808 gimple *sum;
1809 oe2 = (*ops)[i];
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1812 fprintf (dump_file, " + ");
1813 print_generic_expr (dump_file, oe2->op);
1815 zero_one_operation (&oe2->op, c->oecode, c->op);
1816 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1817 oe1->op, oe2->op, opcode);
1818 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1819 oe2->rank = 0;
1820 oe1->op = gimple_get_lhs (sum);
1823 /* Apply the multiplication/division. */
1824 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1825 oe1->op, c->op, c->oecode);
1826 if (dump_file && (dump_flags & TDF_DETAILS))
1828 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1829 print_generic_expr (dump_file, c->op);
1830 fprintf (dump_file, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1->op = gimple_assign_lhs (prod);
1836 oe1->rank = get_rank (oe1->op);
1837 subops[first].release ();
1839 changed = true;
1842 cvec.pop ();
1845 for (i = 0; i < ops->length (); ++i)
1846 subops[i].release ();
1847 free (subops);
1848 cvec.release ();
1850 return changed;
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair<unsigned, unsigned> v_info_elem;
1857 struct v_info {
1858 tree vec_type;
1859 auto_vec<v_info_elem, 32> vec;
1861 typedef v_info *v_info_ptr;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1864 static int
1865 sort_by_mach_mode (const void *p_i, const void *p_j)
1867 const tree tr1 = *((const tree *) p_i);
1868 const tree tr2 = *((const tree *) p_j);
1869 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1870 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1871 if (mode1 > mode2)
1872 return 1;
1873 else if (mode1 < mode2)
1874 return -1;
1875 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1876 return -1;
1877 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1878 return 1;
1879 return 0;
1882 /* Cleanup hash map for VECTOR information. */
1883 static void
1884 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1886 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1887 it != info_map.end (); ++it)
1889 v_info_ptr info = (*it).second;
1890 delete info;
1891 (*it).second = NULL;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1897 is transformed to
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1915 TODO:
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1921 static bool
1922 undistribute_bitref_for_vector (enum tree_code opcode,
1923 vec<operand_entry *> *ops, struct loop *loop)
1925 if (ops->length () <= 1)
1926 return false;
1928 if (opcode != PLUS_EXPR
1929 && opcode != MULT_EXPR
1930 && opcode != BIT_XOR_EXPR
1931 && opcode != BIT_IOR_EXPR
1932 && opcode != BIT_AND_EXPR)
1933 return false;
1935 hash_map<tree, v_info_ptr> v_info_map;
1936 operand_entry *oe1;
1937 unsigned i;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops, i, oe1)
1943 enum tree_code dcode;
1944 gimple *oe1def;
1946 if (TREE_CODE (oe1->op) != SSA_NAME)
1947 continue;
1948 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1949 if (!is_gimple_assign (oe1def))
1950 continue;
1951 dcode = gimple_assign_rhs_code (oe1def);
1952 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1953 continue;
1955 tree rhs = gimple_assign_rhs1 (oe1def);
1956 tree vec = TREE_OPERAND (rhs, 0);
1957 tree vec_type = TREE_TYPE (vec);
1959 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1960 continue;
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1964 continue;
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1968 continue;
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1971 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1972 continue;
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1979 machine_mode simd_mode;
1980 unsigned HOST_WIDE_INT size, nunits;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1984 continue;
1985 if (size <= elem_size || (size % elem_size) != 0)
1986 continue;
1987 nunits = size / elem_size;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1989 nunits).exists (&simd_mode))
1990 continue;
1991 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1995 continue;
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1998 size. */
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2000 continue;
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2004 continue;
2007 tree elem_type = TREE_TYPE (vec_type);
2008 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2009 if (maybe_ne (bit_field_size (rhs), elem_size))
2010 continue;
2012 unsigned idx;
2013 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2014 continue;
2016 /* Ignore it if target machine can't support this type of VECTOR
2017 operation. */
2018 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2019 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2020 continue;
2022 bool existed;
2023 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2024 if (!existed)
2026 info = new v_info;
2027 info->vec_type = vec_type;
2029 else if (!types_compatible_p (vec_type, info->vec_type))
2030 continue;
2031 info->vec.safe_push (std::make_pair (idx, i));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map);
2038 return false;
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec<tree> valid_vecs (v_info_map.elements ());
2046 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2047 it != v_info_map.end (); ++it)
2049 tree cand_vec = (*it).first;
2050 v_info_ptr cand_info = (*it).second;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2053 if (cand_info->vec.length () != num_elems)
2054 continue;
2055 sbitmap holes = sbitmap_alloc (num_elems);
2056 bitmap_ones (holes);
2057 bool valid = true;
2058 v_info_elem *curr;
2059 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2061 if (!bitmap_bit_p (holes, curr->first))
2063 valid = false;
2064 break;
2066 else
2067 bitmap_clear_bit (holes, curr->first);
2069 if (valid && bitmap_empty_p (holes))
2070 valid_vecs.quick_push (cand_vec);
2071 sbitmap_free (holes);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs.length () <= 1)
2077 cleanup_vinfo_map (v_info_map);
2078 return false;
2081 valid_vecs.qsort (sort_by_mach_mode);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2086 tree tvec = valid_vecs[i];
2087 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2091 continue;
2093 unsigned int idx, j;
2094 gimple *sum = NULL;
2095 tree sum_vec = tvec;
2096 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2097 v_info_elem *elem;
2098 tree vec_type = info_ptr->vec_type;
2100 /* Build the sum for all candidates with same mode. */
2103 sum = build_and_add_sum (vec_type, sum_vec,
2104 valid_vecs[i + 1], opcode);
2105 if (!useless_type_conversion_p (vec_type,
2106 TREE_TYPE (valid_vecs[i + 1])))
2108 /* Update the operands only after build_and_add_sum,
2109 so that we don't have to repeat the placement algorithm
2110 of build_and_add_sum. */
2111 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2112 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2113 valid_vecs[i + 1]);
2114 tree lhs = make_ssa_name (vec_type);
2115 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2116 gimple_set_uid (g, gimple_uid (sum));
2117 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2118 gimple_assign_set_rhs2 (sum, lhs);
2119 if (sum_vec == tvec)
2121 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2122 lhs = make_ssa_name (vec_type);
2123 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2124 gimple_set_uid (g, gimple_uid (sum));
2125 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2126 gimple_assign_set_rhs1 (sum, lhs);
2128 update_stmt (sum);
2130 sum_vec = gimple_get_lhs (sum);
2131 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2132 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2133 /* Update those related ops of current candidate VECTOR. */
2134 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2136 idx = elem->second;
2137 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2138 /* Set this then op definition will get DCEd later. */
2139 gimple_set_visited (def, true);
2140 if (opcode == PLUS_EXPR
2141 || opcode == BIT_XOR_EXPR
2142 || opcode == BIT_IOR_EXPR)
2143 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2144 else if (opcode == MULT_EXPR)
2145 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2146 else
2148 gcc_assert (opcode == BIT_AND_EXPR);
2149 (*ops)[idx]->op
2150 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2152 (*ops)[idx]->rank = 0;
2154 if (dump_file && (dump_flags & TDF_DETAILS))
2156 fprintf (dump_file, "Generating addition -> ");
2157 print_gimple_stmt (dump_file, sum, 0);
2159 i++;
2161 while ((i < valid_vecs.length () - 1)
2162 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2164 /* Referring to first valid VECTOR with this mode, generate the
2165 BIT_FIELD_REF statements accordingly. */
2166 info_ptr = *(v_info_map.get (tvec));
2167 gcc_assert (sum);
2168 tree elem_type = TREE_TYPE (vec_type);
2169 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2171 idx = elem->second;
2172 tree dst = make_ssa_name (elem_type);
2173 tree pos = bitsize_int (elem->first
2174 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2175 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2176 TYPE_SIZE (elem_type), pos);
2177 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2178 insert_stmt_after (gs, sum);
2179 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2180 /* Set this then op definition will get DCEd later. */
2181 gimple_set_visited (def, true);
2182 (*ops)[idx]->op = gimple_assign_lhs (gs);
2183 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2186 fprintf (dump_file, "Generating bit_field_ref -> ");
2187 print_gimple_stmt (dump_file, gs, 0);
2192 if (dump_file && (dump_flags & TDF_DETAILS))
2193 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2195 cleanup_vinfo_map (v_info_map);
2197 return true;
2200 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2201 expression, examine the other OPS to see if any of them are comparisons
2202 of the same values, which we may be able to combine or eliminate.
2203 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2205 static bool
2206 eliminate_redundant_comparison (enum tree_code opcode,
2207 vec<operand_entry *> *ops,
2208 unsigned int currindex,
2209 operand_entry *curr)
2211 tree op1, op2;
2212 enum tree_code lcode, rcode;
2213 gimple *def1, *def2;
2214 int i;
2215 operand_entry *oe;
2217 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2218 return false;
2220 /* Check that CURR is a comparison. */
2221 if (TREE_CODE (curr->op) != SSA_NAME)
2222 return false;
2223 def1 = SSA_NAME_DEF_STMT (curr->op);
2224 if (!is_gimple_assign (def1))
2225 return false;
2226 lcode = gimple_assign_rhs_code (def1);
2227 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2228 return false;
2229 op1 = gimple_assign_rhs1 (def1);
2230 op2 = gimple_assign_rhs2 (def1);
2232 /* Now look for a similar comparison in the remaining OPS. */
2233 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2235 tree t;
2237 if (TREE_CODE (oe->op) != SSA_NAME)
2238 continue;
2239 def2 = SSA_NAME_DEF_STMT (oe->op);
2240 if (!is_gimple_assign (def2))
2241 continue;
2242 rcode = gimple_assign_rhs_code (def2);
2243 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2244 continue;
2246 /* If we got here, we have a match. See if we can combine the
2247 two comparisons. */
2248 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2249 if (opcode == BIT_IOR_EXPR)
2250 t = maybe_fold_or_comparisons (type,
2251 lcode, op1, op2,
2252 rcode, gimple_assign_rhs1 (def2),
2253 gimple_assign_rhs2 (def2));
2254 else
2255 t = maybe_fold_and_comparisons (type,
2256 lcode, op1, op2,
2257 rcode, gimple_assign_rhs1 (def2),
2258 gimple_assign_rhs2 (def2));
2259 if (!t)
2260 continue;
2262 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2263 always give us a boolean_type_node value back. If the original
2264 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2265 we need to convert. */
2266 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2268 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2269 continue;
2270 t = fold_convert (TREE_TYPE (curr->op), t);
2273 if (TREE_CODE (t) != INTEGER_CST
2274 && !operand_equal_p (t, curr->op, 0))
2276 enum tree_code subcode;
2277 tree newop1, newop2;
2278 if (!COMPARISON_CLASS_P (t))
2279 continue;
2280 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2281 STRIP_USELESS_TYPE_CONVERSION (newop1);
2282 STRIP_USELESS_TYPE_CONVERSION (newop2);
2283 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2284 continue;
2285 if (lcode == TREE_CODE (t)
2286 && operand_equal_p (op1, newop1, 0)
2287 && operand_equal_p (op2, newop2, 0))
2288 t = curr->op;
2289 else if ((TREE_CODE (newop1) == SSA_NAME
2290 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2291 || (TREE_CODE (newop2) == SSA_NAME
2292 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2293 continue;
2296 if (dump_file && (dump_flags & TDF_DETAILS))
2298 fprintf (dump_file, "Equivalence: ");
2299 print_generic_expr (dump_file, curr->op);
2300 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2301 print_generic_expr (dump_file, oe->op);
2302 fprintf (dump_file, " -> ");
2303 print_generic_expr (dump_file, t);
2304 fprintf (dump_file, "\n");
2307 /* Now we can delete oe, as it has been subsumed by the new combined
2308 expression t. */
2309 ops->ordered_remove (i);
2310 reassociate_stats.ops_eliminated ++;
2312 /* If t is the same as curr->op, we're done. Otherwise we must
2313 replace curr->op with t. Special case is if we got a constant
2314 back, in which case we add it to the end instead of in place of
2315 the current entry. */
2316 if (TREE_CODE (t) == INTEGER_CST)
2318 ops->ordered_remove (currindex);
2319 add_to_ops_vec (ops, t);
2321 else if (!operand_equal_p (t, curr->op, 0))
2323 gimple *sum;
2324 enum tree_code subcode;
2325 tree newop1;
2326 tree newop2;
2327 gcc_assert (COMPARISON_CLASS_P (t));
2328 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2329 STRIP_USELESS_TYPE_CONVERSION (newop1);
2330 STRIP_USELESS_TYPE_CONVERSION (newop2);
2331 gcc_checking_assert (is_gimple_val (newop1)
2332 && is_gimple_val (newop2));
2333 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2334 curr->op = gimple_get_lhs (sum);
2336 return true;
2339 return false;
2343 /* Transform repeated addition of same values into multiply with
2344 constant. */
2345 static bool
2346 transform_add_to_multiply (vec<operand_entry *> *ops)
2348 operand_entry *oe;
2349 tree op = NULL_TREE;
2350 int j;
2351 int i, start = -1, end = 0, count = 0;
2352 auto_vec<std::pair <int, int> > indxs;
2353 bool changed = false;
2355 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2356 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2357 || !flag_unsafe_math_optimizations))
2358 return false;
2360 /* Look for repeated operands. */
2361 FOR_EACH_VEC_ELT (*ops, i, oe)
2363 if (start == -1)
2365 count = 1;
2366 op = oe->op;
2367 start = i;
2369 else if (operand_equal_p (oe->op, op, 0))
2371 count++;
2372 end = i;
2374 else
2376 if (count > 1)
2377 indxs.safe_push (std::make_pair (start, end));
2378 count = 1;
2379 op = oe->op;
2380 start = i;
2384 if (count > 1)
2385 indxs.safe_push (std::make_pair (start, end));
2387 for (j = indxs.length () - 1; j >= 0; --j)
2389 /* Convert repeated operand addition to multiplication. */
2390 start = indxs[j].first;
2391 end = indxs[j].second;
2392 op = (*ops)[start]->op;
2393 count = end - start + 1;
2394 for (i = end; i >= start; --i)
2395 ops->unordered_remove (i);
2396 tree tmp = make_ssa_name (TREE_TYPE (op));
2397 tree cst = build_int_cst (integer_type_node, count);
2398 gassign *mul_stmt
2399 = gimple_build_assign (tmp, MULT_EXPR,
2400 op, fold_convert (TREE_TYPE (op), cst));
2401 gimple_set_visited (mul_stmt, true);
2402 add_to_ops_vec (ops, tmp, mul_stmt);
2403 changed = true;
2406 return changed;
2410 /* Perform various identities and other optimizations on the list of
2411 operand entries, stored in OPS. The tree code for the binary
2412 operation between all the operands is OPCODE. */
2414 static void
2415 optimize_ops_list (enum tree_code opcode,
2416 vec<operand_entry *> *ops)
2418 unsigned int length = ops->length ();
2419 unsigned int i;
2420 operand_entry *oe;
2421 operand_entry *oelast = NULL;
2422 bool iterate = false;
2424 if (length == 1)
2425 return;
2427 oelast = ops->last ();
2429 /* If the last two are constants, pop the constants off, merge them
2430 and try the next two. */
2431 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2433 operand_entry *oelm1 = (*ops)[length - 2];
2435 if (oelm1->rank == 0
2436 && is_gimple_min_invariant (oelm1->op)
2437 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2438 TREE_TYPE (oelast->op)))
2440 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2441 oelm1->op, oelast->op);
2443 if (folded && is_gimple_min_invariant (folded))
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2446 fprintf (dump_file, "Merging constants\n");
2448 ops->pop ();
2449 ops->pop ();
2451 add_to_ops_vec (ops, folded);
2452 reassociate_stats.constants_eliminated++;
2454 optimize_ops_list (opcode, ops);
2455 return;
2460 eliminate_using_constants (opcode, ops);
2461 oelast = NULL;
2463 for (i = 0; ops->iterate (i, &oe);)
2465 bool done = false;
2467 if (eliminate_not_pairs (opcode, ops, i, oe))
2468 return;
2469 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2470 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2471 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2473 if (done)
2474 return;
2475 iterate = true;
2476 oelast = NULL;
2477 continue;
2479 oelast = oe;
2480 i++;
2483 if (iterate)
2484 optimize_ops_list (opcode, ops);
2487 /* The following functions are subroutines to optimize_range_tests and allow
2488 it to try to change a logical combination of comparisons into a range
2489 test.
2491 For example, both
2492 X == 2 || X == 5 || X == 3 || X == 4
2494 X >= 2 && X <= 5
2495 are converted to
2496 (unsigned) (X - 2) <= 3
2498 For more information see comments above fold_test_range in fold-const.cc,
2499 this implementation is for GIMPLE. */
2503 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2505 void
2506 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2508 if (!skip_exp)
2509 print_generic_expr (file, r->exp);
2510 fprintf (file, " %c[", r->in_p ? '+' : '-');
2511 print_generic_expr (file, r->low);
2512 fputs (", ", file);
2513 print_generic_expr (file, r->high);
2514 fputc (']', file);
2517 /* Dump the range entry R to STDERR. */
2519 DEBUG_FUNCTION void
2520 debug_range_entry (struct range_entry *r)
2522 dump_range_entry (stderr, r, false);
2523 fputc ('\n', stderr);
2526 /* This is similar to make_range in fold-const.cc, but on top of
2527 GIMPLE instead of trees. If EXP is non-NULL, it should be
2528 an SSA_NAME and STMT argument is ignored, otherwise STMT
2529 argument should be a GIMPLE_COND. */
2531 void
2532 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2534 int in_p;
2535 tree low, high;
2536 bool is_bool, strict_overflow_p;
2538 r->exp = NULL_TREE;
2539 r->in_p = false;
2540 r->strict_overflow_p = false;
2541 r->low = NULL_TREE;
2542 r->high = NULL_TREE;
2543 if (exp != NULL_TREE
2544 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2545 return;
2547 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2548 and see if we can refine the range. Some of the cases below may not
2549 happen, but it doesn't seem worth worrying about this. We "continue"
2550 the outer loop when we've changed something; otherwise we "break"
2551 the switch, which will "break" the while. */
2552 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2553 high = low;
2554 in_p = 0;
2555 strict_overflow_p = false;
2556 is_bool = false;
2557 if (exp == NULL_TREE)
2558 is_bool = true;
2559 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2561 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2562 is_bool = true;
2563 else
2564 return;
2566 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2567 is_bool = true;
2569 while (1)
2571 enum tree_code code;
2572 tree arg0, arg1, exp_type;
2573 tree nexp;
2574 location_t loc;
2576 if (exp != NULL_TREE)
2578 if (TREE_CODE (exp) != SSA_NAME
2579 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2580 break;
2582 stmt = SSA_NAME_DEF_STMT (exp);
2583 if (!is_gimple_assign (stmt))
2584 break;
2586 code = gimple_assign_rhs_code (stmt);
2587 arg0 = gimple_assign_rhs1 (stmt);
2588 arg1 = gimple_assign_rhs2 (stmt);
2589 exp_type = TREE_TYPE (exp);
2591 else
2593 code = gimple_cond_code (stmt);
2594 arg0 = gimple_cond_lhs (stmt);
2595 arg1 = gimple_cond_rhs (stmt);
2596 exp_type = boolean_type_node;
2599 if (TREE_CODE (arg0) != SSA_NAME
2600 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2601 break;
2602 loc = gimple_location (stmt);
2603 switch (code)
2605 case BIT_NOT_EXPR:
2606 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2607 /* Ensure the range is either +[-,0], +[0,0],
2608 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2609 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2610 or similar expression of unconditional true or
2611 false, it should not be negated. */
2612 && ((high && integer_zerop (high))
2613 || (low && integer_onep (low))))
2615 in_p = !in_p;
2616 exp = arg0;
2617 continue;
2619 break;
2620 case SSA_NAME:
2621 exp = arg0;
2622 continue;
2623 CASE_CONVERT:
2624 if (is_bool)
2626 if ((TYPE_PRECISION (exp_type) == 1
2627 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2628 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2629 return;
2631 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2633 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2634 is_bool = true;
2635 else
2636 return;
2638 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2639 is_bool = true;
2640 goto do_default;
2641 case EQ_EXPR:
2642 case NE_EXPR:
2643 case LT_EXPR:
2644 case LE_EXPR:
2645 case GE_EXPR:
2646 case GT_EXPR:
2647 is_bool = true;
2648 /* FALLTHRU */
2649 default:
2650 if (!is_bool)
2651 return;
2652 do_default:
2653 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2654 &low, &high, &in_p,
2655 &strict_overflow_p);
2656 if (nexp != NULL_TREE)
2658 exp = nexp;
2659 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2660 continue;
2662 break;
2664 break;
2666 if (is_bool)
2668 r->exp = exp;
2669 r->in_p = in_p;
2670 r->low = low;
2671 r->high = high;
2672 r->strict_overflow_p = strict_overflow_p;
2676 /* Comparison function for qsort. Sort entries
2677 without SSA_NAME exp first, then with SSA_NAMEs sorted
2678 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2679 by increasing ->low and if ->low is the same, by increasing
2680 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2681 maximum. */
2683 static int
2684 range_entry_cmp (const void *a, const void *b)
2686 const struct range_entry *p = (const struct range_entry *) a;
2687 const struct range_entry *q = (const struct range_entry *) b;
2689 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2691 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2693 /* Group range_entries for the same SSA_NAME together. */
2694 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2695 return -1;
2696 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2697 return 1;
2698 /* If ->low is different, NULL low goes first, then by
2699 ascending low. */
2700 if (p->low != NULL_TREE)
2702 if (q->low != NULL_TREE)
2704 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2705 p->low, q->low);
2706 if (tem && integer_onep (tem))
2707 return -1;
2708 tem = fold_binary (GT_EXPR, boolean_type_node,
2709 p->low, q->low);
2710 if (tem && integer_onep (tem))
2711 return 1;
2713 else
2714 return 1;
2716 else if (q->low != NULL_TREE)
2717 return -1;
2718 /* If ->high is different, NULL high goes last, before that by
2719 ascending high. */
2720 if (p->high != NULL_TREE)
2722 if (q->high != NULL_TREE)
2724 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2725 p->high, q->high);
2726 if (tem && integer_onep (tem))
2727 return -1;
2728 tem = fold_binary (GT_EXPR, boolean_type_node,
2729 p->high, q->high);
2730 if (tem && integer_onep (tem))
2731 return 1;
2733 else
2734 return -1;
2736 else if (q->high != NULL_TREE)
2737 return 1;
2738 /* If both ranges are the same, sort below by ascending idx. */
2740 else
2741 return 1;
2743 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2744 return -1;
2746 if (p->idx < q->idx)
2747 return -1;
2748 else
2750 gcc_checking_assert (p->idx > q->idx);
2751 return 1;
2755 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2756 insert needed statements BEFORE or after GSI. */
2758 static tree
2759 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2761 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2762 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2763 if (TREE_CODE (ret) != SSA_NAME)
2765 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2766 if (before)
2767 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2768 else
2769 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2770 ret = gimple_assign_lhs (g);
2772 return ret;
2775 /* Helper routine of optimize_range_test.
2776 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2777 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2778 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2779 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2780 an array of COUNT pointers to other ranges. Return
2781 true if the range merge has been successful.
2782 If OPCODE is ERROR_MARK, this is called from within
2783 maybe_optimize_range_tests and is performing inter-bb range optimization.
2784 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2785 oe->rank. */
2787 static bool
2788 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2789 struct range_entry **otherrangep,
2790 unsigned int count, enum tree_code opcode,
2791 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2792 bool in_p, tree low, tree high, bool strict_overflow_p)
2794 unsigned int idx = range->idx;
2795 struct range_entry *swap_with = NULL;
2796 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2797 if (opcode == ERROR_MARK)
2799 /* For inter-bb range test optimization, pick from the range tests
2800 the one which is tested in the earliest condition (one dominating
2801 the others), because otherwise there could be some UB (e.g. signed
2802 overflow) in following bbs that we'd expose which wasn't there in
2803 the original program. See PR104196. */
2804 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2805 basic_block range_bb = orig_range_bb;
2806 for (unsigned int i = 0; i < count; i++)
2808 struct range_entry *this_range;
2809 if (otherrange)
2810 this_range = otherrange + i;
2811 else
2812 this_range = otherrangep[i];
2813 operand_entry *oe = (*ops)[this_range->idx];
2814 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2815 if (range_bb != this_bb
2816 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2818 swap_with = this_range;
2819 range_bb = this_bb;
2820 idx = this_range->idx;
2823 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2824 only defined in later blocks. In this case we can't move the
2825 merged comparison earlier, so instead check if there are any stmts
2826 that might trigger signed integer overflow in between and rewrite
2827 them. But only after we check if the optimization is possible. */
2828 if (seq && swap_with)
2830 rewrite_bb_first = range_bb;
2831 rewrite_bb_last = orig_range_bb;
2832 idx = range->idx;
2833 swap_with = NULL;
2836 operand_entry *oe = (*ops)[idx];
2837 tree op = oe->op;
2838 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2839 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2840 location_t loc = gimple_location (stmt);
2841 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2842 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2843 in_p, low, high);
2844 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2845 gimple_stmt_iterator gsi;
2846 unsigned int i, uid;
2848 if (tem == NULL_TREE)
2849 return false;
2851 /* If op is default def SSA_NAME, there is no place to insert the
2852 new comparison. Give up, unless we can use OP itself as the
2853 range test. */
2854 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2856 if (op == range->exp
2857 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2858 || TREE_CODE (optype) == BOOLEAN_TYPE)
2859 && (op == tem
2860 || (TREE_CODE (tem) == EQ_EXPR
2861 && TREE_OPERAND (tem, 0) == op
2862 && integer_onep (TREE_OPERAND (tem, 1))))
2863 && opcode != BIT_IOR_EXPR
2864 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2866 stmt = NULL;
2867 tem = op;
2869 else
2870 return false;
2873 if (swap_with)
2874 std::swap (range->idx, swap_with->idx);
2876 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2877 warning_at (loc, OPT_Wstrict_overflow,
2878 "assuming signed overflow does not occur "
2879 "when simplifying range test");
2881 if (dump_file && (dump_flags & TDF_DETAILS))
2883 struct range_entry *r;
2884 fprintf (dump_file, "Optimizing range tests ");
2885 dump_range_entry (dump_file, range, false);
2886 for (i = 0; i < count; i++)
2888 if (otherrange)
2889 r = otherrange + i;
2890 else
2891 r = otherrangep[i];
2892 if (r->exp
2893 && r->exp != range->exp
2894 && TREE_CODE (r->exp) == SSA_NAME)
2896 fprintf (dump_file, " and ");
2897 dump_range_entry (dump_file, r, false);
2899 else
2901 fprintf (dump_file, " and");
2902 dump_range_entry (dump_file, r, true);
2905 fprintf (dump_file, "\n into ");
2906 print_generic_expr (dump_file, tem);
2907 fprintf (dump_file, "\n");
2910 /* In inter-bb range optimization mode, if we have a seq, we can't
2911 move the merged comparison to the earliest bb from the comparisons
2912 being replaced, so instead rewrite stmts that could trigger signed
2913 integer overflow. */
2914 for (basic_block bb = rewrite_bb_last;
2915 bb != rewrite_bb_first; bb = single_pred (bb))
2916 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2917 !gsi_end_p (gsi); gsi_next (&gsi))
2919 gimple *stmt = gsi_stmt (gsi);
2920 if (is_gimple_assign (stmt))
2921 if (tree lhs = gimple_assign_lhs (stmt))
2922 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2923 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2924 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2926 enum tree_code code = gimple_assign_rhs_code (stmt);
2927 if (arith_code_with_undefined_signed_overflow (code))
2929 gimple_stmt_iterator gsip = gsi;
2930 gimple_stmt_iterator gsin = gsi;
2931 gsi_prev (&gsip);
2932 gsi_next (&gsin);
2933 rewrite_to_defined_overflow (stmt, true);
2934 unsigned uid = gimple_uid (stmt);
2935 if (gsi_end_p (gsip))
2936 gsip = gsi_after_labels (bb);
2937 else
2938 gsi_next (&gsip);
2939 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2940 gsi_next (&gsip))
2941 gimple_set_uid (gsi_stmt (gsip), uid);
2946 if (opcode == BIT_IOR_EXPR
2947 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2948 tem = invert_truthvalue_loc (loc, tem);
2950 tem = fold_convert_loc (loc, optype, tem);
2951 if (stmt)
2953 gsi = gsi_for_stmt (stmt);
2954 uid = gimple_uid (stmt);
2956 else
2958 gsi = gsi_none ();
2959 uid = 0;
2961 if (stmt == NULL)
2962 gcc_checking_assert (tem == op);
2963 /* In rare cases range->exp can be equal to lhs of stmt.
2964 In that case we have to insert after the stmt rather then before
2965 it. If stmt is a PHI, insert it at the start of the basic block. */
2966 else if (op != range->exp)
2968 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2969 tem = force_into_ssa_name (&gsi, tem, true);
2970 gsi_prev (&gsi);
2972 else if (gimple_code (stmt) != GIMPLE_PHI)
2974 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2975 tem = force_into_ssa_name (&gsi, tem, false);
2977 else
2979 gsi = gsi_after_labels (gimple_bb (stmt));
2980 if (!gsi_end_p (gsi))
2981 uid = gimple_uid (gsi_stmt (gsi));
2982 else
2984 gsi = gsi_start_bb (gimple_bb (stmt));
2985 uid = 1;
2986 while (!gsi_end_p (gsi))
2988 uid = gimple_uid (gsi_stmt (gsi));
2989 gsi_next (&gsi);
2992 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2993 tem = force_into_ssa_name (&gsi, tem, true);
2994 if (gsi_end_p (gsi))
2995 gsi = gsi_last_bb (gimple_bb (stmt));
2996 else
2997 gsi_prev (&gsi);
2999 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3000 if (gimple_uid (gsi_stmt (gsi)))
3001 break;
3002 else
3003 gimple_set_uid (gsi_stmt (gsi), uid);
3005 oe->op = tem;
3006 range->exp = exp;
3007 range->low = low;
3008 range->high = high;
3009 range->in_p = in_p;
3010 range->strict_overflow_p = false;
3012 for (i = 0; i < count; i++)
3014 if (otherrange)
3015 range = otherrange + i;
3016 else
3017 range = otherrangep[i];
3018 oe = (*ops)[range->idx];
3019 /* Now change all the other range test immediate uses, so that
3020 those tests will be optimized away. */
3021 if (opcode == ERROR_MARK)
3023 if (oe->op)
3024 oe->op = build_int_cst (TREE_TYPE (oe->op),
3025 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3026 else
3027 oe->op = (oe->rank == BIT_IOR_EXPR
3028 ? boolean_false_node : boolean_true_node);
3030 else
3031 oe->op = error_mark_node;
3032 range->exp = NULL_TREE;
3033 range->low = NULL_TREE;
3034 range->high = NULL_TREE;
3036 return true;
3039 /* Optimize X == CST1 || X == CST2
3040 if popcount (CST1 ^ CST2) == 1 into
3041 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3042 Similarly for ranges. E.g.
3043 X != 2 && X != 3 && X != 10 && X != 11
3044 will be transformed by the previous optimization into
3045 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3046 and this loop can transform that into
3047 !(((X & ~8) - 2U) <= 1U). */
3049 static bool
3050 optimize_range_tests_xor (enum tree_code opcode, tree type,
3051 tree lowi, tree lowj, tree highi, tree highj,
3052 vec<operand_entry *> *ops,
3053 struct range_entry *rangei,
3054 struct range_entry *rangej)
3056 tree lowxor, highxor, tem, exp;
3057 /* Check lowi ^ lowj == highi ^ highj and
3058 popcount (lowi ^ lowj) == 1. */
3059 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3060 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3061 return false;
3062 if (!integer_pow2p (lowxor))
3063 return false;
3064 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3065 if (!tree_int_cst_equal (lowxor, highxor))
3066 return false;
3068 exp = rangei->exp;
3069 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3070 int prec = GET_MODE_PRECISION (mode);
3071 if (TYPE_PRECISION (type) < prec
3072 || (wi::to_wide (TYPE_MIN_VALUE (type))
3073 != wi::min_value (prec, TYPE_SIGN (type)))
3074 || (wi::to_wide (TYPE_MAX_VALUE (type))
3075 != wi::max_value (prec, TYPE_SIGN (type))))
3077 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3078 exp = fold_convert (type, exp);
3079 lowxor = fold_convert (type, lowxor);
3080 lowi = fold_convert (type, lowi);
3081 highi = fold_convert (type, highi);
3083 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3084 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3085 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3086 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3087 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3088 NULL, rangei->in_p, lowj, highj,
3089 rangei->strict_overflow_p
3090 || rangej->strict_overflow_p))
3091 return true;
3092 return false;
3095 /* Optimize X == CST1 || X == CST2
3096 if popcount (CST2 - CST1) == 1 into
3097 ((X - CST1) & ~(CST2 - CST1)) == 0.
3098 Similarly for ranges. E.g.
3099 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3100 || X == 75 || X == 45
3101 will be transformed by the previous optimization into
3102 (X - 43U) <= 3U || (X - 75U) <= 3U
3103 and this loop can transform that into
3104 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3105 static bool
3106 optimize_range_tests_diff (enum tree_code opcode, tree type,
3107 tree lowi, tree lowj, tree highi, tree highj,
3108 vec<operand_entry *> *ops,
3109 struct range_entry *rangei,
3110 struct range_entry *rangej)
3112 tree tem1, tem2, mask;
3113 /* Check highi - lowi == highj - lowj. */
3114 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3115 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3116 return false;
3117 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3118 if (!tree_int_cst_equal (tem1, tem2))
3119 return false;
3120 /* Check popcount (lowj - lowi) == 1. */
3121 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3122 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3123 return false;
3124 if (!integer_pow2p (tem1))
3125 return false;
3127 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3128 int prec = GET_MODE_PRECISION (mode);
3129 if (TYPE_PRECISION (type) < prec
3130 || (wi::to_wide (TYPE_MIN_VALUE (type))
3131 != wi::min_value (prec, TYPE_SIGN (type)))
3132 || (wi::to_wide (TYPE_MAX_VALUE (type))
3133 != wi::max_value (prec, TYPE_SIGN (type))))
3134 type = build_nonstandard_integer_type (prec, 1);
3135 else
3136 type = unsigned_type_for (type);
3137 tem1 = fold_convert (type, tem1);
3138 tem2 = fold_convert (type, tem2);
3139 lowi = fold_convert (type, lowi);
3140 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3141 tem1 = fold_build2 (MINUS_EXPR, type,
3142 fold_convert (type, rangei->exp), lowi);
3143 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3144 lowj = build_int_cst (type, 0);
3145 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3146 NULL, rangei->in_p, lowj, tem2,
3147 rangei->strict_overflow_p
3148 || rangej->strict_overflow_p))
3149 return true;
3150 return false;
3153 /* It does some common checks for function optimize_range_tests_xor and
3154 optimize_range_tests_diff.
3155 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3156 Else it calls optimize_range_tests_diff. */
3158 static bool
3159 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3160 bool optimize_xor, vec<operand_entry *> *ops,
3161 struct range_entry *ranges)
3163 int i, j;
3164 bool any_changes = false;
3165 for (i = first; i < length; i++)
3167 tree lowi, highi, lowj, highj, type, tem;
3169 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3170 continue;
3171 type = TREE_TYPE (ranges[i].exp);
3172 if (!INTEGRAL_TYPE_P (type))
3173 continue;
3174 lowi = ranges[i].low;
3175 if (lowi == NULL_TREE)
3176 lowi = TYPE_MIN_VALUE (type);
3177 highi = ranges[i].high;
3178 if (highi == NULL_TREE)
3179 continue;
3180 for (j = i + 1; j < length && j < i + 64; j++)
3182 bool changes;
3183 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3184 continue;
3185 lowj = ranges[j].low;
3186 if (lowj == NULL_TREE)
3187 continue;
3188 highj = ranges[j].high;
3189 if (highj == NULL_TREE)
3190 highj = TYPE_MAX_VALUE (type);
3191 /* Check lowj > highi. */
3192 tem = fold_binary (GT_EXPR, boolean_type_node,
3193 lowj, highi);
3194 if (tem == NULL_TREE || !integer_onep (tem))
3195 continue;
3196 if (optimize_xor)
3197 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3198 highi, highj, ops,
3199 ranges + i, ranges + j);
3200 else
3201 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3202 highi, highj, ops,
3203 ranges + i, ranges + j);
3204 if (changes)
3206 any_changes = true;
3207 break;
3211 return any_changes;
3214 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3215 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3216 EXP on success, NULL otherwise. */
3218 static tree
3219 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3220 wide_int *mask, tree *totallowp)
3222 tree tem = int_const_binop (MINUS_EXPR, high, low);
3223 if (tem == NULL_TREE
3224 || TREE_CODE (tem) != INTEGER_CST
3225 || TREE_OVERFLOW (tem)
3226 || tree_int_cst_sgn (tem) == -1
3227 || compare_tree_int (tem, prec) != -1)
3228 return NULL_TREE;
3230 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3231 *mask = wi::shifted_mask (0, max, false, prec);
3232 if (TREE_CODE (exp) == BIT_AND_EXPR
3233 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3235 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3236 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3237 if (wi::popcount (msk) == 1
3238 && wi::ltu_p (msk, prec - max))
3240 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3241 max += msk.to_uhwi ();
3242 exp = TREE_OPERAND (exp, 0);
3243 if (integer_zerop (low)
3244 && TREE_CODE (exp) == PLUS_EXPR
3245 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3247 tree ret = TREE_OPERAND (exp, 0);
3248 STRIP_NOPS (ret);
3249 widest_int bias
3250 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3251 TYPE_PRECISION (TREE_TYPE (low))));
3252 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3253 if (totallowp)
3255 *totallowp = tbias;
3256 return ret;
3258 else if (!tree_int_cst_lt (totallow, tbias))
3259 return NULL_TREE;
3260 bias = wi::to_widest (tbias);
3261 bias -= wi::to_widest (totallow);
3262 if (bias >= 0 && bias < prec - max)
3264 *mask = wi::lshift (*mask, bias);
3265 return ret;
3270 if (totallowp)
3271 return exp;
3272 if (!tree_int_cst_lt (totallow, low))
3273 return exp;
3274 tem = int_const_binop (MINUS_EXPR, low, totallow);
3275 if (tem == NULL_TREE
3276 || TREE_CODE (tem) != INTEGER_CST
3277 || TREE_OVERFLOW (tem)
3278 || compare_tree_int (tem, prec - max) == 1)
3279 return NULL_TREE;
3281 *mask = wi::lshift (*mask, wi::to_widest (tem));
3282 return exp;
3285 /* Attempt to optimize small range tests using bit test.
3286 E.g.
3287 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3288 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3289 has been by earlier optimizations optimized into:
3290 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3291 As all the 43 through 82 range is less than 64 numbers,
3292 for 64-bit word targets optimize that into:
3293 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3295 static bool
3296 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3297 vec<operand_entry *> *ops,
3298 struct range_entry *ranges)
3300 int i, j;
3301 bool any_changes = false;
3302 int prec = GET_MODE_BITSIZE (word_mode);
3303 auto_vec<struct range_entry *, 64> candidates;
3305 for (i = first; i < length - 1; i++)
3307 tree lowi, highi, lowj, highj, type;
3309 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3310 continue;
3311 type = TREE_TYPE (ranges[i].exp);
3312 if (!INTEGRAL_TYPE_P (type))
3313 continue;
3314 lowi = ranges[i].low;
3315 if (lowi == NULL_TREE)
3316 lowi = TYPE_MIN_VALUE (type);
3317 highi = ranges[i].high;
3318 if (highi == NULL_TREE)
3319 continue;
3320 wide_int mask;
3321 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3322 highi, &mask, &lowi);
3323 if (exp == NULL_TREE)
3324 continue;
3325 bool strict_overflow_p = ranges[i].strict_overflow_p;
3326 candidates.truncate (0);
3327 int end = MIN (i + 64, length);
3328 for (j = i + 1; j < end; j++)
3330 tree exp2;
3331 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3332 continue;
3333 if (ranges[j].exp == exp)
3335 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3337 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3338 if (exp2 == exp)
3340 else if (TREE_CODE (exp2) == PLUS_EXPR)
3342 exp2 = TREE_OPERAND (exp2, 0);
3343 STRIP_NOPS (exp2);
3344 if (exp2 != exp)
3345 continue;
3347 else
3348 continue;
3350 else
3351 continue;
3352 lowj = ranges[j].low;
3353 if (lowj == NULL_TREE)
3354 continue;
3355 highj = ranges[j].high;
3356 if (highj == NULL_TREE)
3357 highj = TYPE_MAX_VALUE (type);
3358 wide_int mask2;
3359 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3360 highj, &mask2, NULL);
3361 if (exp2 != exp)
3362 continue;
3363 mask |= mask2;
3364 strict_overflow_p |= ranges[j].strict_overflow_p;
3365 candidates.safe_push (&ranges[j]);
3368 /* If every possible relative value of the expression is a valid shift
3369 amount, then we can merge the entry test in the bit test. In this
3370 case, if we would need otherwise 2 or more comparisons, then use
3371 the bit test; in the other cases, the threshold is 3 comparisons. */
3372 bool entry_test_needed;
3373 value_range r;
3374 if (TREE_CODE (exp) == SSA_NAME
3375 && get_range_query (cfun)->range_of_expr (r, exp)
3376 && !r.undefined_p ()
3377 && !r.varying_p ()
3378 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3380 wide_int min = r.lower_bound ();
3381 wide_int ilowi = wi::to_wide (lowi);
3382 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3384 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3385 mask = wi::lshift (mask, ilowi - min);
3387 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3389 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3390 mask = wi::lrshift (mask, min - ilowi);
3392 entry_test_needed = false;
3394 else
3395 entry_test_needed = true;
3396 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3398 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3399 wi::to_widest (lowi)
3400 + prec - 1 - wi::clz (mask));
3401 operand_entry *oe = (*ops)[ranges[i].idx];
3402 tree op = oe->op;
3403 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3404 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3405 (cfun, oe->id));
3406 location_t loc = gimple_location (stmt);
3407 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3409 /* See if it isn't cheaper to pretend the minimum value of the
3410 range is 0, if maximum value is small enough.
3411 We can avoid then subtraction of the minimum value, but the
3412 mask constant could be perhaps more expensive. */
3413 if (compare_tree_int (lowi, 0) > 0
3414 && compare_tree_int (high, prec) < 0)
3416 int cost_diff;
3417 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3418 rtx reg = gen_raw_REG (word_mode, 10000);
3419 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3420 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3421 GEN_INT (-m)),
3422 word_mode, speed_p);
3423 rtx r = immed_wide_int_const (mask, word_mode);
3424 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3425 word_mode, speed_p);
3426 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3427 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3428 word_mode, speed_p);
3429 if (cost_diff > 0)
3431 mask = wi::lshift (mask, m);
3432 lowi = build_zero_cst (TREE_TYPE (lowi));
3436 tree tem;
3437 if (entry_test_needed)
3439 tem = build_range_check (loc, optype, unshare_expr (exp),
3440 false, lowi, high);
3441 if (tem == NULL_TREE || is_gimple_val (tem))
3442 continue;
3444 else
3445 tem = NULL_TREE;
3446 tree etype = unsigned_type_for (TREE_TYPE (exp));
3447 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3448 fold_convert_loc (loc, etype, exp),
3449 fold_convert_loc (loc, etype, lowi));
3450 exp = fold_convert_loc (loc, integer_type_node, exp);
3451 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3452 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3453 build_int_cst (word_type, 1), exp);
3454 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3455 wide_int_to_tree (word_type, mask));
3456 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3457 build_zero_cst (word_type));
3458 if (is_gimple_val (exp))
3459 continue;
3461 /* The shift might have undefined behavior if TEM is true,
3462 but reassociate_bb isn't prepared to have basic blocks
3463 split when it is running. So, temporarily emit a code
3464 with BIT_IOR_EXPR instead of &&, and fix it up in
3465 branch_fixup. */
3466 gimple_seq seq = NULL;
3467 if (tem)
3469 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3470 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3471 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3473 gimple_seq seq2;
3474 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3475 gimple_seq_add_seq_without_update (&seq, seq2);
3476 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3477 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3478 if (tem)
3480 gimple *g = gimple_build_assign (make_ssa_name (optype),
3481 BIT_IOR_EXPR, tem, exp);
3482 gimple_set_location (g, loc);
3483 gimple_seq_add_stmt_without_update (&seq, g);
3484 exp = gimple_assign_lhs (g);
3486 tree val = build_zero_cst (optype);
3487 if (update_range_test (&ranges[i], NULL, candidates.address (),
3488 candidates.length (), opcode, ops, exp,
3489 seq, false, val, val, strict_overflow_p))
3491 any_changes = true;
3492 if (tem)
3493 reassoc_branch_fixups.safe_push (tem);
3495 else
3496 gimple_seq_discard (seq);
3499 return any_changes;
3502 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3503 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3504 Also, handle x < C && y < C && z < C where C is power of two as
3505 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3506 as (x | y | z) < 0. */
3508 static bool
3509 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3510 vec<operand_entry *> *ops,
3511 struct range_entry *ranges)
3513 int i;
3514 unsigned int b;
3515 bool any_changes = false;
3516 auto_vec<int, 128> buckets;
3517 auto_vec<int, 32> chains;
3518 auto_vec<struct range_entry *, 32> candidates;
3520 for (i = first; i < length; i++)
3522 int idx;
3524 if (ranges[i].exp == NULL_TREE
3525 || TREE_CODE (ranges[i].exp) != SSA_NAME
3526 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3527 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3528 continue;
3530 if (ranges[i].low != NULL_TREE
3531 && ranges[i].high != NULL_TREE
3532 && ranges[i].in_p
3533 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3535 idx = !integer_zerop (ranges[i].low);
3536 if (idx && !integer_all_onesp (ranges[i].low))
3537 continue;
3539 else if (ranges[i].high != NULL_TREE
3540 && TREE_CODE (ranges[i].high) == INTEGER_CST
3541 && ranges[i].in_p)
3543 wide_int w = wi::to_wide (ranges[i].high);
3544 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3545 int l = wi::clz (w);
3546 idx = 2;
3547 if (l <= 0
3548 || l >= prec
3549 || w != wi::mask (prec - l, false, prec))
3550 continue;
3551 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3552 && ranges[i].low == NULL_TREE)
3553 || (ranges[i].low
3554 && integer_zerop (ranges[i].low))))
3555 continue;
3557 else if (ranges[i].high == NULL_TREE
3558 && ranges[i].low != NULL_TREE
3559 /* Perform this optimization only in the last
3560 reassoc pass, as it interferes with the reassociation
3561 itself or could also with VRP etc. which might not
3562 be able to virtually undo the optimization. */
3563 && !reassoc_insert_powi_p
3564 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3565 && integer_zerop (ranges[i].low))
3566 idx = 3;
3567 else
3568 continue;
3570 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3571 if (buckets.length () <= b)
3572 buckets.safe_grow_cleared (b + 1, true);
3573 if (chains.length () <= (unsigned) i)
3574 chains.safe_grow (i + 1, true);
3575 chains[i] = buckets[b];
3576 buckets[b] = i + 1;
3579 FOR_EACH_VEC_ELT (buckets, b, i)
3580 if (i && chains[i - 1])
3582 int j, k = i;
3583 if ((b % 4) == 2)
3585 /* When ranges[X - 1].high + 1 is a power of two,
3586 we need to process the same bucket up to
3587 precision - 1 times, each time split the entries
3588 with the same high bound into one chain and the
3589 rest into another one to be processed later. */
3590 int this_prev = i;
3591 int other_prev = 0;
3592 for (j = chains[i - 1]; j; j = chains[j - 1])
3594 if (tree_int_cst_equal (ranges[i - 1].high,
3595 ranges[j - 1].high))
3597 chains[this_prev - 1] = j;
3598 this_prev = j;
3600 else if (other_prev == 0)
3602 buckets[b] = j;
3603 other_prev = j;
3605 else
3607 chains[other_prev - 1] = j;
3608 other_prev = j;
3611 chains[this_prev - 1] = 0;
3612 if (other_prev)
3613 chains[other_prev - 1] = 0;
3614 if (chains[i - 1] == 0)
3616 if (other_prev)
3617 b--;
3618 continue;
3621 for (j = chains[i - 1]; j; j = chains[j - 1])
3623 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3624 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3625 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3626 k = j;
3628 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3629 tree type2 = NULL_TREE;
3630 bool strict_overflow_p = false;
3631 candidates.truncate (0);
3632 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3633 type1 = pointer_sized_int_node;
3634 for (j = i; j; j = chains[j - 1])
3636 tree type = TREE_TYPE (ranges[j - 1].exp);
3637 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3638 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3639 type = pointer_sized_int_node;
3640 if ((b % 4) == 3)
3642 /* For the signed < 0 cases, the types should be
3643 really compatible (all signed with the same precision,
3644 instead put ranges that have different in_p from
3645 k first. */
3646 if (!useless_type_conversion_p (type1, type))
3647 continue;
3648 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3649 candidates.safe_push (&ranges[j - 1]);
3650 type2 = type1;
3651 continue;
3653 if (j == k
3654 || useless_type_conversion_p (type1, type))
3656 else if (type2 == NULL_TREE
3657 || useless_type_conversion_p (type2, type))
3659 if (type2 == NULL_TREE)
3660 type2 = type;
3661 candidates.safe_push (&ranges[j - 1]);
3664 unsigned l = candidates.length ();
3665 for (j = i; j; j = chains[j - 1])
3667 tree type = TREE_TYPE (ranges[j - 1].exp);
3668 if (j == k)
3669 continue;
3670 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3671 type = pointer_sized_int_node;
3672 if ((b % 4) == 3)
3674 if (!useless_type_conversion_p (type1, type))
3675 continue;
3676 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3677 candidates.safe_push (&ranges[j - 1]);
3678 continue;
3680 if (useless_type_conversion_p (type1, type))
3682 else if (type2 == NULL_TREE
3683 || useless_type_conversion_p (type2, type))
3684 continue;
3685 candidates.safe_push (&ranges[j - 1]);
3687 gimple_seq seq = NULL;
3688 tree op = NULL_TREE;
3689 unsigned int id;
3690 struct range_entry *r;
3691 candidates.safe_push (&ranges[k - 1]);
3692 FOR_EACH_VEC_ELT (candidates, id, r)
3694 gimple *g;
3695 enum tree_code code;
3696 if (id == 0)
3698 op = r->exp;
3699 continue;
3701 if (id == l
3702 || POINTER_TYPE_P (TREE_TYPE (op))
3703 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3705 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3706 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3707 if (code == BIT_NOT_EXPR
3708 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3710 g = gimple_build_assign (make_ssa_name (type3),
3711 NOP_EXPR, op);
3712 gimple_seq_add_stmt_without_update (&seq, g);
3713 op = gimple_assign_lhs (g);
3715 g = gimple_build_assign (make_ssa_name (type3), code, op);
3716 gimple_seq_add_stmt_without_update (&seq, g);
3717 op = gimple_assign_lhs (g);
3719 tree type = TREE_TYPE (r->exp);
3720 tree exp = r->exp;
3721 if (POINTER_TYPE_P (type)
3722 || TREE_CODE (type) == OFFSET_TYPE
3723 || (id >= l && !useless_type_conversion_p (type1, type)))
3725 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3726 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3727 gimple_seq_add_stmt_without_update (&seq, g);
3728 exp = gimple_assign_lhs (g);
3730 if ((b % 4) == 3)
3731 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3732 else
3733 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3734 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3735 code, op, exp);
3736 gimple_seq_add_stmt_without_update (&seq, g);
3737 op = gimple_assign_lhs (g);
3739 type1 = TREE_TYPE (ranges[k - 1].exp);
3740 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3742 gimple *g
3743 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3744 gimple_seq_add_stmt_without_update (&seq, g);
3745 op = gimple_assign_lhs (g);
3747 candidates.pop ();
3748 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3749 candidates.length (), opcode, ops, op,
3750 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3751 ranges[k - 1].high, strict_overflow_p))
3752 any_changes = true;
3753 else
3754 gimple_seq_discard (seq);
3755 if ((b % 4) == 2 && buckets[b] != i)
3756 /* There is more work to do for this bucket. */
3757 b--;
3760 return any_changes;
3763 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3764 a >= 0 && a < b into (unsigned) a < (unsigned) b
3765 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3767 static bool
3768 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3769 vec<operand_entry *> *ops,
3770 struct range_entry *ranges,
3771 basic_block first_bb)
3773 int i;
3774 bool any_changes = false;
3775 hash_map<tree, int> *map = NULL;
3777 for (i = first; i < length; i++)
3779 if (ranges[i].exp == NULL_TREE
3780 || TREE_CODE (ranges[i].exp) != SSA_NAME
3781 || !ranges[i].in_p)
3782 continue;
3784 tree type = TREE_TYPE (ranges[i].exp);
3785 if (!INTEGRAL_TYPE_P (type)
3786 || TYPE_UNSIGNED (type)
3787 || ranges[i].low == NULL_TREE
3788 || !integer_zerop (ranges[i].low)
3789 || ranges[i].high != NULL_TREE)
3790 continue;
3791 /* EXP >= 0 here. */
3792 if (map == NULL)
3793 map = new hash_map <tree, int>;
3794 map->put (ranges[i].exp, i);
3797 if (map == NULL)
3798 return false;
3800 for (i = 0; i < length; i++)
3802 bool in_p = ranges[i].in_p;
3803 if (ranges[i].low == NULL_TREE
3804 || ranges[i].high == NULL_TREE)
3805 continue;
3806 if (!integer_zerop (ranges[i].low)
3807 || !integer_zerop (ranges[i].high))
3809 if (ranges[i].exp
3810 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3811 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3812 && integer_onep (ranges[i].low)
3813 && integer_onep (ranges[i].high))
3814 in_p = !in_p;
3815 else
3816 continue;
3819 gimple *stmt;
3820 tree_code ccode;
3821 tree rhs1, rhs2;
3822 if (ranges[i].exp)
3824 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3825 continue;
3826 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3827 if (!is_gimple_assign (stmt))
3828 continue;
3829 ccode = gimple_assign_rhs_code (stmt);
3830 rhs1 = gimple_assign_rhs1 (stmt);
3831 rhs2 = gimple_assign_rhs2 (stmt);
3833 else
3835 operand_entry *oe = (*ops)[ranges[i].idx];
3836 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3837 if (gimple_code (stmt) != GIMPLE_COND)
3838 continue;
3839 ccode = gimple_cond_code (stmt);
3840 rhs1 = gimple_cond_lhs (stmt);
3841 rhs2 = gimple_cond_rhs (stmt);
3844 if (TREE_CODE (rhs1) != SSA_NAME
3845 || rhs2 == NULL_TREE
3846 || TREE_CODE (rhs2) != SSA_NAME)
3847 continue;
3849 switch (ccode)
3851 case GT_EXPR:
3852 case GE_EXPR:
3853 case LT_EXPR:
3854 case LE_EXPR:
3855 break;
3856 default:
3857 continue;
3859 if (in_p)
3860 ccode = invert_tree_comparison (ccode, false);
3861 switch (ccode)
3863 case GT_EXPR:
3864 case GE_EXPR:
3865 std::swap (rhs1, rhs2);
3866 ccode = swap_tree_comparison (ccode);
3867 break;
3868 case LT_EXPR:
3869 case LE_EXPR:
3870 break;
3871 default:
3872 gcc_unreachable ();
3875 int *idx = map->get (rhs1);
3876 if (idx == NULL)
3877 continue;
3879 /* maybe_optimize_range_tests allows statements without side-effects
3880 in the basic blocks as long as they are consumed in the same bb.
3881 Make sure rhs2's def stmt is not among them, otherwise we can't
3882 use safely get_nonzero_bits on it. E.g. in:
3883 # RANGE [-83, 1] NONZERO 173
3884 # k_32 = PHI <k_47(13), k_12(9)>
3886 if (k_32 >= 0)
3887 goto <bb 5>; [26.46%]
3888 else
3889 goto <bb 9>; [73.54%]
3891 <bb 5> [local count: 140323371]:
3892 # RANGE [0, 1] NONZERO 1
3893 _5 = (int) k_32;
3894 # RANGE [0, 4] NONZERO 4
3895 _21 = _5 << 2;
3896 # RANGE [0, 4] NONZERO 4
3897 iftmp.0_44 = (char) _21;
3898 if (k_32 < iftmp.0_44)
3899 goto <bb 6>; [84.48%]
3900 else
3901 goto <bb 9>; [15.52%]
3902 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3903 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3904 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3905 those stmts even for negative k_32 and the value ranges would be no
3906 longer guaranteed and so the optimization would be invalid. */
3907 while (opcode == ERROR_MARK)
3909 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3910 basic_block bb2 = gimple_bb (g);
3911 if (bb2
3912 && bb2 != first_bb
3913 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3915 /* As an exception, handle a few common cases. */
3916 if (gimple_assign_cast_p (g)
3917 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3919 tree op0 = gimple_assign_rhs1 (g);
3920 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3921 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3922 > TYPE_PRECISION (TREE_TYPE (op0))))
3923 /* Zero-extension is always ok. */
3924 break;
3925 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3926 == TYPE_PRECISION (TREE_TYPE (op0))
3927 && TREE_CODE (op0) == SSA_NAME)
3929 /* Cast from signed to unsigned or vice versa. Retry
3930 with the op0 as new rhs2. */
3931 rhs2 = op0;
3932 continue;
3935 else if (is_gimple_assign (g)
3936 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3937 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3938 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3939 /* Masking with INTEGER_CST with MSB clear is always ok
3940 too. */
3941 break;
3942 rhs2 = NULL_TREE;
3944 break;
3946 if (rhs2 == NULL_TREE)
3947 continue;
3949 wide_int nz = get_nonzero_bits (rhs2);
3950 if (wi::neg_p (nz))
3951 continue;
3953 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3954 and RHS2 is known to be RHS2 >= 0. */
3955 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3957 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3958 if ((ranges[*idx].strict_overflow_p
3959 || ranges[i].strict_overflow_p)
3960 && issue_strict_overflow_warning (wc))
3961 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3962 "assuming signed overflow does not occur "
3963 "when simplifying range test");
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3967 struct range_entry *r = &ranges[*idx];
3968 fprintf (dump_file, "Optimizing range test ");
3969 print_generic_expr (dump_file, r->exp);
3970 fprintf (dump_file, " +[");
3971 print_generic_expr (dump_file, r->low);
3972 fprintf (dump_file, ", ");
3973 print_generic_expr (dump_file, r->high);
3974 fprintf (dump_file, "] and comparison ");
3975 print_generic_expr (dump_file, rhs1);
3976 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3977 print_generic_expr (dump_file, rhs2);
3978 fprintf (dump_file, "\n into (");
3979 print_generic_expr (dump_file, utype);
3980 fprintf (dump_file, ") ");
3981 print_generic_expr (dump_file, rhs1);
3982 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3983 print_generic_expr (dump_file, utype);
3984 fprintf (dump_file, ") ");
3985 print_generic_expr (dump_file, rhs2);
3986 fprintf (dump_file, "\n");
3989 operand_entry *oe = (*ops)[ranges[i].idx];
3990 ranges[i].in_p = 0;
3991 if (opcode == BIT_IOR_EXPR
3992 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3994 ranges[i].in_p = 1;
3995 ccode = invert_tree_comparison (ccode, false);
3998 unsigned int uid = gimple_uid (stmt);
3999 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4000 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4001 gimple_set_uid (g, uid);
4002 rhs1 = gimple_assign_lhs (g);
4003 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4004 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4006 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4007 gimple_set_uid (g, uid);
4008 rhs2 = gimple_assign_lhs (g);
4009 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4011 if (tree_swap_operands_p (rhs1, rhs2))
4013 std::swap (rhs1, rhs2);
4014 ccode = swap_tree_comparison (ccode);
4016 if (gimple_code (stmt) == GIMPLE_COND)
4018 gcond *c = as_a <gcond *> (stmt);
4019 gimple_cond_set_code (c, ccode);
4020 gimple_cond_set_lhs (c, rhs1);
4021 gimple_cond_set_rhs (c, rhs2);
4022 update_stmt (stmt);
4024 else
4026 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4027 if (!INTEGRAL_TYPE_P (ctype)
4028 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4029 && TYPE_PRECISION (ctype) != 1))
4030 ctype = boolean_type_node;
4031 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4032 gimple_set_uid (g, uid);
4033 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4034 if (oe->op && ctype != TREE_TYPE (oe->op))
4036 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4037 NOP_EXPR, gimple_assign_lhs (g));
4038 gimple_set_uid (g, uid);
4039 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4041 ranges[i].exp = gimple_assign_lhs (g);
4042 oe->op = ranges[i].exp;
4043 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4044 ranges[i].high = ranges[i].low;
4046 ranges[i].strict_overflow_p = false;
4047 oe = (*ops)[ranges[*idx].idx];
4048 /* Now change all the other range test immediate uses, so that
4049 those tests will be optimized away. */
4050 if (opcode == ERROR_MARK)
4052 if (oe->op)
4053 oe->op = build_int_cst (TREE_TYPE (oe->op),
4054 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4055 else
4056 oe->op = (oe->rank == BIT_IOR_EXPR
4057 ? boolean_false_node : boolean_true_node);
4059 else
4060 oe->op = error_mark_node;
4061 ranges[*idx].exp = NULL_TREE;
4062 ranges[*idx].low = NULL_TREE;
4063 ranges[*idx].high = NULL_TREE;
4064 any_changes = true;
4067 delete map;
4068 return any_changes;
4071 /* Optimize range tests, similarly how fold_range_test optimizes
4072 it on trees. The tree code for the binary
4073 operation between all the operands is OPCODE.
4074 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4075 maybe_optimize_range_tests for inter-bb range optimization.
4076 In that case if oe->op is NULL, oe->id is bb->index whose
4077 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4078 the actual opcode.
4079 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4081 static bool
4082 optimize_range_tests (enum tree_code opcode,
4083 vec<operand_entry *> *ops, basic_block first_bb)
4085 unsigned int length = ops->length (), i, j, first;
4086 operand_entry *oe;
4087 struct range_entry *ranges;
4088 bool any_changes = false;
4090 if (length == 1)
4091 return false;
4093 ranges = XNEWVEC (struct range_entry, length);
4094 for (i = 0; i < length; i++)
4096 oe = (*ops)[i];
4097 ranges[i].idx = i;
4098 init_range_entry (ranges + i, oe->op,
4099 oe->op
4100 ? NULL
4101 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4102 /* For | invert it now, we will invert it again before emitting
4103 the optimized expression. */
4104 if (opcode == BIT_IOR_EXPR
4105 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4106 ranges[i].in_p = !ranges[i].in_p;
4109 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4110 for (i = 0; i < length; i++)
4111 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4112 break;
4114 /* Try to merge ranges. */
4115 for (first = i; i < length; i++)
4117 tree low = ranges[i].low;
4118 tree high = ranges[i].high;
4119 int in_p = ranges[i].in_p;
4120 bool strict_overflow_p = ranges[i].strict_overflow_p;
4121 int update_fail_count = 0;
4123 for (j = i + 1; j < length; j++)
4125 if (ranges[i].exp != ranges[j].exp)
4126 break;
4127 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4128 ranges[j].in_p, ranges[j].low, ranges[j].high))
4129 break;
4130 strict_overflow_p |= ranges[j].strict_overflow_p;
4133 if (j == i + 1)
4134 continue;
4136 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4137 opcode, ops, ranges[i].exp, NULL, in_p,
4138 low, high, strict_overflow_p))
4140 i = j - 1;
4141 any_changes = true;
4143 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4144 while update_range_test would fail. */
4145 else if (update_fail_count == 64)
4146 i = j - 1;
4147 else
4148 ++update_fail_count;
4151 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4152 ops, ranges);
4154 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4155 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4156 ops, ranges);
4157 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4158 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4159 ops, ranges);
4160 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4161 ranges, first_bb);
4162 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4163 ops, ranges);
4165 if (any_changes && opcode != ERROR_MARK)
4167 j = 0;
4168 FOR_EACH_VEC_ELT (*ops, i, oe)
4170 if (oe->op == error_mark_node)
4171 continue;
4172 else if (i != j)
4173 (*ops)[j] = oe;
4174 j++;
4176 ops->truncate (j);
4179 XDELETEVEC (ranges);
4180 return any_changes;
4183 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4184 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4185 otherwise the comparison code. TYPE is a return value that is set
4186 to type of comparison. */
4188 static tree_code
4189 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4190 tree *lhs, tree *rhs, gassign **vcond)
4192 if (TREE_CODE (var) != SSA_NAME)
4193 return ERROR_MARK;
4195 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4196 if (stmt == NULL)
4197 return ERROR_MARK;
4198 if (vcond)
4199 *vcond = stmt;
4201 /* ??? If we start creating more COND_EXPR, we could perform
4202 this same optimization with them. For now, simplify. */
4203 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4204 return ERROR_MARK;
4206 tree cond = gimple_assign_rhs1 (stmt);
4207 tree_code cmp = TREE_CODE (cond);
4208 if (cmp != SSA_NAME)
4209 return ERROR_MARK;
4211 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4212 if (assign == NULL
4213 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4214 return ERROR_MARK;
4216 cmp = gimple_assign_rhs_code (assign);
4217 if (lhs)
4218 *lhs = gimple_assign_rhs1 (assign);
4219 if (rhs)
4220 *rhs = gimple_assign_rhs2 (assign);
4222 /* ??? For now, allow only canonical true and false result vectors.
4223 We could expand this to other constants should the need arise,
4224 but at the moment we don't create them. */
4225 tree t = gimple_assign_rhs2 (stmt);
4226 tree f = gimple_assign_rhs3 (stmt);
4227 bool inv;
4228 if (integer_all_onesp (t))
4229 inv = false;
4230 else if (integer_all_onesp (f))
4232 cmp = invert_tree_comparison (cmp, false);
4233 inv = true;
4235 else
4236 return ERROR_MARK;
4237 if (!integer_zerop (f))
4238 return ERROR_MARK;
4240 /* Success! */
4241 if (rets)
4242 *rets = assign;
4243 if (reti)
4244 *reti = inv;
4245 if (type)
4246 *type = TREE_TYPE (cond);
4247 return cmp;
4250 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4251 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4253 static bool
4254 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4256 unsigned int length = ops->length (), i, j;
4257 bool any_changes = false;
4259 if (length == 1)
4260 return false;
4262 for (i = 0; i < length; ++i)
4264 tree elt0 = (*ops)[i]->op;
4266 gassign *stmt0, *vcond0;
4267 bool invert;
4268 tree type, lhs0, rhs0;
4269 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4270 &rhs0, &vcond0);
4271 if (cmp0 == ERROR_MARK)
4272 continue;
4274 for (j = i + 1; j < length; ++j)
4276 tree &elt1 = (*ops)[j]->op;
4278 gassign *stmt1, *vcond1;
4279 tree lhs1, rhs1;
4280 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4281 &rhs1, &vcond1);
4282 if (cmp1 == ERROR_MARK)
4283 continue;
4285 tree comb;
4286 if (opcode == BIT_AND_EXPR)
4287 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4288 cmp1, lhs1, rhs1);
4289 else if (opcode == BIT_IOR_EXPR)
4290 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4291 cmp1, lhs1, rhs1);
4292 else
4293 gcc_unreachable ();
4294 if (comb == NULL)
4295 continue;
4297 /* Success! */
4298 if (dump_file && (dump_flags & TDF_DETAILS))
4300 fprintf (dump_file, "Transforming ");
4301 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4302 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4303 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4304 fprintf (dump_file, " into ");
4305 print_generic_expr (dump_file, comb);
4306 fputc ('\n', dump_file);
4309 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4310 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4311 true, GSI_SAME_STMT);
4312 if (invert)
4313 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4314 gimple_assign_rhs3_ptr (vcond0));
4315 gimple_assign_set_rhs1 (vcond0, exp);
4316 update_stmt (vcond0);
4318 elt1 = error_mark_node;
4319 any_changes = true;
4323 if (any_changes)
4325 operand_entry *oe;
4326 j = 0;
4327 FOR_EACH_VEC_ELT (*ops, i, oe)
4329 if (oe->op == error_mark_node)
4330 continue;
4331 else if (i != j)
4332 (*ops)[j] = oe;
4333 j++;
4335 ops->truncate (j);
4338 return any_changes;
4341 /* Return true if STMT is a cast like:
4342 <bb N>:
4344 _123 = (int) _234;
4346 <bb M>:
4347 # _345 = PHI <_123(N), 1(...), 1(...)>
4348 where _234 has bool type, _123 has single use and
4349 bb N has a single successor M. This is commonly used in
4350 the last block of a range test.
4352 Also Return true if STMT is tcc_compare like:
4353 <bb N>:
4355 _234 = a_2(D) == 2;
4357 <bb M>:
4358 # _345 = PHI <_234(N), 1(...), 1(...)>
4359 _346 = (int) _345;
4360 where _234 has booltype, single use and
4361 bb N has a single successor M. This is commonly used in
4362 the last block of a range test. */
4364 static bool
4365 final_range_test_p (gimple *stmt)
4367 basic_block bb, rhs_bb, lhs_bb;
4368 edge e;
4369 tree lhs, rhs;
4370 use_operand_p use_p;
4371 gimple *use_stmt;
4373 if (!gimple_assign_cast_p (stmt)
4374 && (!is_gimple_assign (stmt)
4375 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4376 != tcc_comparison)))
4377 return false;
4378 bb = gimple_bb (stmt);
4379 if (!single_succ_p (bb))
4380 return false;
4381 e = single_succ_edge (bb);
4382 if (e->flags & EDGE_COMPLEX)
4383 return false;
4385 lhs = gimple_assign_lhs (stmt);
4386 rhs = gimple_assign_rhs1 (stmt);
4387 if (gimple_assign_cast_p (stmt)
4388 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4389 || TREE_CODE (rhs) != SSA_NAME
4390 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4391 return false;
4393 if (!gimple_assign_cast_p (stmt)
4394 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4395 return false;
4397 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4398 if (!single_imm_use (lhs, &use_p, &use_stmt))
4399 return false;
4401 if (gimple_code (use_stmt) != GIMPLE_PHI
4402 || gimple_bb (use_stmt) != e->dest)
4403 return false;
4405 /* And that the rhs is defined in the same loop. */
4406 if (gimple_assign_cast_p (stmt))
4408 if (TREE_CODE (rhs) != SSA_NAME
4409 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4410 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4411 return false;
4413 else
4415 if (TREE_CODE (lhs) != SSA_NAME
4416 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4417 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4418 return false;
4421 return true;
4424 /* Return true if BB is suitable basic block for inter-bb range test
4425 optimization. If BACKWARD is true, BB should be the only predecessor
4426 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4427 or compared with to find a common basic block to which all conditions
4428 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4429 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4430 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4431 block points to an empty block that falls through into *OTHER_BB and
4432 the phi args match that path. */
4434 static bool
4435 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4436 bool *test_swapped_p, bool backward)
4438 edge_iterator ei, ei2;
4439 edge e, e2;
4440 gimple *stmt;
4441 gphi_iterator gsi;
4442 bool other_edge_seen = false;
4443 bool is_cond;
4445 if (test_bb == bb)
4446 return false;
4447 /* Check last stmt first. */
4448 stmt = last_nondebug_stmt (bb);
4449 if (stmt == NULL
4450 || (gimple_code (stmt) != GIMPLE_COND
4451 && (backward || !final_range_test_p (stmt)))
4452 || gimple_visited_p (stmt)
4453 || stmt_could_throw_p (cfun, stmt)
4454 || *other_bb == bb)
4455 return false;
4456 is_cond = gimple_code (stmt) == GIMPLE_COND;
4457 if (is_cond)
4459 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4460 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4461 to *OTHER_BB (if not set yet, try to find it out). */
4462 if (EDGE_COUNT (bb->succs) != 2)
4463 return false;
4464 FOR_EACH_EDGE (e, ei, bb->succs)
4466 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4467 return false;
4468 if (e->dest == test_bb)
4470 if (backward)
4471 continue;
4472 else
4473 return false;
4475 if (e->dest == bb)
4476 return false;
4477 if (*other_bb == NULL)
4479 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4480 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4481 return false;
4482 else if (e->dest == e2->dest)
4483 *other_bb = e->dest;
4484 if (*other_bb == NULL)
4485 return false;
4487 if (e->dest == *other_bb)
4488 other_edge_seen = true;
4489 else if (backward)
4490 return false;
4492 if (*other_bb == NULL || !other_edge_seen)
4493 return false;
4495 else if (single_succ (bb) != *other_bb)
4496 return false;
4498 /* Now check all PHIs of *OTHER_BB. */
4499 e = find_edge (bb, *other_bb);
4500 e2 = find_edge (test_bb, *other_bb);
4501 retry:;
4502 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4504 gphi *phi = gsi.phi ();
4505 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4506 corresponding to BB and TEST_BB predecessor must be the same. */
4507 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4508 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4510 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4511 one of the PHIs should have the lhs of the last stmt in
4512 that block as PHI arg and that PHI should have 0 or 1
4513 corresponding to it in all other range test basic blocks
4514 considered. */
4515 if (!is_cond)
4517 if (gimple_phi_arg_def (phi, e->dest_idx)
4518 == gimple_assign_lhs (stmt)
4519 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4520 || integer_onep (gimple_phi_arg_def (phi,
4521 e2->dest_idx))))
4522 continue;
4524 else
4526 gimple *test_last = last_nondebug_stmt (test_bb);
4527 if (gimple_code (test_last) == GIMPLE_COND)
4529 if (backward ? e2->src != test_bb : e->src != bb)
4530 return false;
4532 /* For last_bb, handle also:
4533 if (x_3(D) == 3)
4534 goto <bb 6>; [34.00%]
4535 else
4536 goto <bb 7>; [66.00%]
4538 <bb 6> [local count: 79512730]:
4540 <bb 7> [local count: 1073741824]:
4541 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4542 where bb 7 is *OTHER_BB, but the PHI values from the
4543 earlier bbs match the path through the empty bb
4544 in between. */
4545 edge e3;
4546 if (backward)
4547 e3 = EDGE_SUCC (test_bb,
4548 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4549 else
4550 e3 = EDGE_SUCC (bb,
4551 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4552 if (empty_block_p (e3->dest)
4553 && single_succ_p (e3->dest)
4554 && single_succ (e3->dest) == *other_bb
4555 && single_pred_p (e3->dest)
4556 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4558 if (backward)
4559 e2 = single_succ_edge (e3->dest);
4560 else
4561 e = single_succ_edge (e3->dest);
4562 if (test_swapped_p)
4563 *test_swapped_p = true;
4564 goto retry;
4567 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4568 == gimple_assign_lhs (test_last)
4569 && (integer_zerop (gimple_phi_arg_def (phi,
4570 e->dest_idx))
4571 || integer_onep (gimple_phi_arg_def (phi,
4572 e->dest_idx))))
4573 continue;
4576 return false;
4579 return true;
4582 /* Return true if BB doesn't have side-effects that would disallow
4583 range test optimization, all SSA_NAMEs set in the bb are consumed
4584 in the bb and there are no PHIs. */
4586 bool
4587 no_side_effect_bb (basic_block bb)
4589 gimple_stmt_iterator gsi;
4590 gimple *last;
4592 if (!gimple_seq_empty_p (phi_nodes (bb)))
4593 return false;
4594 last = last_nondebug_stmt (bb);
4595 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4597 gimple *stmt = gsi_stmt (gsi);
4598 tree lhs;
4599 imm_use_iterator imm_iter;
4600 use_operand_p use_p;
4602 if (is_gimple_debug (stmt))
4603 continue;
4604 if (gimple_has_side_effects (stmt))
4605 return false;
4606 if (stmt == last)
4607 return true;
4608 if (!is_gimple_assign (stmt))
4609 return false;
4610 lhs = gimple_assign_lhs (stmt);
4611 if (TREE_CODE (lhs) != SSA_NAME)
4612 return false;
4613 if (gimple_assign_rhs_could_trap_p (stmt))
4614 return false;
4615 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4617 gimple *use_stmt = USE_STMT (use_p);
4618 if (is_gimple_debug (use_stmt))
4619 continue;
4620 if (gimple_bb (use_stmt) != bb)
4621 return false;
4624 return false;
4627 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4628 return true and fill in *OPS recursively. */
4630 static bool
4631 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4632 class loop *loop)
4634 gimple *stmt = SSA_NAME_DEF_STMT (var);
4635 tree rhs[2];
4636 int i;
4638 if (!is_reassociable_op (stmt, code, loop))
4639 return false;
4641 rhs[0] = gimple_assign_rhs1 (stmt);
4642 rhs[1] = gimple_assign_rhs2 (stmt);
4643 gimple_set_visited (stmt, true);
4644 for (i = 0; i < 2; i++)
4645 if (TREE_CODE (rhs[i]) == SSA_NAME
4646 && !get_ops (rhs[i], code, ops, loop)
4647 && has_single_use (rhs[i]))
4649 operand_entry *oe = operand_entry_pool.allocate ();
4651 oe->op = rhs[i];
4652 oe->rank = code;
4653 oe->id = 0;
4654 oe->count = 1;
4655 oe->stmt_to_insert = NULL;
4656 ops->safe_push (oe);
4658 return true;
4661 /* Find the ops that were added by get_ops starting from VAR, see if
4662 they were changed during update_range_test and if yes, create new
4663 stmts. */
4665 static tree
4666 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4667 unsigned int *pidx, class loop *loop)
4669 gimple *stmt = SSA_NAME_DEF_STMT (var);
4670 tree rhs[4];
4671 int i;
4673 if (!is_reassociable_op (stmt, code, loop))
4674 return NULL;
4676 rhs[0] = gimple_assign_rhs1 (stmt);
4677 rhs[1] = gimple_assign_rhs2 (stmt);
4678 rhs[2] = rhs[0];
4679 rhs[3] = rhs[1];
4680 for (i = 0; i < 2; i++)
4681 if (TREE_CODE (rhs[i]) == SSA_NAME)
4683 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4684 if (rhs[2 + i] == NULL_TREE)
4686 if (has_single_use (rhs[i]))
4687 rhs[2 + i] = ops[(*pidx)++]->op;
4688 else
4689 rhs[2 + i] = rhs[i];
4692 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4693 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4695 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4696 var = make_ssa_name (TREE_TYPE (var));
4697 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4698 rhs[2], rhs[3]);
4699 gimple_set_uid (g, gimple_uid (stmt));
4700 gimple_set_visited (g, true);
4701 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4702 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4703 if (fold_stmt_inplace (&gsi2))
4704 update_stmt (g);
4706 return var;
4709 /* Structure to track the initial value passed to get_ops and
4710 the range in the ops vector for each basic block. */
4712 struct inter_bb_range_test_entry
4714 tree op;
4715 unsigned int first_idx, last_idx;
4718 /* Inter-bb range test optimization.
4720 Returns TRUE if a gimple conditional is optimized to a true/false,
4721 otherwise return FALSE.
4723 This indicates to the caller that it should run a CFG cleanup pass
4724 once reassociation is completed. */
4726 static bool
4727 maybe_optimize_range_tests (gimple *stmt)
4729 basic_block first_bb = gimple_bb (stmt);
4730 basic_block last_bb = first_bb;
4731 basic_block other_bb = NULL;
4732 basic_block bb;
4733 edge_iterator ei;
4734 edge e;
4735 auto_vec<operand_entry *> ops;
4736 auto_vec<inter_bb_range_test_entry> bbinfo;
4737 bool any_changes = false;
4738 bool cfg_cleanup_needed = false;
4740 /* Consider only basic blocks that end with GIMPLE_COND or
4741 a cast statement satisfying final_range_test_p. All
4742 but the last bb in the first_bb .. last_bb range
4743 should end with GIMPLE_COND. */
4744 if (gimple_code (stmt) == GIMPLE_COND)
4746 if (EDGE_COUNT (first_bb->succs) != 2)
4747 return cfg_cleanup_needed;
4749 else if (final_range_test_p (stmt))
4750 other_bb = single_succ (first_bb);
4751 else
4752 return cfg_cleanup_needed;
4754 if (stmt_could_throw_p (cfun, stmt))
4755 return cfg_cleanup_needed;
4757 /* As relative ordering of post-dominator sons isn't fixed,
4758 maybe_optimize_range_tests can be called first on any
4759 bb in the range we want to optimize. So, start searching
4760 backwards, if first_bb can be set to a predecessor. */
4761 while (single_pred_p (first_bb))
4763 basic_block pred_bb = single_pred (first_bb);
4764 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4765 break;
4766 if (!no_side_effect_bb (first_bb))
4767 break;
4768 first_bb = pred_bb;
4770 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4771 Before starting forward search in last_bb successors, find
4772 out the other_bb. */
4773 if (first_bb == last_bb)
4775 other_bb = NULL;
4776 /* As non-GIMPLE_COND last stmt always terminates the range,
4777 if forward search didn't discover anything, just give up. */
4778 if (gimple_code (stmt) != GIMPLE_COND)
4779 return cfg_cleanup_needed;
4780 /* Look at both successors. Either it ends with a GIMPLE_COND
4781 and satisfies suitable_cond_bb, or ends with a cast and
4782 other_bb is that cast's successor. */
4783 FOR_EACH_EDGE (e, ei, first_bb->succs)
4784 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4785 || e->dest == first_bb)
4786 return cfg_cleanup_needed;
4787 else if (single_pred_p (e->dest))
4789 stmt = last_nondebug_stmt (e->dest);
4790 if (stmt
4791 && gimple_code (stmt) == GIMPLE_COND
4792 && EDGE_COUNT (e->dest->succs) == 2)
4794 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4795 NULL, true))
4796 break;
4797 else
4798 other_bb = NULL;
4800 else if (stmt
4801 && final_range_test_p (stmt)
4802 && find_edge (first_bb, single_succ (e->dest)))
4804 other_bb = single_succ (e->dest);
4805 if (other_bb == first_bb)
4806 other_bb = NULL;
4809 if (other_bb == NULL)
4810 return cfg_cleanup_needed;
4812 /* Now do the forward search, moving last_bb to successor bbs
4813 that aren't other_bb. */
4814 while (EDGE_COUNT (last_bb->succs) == 2)
4816 FOR_EACH_EDGE (e, ei, last_bb->succs)
4817 if (e->dest != other_bb)
4818 break;
4819 if (e == NULL)
4820 break;
4821 if (!single_pred_p (e->dest))
4822 break;
4823 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4824 break;
4825 if (!no_side_effect_bb (e->dest))
4826 break;
4827 last_bb = e->dest;
4829 if (first_bb == last_bb)
4830 return cfg_cleanup_needed;
4831 /* Here basic blocks first_bb through last_bb's predecessor
4832 end with GIMPLE_COND, all of them have one of the edges to
4833 other_bb and another to another block in the range,
4834 all blocks except first_bb don't have side-effects and
4835 last_bb ends with either GIMPLE_COND, or cast satisfying
4836 final_range_test_p. */
4837 for (bb = last_bb; ; bb = single_pred (bb))
4839 enum tree_code code;
4840 tree lhs, rhs;
4841 inter_bb_range_test_entry bb_ent;
4843 bb_ent.op = NULL_TREE;
4844 bb_ent.first_idx = ops.length ();
4845 bb_ent.last_idx = bb_ent.first_idx;
4846 e = find_edge (bb, other_bb);
4847 stmt = last_nondebug_stmt (bb);
4848 gimple_set_visited (stmt, true);
4849 if (gimple_code (stmt) != GIMPLE_COND)
4851 use_operand_p use_p;
4852 gimple *phi;
4853 edge e2;
4854 unsigned int d;
4856 lhs = gimple_assign_lhs (stmt);
4857 rhs = gimple_assign_rhs1 (stmt);
4858 gcc_assert (bb == last_bb);
4860 /* stmt is
4861 _123 = (int) _234;
4863 _234 = a_2(D) == 2;
4865 followed by:
4866 <bb M>:
4867 # _345 = PHI <_123(N), 1(...), 1(...)>
4869 or 0 instead of 1. If it is 0, the _234
4870 range test is anded together with all the
4871 other range tests, if it is 1, it is ored with
4872 them. */
4873 single_imm_use (lhs, &use_p, &phi);
4874 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4875 e2 = find_edge (first_bb, other_bb);
4876 d = e2->dest_idx;
4877 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4878 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4879 code = BIT_AND_EXPR;
4880 else
4882 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4883 code = BIT_IOR_EXPR;
4886 /* If _234 SSA_NAME_DEF_STMT is
4887 _234 = _567 | _789;
4888 (or &, corresponding to 1/0 in the phi arguments,
4889 push into ops the individual range test arguments
4890 of the bitwise or resp. and, recursively. */
4891 if (TREE_CODE (rhs) == SSA_NAME
4892 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4893 != tcc_comparison)
4894 && !get_ops (rhs, code, &ops,
4895 loop_containing_stmt (stmt))
4896 && has_single_use (rhs))
4898 /* Otherwise, push the _234 range test itself. */
4899 operand_entry *oe = operand_entry_pool.allocate ();
4901 oe->op = rhs;
4902 oe->rank = code;
4903 oe->id = 0;
4904 oe->count = 1;
4905 oe->stmt_to_insert = NULL;
4906 ops.safe_push (oe);
4907 bb_ent.last_idx++;
4908 bb_ent.op = rhs;
4910 else if (is_gimple_assign (stmt)
4911 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4912 == tcc_comparison)
4913 && !get_ops (lhs, code, &ops,
4914 loop_containing_stmt (stmt))
4915 && has_single_use (lhs))
4917 operand_entry *oe = operand_entry_pool.allocate ();
4918 oe->op = lhs;
4919 oe->rank = code;
4920 oe->id = 0;
4921 oe->count = 1;
4922 ops.safe_push (oe);
4923 bb_ent.last_idx++;
4924 bb_ent.op = lhs;
4926 else
4928 bb_ent.last_idx = ops.length ();
4929 bb_ent.op = rhs;
4931 bbinfo.safe_push (bb_ent);
4932 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4933 ops[i]->id = bb->index;
4934 continue;
4936 else if (bb == last_bb)
4938 /* For last_bb, handle also:
4939 if (x_3(D) == 3)
4940 goto <bb 6>; [34.00%]
4941 else
4942 goto <bb 7>; [66.00%]
4944 <bb 6> [local count: 79512730]:
4946 <bb 7> [local count: 1073741824]:
4947 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4948 where bb 7 is OTHER_BB, but the PHI values from the
4949 earlier bbs match the path through the empty bb
4950 in between. */
4951 bool test_swapped_p = false;
4952 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4953 &other_bb, &test_swapped_p, true);
4954 gcc_assert (ok);
4955 if (test_swapped_p)
4956 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4958 /* Otherwise stmt is GIMPLE_COND. */
4959 code = gimple_cond_code (stmt);
4960 lhs = gimple_cond_lhs (stmt);
4961 rhs = gimple_cond_rhs (stmt);
4962 if (TREE_CODE (lhs) == SSA_NAME
4963 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4964 && ((code != EQ_EXPR && code != NE_EXPR)
4965 || rhs != boolean_false_node
4966 /* Either push into ops the individual bitwise
4967 or resp. and operands, depending on which
4968 edge is other_bb. */
4969 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4970 ^ (code == EQ_EXPR))
4971 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4972 loop_containing_stmt (stmt))))
4974 /* Or push the GIMPLE_COND stmt itself. */
4975 operand_entry *oe = operand_entry_pool.allocate ();
4977 oe->op = NULL;
4978 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4979 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4980 /* oe->op = NULL signs that there is no SSA_NAME
4981 for the range test, and oe->id instead is the
4982 basic block number, at which's end the GIMPLE_COND
4983 is. */
4984 oe->id = bb->index;
4985 oe->count = 1;
4986 oe->stmt_to_insert = NULL;
4987 ops.safe_push (oe);
4988 bb_ent.op = NULL;
4989 bb_ent.last_idx++;
4991 else if (ops.length () > bb_ent.first_idx)
4993 bb_ent.op = lhs;
4994 bb_ent.last_idx = ops.length ();
4996 bbinfo.safe_push (bb_ent);
4997 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4998 ops[i]->id = bb->index;
4999 if (bb == first_bb)
5000 break;
5002 if (ops.length () > 1)
5003 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5004 if (any_changes)
5006 unsigned int idx, max_idx = 0;
5007 /* update_ops relies on has_single_use predicates returning the
5008 same values as it did during get_ops earlier. Additionally it
5009 never removes statements, only adds new ones and it should walk
5010 from the single imm use and check the predicate already before
5011 making those changes.
5012 On the other side, the handling of GIMPLE_COND directly can turn
5013 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5014 it needs to be done in a separate loop afterwards. */
5015 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5017 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5018 && bbinfo[idx].op != NULL_TREE)
5020 tree new_op;
5022 max_idx = idx;
5023 stmt = last_nondebug_stmt (bb);
5024 new_op = update_ops (bbinfo[idx].op,
5025 (enum tree_code)
5026 ops[bbinfo[idx].first_idx]->rank,
5027 ops, &bbinfo[idx].first_idx,
5028 loop_containing_stmt (stmt));
5029 if (new_op == NULL_TREE)
5031 gcc_assert (bb == last_bb);
5032 new_op = ops[bbinfo[idx].first_idx++]->op;
5034 if (bbinfo[idx].op != new_op)
5036 imm_use_iterator iter;
5037 use_operand_p use_p;
5038 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5040 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5041 if (is_gimple_debug (use_stmt))
5042 continue;
5043 else if (gimple_code (use_stmt) == GIMPLE_COND
5044 || gimple_code (use_stmt) == GIMPLE_PHI)
5045 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5046 SET_USE (use_p, new_op);
5047 else if ((is_gimple_assign (use_stmt)
5048 && (TREE_CODE_CLASS
5049 (gimple_assign_rhs_code (use_stmt))
5050 == tcc_comparison)))
5051 cast_or_tcc_cmp_stmt = use_stmt;
5052 else if (gimple_assign_cast_p (use_stmt))
5053 cast_or_tcc_cmp_stmt = use_stmt;
5054 else
5055 gcc_unreachable ();
5057 if (cast_or_tcc_cmp_stmt)
5059 gcc_assert (bb == last_bb);
5060 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5061 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5062 enum tree_code rhs_code
5063 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5064 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5065 : CONVERT_EXPR;
5066 gassign *g;
5067 if (is_gimple_min_invariant (new_op))
5069 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5070 g = gimple_build_assign (new_lhs, new_op);
5072 else
5073 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5074 gimple_stmt_iterator gsi
5075 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5076 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5077 gimple_set_visited (g, true);
5078 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5079 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5080 if (is_gimple_debug (use_stmt))
5081 continue;
5082 else if (gimple_code (use_stmt) == GIMPLE_COND
5083 || gimple_code (use_stmt) == GIMPLE_PHI)
5084 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5085 SET_USE (use_p, new_lhs);
5086 else
5087 gcc_unreachable ();
5091 if (bb == first_bb)
5092 break;
5094 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5096 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5097 && bbinfo[idx].op == NULL_TREE
5098 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5100 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5102 if (idx > max_idx)
5103 max_idx = idx;
5105 /* If we collapse the conditional to a true/false
5106 condition, then bubble that knowledge up to our caller. */
5107 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5109 gimple_cond_make_false (cond_stmt);
5110 cfg_cleanup_needed = true;
5112 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5114 gimple_cond_make_true (cond_stmt);
5115 cfg_cleanup_needed = true;
5117 else
5119 gimple_cond_set_code (cond_stmt, NE_EXPR);
5120 gimple_cond_set_lhs (cond_stmt,
5121 ops[bbinfo[idx].first_idx]->op);
5122 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5124 update_stmt (cond_stmt);
5126 if (bb == first_bb)
5127 break;
5130 /* The above changes could result in basic blocks after the first
5131 modified one, up to and including last_bb, to be executed even if
5132 they would not be in the original program. If the value ranges of
5133 assignment lhs' in those bbs were dependent on the conditions
5134 guarding those basic blocks which now can change, the VRs might
5135 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5136 are only used within the same bb, it should be not a big deal if
5137 we just reset all the VRs in those bbs. See PR68671. */
5138 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5139 reset_flow_sensitive_info_in_bb (bb);
5141 return cfg_cleanup_needed;
5144 /* Remove def stmt of VAR if VAR has zero uses and recurse
5145 on rhs1 operand if so. */
5147 static void
5148 remove_visited_stmt_chain (tree var)
5150 gimple *stmt;
5151 gimple_stmt_iterator gsi;
5153 while (1)
5155 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5156 return;
5157 stmt = SSA_NAME_DEF_STMT (var);
5158 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5160 var = gimple_assign_rhs1 (stmt);
5161 gsi = gsi_for_stmt (stmt);
5162 reassoc_remove_stmt (&gsi);
5163 release_defs (stmt);
5165 else
5166 return;
5170 /* This function checks three consequtive operands in
5171 passed operands vector OPS starting from OPINDEX and
5172 swaps two operands if it is profitable for binary operation
5173 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5175 We pair ops with the same rank if possible. */
5177 static void
5178 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5179 unsigned int opindex)
5181 operand_entry *oe1, *oe2, *oe3;
5183 oe1 = ops[opindex];
5184 oe2 = ops[opindex + 1];
5185 oe3 = ops[opindex + 2];
5187 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5188 std::swap (*oe1, *oe3);
5189 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5190 std::swap (*oe1, *oe2);
5193 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5194 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5195 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5196 after the returned stmt. */
5198 static inline gimple *
5199 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5201 insert_before = true;
5202 if (TREE_CODE (rhs1) == SSA_NAME
5203 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5205 stmt = SSA_NAME_DEF_STMT (rhs1);
5206 insert_before = false;
5208 if (TREE_CODE (rhs2) == SSA_NAME
5209 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5211 stmt = SSA_NAME_DEF_STMT (rhs2);
5212 insert_before = false;
5214 return stmt;
5217 /* If the stmt that defines operand has to be inserted, insert it
5218 before the use. */
5219 static void
5220 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5222 gcc_assert (is_gimple_assign (stmt_to_insert));
5223 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5224 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5225 bool insert_before;
5226 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5227 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5228 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5230 /* If the insert point is not stmt, then insert_point would be
5231 the point where operand rhs1 or rhs2 is defined. In this case,
5232 stmt_to_insert has to be inserted afterwards. This would
5233 only happen when the stmt insertion point is flexible. */
5234 if (insert_before)
5235 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5236 else
5237 insert_stmt_after (stmt_to_insert, insert_point);
5241 /* Recursively rewrite our linearized statements so that the operators
5242 match those in OPS[OPINDEX], putting the computation in rank
5243 order. Return new lhs.
5244 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5245 the current stmt and during recursive invocations.
5246 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5247 recursive invocations. */
5249 static tree
5250 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5251 const vec<operand_entry *> &ops, bool changed,
5252 bool next_changed)
5254 tree rhs1 = gimple_assign_rhs1 (stmt);
5255 tree rhs2 = gimple_assign_rhs2 (stmt);
5256 tree lhs = gimple_assign_lhs (stmt);
5257 operand_entry *oe;
5259 /* The final recursion case for this function is that you have
5260 exactly two operations left.
5261 If we had exactly one op in the entire list to start with, we
5262 would have never called this function, and the tail recursion
5263 rewrites them one at a time. */
5264 if (opindex + 2 == ops.length ())
5266 operand_entry *oe1, *oe2;
5268 oe1 = ops[opindex];
5269 oe2 = ops[opindex + 1];
5271 if (rhs1 != oe1->op || rhs2 != oe2->op)
5273 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5274 unsigned int uid = gimple_uid (stmt);
5276 if (dump_file && (dump_flags & TDF_DETAILS))
5278 fprintf (dump_file, "Transforming ");
5279 print_gimple_stmt (dump_file, stmt, 0);
5282 /* If the stmt that defines operand has to be inserted, insert it
5283 before the use. */
5284 if (oe1->stmt_to_insert)
5285 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5286 if (oe2->stmt_to_insert)
5287 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5288 /* Even when changed is false, reassociation could have e.g. removed
5289 some redundant operations, so unless we are just swapping the
5290 arguments or unless there is no change at all (then we just
5291 return lhs), force creation of a new SSA_NAME. */
5292 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5294 bool insert_before;
5295 gimple *insert_point
5296 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5297 lhs = make_ssa_name (TREE_TYPE (lhs));
5298 stmt
5299 = gimple_build_assign (lhs, rhs_code,
5300 oe1->op, oe2->op);
5301 gimple_set_uid (stmt, uid);
5302 gimple_set_visited (stmt, true);
5303 if (insert_before)
5304 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5305 else
5306 insert_stmt_after (stmt, insert_point);
5308 else
5310 bool insert_before;
5311 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5312 insert_before)
5313 == stmt);
5314 gimple_assign_set_rhs1 (stmt, oe1->op);
5315 gimple_assign_set_rhs2 (stmt, oe2->op);
5316 update_stmt (stmt);
5319 if (rhs1 != oe1->op && rhs1 != oe2->op)
5320 remove_visited_stmt_chain (rhs1);
5322 if (dump_file && (dump_flags & TDF_DETAILS))
5324 fprintf (dump_file, " into ");
5325 print_gimple_stmt (dump_file, stmt, 0);
5328 return lhs;
5331 /* If we hit here, we should have 3 or more ops left. */
5332 gcc_assert (opindex + 2 < ops.length ());
5334 /* Rewrite the next operator. */
5335 oe = ops[opindex];
5337 /* If the stmt that defines operand has to be inserted, insert it
5338 before the use. */
5339 if (oe->stmt_to_insert)
5340 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5342 /* Recurse on the LHS of the binary operator, which is guaranteed to
5343 be the non-leaf side. */
5344 tree new_rhs1
5345 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5346 changed || oe->op != rhs2 || next_changed,
5347 false);
5349 if (oe->op != rhs2 || new_rhs1 != rhs1)
5351 if (dump_file && (dump_flags & TDF_DETAILS))
5353 fprintf (dump_file, "Transforming ");
5354 print_gimple_stmt (dump_file, stmt, 0);
5357 /* If changed is false, this is either opindex == 0
5358 or all outer rhs2's were equal to corresponding oe->op,
5359 and powi_result is NULL.
5360 That means lhs is equivalent before and after reassociation.
5361 Otherwise ensure the old lhs SSA_NAME is not reused and
5362 create a new stmt as well, so that any debug stmts will be
5363 properly adjusted. */
5364 if (changed)
5366 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5367 unsigned int uid = gimple_uid (stmt);
5368 bool insert_before;
5369 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5370 insert_before);
5372 lhs = make_ssa_name (TREE_TYPE (lhs));
5373 stmt = gimple_build_assign (lhs, rhs_code,
5374 new_rhs1, oe->op);
5375 gimple_set_uid (stmt, uid);
5376 gimple_set_visited (stmt, true);
5377 if (insert_before)
5378 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5379 else
5380 insert_stmt_after (stmt, insert_point);
5382 else
5384 bool insert_before;
5385 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5386 insert_before)
5387 == stmt);
5388 gimple_assign_set_rhs1 (stmt, new_rhs1);
5389 gimple_assign_set_rhs2 (stmt, oe->op);
5390 update_stmt (stmt);
5393 if (dump_file && (dump_flags & TDF_DETAILS))
5395 fprintf (dump_file, " into ");
5396 print_gimple_stmt (dump_file, stmt, 0);
5399 return lhs;
5402 /* Find out how many cycles we need to compute statements chain.
5403 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5404 maximum number of independent statements we may execute per cycle. */
5406 static int
5407 get_required_cycles (int ops_num, int cpu_width)
5409 int res;
5410 int elog;
5411 unsigned int rest;
5413 /* While we have more than 2 * cpu_width operands
5414 we may reduce number of operands by cpu_width
5415 per cycle. */
5416 res = ops_num / (2 * cpu_width);
5418 /* Remained operands count may be reduced twice per cycle
5419 until we have only one operand. */
5420 rest = (unsigned)(ops_num - res * cpu_width);
5421 elog = exact_log2 (rest);
5422 if (elog >= 0)
5423 res += elog;
5424 else
5425 res += floor_log2 (rest) + 1;
5427 return res;
5430 /* Returns an optimal number of registers to use for computation of
5431 given statements. */
5433 static int
5434 get_reassociation_width (int ops_num, enum tree_code opc,
5435 machine_mode mode)
5437 int param_width = param_tree_reassoc_width;
5438 int width;
5439 int width_min;
5440 int cycles_best;
5442 if (param_width > 0)
5443 width = param_width;
5444 else
5445 width = targetm.sched.reassociation_width (opc, mode);
5447 if (width == 1)
5448 return width;
5450 /* Get the minimal time required for sequence computation. */
5451 cycles_best = get_required_cycles (ops_num, width);
5453 /* Check if we may use less width and still compute sequence for
5454 the same time. It will allow us to reduce registers usage.
5455 get_required_cycles is monotonically increasing with lower width
5456 so we can perform a binary search for the minimal width that still
5457 results in the optimal cycle count. */
5458 width_min = 1;
5459 while (width > width_min)
5461 int width_mid = (width + width_min) / 2;
5463 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5464 width = width_mid;
5465 else if (width_min < width_mid)
5466 width_min = width_mid;
5467 else
5468 break;
5471 return width;
5474 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5475 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5476 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5478 /* Rewrite statements with dependency chain with regard the chance to generate
5479 FMA.
5480 For the chain with FMA: Try to keep fma opportunity as much as possible.
5481 For the chain without FMA: Putting the computation in rank order and trying
5482 to allow operations to be executed in parallel.
5483 E.g.
5484 e + f + a * b + c * d;
5486 ssa1 = e + a * b;
5487 ssa2 = f + c * d;
5488 ssa3 = ssa1 + ssa2;
5490 This reassociation approach preserves the chance of fma generation as much
5491 as possible.
5493 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5494 the whole chain will have dependencies across the loop iteration. Just keep
5495 loop-carried ops in a separate chain.
5496 E.g.
5497 x_1 = phi (x_0, x_2)
5498 y_1 = phi (y_0, y_2)
5500 a + b + c + d + e + x1 + y1
5502 SSA1 = a + b;
5503 SSA2 = c + d;
5504 SSA3 = SSA1 + e;
5505 SSA4 = SSA3 + SSA2;
5506 SSA5 = x1 + y1;
5507 SSA6 = SSA4 + SSA5;
5509 static void
5510 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5511 const vec<operand_entry *> &ops)
5513 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5514 int op_num = ops.length ();
5515 int op_normal_num = op_num;
5516 gcc_assert (op_num > 0);
5517 int stmt_num = op_num - 1;
5518 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5519 int i = 0, j = 0;
5520 tree tmp_op[2], op1;
5521 operand_entry *oe;
5522 gimple *stmt1 = NULL;
5523 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5524 int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5525 int width_active = 0, width_count = 0;
5526 bool has_biased = false, ops_changed = false;
5527 auto_vec<operand_entry *> ops_normal;
5528 auto_vec<operand_entry *> ops_biased;
5529 vec<operand_entry *> *ops1;
5531 /* We start expression rewriting from the top statements.
5532 So, in this loop we create a full list of statements
5533 we will work with. */
5534 stmts[stmt_num - 1] = stmt;
5535 for (i = stmt_num - 2; i >= 0; i--)
5536 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5538 /* Avoid adding loop-carried ops to long chains, first filter out the
5539 loop-carried. But we need to make sure that the length of the remainder
5540 is not less than 4, which is the smallest ops length we can break the
5541 dependency. */
5542 FOR_EACH_VEC_ELT (ops, i, oe)
5544 if (TREE_CODE (oe->op) == SSA_NAME
5545 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5546 && op_normal_num > 4)
5548 ops_biased.safe_push (oe);
5549 has_biased = true;
5550 op_normal_num --;
5552 else
5553 ops_normal.safe_push (oe);
5556 /* Width should not be larger than ops length / 2, since we can not create
5557 more parallel dependency chains that exceeds such value. */
5558 int width_normal = op_normal_num / 2;
5559 int width_biased = (op_num - op_normal_num) / 2;
5560 width_normal = width <= width_normal ? width : width_normal;
5561 width_biased = width <= width_biased ? width : width_biased;
5563 ops1 = &ops_normal;
5564 width_count = width_active = width_normal;
5566 /* Build parallel dependency chain according to width. */
5567 for (i = 0; i < stmt_num; i++)
5569 if (dump_file && (dump_flags & TDF_DETAILS))
5571 fprintf (dump_file, "Transforming ");
5572 print_gimple_stmt (dump_file, stmts[i], 0);
5575 /* When the work of normal ops is over, but the loop is not over,
5576 continue to do biased ops. */
5577 if (width_count == 0 && ops1 == &ops_normal)
5579 ops1 = &ops_biased;
5580 width_count = width_active = width_biased;
5581 ops_changed = true;
5584 /* Swap the operands if no FMA in the chain. */
5585 if (ops1->length () > 2 && !has_fma)
5586 swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5588 if (i < width_active
5589 || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5591 for (j = 0; j < 2; j++)
5593 oe = ops1->pop ();
5594 tmp_op[j] = oe->op;
5595 /* If the stmt that defines operand has to be inserted, insert it
5596 before the use. */
5597 stmt1 = oe->stmt_to_insert;
5598 if (stmt1)
5599 insert_stmt_before_use (stmts[i], stmt1);
5600 stmt1 = NULL;
5602 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5603 tmp_op[1],
5604 tmp_op[0],
5605 opcode);
5606 gimple_set_visited (stmts[i], true);
5609 else
5611 /* We keep original statement only for the last one. All others are
5612 recreated. */
5613 if (!ops1->length ())
5615 /* For biased length equal to 2. */
5616 if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5617 last_rhs2_stmt_index = i - 1;
5619 /* When width_count == 2 and there is no biased, just finish. */
5620 if (width_count == NORMAL_END_STMT && !has_biased)
5622 last_rhs1_stmt_index = i - 1;
5623 last_rhs2_stmt_index = i - 2;
5625 if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5627 /* We keep original statement only for the last one. All
5628 others are recreated. */
5629 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5630 (stmts[last_rhs1_stmt_index]));
5631 gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5632 (stmts[last_rhs2_stmt_index]));
5633 update_stmt (stmts[i]);
5635 else
5637 stmts[i] =
5638 build_and_add_sum (TREE_TYPE (last_rhs1),
5639 gimple_assign_lhs (stmts[i-width_count]),
5640 gimple_assign_lhs
5641 (stmts[i-width_count+1]),
5642 opcode);
5643 gimple_set_visited (stmts[i], true);
5644 width_count--;
5646 /* It is the end of normal or biased ops.
5647 last_rhs1_stmt_index used to record the last stmt index
5648 for normal ops. last_rhs2_stmt_index used to record the
5649 last stmt index for biased ops. */
5650 if (width_count == BIASED_END_STMT)
5652 gcc_assert (has_biased);
5653 if (ops_biased.length ())
5654 last_rhs1_stmt_index = i;
5655 else
5656 last_rhs2_stmt_index = i;
5657 width_count--;
5661 else
5663 /* Attach the rest ops to the parallel dependency chain. */
5664 oe = ops1->pop ();
5665 op1 = oe->op;
5666 stmt1 = oe->stmt_to_insert;
5667 if (stmt1)
5668 insert_stmt_before_use (stmts[i], stmt1);
5669 stmt1 = NULL;
5671 /* For only one biased ops. */
5672 if (width_count == SPECIAL_BIASED_END_STMT)
5674 /* We keep original statement only for the last one. All
5675 others are recreated. */
5676 gcc_assert (has_biased);
5677 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5678 (stmts[last_rhs1_stmt_index]));
5679 gimple_assign_set_rhs2 (stmts[i], op1);
5680 update_stmt (stmts[i]);
5682 else
5684 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5685 gimple_assign_lhs
5686 (stmts[i-width_active]),
5687 op1,
5688 opcode);
5689 gimple_set_visited (stmts[i], true);
5694 if (dump_file && (dump_flags & TDF_DETAILS))
5696 fprintf (dump_file, " into ");
5697 print_gimple_stmt (dump_file, stmts[i], 0);
5701 remove_visited_stmt_chain (last_rhs1);
5704 /* Transform STMT, which is really (A +B) + (C + D) into the left
5705 linear form, ((A+B)+C)+D.
5706 Recurse on D if necessary. */
5708 static void
5709 linearize_expr (gimple *stmt)
5711 gimple_stmt_iterator gsi;
5712 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5713 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5714 gimple *oldbinrhs = binrhs;
5715 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5716 gimple *newbinrhs = NULL;
5717 class loop *loop = loop_containing_stmt (stmt);
5718 tree lhs = gimple_assign_lhs (stmt);
5720 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5721 && is_reassociable_op (binrhs, rhscode, loop));
5723 gsi = gsi_for_stmt (stmt);
5725 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5726 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5727 gimple_assign_rhs_code (binrhs),
5728 gimple_assign_lhs (binlhs),
5729 gimple_assign_rhs2 (binrhs));
5730 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5731 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5732 gimple_set_uid (binrhs, gimple_uid (stmt));
5734 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5735 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5737 if (dump_file && (dump_flags & TDF_DETAILS))
5739 fprintf (dump_file, "Linearized: ");
5740 print_gimple_stmt (dump_file, stmt, 0);
5743 reassociate_stats.linearized++;
5744 update_stmt (stmt);
5746 gsi = gsi_for_stmt (oldbinrhs);
5747 reassoc_remove_stmt (&gsi);
5748 release_defs (oldbinrhs);
5750 gimple_set_visited (stmt, true);
5751 gimple_set_visited (binlhs, true);
5752 gimple_set_visited (binrhs, true);
5754 /* Tail recurse on the new rhs if it still needs reassociation. */
5755 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5756 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5757 want to change the algorithm while converting to tuples. */
5758 linearize_expr (stmt);
5761 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5762 it. Otherwise, return NULL. */
5764 static gimple *
5765 get_single_immediate_use (tree lhs)
5767 use_operand_p immuse;
5768 gimple *immusestmt;
5770 if (TREE_CODE (lhs) == SSA_NAME
5771 && single_imm_use (lhs, &immuse, &immusestmt)
5772 && is_gimple_assign (immusestmt))
5773 return immusestmt;
5775 return NULL;
5778 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5779 representing the negated value. Insertions of any necessary
5780 instructions go before GSI.
5781 This function is recursive in that, if you hand it "a_5" as the
5782 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5783 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5785 static tree
5786 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5788 gimple *negatedefstmt = NULL;
5789 tree resultofnegate;
5790 gimple_stmt_iterator gsi;
5791 unsigned int uid;
5793 /* If we are trying to negate a name, defined by an add, negate the
5794 add operands instead. */
5795 if (TREE_CODE (tonegate) == SSA_NAME)
5796 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5797 if (TREE_CODE (tonegate) == SSA_NAME
5798 && is_gimple_assign (negatedefstmt)
5799 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5800 && has_single_use (gimple_assign_lhs (negatedefstmt))
5801 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5803 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5804 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5805 tree lhs = gimple_assign_lhs (negatedefstmt);
5806 gimple *g;
5808 gsi = gsi_for_stmt (negatedefstmt);
5809 rhs1 = negate_value (rhs1, &gsi);
5811 gsi = gsi_for_stmt (negatedefstmt);
5812 rhs2 = negate_value (rhs2, &gsi);
5814 gsi = gsi_for_stmt (negatedefstmt);
5815 lhs = make_ssa_name (TREE_TYPE (lhs));
5816 gimple_set_visited (negatedefstmt, true);
5817 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5818 gimple_set_uid (g, gimple_uid (negatedefstmt));
5819 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5820 return lhs;
5823 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5824 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5825 NULL_TREE, true, GSI_SAME_STMT);
5826 gsi = *gsip;
5827 uid = gimple_uid (gsi_stmt (gsi));
5828 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5830 gimple *stmt = gsi_stmt (gsi);
5831 if (gimple_uid (stmt) != 0)
5832 break;
5833 gimple_set_uid (stmt, uid);
5835 return resultofnegate;
5838 /* Return true if we should break up the subtract in STMT into an add
5839 with negate. This is true when we the subtract operands are really
5840 adds, or the subtract itself is used in an add expression. In
5841 either case, breaking up the subtract into an add with negate
5842 exposes the adds to reassociation. */
5844 static bool
5845 should_break_up_subtract (gimple *stmt)
5847 tree lhs = gimple_assign_lhs (stmt);
5848 tree binlhs = gimple_assign_rhs1 (stmt);
5849 tree binrhs = gimple_assign_rhs2 (stmt);
5850 gimple *immusestmt;
5851 class loop *loop = loop_containing_stmt (stmt);
5853 if (TREE_CODE (binlhs) == SSA_NAME
5854 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5855 return true;
5857 if (TREE_CODE (binrhs) == SSA_NAME
5858 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5859 return true;
5861 if (TREE_CODE (lhs) == SSA_NAME
5862 && (immusestmt = get_single_immediate_use (lhs))
5863 && is_gimple_assign (immusestmt)
5864 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5865 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5866 && gimple_assign_rhs1 (immusestmt) == lhs)
5867 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5868 return true;
5869 return false;
5872 /* Transform STMT from A - B into A + -B. */
5874 static void
5875 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5877 tree rhs1 = gimple_assign_rhs1 (stmt);
5878 tree rhs2 = gimple_assign_rhs2 (stmt);
5880 if (dump_file && (dump_flags & TDF_DETAILS))
5882 fprintf (dump_file, "Breaking up subtract ");
5883 print_gimple_stmt (dump_file, stmt, 0);
5886 rhs2 = negate_value (rhs2, gsip);
5887 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5888 update_stmt (stmt);
5891 /* Determine whether STMT is a builtin call that raises an SSA name
5892 to an integer power and has only one use. If so, and this is early
5893 reassociation and unsafe math optimizations are permitted, place
5894 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5895 If any of these conditions does not hold, return FALSE. */
5897 static bool
5898 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5900 tree arg1;
5901 REAL_VALUE_TYPE c, cint;
5903 switch (gimple_call_combined_fn (stmt))
5905 CASE_CFN_POW:
5906 if (flag_errno_math)
5907 return false;
5909 *base = gimple_call_arg (stmt, 0);
5910 arg1 = gimple_call_arg (stmt, 1);
5912 if (TREE_CODE (arg1) != REAL_CST)
5913 return false;
5915 c = TREE_REAL_CST (arg1);
5917 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5918 return false;
5920 *exponent = real_to_integer (&c);
5921 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5922 if (!real_identical (&c, &cint))
5923 return false;
5925 break;
5927 CASE_CFN_POWI:
5928 *base = gimple_call_arg (stmt, 0);
5929 arg1 = gimple_call_arg (stmt, 1);
5931 if (!tree_fits_shwi_p (arg1))
5932 return false;
5934 *exponent = tree_to_shwi (arg1);
5935 break;
5937 default:
5938 return false;
5941 /* Expanding negative exponents is generally unproductive, so we don't
5942 complicate matters with those. Exponents of zero and one should
5943 have been handled by expression folding. */
5944 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5945 return false;
5947 return true;
5950 /* Try to derive and add operand entry for OP to *OPS. Return false if
5951 unsuccessful. */
5953 static bool
5954 try_special_add_to_ops (vec<operand_entry *> *ops,
5955 enum tree_code code,
5956 tree op, gimple* def_stmt)
5958 tree base = NULL_TREE;
5959 HOST_WIDE_INT exponent = 0;
5961 if (TREE_CODE (op) != SSA_NAME
5962 || ! has_single_use (op))
5963 return false;
5965 if (code == MULT_EXPR
5966 && reassoc_insert_powi_p
5967 && flag_unsafe_math_optimizations
5968 && is_gimple_call (def_stmt)
5969 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5971 add_repeat_to_ops_vec (ops, base, exponent);
5972 gimple_set_visited (def_stmt, true);
5973 return true;
5975 else if (code == MULT_EXPR
5976 && is_gimple_assign (def_stmt)
5977 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5978 && !HONOR_SNANS (TREE_TYPE (op))
5979 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5980 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5981 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5982 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5984 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5985 tree cst = build_minus_one_cst (TREE_TYPE (op));
5986 add_to_ops_vec (ops, rhs1);
5987 add_to_ops_vec (ops, cst);
5988 gimple_set_visited (def_stmt, true);
5989 return true;
5992 return false;
5995 /* Recursively linearize a binary expression that is the RHS of STMT.
5996 Place the operands of the expression tree in the vector named OPS. */
5998 static void
5999 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6000 bool is_associative, bool set_visited)
6002 tree binlhs = gimple_assign_rhs1 (stmt);
6003 tree binrhs = gimple_assign_rhs2 (stmt);
6004 gimple *binlhsdef = NULL, *binrhsdef = NULL;
6005 bool binlhsisreassoc = false;
6006 bool binrhsisreassoc = false;
6007 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6008 class loop *loop = loop_containing_stmt (stmt);
6010 if (set_visited)
6011 gimple_set_visited (stmt, true);
6013 if (TREE_CODE (binlhs) == SSA_NAME)
6015 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6016 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6017 && !stmt_could_throw_p (cfun, binlhsdef));
6020 if (TREE_CODE (binrhs) == SSA_NAME)
6022 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6023 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6024 && !stmt_could_throw_p (cfun, binrhsdef));
6027 /* If the LHS is not reassociable, but the RHS is, we need to swap
6028 them. If neither is reassociable, there is nothing we can do, so
6029 just put them in the ops vector. If the LHS is reassociable,
6030 linearize it. If both are reassociable, then linearize the RHS
6031 and the LHS. */
6033 if (!binlhsisreassoc)
6035 /* If this is not a associative operation like division, give up. */
6036 if (!is_associative)
6038 add_to_ops_vec (ops, binrhs);
6039 return;
6042 if (!binrhsisreassoc)
6044 bool swap = false;
6045 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6046 /* If we add ops for the rhs we expect to be able to recurse
6047 to it via the lhs during expression rewrite so swap
6048 operands. */
6049 swap = true;
6050 else
6051 add_to_ops_vec (ops, binrhs);
6053 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6054 add_to_ops_vec (ops, binlhs);
6056 if (!swap)
6057 return;
6060 if (dump_file && (dump_flags & TDF_DETAILS))
6062 fprintf (dump_file, "swapping operands of ");
6063 print_gimple_stmt (dump_file, stmt, 0);
6066 swap_ssa_operands (stmt,
6067 gimple_assign_rhs1_ptr (stmt),
6068 gimple_assign_rhs2_ptr (stmt));
6069 update_stmt (stmt);
6071 if (dump_file && (dump_flags & TDF_DETAILS))
6073 fprintf (dump_file, " is now ");
6074 print_gimple_stmt (dump_file, stmt, 0);
6076 if (!binrhsisreassoc)
6077 return;
6079 /* We want to make it so the lhs is always the reassociative op,
6080 so swap. */
6081 std::swap (binlhs, binrhs);
6083 else if (binrhsisreassoc)
6085 linearize_expr (stmt);
6086 binlhs = gimple_assign_rhs1 (stmt);
6087 binrhs = gimple_assign_rhs2 (stmt);
6090 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6091 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6092 rhscode, loop));
6093 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6094 is_associative, set_visited);
6096 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6097 add_to_ops_vec (ops, binrhs);
6100 /* Repropagate the negates back into subtracts, since no other pass
6101 currently does it. */
6103 static void
6104 repropagate_negates (void)
6106 unsigned int i = 0;
6107 tree negate;
6109 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6111 gimple *user = get_single_immediate_use (negate);
6112 if (!user || !is_gimple_assign (user))
6113 continue;
6115 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6116 if (TREE_CODE (negateop) == SSA_NAME
6117 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6118 continue;
6120 /* The negate operand can be either operand of a PLUS_EXPR
6121 (it can be the LHS if the RHS is a constant for example).
6123 Force the negate operand to the RHS of the PLUS_EXPR, then
6124 transform the PLUS_EXPR into a MINUS_EXPR. */
6125 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6127 /* If the negated operand appears on the LHS of the
6128 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6129 to force the negated operand to the RHS of the PLUS_EXPR. */
6130 if (gimple_assign_rhs1 (user) == negate)
6132 swap_ssa_operands (user,
6133 gimple_assign_rhs1_ptr (user),
6134 gimple_assign_rhs2_ptr (user));
6137 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6138 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6139 if (gimple_assign_rhs2 (user) == negate)
6141 tree rhs1 = gimple_assign_rhs1 (user);
6142 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6143 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6144 negateop);
6145 update_stmt (user);
6148 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6150 if (gimple_assign_rhs1 (user) == negate)
6152 /* We have
6153 x = -negateop
6154 y = x - b
6155 which we transform into
6156 x = negateop + b
6157 y = -x .
6158 This pushes down the negate which we possibly can merge
6159 into some other operation, hence insert it into the
6160 plus_negates vector. */
6161 gimple *feed = SSA_NAME_DEF_STMT (negate);
6162 tree b = gimple_assign_rhs2 (user);
6163 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6164 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6165 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6166 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6167 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6168 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6169 user = gsi_stmt (gsi2);
6170 update_stmt (user);
6171 reassoc_remove_stmt (&gsi);
6172 release_defs (feed);
6173 plus_negates.safe_push (gimple_assign_lhs (user));
6175 else
6177 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6178 getting rid of one operation. */
6179 tree rhs1 = gimple_assign_rhs1 (user);
6180 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6181 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6182 update_stmt (gsi_stmt (gsi));
6188 /* Break up subtract operations in block BB.
6190 We do this top down because we don't know whether the subtract is
6191 part of a possible chain of reassociation except at the top.
6193 IE given
6194 d = f + g
6195 c = a + e
6196 b = c - d
6197 q = b - r
6198 k = t - q
6200 we want to break up k = t - q, but we won't until we've transformed q
6201 = b - r, which won't be broken up until we transform b = c - d.
6203 En passant, clear the GIMPLE visited flag on every statement
6204 and set UIDs within each basic block. */
6206 static void
6207 break_up_subtract_bb (basic_block bb)
6209 gimple_stmt_iterator gsi;
6210 basic_block son;
6211 unsigned int uid = 1;
6213 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6215 gimple *stmt = gsi_stmt (gsi);
6216 gimple_set_visited (stmt, false);
6217 gimple_set_uid (stmt, uid++);
6219 if (!is_gimple_assign (stmt)
6220 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6221 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6222 continue;
6224 /* Look for simple gimple subtract operations. */
6225 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6227 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6228 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6229 continue;
6231 /* Check for a subtract used only in an addition. If this
6232 is the case, transform it into add of a negate for better
6233 reassociation. IE transform C = A-B into C = A + -B if C
6234 is only used in an addition. */
6235 if (should_break_up_subtract (stmt))
6236 break_up_subtract (stmt, &gsi);
6238 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6239 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6240 plus_negates.safe_push (gimple_assign_lhs (stmt));
6242 for (son = first_dom_son (CDI_DOMINATORS, bb);
6243 son;
6244 son = next_dom_son (CDI_DOMINATORS, son))
6245 break_up_subtract_bb (son);
6248 /* Used for repeated factor analysis. */
6249 struct repeat_factor
6251 /* An SSA name that occurs in a multiply chain. */
6252 tree factor;
6254 /* Cached rank of the factor. */
6255 unsigned rank;
6257 /* Number of occurrences of the factor in the chain. */
6258 HOST_WIDE_INT count;
6260 /* An SSA name representing the product of this factor and
6261 all factors appearing later in the repeated factor vector. */
6262 tree repr;
6266 static vec<repeat_factor> repeat_factor_vec;
6268 /* Used for sorting the repeat factor vector. Sort primarily by
6269 ascending occurrence count, secondarily by descending rank. */
6271 static int
6272 compare_repeat_factors (const void *x1, const void *x2)
6274 const repeat_factor *rf1 = (const repeat_factor *) x1;
6275 const repeat_factor *rf2 = (const repeat_factor *) x2;
6277 if (rf1->count != rf2->count)
6278 return rf1->count - rf2->count;
6280 return rf2->rank - rf1->rank;
6283 /* Look for repeated operands in OPS in the multiply tree rooted at
6284 STMT. Replace them with an optimal sequence of multiplies and powi
6285 builtin calls, and remove the used operands from OPS. Return an
6286 SSA name representing the value of the replacement sequence. */
6288 static tree
6289 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6291 unsigned i, j, vec_len;
6292 int ii;
6293 operand_entry *oe;
6294 repeat_factor *rf1, *rf2;
6295 repeat_factor rfnew;
6296 tree result = NULL_TREE;
6297 tree target_ssa, iter_result;
6298 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6299 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6300 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6301 gimple *mul_stmt, *pow_stmt;
6303 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6304 target, unless type is integral. */
6305 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6306 return NULL_TREE;
6308 /* Allocate the repeated factor vector. */
6309 repeat_factor_vec.create (10);
6311 /* Scan the OPS vector for all SSA names in the product and build
6312 up a vector of occurrence counts for each factor. */
6313 FOR_EACH_VEC_ELT (*ops, i, oe)
6315 if (TREE_CODE (oe->op) == SSA_NAME)
6317 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6319 if (rf1->factor == oe->op)
6321 rf1->count += oe->count;
6322 break;
6326 if (j >= repeat_factor_vec.length ())
6328 rfnew.factor = oe->op;
6329 rfnew.rank = oe->rank;
6330 rfnew.count = oe->count;
6331 rfnew.repr = NULL_TREE;
6332 repeat_factor_vec.safe_push (rfnew);
6337 /* Sort the repeated factor vector by (a) increasing occurrence count,
6338 and (b) decreasing rank. */
6339 repeat_factor_vec.qsort (compare_repeat_factors);
6341 /* It is generally best to combine as many base factors as possible
6342 into a product before applying __builtin_powi to the result.
6343 However, the sort order chosen for the repeated factor vector
6344 allows us to cache partial results for the product of the base
6345 factors for subsequent use. When we already have a cached partial
6346 result from a previous iteration, it is best to make use of it
6347 before looking for another __builtin_pow opportunity.
6349 As an example, consider x * x * y * y * y * z * z * z * z.
6350 We want to first compose the product x * y * z, raise it to the
6351 second power, then multiply this by y * z, and finally multiply
6352 by z. This can be done in 5 multiplies provided we cache y * z
6353 for use in both expressions:
6355 t1 = y * z
6356 t2 = t1 * x
6357 t3 = t2 * t2
6358 t4 = t1 * t3
6359 result = t4 * z
6361 If we instead ignored the cached y * z and first multiplied by
6362 the __builtin_pow opportunity z * z, we would get the inferior:
6364 t1 = y * z
6365 t2 = t1 * x
6366 t3 = t2 * t2
6367 t4 = z * z
6368 t5 = t3 * t4
6369 result = t5 * y */
6371 vec_len = repeat_factor_vec.length ();
6373 /* Repeatedly look for opportunities to create a builtin_powi call. */
6374 while (true)
6376 HOST_WIDE_INT power;
6378 /* First look for the largest cached product of factors from
6379 preceding iterations. If found, create a builtin_powi for
6380 it if the minimum occurrence count for its factors is at
6381 least 2, or just use this cached product as our next
6382 multiplicand if the minimum occurrence count is 1. */
6383 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6385 if (rf1->repr && rf1->count > 0)
6386 break;
6389 if (j < vec_len)
6391 power = rf1->count;
6393 if (power == 1)
6395 iter_result = rf1->repr;
6397 if (dump_file && (dump_flags & TDF_DETAILS))
6399 unsigned elt;
6400 repeat_factor *rf;
6401 fputs ("Multiplying by cached product ", dump_file);
6402 for (elt = j; elt < vec_len; elt++)
6404 rf = &repeat_factor_vec[elt];
6405 print_generic_expr (dump_file, rf->factor);
6406 if (elt < vec_len - 1)
6407 fputs (" * ", dump_file);
6409 fputs ("\n", dump_file);
6412 else
6414 if (INTEGRAL_TYPE_P (type))
6416 gcc_assert (power > 1);
6417 gimple_stmt_iterator gsip = gsi;
6418 gsi_prev (&gsip);
6419 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6420 rf1->repr, power);
6421 gimple_stmt_iterator gsic = gsi;
6422 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6424 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6425 gimple_set_visited (gsi_stmt (gsic), true);
6426 gsi_prev (&gsic);
6429 else
6431 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6432 pow_stmt
6433 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6434 build_int_cst (integer_type_node,
6435 power));
6436 gimple_call_set_lhs (pow_stmt, iter_result);
6437 gimple_set_location (pow_stmt, gimple_location (stmt));
6438 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6439 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6442 if (dump_file && (dump_flags & TDF_DETAILS))
6444 unsigned elt;
6445 repeat_factor *rf;
6446 fputs ("Building __builtin_pow call for cached product (",
6447 dump_file);
6448 for (elt = j; elt < vec_len; elt++)
6450 rf = &repeat_factor_vec[elt];
6451 print_generic_expr (dump_file, rf->factor);
6452 if (elt < vec_len - 1)
6453 fputs (" * ", dump_file);
6455 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6456 power);
6460 else
6462 /* Otherwise, find the first factor in the repeated factor
6463 vector whose occurrence count is at least 2. If no such
6464 factor exists, there are no builtin_powi opportunities
6465 remaining. */
6466 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6468 if (rf1->count >= 2)
6469 break;
6472 if (j >= vec_len)
6473 break;
6475 power = rf1->count;
6477 if (dump_file && (dump_flags & TDF_DETAILS))
6479 unsigned elt;
6480 repeat_factor *rf;
6481 fputs ("Building __builtin_pow call for (", dump_file);
6482 for (elt = j; elt < vec_len; elt++)
6484 rf = &repeat_factor_vec[elt];
6485 print_generic_expr (dump_file, rf->factor);
6486 if (elt < vec_len - 1)
6487 fputs (" * ", dump_file);
6489 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6492 reassociate_stats.pows_created++;
6494 /* Visit each element of the vector in reverse order (so that
6495 high-occurrence elements are visited first, and within the
6496 same occurrence count, lower-ranked elements are visited
6497 first). Form a linear product of all elements in this order
6498 whose occurrencce count is at least that of element J.
6499 Record the SSA name representing the product of each element
6500 with all subsequent elements in the vector. */
6501 if (j == vec_len - 1)
6502 rf1->repr = rf1->factor;
6503 else
6505 for (ii = vec_len - 2; ii >= (int)j; ii--)
6507 tree op1, op2;
6509 rf1 = &repeat_factor_vec[ii];
6510 rf2 = &repeat_factor_vec[ii + 1];
6512 /* Init the last factor's representative to be itself. */
6513 if (!rf2->repr)
6514 rf2->repr = rf2->factor;
6516 op1 = rf1->factor;
6517 op2 = rf2->repr;
6519 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6520 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6521 op1, op2);
6522 gimple_set_location (mul_stmt, gimple_location (stmt));
6523 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6524 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6525 rf1->repr = target_ssa;
6527 /* Don't reprocess the multiply we just introduced. */
6528 gimple_set_visited (mul_stmt, true);
6532 /* Form a call to __builtin_powi for the maximum product
6533 just formed, raised to the power obtained earlier. */
6534 rf1 = &repeat_factor_vec[j];
6535 if (INTEGRAL_TYPE_P (type))
6537 gcc_assert (power > 1);
6538 gimple_stmt_iterator gsip = gsi;
6539 gsi_prev (&gsip);
6540 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6541 rf1->repr, power);
6542 gimple_stmt_iterator gsic = gsi;
6543 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6545 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6546 gimple_set_visited (gsi_stmt (gsic), true);
6547 gsi_prev (&gsic);
6550 else
6552 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6553 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6554 build_int_cst (integer_type_node,
6555 power));
6556 gimple_call_set_lhs (pow_stmt, iter_result);
6557 gimple_set_location (pow_stmt, gimple_location (stmt));
6558 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6559 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6563 /* If we previously formed at least one other builtin_powi call,
6564 form the product of this one and those others. */
6565 if (result)
6567 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6568 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6569 result, iter_result);
6570 gimple_set_location (mul_stmt, gimple_location (stmt));
6571 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6572 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6573 gimple_set_visited (mul_stmt, true);
6574 result = new_result;
6576 else
6577 result = iter_result;
6579 /* Decrement the occurrence count of each element in the product
6580 by the count found above, and remove this many copies of each
6581 factor from OPS. */
6582 for (i = j; i < vec_len; i++)
6584 unsigned k = power;
6585 unsigned n;
6587 rf1 = &repeat_factor_vec[i];
6588 rf1->count -= power;
6590 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6592 if (oe->op == rf1->factor)
6594 if (oe->count <= k)
6596 ops->ordered_remove (n);
6597 k -= oe->count;
6599 if (k == 0)
6600 break;
6602 else
6604 oe->count -= k;
6605 break;
6612 /* At this point all elements in the repeated factor vector have a
6613 remaining occurrence count of 0 or 1, and those with a count of 1
6614 don't have cached representatives. Re-sort the ops vector and
6615 clean up. */
6616 ops->qsort (sort_by_operand_rank);
6617 repeat_factor_vec.release ();
6619 /* Return the final product computed herein. Note that there may
6620 still be some elements with single occurrence count left in OPS;
6621 those will be handled by the normal reassociation logic. */
6622 return result;
6625 /* Attempt to optimize
6626 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6627 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6629 static void
6630 attempt_builtin_copysign (vec<operand_entry *> *ops)
6632 operand_entry *oe;
6633 unsigned int i;
6634 unsigned int length = ops->length ();
6635 tree cst = ops->last ()->op;
6637 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6638 return;
6640 FOR_EACH_VEC_ELT (*ops, i, oe)
6642 if (TREE_CODE (oe->op) == SSA_NAME
6643 && has_single_use (oe->op))
6645 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6646 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6648 tree arg0, arg1;
6649 switch (gimple_call_combined_fn (old_call))
6651 CASE_CFN_COPYSIGN:
6652 CASE_CFN_COPYSIGN_FN:
6653 arg0 = gimple_call_arg (old_call, 0);
6654 arg1 = gimple_call_arg (old_call, 1);
6655 /* The first argument of copysign must be a constant,
6656 otherwise there's nothing to do. */
6657 if (TREE_CODE (arg0) == REAL_CST)
6659 tree type = TREE_TYPE (arg0);
6660 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6661 /* If we couldn't fold to a single constant, skip it.
6662 That happens e.g. for inexact multiplication when
6663 -frounding-math. */
6664 if (mul == NULL_TREE)
6665 break;
6666 /* Instead of adjusting OLD_CALL, let's build a new
6667 call to not leak the LHS and prevent keeping bogus
6668 debug statements. DCE will clean up the old call. */
6669 gcall *new_call;
6670 if (gimple_call_internal_p (old_call))
6671 new_call = gimple_build_call_internal
6672 (IFN_COPYSIGN, 2, mul, arg1);
6673 else
6674 new_call = gimple_build_call
6675 (gimple_call_fndecl (old_call), 2, mul, arg1);
6676 tree lhs = make_ssa_name (type);
6677 gimple_call_set_lhs (new_call, lhs);
6678 gimple_set_location (new_call,
6679 gimple_location (old_call));
6680 insert_stmt_after (new_call, old_call);
6681 /* We've used the constant, get rid of it. */
6682 ops->pop ();
6683 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6684 /* Handle the CST1 < 0 case by negating the result. */
6685 if (cst1_neg)
6687 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6688 gimple *negate_stmt
6689 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6690 insert_stmt_after (negate_stmt, new_call);
6691 oe->op = negrhs;
6693 else
6694 oe->op = lhs;
6695 if (dump_file && (dump_flags & TDF_DETAILS))
6697 fprintf (dump_file, "Optimizing copysign: ");
6698 print_generic_expr (dump_file, cst);
6699 fprintf (dump_file, " * COPYSIGN (");
6700 print_generic_expr (dump_file, arg0);
6701 fprintf (dump_file, ", ");
6702 print_generic_expr (dump_file, arg1);
6703 fprintf (dump_file, ") into %sCOPYSIGN (",
6704 cst1_neg ? "-" : "");
6705 print_generic_expr (dump_file, mul);
6706 fprintf (dump_file, ", ");
6707 print_generic_expr (dump_file, arg1);
6708 fprintf (dump_file, "\n");
6710 return;
6712 break;
6713 default:
6714 break;
6721 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6723 static void
6724 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6726 tree rhs1;
6728 if (dump_file && (dump_flags & TDF_DETAILS))
6730 fprintf (dump_file, "Transforming ");
6731 print_gimple_stmt (dump_file, stmt, 0);
6734 rhs1 = gimple_assign_rhs1 (stmt);
6735 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6736 update_stmt (stmt);
6737 remove_visited_stmt_chain (rhs1);
6739 if (dump_file && (dump_flags & TDF_DETAILS))
6741 fprintf (dump_file, " into ");
6742 print_gimple_stmt (dump_file, stmt, 0);
6746 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6748 static void
6749 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6750 tree rhs1, tree rhs2)
6752 if (dump_file && (dump_flags & TDF_DETAILS))
6754 fprintf (dump_file, "Transforming ");
6755 print_gimple_stmt (dump_file, stmt, 0);
6758 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6759 update_stmt (gsi_stmt (*gsi));
6760 remove_visited_stmt_chain (rhs1);
6762 if (dump_file && (dump_flags & TDF_DETAILS))
6764 fprintf (dump_file, " into ");
6765 print_gimple_stmt (dump_file, stmt, 0);
6769 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6770 Put no-mult ops and mult ops alternately at the end of the queue, which is
6771 conducive to generating more FMA and reducing the loss of FMA when breaking
6772 the chain.
6773 E.g.
6774 a * b + c * d + e generates:
6776 _4 = c_9(D) * d_10(D);
6777 _12 = .FMA (a_7(D), b_8(D), _4);
6778 _11 = e_6(D) + _12;
6780 Rearrange ops to -> e + a * b + c * d generates:
6782 _4 = .FMA (c_7(D), d_8(D), _3);
6783 _11 = .FMA (a_5(D), b_6(D), _4); */
6784 static bool
6785 rank_ops_for_fma (vec<operand_entry *> *ops)
6787 operand_entry *oe;
6788 unsigned int i;
6789 unsigned int ops_length = ops->length ();
6790 auto_vec<operand_entry *> ops_mult;
6791 auto_vec<operand_entry *> ops_others;
6793 FOR_EACH_VEC_ELT (*ops, i, oe)
6795 if (TREE_CODE (oe->op) == SSA_NAME)
6797 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6798 if (is_gimple_assign (def_stmt)
6799 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6800 ops_mult.safe_push (oe);
6801 else
6802 ops_others.safe_push (oe);
6804 else
6805 ops_others.safe_push (oe);
6807 /* 1. When ops_mult.length == 2, like the following case,
6809 a * b + c * d + e.
6811 we need to rearrange the ops.
6813 Putting ops that not def from mult in front can generate more FMAs.
6815 2. If all ops are defined with mult, we don't need to rearrange them. */
6816 if (ops_mult.length () >= 2 && ops_mult.length () != ops_length)
6818 /* Put no-mult ops and mult ops alternately at the end of the
6819 queue, which is conducive to generating more FMA and reducing the
6820 loss of FMA when breaking the chain. */
6821 ops->truncate (0);
6822 ops->splice (ops_mult);
6823 int j, opindex = ops->length ();
6824 int others_length = ops_others.length ();
6825 for (j = 0; j < others_length; j++)
6827 oe = ops_others.pop ();
6828 ops->quick_insert (opindex, oe);
6829 if (opindex > 0)
6830 opindex--;
6832 return true;
6834 return false;
6836 /* Reassociate expressions in basic block BB and its post-dominator as
6837 children.
6839 Bubble up return status from maybe_optimize_range_tests. */
6841 static bool
6842 reassociate_bb (basic_block bb)
6844 gimple_stmt_iterator gsi;
6845 basic_block son;
6846 gimple *stmt = last_nondebug_stmt (bb);
6847 bool cfg_cleanup_needed = false;
6849 if (stmt && !gimple_visited_p (stmt))
6850 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6852 bool do_prev = false;
6853 for (gsi = gsi_last_bb (bb);
6854 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6856 do_prev = true;
6857 stmt = gsi_stmt (gsi);
6859 if (is_gimple_assign (stmt)
6860 && !stmt_could_throw_p (cfun, stmt))
6862 tree lhs, rhs1, rhs2;
6863 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6865 /* If this was part of an already processed statement,
6866 we don't need to touch it again. */
6867 if (gimple_visited_p (stmt))
6869 /* This statement might have become dead because of previous
6870 reassociations. */
6871 if (has_zero_uses (gimple_get_lhs (stmt)))
6873 reassoc_remove_stmt (&gsi);
6874 release_defs (stmt);
6875 /* We might end up removing the last stmt above which
6876 places the iterator to the end of the sequence.
6877 Reset it to the last stmt in this case and make sure
6878 we don't do gsi_prev in that case. */
6879 if (gsi_end_p (gsi))
6881 gsi = gsi_last_bb (bb);
6882 do_prev = false;
6885 continue;
6888 /* If this is not a gimple binary expression, there is
6889 nothing for us to do with it. */
6890 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6891 continue;
6893 lhs = gimple_assign_lhs (stmt);
6894 rhs1 = gimple_assign_rhs1 (stmt);
6895 rhs2 = gimple_assign_rhs2 (stmt);
6897 /* For non-bit or min/max operations we can't associate
6898 all types. Verify that here. */
6899 if ((rhs_code != BIT_IOR_EXPR
6900 && rhs_code != BIT_AND_EXPR
6901 && rhs_code != BIT_XOR_EXPR
6902 && rhs_code != MIN_EXPR
6903 && rhs_code != MAX_EXPR
6904 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6905 || !can_reassociate_op_p (rhs1)
6906 || !can_reassociate_op_p (rhs2))
6907 continue;
6909 if (associative_tree_code (rhs_code))
6911 auto_vec<operand_entry *> ops;
6912 tree powi_result = NULL_TREE;
6913 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6915 /* There may be no immediate uses left by the time we
6916 get here because we may have eliminated them all. */
6917 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6918 continue;
6920 gimple_set_visited (stmt, true);
6921 linearize_expr_tree (&ops, stmt, true, true);
6922 ops.qsort (sort_by_operand_rank);
6923 int orig_len = ops.length ();
6924 optimize_ops_list (rhs_code, &ops);
6925 if (undistribute_ops_list (rhs_code, &ops,
6926 loop_containing_stmt (stmt)))
6928 ops.qsort (sort_by_operand_rank);
6929 optimize_ops_list (rhs_code, &ops);
6931 if (undistribute_bitref_for_vector (rhs_code, &ops,
6932 loop_containing_stmt (stmt)))
6934 ops.qsort (sort_by_operand_rank);
6935 optimize_ops_list (rhs_code, &ops);
6937 if (rhs_code == PLUS_EXPR
6938 && transform_add_to_multiply (&ops))
6939 ops.qsort (sort_by_operand_rank);
6941 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6943 if (is_vector)
6944 optimize_vec_cond_expr (rhs_code, &ops);
6945 else
6946 optimize_range_tests (rhs_code, &ops, NULL);
6949 if (rhs_code == MULT_EXPR && !is_vector)
6951 attempt_builtin_copysign (&ops);
6953 if (reassoc_insert_powi_p
6954 && (flag_unsafe_math_optimizations
6955 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6956 powi_result = attempt_builtin_powi (stmt, &ops);
6959 operand_entry *last;
6960 bool negate_result = false;
6961 if (ops.length () > 1
6962 && rhs_code == MULT_EXPR)
6964 last = ops.last ();
6965 if ((integer_minus_onep (last->op)
6966 || real_minus_onep (last->op))
6967 && !HONOR_SNANS (TREE_TYPE (lhs))
6968 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6969 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6971 ops.pop ();
6972 negate_result = true;
6976 tree new_lhs = lhs;
6977 /* If the operand vector is now empty, all operands were
6978 consumed by the __builtin_powi optimization. */
6979 if (ops.length () == 0)
6980 transform_stmt_to_copy (&gsi, stmt, powi_result);
6981 else if (ops.length () == 1)
6983 tree last_op = ops.last ()->op;
6985 /* If the stmt that defines operand has to be inserted, insert it
6986 before the use. */
6987 if (ops.last ()->stmt_to_insert)
6988 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6989 if (powi_result)
6990 transform_stmt_to_multiply (&gsi, stmt, last_op,
6991 powi_result);
6992 else
6993 transform_stmt_to_copy (&gsi, stmt, last_op);
6995 else
6997 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6998 int ops_num = ops.length ();
6999 int width;
7000 bool has_fma = false;
7002 /* For binary bit operations, if there are at least 3
7003 operands and the last operand in OPS is a constant,
7004 move it to the front. This helps ensure that we generate
7005 (X & Y) & C rather than (X & C) & Y. The former will
7006 often match a canonical bit test when we get to RTL. */
7007 if (ops.length () > 2
7008 && (rhs_code == BIT_AND_EXPR
7009 || rhs_code == BIT_IOR_EXPR
7010 || rhs_code == BIT_XOR_EXPR)
7011 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7012 std::swap (*ops[0], *ops[ops_num - 1]);
7014 optimization_type opt_type = bb_optimization_type (bb);
7016 /* If the target support FMA, rank_ops_for_fma will detect if
7017 the chain has fmas and rearrange the ops if so. */
7018 if (direct_internal_fn_supported_p (IFN_FMA,
7019 TREE_TYPE (lhs),
7020 opt_type)
7021 && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7023 has_fma = rank_ops_for_fma (&ops);
7026 /* Only rewrite the expression tree to parallel in the
7027 last reassoc pass to avoid useless work back-and-forth
7028 with initial linearization. */
7029 if (!reassoc_insert_powi_p
7030 && ops.length () > 3
7031 && (width = get_reassociation_width (ops_num, rhs_code,
7032 mode)) > 1)
7034 if (dump_file && (dump_flags & TDF_DETAILS))
7035 fprintf (dump_file,
7036 "Width = %d was chosen for reassociation\n",
7037 width);
7038 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7039 width,
7040 has_fma,
7041 ops);
7043 else
7045 /* When there are three operands left, we want
7046 to make sure the ones that get the double
7047 binary op are chosen wisely. */
7048 int len = ops.length ();
7049 if (len >= 3 && !has_fma)
7050 swap_ops_for_binary_stmt (ops, len - 3);
7052 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7053 powi_result != NULL
7054 || negate_result,
7055 len != orig_len);
7058 /* If we combined some repeated factors into a
7059 __builtin_powi call, multiply that result by the
7060 reassociated operands. */
7061 if (powi_result)
7063 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7064 tree type = TREE_TYPE (lhs);
7065 tree target_ssa = make_temp_ssa_name (type, NULL,
7066 "reassocpow");
7067 gimple_set_lhs (lhs_stmt, target_ssa);
7068 update_stmt (lhs_stmt);
7069 if (lhs != new_lhs)
7071 target_ssa = new_lhs;
7072 new_lhs = lhs;
7074 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7075 powi_result, target_ssa);
7076 gimple_set_location (mul_stmt, gimple_location (stmt));
7077 gimple_set_uid (mul_stmt, gimple_uid (stmt));
7078 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7082 if (negate_result)
7084 stmt = SSA_NAME_DEF_STMT (lhs);
7085 tree tmp = make_ssa_name (TREE_TYPE (lhs));
7086 gimple_set_lhs (stmt, tmp);
7087 if (lhs != new_lhs)
7088 tmp = new_lhs;
7089 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7090 tmp);
7091 gimple_set_uid (neg_stmt, gimple_uid (stmt));
7092 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7093 update_stmt (stmt);
7098 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
7099 son;
7100 son = next_dom_son (CDI_POST_DOMINATORS, son))
7101 cfg_cleanup_needed |= reassociate_bb (son);
7103 return cfg_cleanup_needed;
7106 /* Add jumps around shifts for range tests turned into bit tests.
7107 For each SSA_NAME VAR we have code like:
7108 VAR = ...; // final stmt of range comparison
7109 // bit test here...;
7110 OTHERVAR = ...; // final stmt of the bit test sequence
7111 RES = VAR | OTHERVAR;
7112 Turn the above into:
7113 VAR = ...;
7114 if (VAR != 0)
7115 goto <l3>;
7116 else
7117 goto <l2>;
7118 <l2>:
7119 // bit test here...;
7120 OTHERVAR = ...;
7121 <l3>:
7122 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7124 static void
7125 branch_fixup (void)
7127 tree var;
7128 unsigned int i;
7130 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7132 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7133 gimple *use_stmt;
7134 use_operand_p use;
7135 bool ok = single_imm_use (var, &use, &use_stmt);
7136 gcc_assert (ok
7137 && is_gimple_assign (use_stmt)
7138 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7139 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7141 basic_block cond_bb = gimple_bb (def_stmt);
7142 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7143 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7145 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7146 gimple *g = gimple_build_cond (NE_EXPR, var,
7147 build_zero_cst (TREE_TYPE (var)),
7148 NULL_TREE, NULL_TREE);
7149 location_t loc = gimple_location (use_stmt);
7150 gimple_set_location (g, loc);
7151 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7153 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7154 etrue->probability = profile_probability::even ();
7155 edge efalse = find_edge (cond_bb, then_bb);
7156 efalse->flags = EDGE_FALSE_VALUE;
7157 efalse->probability -= etrue->probability;
7158 then_bb->count -= etrue->count ();
7160 tree othervar = NULL_TREE;
7161 if (gimple_assign_rhs1 (use_stmt) == var)
7162 othervar = gimple_assign_rhs2 (use_stmt);
7163 else if (gimple_assign_rhs2 (use_stmt) == var)
7164 othervar = gimple_assign_rhs1 (use_stmt);
7165 else
7166 gcc_unreachable ();
7167 tree lhs = gimple_assign_lhs (use_stmt);
7168 gphi *phi = create_phi_node (lhs, merge_bb);
7169 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7170 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7171 gsi = gsi_for_stmt (use_stmt);
7172 gsi_remove (&gsi, true);
7174 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7175 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7177 reassoc_branch_fixups.release ();
7180 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7181 void debug_ops_vector (vec<operand_entry *> ops);
7183 /* Dump the operand entry vector OPS to FILE. */
7185 void
7186 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7188 operand_entry *oe;
7189 unsigned int i;
7191 FOR_EACH_VEC_ELT (ops, i, oe)
7193 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7194 print_generic_expr (file, oe->op);
7195 fprintf (file, "\n");
7199 /* Dump the operand entry vector OPS to STDERR. */
7201 DEBUG_FUNCTION void
7202 debug_ops_vector (vec<operand_entry *> ops)
7204 dump_ops_vector (stderr, ops);
7207 /* Bubble up return status from reassociate_bb. */
7209 static bool
7210 do_reassoc (void)
7212 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7213 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7216 /* Initialize the reassociation pass. */
7218 static void
7219 init_reassoc (void)
7221 int i;
7222 int64_t rank = 2;
7223 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7225 /* Find the loops, so that we can prevent moving calculations in
7226 them. */
7227 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7229 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7231 next_operand_entry_id = 0;
7233 /* Reverse RPO (Reverse Post Order) will give us something where
7234 deeper loops come later. */
7235 pre_and_rev_post_order_compute (NULL, bbs, false);
7236 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7237 operand_rank = new hash_map<tree, int64_t>;
7239 /* Give each default definition a distinct rank. This includes
7240 parameters and the static chain. Walk backwards over all
7241 SSA names so that we get proper rank ordering according
7242 to tree_swap_operands_p. */
7243 for (i = num_ssa_names - 1; i > 0; --i)
7245 tree name = ssa_name (i);
7246 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7247 insert_operand_rank (name, ++rank);
7250 /* Set up rank for each BB */
7251 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7252 bb_rank[bbs[i]] = ++rank << 16;
7254 free (bbs);
7255 calculate_dominance_info (CDI_POST_DOMINATORS);
7256 plus_negates = vNULL;
7259 /* Cleanup after the reassociation pass, and print stats if
7260 requested. */
7262 static void
7263 fini_reassoc (void)
7265 statistics_counter_event (cfun, "Linearized",
7266 reassociate_stats.linearized);
7267 statistics_counter_event (cfun, "Constants eliminated",
7268 reassociate_stats.constants_eliminated);
7269 statistics_counter_event (cfun, "Ops eliminated",
7270 reassociate_stats.ops_eliminated);
7271 statistics_counter_event (cfun, "Statements rewritten",
7272 reassociate_stats.rewritten);
7273 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7274 reassociate_stats.pows_encountered);
7275 statistics_counter_event (cfun, "Built-in powi calls created",
7276 reassociate_stats.pows_created);
7278 delete operand_rank;
7279 bitmap_clear (biased_names);
7280 operand_entry_pool.release ();
7281 free (bb_rank);
7282 plus_negates.release ();
7283 free_dominance_info (CDI_POST_DOMINATORS);
7284 loop_optimizer_finalize ();
7287 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7288 insertion of __builtin_powi calls.
7290 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7291 optimization of a gimple conditional. Otherwise returns zero. */
7293 static unsigned int
7294 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7296 reassoc_insert_powi_p = insert_powi_p;
7297 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7299 init_reassoc ();
7301 bool cfg_cleanup_needed;
7302 cfg_cleanup_needed = do_reassoc ();
7303 repropagate_negates ();
7304 branch_fixup ();
7306 fini_reassoc ();
7307 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7310 namespace {
7312 const pass_data pass_data_reassoc =
7314 GIMPLE_PASS, /* type */
7315 "reassoc", /* name */
7316 OPTGROUP_NONE, /* optinfo_flags */
7317 TV_TREE_REASSOC, /* tv_id */
7318 ( PROP_cfg | PROP_ssa ), /* properties_required */
7319 0, /* properties_provided */
7320 0, /* properties_destroyed */
7321 0, /* todo_flags_start */
7322 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7325 class pass_reassoc : public gimple_opt_pass
7327 public:
7328 pass_reassoc (gcc::context *ctxt)
7329 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7332 /* opt_pass methods: */
7333 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7334 void set_pass_param (unsigned int n, bool param) final override
7336 gcc_assert (n == 0);
7337 insert_powi_p = param;
7338 bias_loop_carried_phi_ranks_p = !param;
7340 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7341 unsigned int execute (function *) final override
7343 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7346 private:
7347 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7348 point 3a in the pass header comment. */
7349 bool insert_powi_p;
7350 bool bias_loop_carried_phi_ranks_p;
7351 }; // class pass_reassoc
7353 } // anon namespace
7355 gimple_opt_pass *
7356 make_pass_reassoc (gcc::context *ctxt)
7358 return new pass_reassoc (ctxt);