c++: Implement modules ABI for vtable emissions
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob61f54f07b577bef7d1c65ea2b9926574cdd1ae1f
1 /* Reassociation for trees.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
110 You want to first merge all leaves of the same rank, as much as
111 possible.
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
126 mergetmp2 = d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
135 So we have
137 build binary op
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p;
188 /* Statistics */
189 static struct
191 int linearized;
192 int constants_eliminated;
193 int ops_eliminated;
194 int rewritten;
195 int pows_encountered;
196 int pows_created;
197 } reassociate_stats;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static int64_t *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, int64_t> *operand_rank;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 biased. */
217 static auto_bitmap biased_names;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec<tree> reassoc_branch_fixups;
225 /* Forward decls. */
226 static int64_t get_rank (tree);
227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
232 static bool
233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
235 gimple *stmt = gsi_stmt (*gsi);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 return gsi_remove (gsi, true);
240 gimple_stmt_iterator prev = *gsi;
241 gsi_prev (&prev);
242 unsigned uid = gimple_uid (stmt);
243 basic_block bb = gimple_bb (stmt);
244 bool ret = gsi_remove (gsi, true);
245 if (!gsi_end_p (prev))
246 gsi_next (&prev);
247 else
248 prev = gsi_start_bb (bb);
249 gimple *end_stmt = gsi_stmt (*gsi);
250 while ((stmt = gsi_stmt (prev)) != end_stmt)
252 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 gimple_set_uid (stmt, uid);
254 gsi_next (&prev);
256 return ret;
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
266 like this:
268 x_1 = phi(x_0, x_2)
269 a = x_1 | 1
270 b = a ^ 2
271 .MEM = b
272 c = b + d
273 x_2 = c + e
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
278 static bool
279 propagate_bias_p (gimple *stmt)
281 use_operand_p use;
282 imm_use_iterator use_iter;
283 gimple *single_use_stmt = NULL;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 return false;
288 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
290 gimple *current_use_stmt = USE_STMT (use);
292 if (is_gimple_assign (current_use_stmt)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
295 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 return false;
297 single_use_stmt = current_use_stmt;
301 if (single_use_stmt == NULL)
302 return false;
304 if (gimple_bb (stmt)->loop_father
305 != gimple_bb (single_use_stmt)->loop_father)
306 return false;
308 return true;
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
318 static int64_t
319 phi_rank (gimple *stmt)
321 basic_block bb = gimple_bb (stmt);
322 class loop *father = bb->loop_father;
323 tree res;
324 unsigned i;
325 use_operand_p use;
326 gimple *use_stmt;
328 if (!reassoc_bias_loop_carried_phi_ranks_p)
329 return bb_rank[bb->index];
331 /* We only care about real loops (those with a latch). */
332 if (!father->latch)
333 return bb_rank[bb->index];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb != father->header
337 || father->inner)
338 return bb_rank[bb->index];
340 /* Ignore virtual SSA_NAMEs. */
341 res = gimple_phi_result (stmt);
342 if (virtual_operand_p (res))
343 return bb_rank[bb->index];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res, &use, &use_stmt)
348 || gimple_bb (use_stmt)->loop_father != father)
349 return bb_rank[bb->index];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i = 0; i < gimple_phi_num_args (stmt); i++)
354 tree arg = gimple_phi_arg_def (stmt, i);
355 if (TREE_CODE (arg) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg))
358 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 if (gimple_bb (def_stmt)->loop_father == father)
360 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
364 /* Must be an uninteresting phi. */
365 return bb_rank[bb->index];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
372 static int64_t
373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
375 int64_t op_rank;
377 op_rank = get_rank (op);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op) == SSA_NAME
382 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
384 if (maybe_biased_p == NULL)
385 return rank;
386 *maybe_biased_p = true;
389 return MAX (rank, op_rank);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e)
397 int64_t *slot = operand_rank->get (e);
398 return slot ? *slot : -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
403 static inline void
404 insert_operand_rank (tree e, int64_t rank)
406 gcc_assert (rank > 0);
407 gcc_assert (!operand_rank->put (e, rank));
410 /* Given an expression E, return the rank of the expression. */
412 static int64_t
413 get_rank (tree e)
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
420 the BB.
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
424 results. */
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
430 x_1 = phi(x_0, x_2)
431 b = a + x_1
432 c = b + d
433 x_2 = c + e
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
439 x_1 = phi(x_0, x_2)
440 b = a + d
441 c = b + e
442 x_2 = c + x_1
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e) == SSA_NAME)
454 ssa_op_iter iter;
455 gimple *stmt;
456 int64_t rank;
457 tree op;
459 /* If we already have a rank for this expression, use that. */
460 rank = find_operand_rank (e);
461 if (rank != -1)
462 return rank;
464 stmt = SSA_NAME_DEF_STMT (e);
465 if (gimple_code (stmt) == GIMPLE_PHI)
467 rank = phi_rank (stmt);
468 if (rank != bb_rank[gimple_bb (stmt)->index])
469 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
472 else if (!is_gimple_assign (stmt))
473 rank = bb_rank[gimple_bb (stmt)->index];
475 else
477 bool biased_p = false;
478 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
485 thus have rank 0. */
486 rank = 0;
487 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
488 rank = propagate_rank (rank, op, maybe_biased_p);
490 rank += 1;
491 if (biased_p)
492 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
495 if (dump_file && (dump_flags & TDF_DETAILS))
497 fprintf (dump_file, "Rank for ");
498 print_generic_expr (dump_file, e);
499 fprintf (dump_file, " is %" PRId64 "\n", rank);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e, rank);
504 return rank;
507 /* Constants, globals, etc., are rank 0 */
508 return 0;
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
521 static inline int
522 constant_type (tree t)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
525 return INTEGER_CONST_TYPE;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t) || real_minus_onep (t))
532 return FLOAT_ONE_CONST_TYPE;
533 return FLOAT_CONST_TYPE;
535 else
536 return OTHER_CONST_TYPE;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
541 static int
542 sort_by_operand_rank (const void *pa, const void *pb)
544 const operand_entry *oea = *(const operand_entry *const *)pa;
545 const operand_entry *oeb = *(const operand_entry *const *)pb;
547 if (oeb->rank != oea->rank)
548 return oeb->rank > oea->rank ? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
553 if (oea->rank == 0)
555 if (constant_type (oeb->op) != constant_type (oea->op))
556 return constant_type (oea->op) - constant_type (oeb->op);
557 else
558 /* To make sorting result stable, we use unique IDs to determine
559 order. */
560 return oeb->id > oea->id ? 1 : -1;
563 if (TREE_CODE (oea->op) != SSA_NAME)
565 if (TREE_CODE (oeb->op) != SSA_NAME)
566 return oeb->id > oea->id ? 1 : -1;
567 else
568 return 1;
570 else if (TREE_CODE (oeb->op) != SSA_NAME)
571 return -1;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
580 See PR60418. */
581 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
582 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
583 basic_block bba = gimple_bb (stmta);
584 basic_block bbb = gimple_bb (stmtb);
585 if (bbb != bba)
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
589 if (!bba)
590 return 1;
591 if (!bbb)
592 return -1;
593 /* If neither is, compare bb_rank. */
594 if (bb_rank[bbb->index] != bb_rank[bba->index])
595 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
603 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
606 return oeb->id > oea->id ? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
611 static void
612 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 operand_entry *oe = operand_entry_pool.allocate ();
616 oe->op = op;
617 oe->rank = get_rank (op);
618 oe->id = next_operand_entry_id++;
619 oe->count = 1;
620 oe->stmt_to_insert = stmt_to_insert;
621 ops->safe_push (oe);
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
625 count REPEAT. */
627 static void
628 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
629 HOST_WIDE_INT repeat)
631 operand_entry *oe = operand_entry_pool.allocate ();
633 oe->op = op;
634 oe->rank = get_rank (op);
635 oe->id = next_operand_entry_id++;
636 oe->count = repeat;
637 oe->stmt_to_insert = NULL;
638 ops->safe_push (oe);
640 reassociate_stats.pows_encountered++;
643 /* Returns true if we can associate the SSA def OP. */
645 static bool
646 can_reassociate_op_p (tree op)
648 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
649 return false;
650 /* Uninitialized variables can't participate in reassociation. */
651 if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
652 return false;
653 /* Make sure asm goto outputs do not participate in reassociation since
654 we have no way to find an insertion place after asm goto. */
655 if (TREE_CODE (op) == SSA_NAME
656 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
657 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
658 return false;
659 return true;
662 /* Returns true if we can reassociate operations of TYPE.
663 That is for integral or non-saturating fixed-point types, and for
664 floating point type when associative-math is enabled. */
666 static bool
667 can_reassociate_type_p (tree type)
669 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
670 || NON_SAT_FIXED_POINT_TYPE_P (type)
671 || (flag_associative_math && FLOAT_TYPE_P (type)))
672 return true;
673 return false;
676 /* Return true if STMT is reassociable operation containing a binary
677 operation with tree code CODE, and is inside LOOP. */
679 static bool
680 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
682 basic_block bb = gimple_bb (stmt);
684 if (gimple_bb (stmt) == NULL)
685 return false;
687 if (!flow_bb_inside_loop_p (loop, bb))
688 return false;
690 if (is_gimple_assign (stmt)
691 && gimple_assign_rhs_code (stmt) == code
692 && has_single_use (gimple_assign_lhs (stmt)))
694 tree rhs1 = gimple_assign_rhs1 (stmt);
695 tree rhs2 = gimple_assign_rhs2 (stmt);
696 if (!can_reassociate_op_p (rhs1)
697 || (rhs2 && !can_reassociate_op_p (rhs2)))
698 return false;
699 return true;
702 return false;
706 /* Return true if STMT is a nop-conversion. */
708 static bool
709 gimple_nop_conversion_p (gimple *stmt)
711 if (gassign *ass = dyn_cast <gassign *> (stmt))
713 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
714 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
715 TREE_TYPE (gimple_assign_rhs1 (ass))))
716 return true;
718 return false;
721 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
722 operand of the negate operation. Otherwise, return NULL. */
724 static tree
725 get_unary_op (tree name, enum tree_code opcode)
727 gimple *stmt = SSA_NAME_DEF_STMT (name);
729 /* Look through nop conversions (sign changes). */
730 if (gimple_nop_conversion_p (stmt)
731 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
732 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
734 if (!is_gimple_assign (stmt))
735 return NULL_TREE;
737 if (gimple_assign_rhs_code (stmt) == opcode)
738 return gimple_assign_rhs1 (stmt);
739 return NULL_TREE;
742 /* Return true if OP1 and OP2 have the same value if casted to either type. */
744 static bool
745 ops_equal_values_p (tree op1, tree op2)
747 if (op1 == op2)
748 return true;
750 tree orig_op1 = op1;
751 if (TREE_CODE (op1) == SSA_NAME)
753 gimple *stmt = SSA_NAME_DEF_STMT (op1);
754 if (gimple_nop_conversion_p (stmt))
756 op1 = gimple_assign_rhs1 (stmt);
757 if (op1 == op2)
758 return true;
762 if (TREE_CODE (op2) == SSA_NAME)
764 gimple *stmt = SSA_NAME_DEF_STMT (op2);
765 if (gimple_nop_conversion_p (stmt))
767 op2 = gimple_assign_rhs1 (stmt);
768 if (op1 == op2
769 || orig_op1 == op2)
770 return true;
774 return false;
778 /* If CURR and LAST are a pair of ops that OPCODE allows us to
779 eliminate through equivalences, do so, remove them from OPS, and
780 return true. Otherwise, return false. */
782 static bool
783 eliminate_duplicate_pair (enum tree_code opcode,
784 vec<operand_entry *> *ops,
785 bool *all_done,
786 unsigned int i,
787 operand_entry *curr,
788 operand_entry *last)
791 /* If we have two of the same op, and the opcode is & |, min, or max,
792 we can eliminate one of them.
793 If we have two of the same op, and the opcode is ^, we can
794 eliminate both of them. */
796 if (last && last->op == curr->op)
798 switch (opcode)
800 case MAX_EXPR:
801 case MIN_EXPR:
802 case BIT_IOR_EXPR:
803 case BIT_AND_EXPR:
804 if (dump_file && (dump_flags & TDF_DETAILS))
806 fprintf (dump_file, "Equivalence: ");
807 print_generic_expr (dump_file, curr->op);
808 fprintf (dump_file, " [&|minmax] ");
809 print_generic_expr (dump_file, last->op);
810 fprintf (dump_file, " -> ");
811 print_generic_stmt (dump_file, last->op);
814 ops->ordered_remove (i);
815 reassociate_stats.ops_eliminated ++;
817 return true;
819 case BIT_XOR_EXPR:
820 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Equivalence: ");
823 print_generic_expr (dump_file, curr->op);
824 fprintf (dump_file, " ^ ");
825 print_generic_expr (dump_file, last->op);
826 fprintf (dump_file, " -> nothing\n");
829 reassociate_stats.ops_eliminated += 2;
831 if (ops->length () == 2)
833 ops->truncate (0);
834 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
835 *all_done = true;
837 else
839 ops->ordered_remove (i-1);
840 ops->ordered_remove (i-1);
843 return true;
845 default:
846 break;
849 return false;
852 static vec<tree> plus_negates;
854 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
855 expression, look in OPS for a corresponding positive operation to cancel
856 it out. If we find one, remove the other from OPS, replace
857 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
858 return false. */
860 static bool
861 eliminate_plus_minus_pair (enum tree_code opcode,
862 vec<operand_entry *> *ops,
863 unsigned int currindex,
864 operand_entry *curr)
866 tree negateop;
867 tree notop;
868 unsigned int i;
869 operand_entry *oe;
871 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
872 return false;
874 negateop = get_unary_op (curr->op, NEGATE_EXPR);
875 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
876 if (negateop == NULL_TREE && notop == NULL_TREE)
877 return false;
879 /* Any non-negated version will have a rank that is one less than
880 the current rank. So once we hit those ranks, if we don't find
881 one, we can stop. */
883 for (i = currindex + 1;
884 ops->iterate (i, &oe)
885 && oe->rank >= curr->rank - 1 ;
886 i++)
888 if (negateop
889 && ops_equal_values_p (oe->op, negateop))
891 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file, "Equivalence: ");
894 print_generic_expr (dump_file, negateop);
895 fprintf (dump_file, " + -");
896 print_generic_expr (dump_file, oe->op);
897 fprintf (dump_file, " -> 0\n");
900 ops->ordered_remove (i);
901 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
902 ops->ordered_remove (currindex);
903 reassociate_stats.ops_eliminated ++;
905 return true;
907 else if (notop
908 && ops_equal_values_p (oe->op, notop))
910 tree op_type = TREE_TYPE (oe->op);
912 if (dump_file && (dump_flags & TDF_DETAILS))
914 fprintf (dump_file, "Equivalence: ");
915 print_generic_expr (dump_file, notop);
916 fprintf (dump_file, " + ~");
917 print_generic_expr (dump_file, oe->op);
918 fprintf (dump_file, " -> -1\n");
921 ops->ordered_remove (i);
922 add_to_ops_vec (ops, build_all_ones_cst (op_type));
923 ops->ordered_remove (currindex);
924 reassociate_stats.ops_eliminated ++;
926 return true;
930 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
931 save it for later inspection in repropagate_negates(). */
932 if (negateop != NULL_TREE
933 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
934 plus_negates.safe_push (curr->op);
936 return false;
939 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
940 bitwise not expression, look in OPS for a corresponding operand to
941 cancel it out. If we find one, remove the other from OPS, replace
942 OPS[CURRINDEX] with 0, and return true. Otherwise, return
943 false. */
945 static bool
946 eliminate_not_pairs (enum tree_code opcode,
947 vec<operand_entry *> *ops,
948 unsigned int currindex,
949 operand_entry *curr)
951 tree notop;
952 unsigned int i;
953 operand_entry *oe;
955 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
956 || TREE_CODE (curr->op) != SSA_NAME)
957 return false;
959 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
960 if (notop == NULL_TREE)
961 return false;
963 /* Any non-not version will have a rank that is one less than
964 the current rank. So once we hit those ranks, if we don't find
965 one, we can stop. */
967 for (i = currindex + 1;
968 ops->iterate (i, &oe)
969 && oe->rank >= curr->rank - 1;
970 i++)
972 if (oe->op == notop)
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Equivalence: ");
977 print_generic_expr (dump_file, notop);
978 if (opcode == BIT_AND_EXPR)
979 fprintf (dump_file, " & ~");
980 else if (opcode == BIT_IOR_EXPR)
981 fprintf (dump_file, " | ~");
982 print_generic_expr (dump_file, oe->op);
983 if (opcode == BIT_AND_EXPR)
984 fprintf (dump_file, " -> 0\n");
985 else if (opcode == BIT_IOR_EXPR)
986 fprintf (dump_file, " -> -1\n");
989 if (opcode == BIT_AND_EXPR)
990 oe->op = build_zero_cst (TREE_TYPE (oe->op));
991 else if (opcode == BIT_IOR_EXPR)
992 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (0);
996 ops->quick_push (oe);
997 return true;
1001 return false;
1004 /* Use constant value that may be present in OPS to try to eliminate
1005 operands. Note that this function is only really used when we've
1006 eliminated ops for other reasons, or merged constants. Across
1007 single statements, fold already does all of this, plus more. There
1008 is little point in duplicating logic, so I've only included the
1009 identities that I could ever construct testcases to trigger. */
1011 static void
1012 eliminate_using_constants (enum tree_code opcode,
1013 vec<operand_entry *> *ops)
1015 operand_entry *oelast = ops->last ();
1016 tree type = TREE_TYPE (oelast->op);
1018 if (oelast->rank == 0
1019 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1021 switch (opcode)
1023 case BIT_AND_EXPR:
1024 if (integer_zerop (oelast->op))
1026 if (ops->length () != 1)
1028 if (dump_file && (dump_flags & TDF_DETAILS))
1029 fprintf (dump_file, "Found & 0, removing all other ops\n");
1031 reassociate_stats.ops_eliminated += ops->length () - 1;
1033 ops->truncate (0);
1034 ops->quick_push (oelast);
1035 return;
1038 else if (integer_all_onesp (oelast->op))
1040 if (ops->length () != 1)
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "Found & -1, removing\n");
1044 ops->pop ();
1045 reassociate_stats.ops_eliminated++;
1048 break;
1049 case BIT_IOR_EXPR:
1050 if (integer_all_onesp (oelast->op))
1052 if (ops->length () != 1)
1054 if (dump_file && (dump_flags & TDF_DETAILS))
1055 fprintf (dump_file, "Found | -1, removing all other ops\n");
1057 reassociate_stats.ops_eliminated += ops->length () - 1;
1059 ops->truncate (0);
1060 ops->quick_push (oelast);
1061 return;
1064 else if (integer_zerop (oelast->op))
1066 if (ops->length () != 1)
1068 if (dump_file && (dump_flags & TDF_DETAILS))
1069 fprintf (dump_file, "Found | 0, removing\n");
1070 ops->pop ();
1071 reassociate_stats.ops_eliminated++;
1074 break;
1075 case MULT_EXPR:
1076 if (integer_zerop (oelast->op)
1077 || (FLOAT_TYPE_P (type)
1078 && !HONOR_NANS (type)
1079 && !HONOR_SIGNED_ZEROS (type)
1080 && real_zerop (oelast->op)))
1082 if (ops->length () != 1)
1084 if (dump_file && (dump_flags & TDF_DETAILS))
1085 fprintf (dump_file, "Found * 0, removing all other ops\n");
1087 reassociate_stats.ops_eliminated += ops->length () - 1;
1088 ops->truncate (0);
1089 ops->quick_push (oelast);
1090 return;
1093 else if (integer_onep (oelast->op)
1094 || (FLOAT_TYPE_P (type)
1095 && !HONOR_SNANS (type)
1096 && real_onep (oelast->op)))
1098 if (ops->length () != 1)
1100 if (dump_file && (dump_flags & TDF_DETAILS))
1101 fprintf (dump_file, "Found * 1, removing\n");
1102 ops->pop ();
1103 reassociate_stats.ops_eliminated++;
1104 return;
1107 break;
1108 case BIT_XOR_EXPR:
1109 case PLUS_EXPR:
1110 case MINUS_EXPR:
1111 if (integer_zerop (oelast->op)
1112 || (FLOAT_TYPE_P (type)
1113 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1114 && fold_real_zero_addition_p (type, 0, oelast->op,
1115 opcode == MINUS_EXPR)))
1117 if (ops->length () != 1)
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1120 fprintf (dump_file, "Found [|^+] 0, removing\n");
1121 ops->pop ();
1122 reassociate_stats.ops_eliminated++;
1123 return;
1126 break;
1127 default:
1128 break;
1134 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1135 bool, bool);
1137 /* Structure for tracking and counting operands. */
1138 struct oecount {
1139 unsigned int cnt;
1140 unsigned int id;
1141 enum tree_code oecode;
1142 tree op;
1146 /* The heap for the oecount hashtable and the sorted list of operands. */
1147 static vec<oecount> cvec;
1150 /* Oecount hashtable helpers. */
1152 struct oecount_hasher : int_hash <int, 0, 1>
1154 static inline hashval_t hash (int);
1155 static inline bool equal (int, int);
1158 /* Hash function for oecount. */
1160 inline hashval_t
1161 oecount_hasher::hash (int p)
1163 const oecount *c = &cvec[p - 42];
1164 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1167 /* Comparison function for oecount. */
1169 inline bool
1170 oecount_hasher::equal (int p1, int p2)
1172 const oecount *c1 = &cvec[p1 - 42];
1173 const oecount *c2 = &cvec[p2 - 42];
1174 return c1->oecode == c2->oecode && c1->op == c2->op;
1177 /* Comparison function for qsort sorting oecount elements by count. */
1179 static int
1180 oecount_cmp (const void *p1, const void *p2)
1182 const oecount *c1 = (const oecount *)p1;
1183 const oecount *c2 = (const oecount *)p2;
1184 if (c1->cnt != c2->cnt)
1185 return c1->cnt > c2->cnt ? 1 : -1;
1186 else
1187 /* If counts are identical, use unique IDs to stabilize qsort. */
1188 return c1->id > c2->id ? 1 : -1;
1191 /* Return TRUE iff STMT represents a builtin call that raises OP
1192 to some exponent. */
1194 static bool
1195 stmt_is_power_of_op (gimple *stmt, tree op)
1197 if (!is_gimple_call (stmt))
1198 return false;
1200 switch (gimple_call_combined_fn (stmt))
1202 CASE_CFN_POW:
1203 CASE_CFN_POWI:
1204 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1206 default:
1207 return false;
1211 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1212 in place and return the result. Assumes that stmt_is_power_of_op
1213 was previously called for STMT and returned TRUE. */
1215 static HOST_WIDE_INT
1216 decrement_power (gimple *stmt)
1218 REAL_VALUE_TYPE c, cint;
1219 HOST_WIDE_INT power;
1220 tree arg1;
1222 switch (gimple_call_combined_fn (stmt))
1224 CASE_CFN_POW:
1225 arg1 = gimple_call_arg (stmt, 1);
1226 c = TREE_REAL_CST (arg1);
1227 power = real_to_integer (&c) - 1;
1228 real_from_integer (&cint, VOIDmode, power, SIGNED);
1229 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1230 return power;
1232 CASE_CFN_POWI:
1233 arg1 = gimple_call_arg (stmt, 1);
1234 power = TREE_INT_CST_LOW (arg1) - 1;
1235 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1236 return power;
1238 default:
1239 gcc_unreachable ();
1243 /* Replace SSA defined by STMT and replace all its uses with new
1244 SSA. Also return the new SSA. */
1246 static tree
1247 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1249 gimple *use_stmt;
1250 use_operand_p use;
1251 imm_use_iterator iter;
1252 tree new_lhs, new_debug_lhs = NULL_TREE;
1253 tree lhs = gimple_get_lhs (stmt);
1255 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1256 gimple_set_lhs (stmt, new_lhs);
1258 /* Also need to update GIMPLE_DEBUGs. */
1259 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1261 tree repl = new_lhs;
1262 if (is_gimple_debug (use_stmt))
1264 if (new_debug_lhs == NULL_TREE)
1266 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1267 gdebug *def_temp
1268 = gimple_build_debug_bind (new_debug_lhs,
1269 build2 (opcode, TREE_TYPE (lhs),
1270 new_lhs, op),
1271 stmt);
1272 gimple_set_uid (def_temp, gimple_uid (stmt));
1273 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1274 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1276 repl = new_debug_lhs;
1278 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1279 SET_USE (use, repl);
1280 update_stmt (use_stmt);
1282 return new_lhs;
1285 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1286 uses with new SSAs. Also do this for the stmt that defines DEF
1287 if *DEF is not OP. */
1289 static void
1290 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1291 vec<gimple *> &stmts_to_fix)
1293 unsigned i;
1294 gimple *stmt;
1296 if (*def != op
1297 && TREE_CODE (*def) == SSA_NAME
1298 && (stmt = SSA_NAME_DEF_STMT (*def))
1299 && gimple_code (stmt) != GIMPLE_NOP)
1300 *def = make_new_ssa_for_def (stmt, opcode, op);
1302 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1303 make_new_ssa_for_def (stmt, opcode, op);
1306 /* Find the single immediate use of STMT's LHS, and replace it
1307 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1308 replace *DEF with OP as well. */
1310 static void
1311 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1313 tree lhs;
1314 gimple *use_stmt;
1315 use_operand_p use;
1316 gimple_stmt_iterator gsi;
1318 if (is_gimple_call (stmt))
1319 lhs = gimple_call_lhs (stmt);
1320 else
1321 lhs = gimple_assign_lhs (stmt);
1323 gcc_assert (has_single_use (lhs));
1324 single_imm_use (lhs, &use, &use_stmt);
1325 if (lhs == *def)
1326 *def = op;
1327 SET_USE (use, op);
1328 if (TREE_CODE (op) != SSA_NAME)
1329 update_stmt (use_stmt);
1330 gsi = gsi_for_stmt (stmt);
1331 unlink_stmt_vdef (stmt);
1332 reassoc_remove_stmt (&gsi);
1333 release_defs (stmt);
1336 /* Walks the linear chain with result *DEF searching for an operation
1337 with operand OP and code OPCODE removing that from the chain. *DEF
1338 is updated if there is only one operand but no operation left. */
1340 static void
1341 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1343 tree orig_def = *def;
1344 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1345 /* PR72835 - Record the stmt chain that has to be updated such that
1346 we dont use the same LHS when the values computed are different. */
1347 auto_vec<gimple *, 64> stmts_to_fix;
1351 tree name;
1353 if (opcode == MULT_EXPR)
1355 if (stmt_is_power_of_op (stmt, op))
1357 if (decrement_power (stmt) == 1)
1359 if (stmts_to_fix.length () > 0)
1360 stmts_to_fix.pop ();
1361 propagate_op_to_single_use (op, stmt, def);
1363 break;
1365 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1367 if (gimple_assign_rhs1 (stmt) == op)
1369 tree cst = build_minus_one_cst (TREE_TYPE (op));
1370 if (stmts_to_fix.length () > 0)
1371 stmts_to_fix.pop ();
1372 propagate_op_to_single_use (cst, stmt, def);
1373 break;
1375 else if (integer_minus_onep (op)
1376 || real_minus_onep (op))
1378 gimple_assign_set_rhs_code
1379 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1380 break;
1385 name = gimple_assign_rhs1 (stmt);
1387 /* If this is the operation we look for and one of the operands
1388 is ours simply propagate the other operand into the stmts
1389 single use. */
1390 if (gimple_assign_rhs_code (stmt) == opcode
1391 && (name == op
1392 || gimple_assign_rhs2 (stmt) == op))
1394 if (name == op)
1395 name = gimple_assign_rhs2 (stmt);
1396 if (stmts_to_fix.length () > 0)
1397 stmts_to_fix.pop ();
1398 propagate_op_to_single_use (name, stmt, def);
1399 break;
1402 /* We might have a multiply of two __builtin_pow* calls, and
1403 the operand might be hiding in the rightmost one. Likewise
1404 this can happen for a negate. */
1405 if (opcode == MULT_EXPR
1406 && gimple_assign_rhs_code (stmt) == opcode
1407 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1408 && has_single_use (gimple_assign_rhs2 (stmt)))
1410 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1411 if (stmt_is_power_of_op (stmt2, op))
1413 if (decrement_power (stmt2) == 1)
1414 propagate_op_to_single_use (op, stmt2, def);
1415 else
1416 stmts_to_fix.safe_push (stmt2);
1417 break;
1419 else if (is_gimple_assign (stmt2)
1420 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1422 if (gimple_assign_rhs1 (stmt2) == op)
1424 tree cst = build_minus_one_cst (TREE_TYPE (op));
1425 propagate_op_to_single_use (cst, stmt2, def);
1426 break;
1428 else if (integer_minus_onep (op)
1429 || real_minus_onep (op))
1431 stmts_to_fix.safe_push (stmt2);
1432 gimple_assign_set_rhs_code
1433 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1434 break;
1439 /* Continue walking the chain. */
1440 gcc_assert (name != op
1441 && TREE_CODE (name) == SSA_NAME);
1442 stmt = SSA_NAME_DEF_STMT (name);
1443 stmts_to_fix.safe_push (stmt);
1445 while (1);
1447 if (stmts_to_fix.length () > 0 || *def == orig_def)
1448 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1451 /* Returns true if statement S1 dominates statement S2. Like
1452 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1454 static bool
1455 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1457 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1459 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1460 SSA_NAME. Assume it lives at the beginning of function and
1461 thus dominates everything. */
1462 if (!bb1 || s1 == s2)
1463 return true;
1465 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1466 if (!bb2)
1467 return false;
1469 if (bb1 == bb2)
1471 /* PHIs in the same basic block are assumed to be
1472 executed all in parallel, if only one stmt is a PHI,
1473 it dominates the other stmt in the same basic block. */
1474 if (gimple_code (s1) == GIMPLE_PHI)
1475 return true;
1477 if (gimple_code (s2) == GIMPLE_PHI)
1478 return false;
1480 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1482 if (gimple_uid (s1) < gimple_uid (s2))
1483 return true;
1485 if (gimple_uid (s1) > gimple_uid (s2))
1486 return false;
1488 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1489 unsigned int uid = gimple_uid (s1);
1490 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1492 gimple *s = gsi_stmt (gsi);
1493 if (gimple_uid (s) != uid)
1494 break;
1495 if (s == s2)
1496 return true;
1499 return false;
1502 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1505 /* Insert STMT after INSERT_POINT. */
1507 static void
1508 insert_stmt_after (gimple *stmt, gimple *insert_point)
1510 gimple_stmt_iterator gsi;
1511 basic_block bb;
1513 if (gimple_code (insert_point) == GIMPLE_PHI)
1514 bb = gimple_bb (insert_point);
1515 else if (!stmt_ends_bb_p (insert_point))
1517 gsi = gsi_for_stmt (insert_point);
1518 gimple_set_uid (stmt, gimple_uid (insert_point));
1519 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1520 return;
1522 else if (gimple_code (insert_point) == GIMPLE_ASM
1523 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1524 /* We have no idea where to insert - it depends on where the
1525 uses will be placed. */
1526 gcc_unreachable ();
1527 else
1528 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1529 thus if it must end a basic block, it should be a call that can
1530 throw, or some assignment that can throw. If it throws, the LHS
1531 of it will not be initialized though, so only valid places using
1532 the SSA_NAME should be dominated by the fallthru edge. */
1533 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1534 gsi = gsi_after_labels (bb);
1535 if (gsi_end_p (gsi))
1537 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1538 gimple_set_uid (stmt,
1539 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1541 else
1542 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1543 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1546 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1547 the result. Places the statement after the definition of either
1548 OP1 or OP2. Returns the new statement. */
1550 static gimple *
1551 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1553 gimple *op1def = NULL, *op2def = NULL;
1554 gimple_stmt_iterator gsi;
1555 tree op;
1556 gassign *sum;
1558 /* Create the addition statement. */
1559 op = make_ssa_name (type);
1560 sum = gimple_build_assign (op, opcode, op1, op2);
1562 /* Find an insertion place and insert. */
1563 if (TREE_CODE (op1) == SSA_NAME)
1564 op1def = SSA_NAME_DEF_STMT (op1);
1565 if (TREE_CODE (op2) == SSA_NAME)
1566 op2def = SSA_NAME_DEF_STMT (op2);
1567 if ((!op1def || gimple_nop_p (op1def))
1568 && (!op2def || gimple_nop_p (op2def)))
1570 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1571 if (!gsi_end_p (gsi)
1572 && is_gimple_call (gsi_stmt (gsi))
1573 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1575 /* Don't add statements before a returns_twice call at the start
1576 of a function. */
1577 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1578 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580 if (gsi_end_p (gsi))
1582 gimple_stmt_iterator gsi2
1583 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1584 gimple_set_uid (sum,
1585 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1587 else
1588 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1589 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1591 else
1593 gimple *insert_point;
1594 if ((!op1def || gimple_nop_p (op1def))
1595 || (op2def && !gimple_nop_p (op2def)
1596 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1597 insert_point = op2def;
1598 else
1599 insert_point = op1def;
1600 insert_stmt_after (sum, insert_point);
1602 update_stmt (sum);
1604 return sum;
1607 /* Perform un-distribution of divisions and multiplications.
1608 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1609 to (A + B) / X for real X.
1611 The algorithm is organized as follows.
1613 - First we walk the addition chain *OPS looking for summands that
1614 are defined by a multiplication or a real division. This results
1615 in the candidates bitmap with relevant indices into *OPS.
1617 - Second we build the chains of multiplications or divisions for
1618 these candidates, counting the number of occurrences of (operand, code)
1619 pairs in all of the candidates chains.
1621 - Third we sort the (operand, code) pairs by number of occurrence and
1622 process them starting with the pair with the most uses.
1624 * For each such pair we walk the candidates again to build a
1625 second candidate bitmap noting all multiplication/division chains
1626 that have at least one occurrence of (operand, code).
1628 * We build an alternate addition chain only covering these
1629 candidates with one (operand, code) operation removed from their
1630 multiplication/division chain.
1632 * The first candidate gets replaced by the alternate addition chain
1633 multiplied/divided by the operand.
1635 * All candidate chains get disabled for further processing and
1636 processing of (operand, code) pairs continues.
1638 The alternate addition chains built are re-processed by the main
1639 reassociation algorithm which allows optimizing a * x * y + b * y * x
1640 to (a + b ) * x * y in one invocation of the reassociation pass. */
1642 static bool
1643 undistribute_ops_list (enum tree_code opcode,
1644 vec<operand_entry *> *ops, class loop *loop)
1646 unsigned int length = ops->length ();
1647 operand_entry *oe1;
1648 unsigned i, j;
1649 unsigned nr_candidates, nr_candidates2;
1650 sbitmap_iterator sbi0;
1651 vec<operand_entry *> *subops;
1652 bool changed = false;
1653 unsigned int next_oecount_id = 0;
1655 if (length <= 1
1656 || opcode != PLUS_EXPR)
1657 return false;
1659 /* Build a list of candidates to process. */
1660 auto_sbitmap candidates (length);
1661 bitmap_clear (candidates);
1662 nr_candidates = 0;
1663 FOR_EACH_VEC_ELT (*ops, i, oe1)
1665 enum tree_code dcode;
1666 gimple *oe1def;
1668 if (TREE_CODE (oe1->op) != SSA_NAME)
1669 continue;
1670 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1671 if (!is_gimple_assign (oe1def))
1672 continue;
1673 dcode = gimple_assign_rhs_code (oe1def);
1674 if ((dcode != MULT_EXPR
1675 && dcode != RDIV_EXPR)
1676 || !is_reassociable_op (oe1def, dcode, loop))
1677 continue;
1679 bitmap_set_bit (candidates, i);
1680 nr_candidates++;
1683 if (nr_candidates < 2)
1684 return false;
1686 if (dump_file && (dump_flags & TDF_DETAILS))
1688 fprintf (dump_file, "searching for un-distribute opportunities ");
1689 print_generic_expr (dump_file,
1690 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1691 fprintf (dump_file, " %d\n", nr_candidates);
1694 /* Build linearized sub-operand lists and the counting table. */
1695 cvec.create (0);
1697 hash_table<oecount_hasher> ctable (15);
1699 /* ??? Macro arguments cannot have multi-argument template types in
1700 them. This typedef is needed to workaround that limitation. */
1701 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1702 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1703 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1705 gimple *oedef;
1706 enum tree_code oecode;
1707 unsigned j;
1709 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1710 oecode = gimple_assign_rhs_code (oedef);
1711 linearize_expr_tree (&subops[i], oedef,
1712 associative_tree_code (oecode), false);
1714 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1716 oecount c;
1717 int *slot;
1718 int idx;
1719 c.oecode = oecode;
1720 c.cnt = 1;
1721 c.id = next_oecount_id++;
1722 c.op = oe1->op;
1723 cvec.safe_push (c);
1724 idx = cvec.length () + 41;
1725 slot = ctable.find_slot (idx, INSERT);
1726 if (!*slot)
1728 *slot = idx;
1730 else
1732 cvec.pop ();
1733 cvec[*slot - 42].cnt++;
1738 /* Sort the counting table. */
1739 cvec.qsort (oecount_cmp);
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1743 oecount *c;
1744 fprintf (dump_file, "Candidates:\n");
1745 FOR_EACH_VEC_ELT (cvec, j, c)
1747 fprintf (dump_file, " %u %s: ", c->cnt,
1748 c->oecode == MULT_EXPR
1749 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1750 print_generic_expr (dump_file, c->op);
1751 fprintf (dump_file, "\n");
1755 /* Process the (operand, code) pairs in order of most occurrence. */
1756 auto_sbitmap candidates2 (length);
1757 while (!cvec.is_empty ())
1759 oecount *c = &cvec.last ();
1760 if (c->cnt < 2)
1761 break;
1763 /* Now collect the operands in the outer chain that contain
1764 the common operand in their inner chain. */
1765 bitmap_clear (candidates2);
1766 nr_candidates2 = 0;
1767 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1769 gimple *oedef;
1770 enum tree_code oecode;
1771 unsigned j;
1772 tree op = (*ops)[i]->op;
1774 /* If we undistributed in this chain already this may be
1775 a constant. */
1776 if (TREE_CODE (op) != SSA_NAME)
1777 continue;
1779 oedef = SSA_NAME_DEF_STMT (op);
1780 oecode = gimple_assign_rhs_code (oedef);
1781 if (oecode != c->oecode)
1782 continue;
1784 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1786 if (oe1->op == c->op)
1788 bitmap_set_bit (candidates2, i);
1789 ++nr_candidates2;
1790 break;
1795 if (nr_candidates2 >= 2)
1797 operand_entry *oe1, *oe2;
1798 gimple *prod;
1799 int first = bitmap_first_set_bit (candidates2);
1801 /* Build the new addition chain. */
1802 oe1 = (*ops)[first];
1803 if (dump_file && (dump_flags & TDF_DETAILS))
1805 fprintf (dump_file, "Building (");
1806 print_generic_expr (dump_file, oe1->op);
1808 zero_one_operation (&oe1->op, c->oecode, c->op);
1809 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1811 gimple *sum;
1812 oe2 = (*ops)[i];
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1815 fprintf (dump_file, " + ");
1816 print_generic_expr (dump_file, oe2->op);
1818 zero_one_operation (&oe2->op, c->oecode, c->op);
1819 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1820 oe1->op, oe2->op, opcode);
1821 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1822 oe2->rank = 0;
1823 oe1->op = gimple_get_lhs (sum);
1826 /* Apply the multiplication/division. */
1827 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1828 oe1->op, c->op, c->oecode);
1829 if (dump_file && (dump_flags & TDF_DETAILS))
1831 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1832 print_generic_expr (dump_file, c->op);
1833 fprintf (dump_file, "\n");
1836 /* Record it in the addition chain and disable further
1837 undistribution with this op. */
1838 oe1->op = gimple_assign_lhs (prod);
1839 oe1->rank = get_rank (oe1->op);
1840 subops[first].release ();
1842 changed = true;
1845 cvec.pop ();
1848 for (i = 0; i < ops->length (); ++i)
1849 subops[i].release ();
1850 free (subops);
1851 cvec.release ();
1853 return changed;
1856 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1857 first: element index for each relevant BIT_FIELD_REF.
1858 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1859 typedef std::pair<unsigned, unsigned> v_info_elem;
1860 struct v_info {
1861 tree vec_type;
1862 auto_vec<v_info_elem, 32> vec;
1864 typedef v_info *v_info_ptr;
1866 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1867 static int
1868 sort_by_mach_mode (const void *p_i, const void *p_j)
1870 const tree tr1 = *((const tree *) p_i);
1871 const tree tr2 = *((const tree *) p_j);
1872 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1873 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1874 if (mode1 > mode2)
1875 return 1;
1876 else if (mode1 < mode2)
1877 return -1;
1878 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1879 return -1;
1880 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1881 return 1;
1882 return 0;
1885 /* Cleanup hash map for VECTOR information. */
1886 static void
1887 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1889 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1890 it != info_map.end (); ++it)
1892 v_info_ptr info = (*it).second;
1893 delete info;
1894 (*it).second = NULL;
1898 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1899 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1900 is transformed to
1901 Vs = (V1 + V2 + ... + Vn)
1902 Vs[0] + Vs[1] + ... + Vs[k]
1904 The basic steps are listed below:
1906 1) Check the addition chain *OPS by looking those summands coming from
1907 VECTOR bit_field_ref on VECTOR type. Put the information into
1908 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1910 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1911 continuous, they can cover the whole VECTOR perfectly without any holes.
1912 Obtain one VECTOR list which contain candidates to be transformed.
1914 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1915 candidates with same mode, build the addition statements for them and
1916 generate BIT_FIELD_REFs accordingly.
1918 TODO:
1919 The current implementation requires the whole VECTORs should be fully
1920 covered, but it can be extended to support partial, checking adjacent
1921 but not fill the whole, it may need some cost model to define the
1922 boundary to do or not.
1924 static bool
1925 undistribute_bitref_for_vector (enum tree_code opcode,
1926 vec<operand_entry *> *ops, struct loop *loop)
1928 if (ops->length () <= 1)
1929 return false;
1931 if (opcode != PLUS_EXPR
1932 && opcode != MULT_EXPR
1933 && opcode != BIT_XOR_EXPR
1934 && opcode != BIT_IOR_EXPR
1935 && opcode != BIT_AND_EXPR)
1936 return false;
1938 hash_map<tree, v_info_ptr> v_info_map;
1939 operand_entry *oe1;
1940 unsigned i;
1942 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1943 information into map. */
1944 FOR_EACH_VEC_ELT (*ops, i, oe1)
1946 enum tree_code dcode;
1947 gimple *oe1def;
1949 if (TREE_CODE (oe1->op) != SSA_NAME)
1950 continue;
1951 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1952 if (!is_gimple_assign (oe1def))
1953 continue;
1954 dcode = gimple_assign_rhs_code (oe1def);
1955 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1956 continue;
1958 tree rhs = gimple_assign_rhs1 (oe1def);
1959 tree vec = TREE_OPERAND (rhs, 0);
1960 tree vec_type = TREE_TYPE (vec);
1962 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1963 continue;
1965 /* Ignore it if target machine can't support this VECTOR type. */
1966 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1967 continue;
1969 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1970 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1971 continue;
1973 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1974 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1975 continue;
1977 /* The type of BIT_FIELD_REF might not be equal to the element type of
1978 the vector. We want to use a vector type with element type the
1979 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1980 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1982 machine_mode simd_mode;
1983 unsigned HOST_WIDE_INT size, nunits;
1984 unsigned HOST_WIDE_INT elem_size
1985 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1986 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1987 continue;
1988 if (size <= elem_size || (size % elem_size) != 0)
1989 continue;
1990 nunits = size / elem_size;
1991 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1992 nunits).exists (&simd_mode))
1993 continue;
1994 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1996 /* Ignore it if target machine can't support this VECTOR type. */
1997 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1998 continue;
2000 /* Check const vector type, constrain BIT_FIELD_REF offset and
2001 size. */
2002 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2003 continue;
2005 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2006 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2007 continue;
2010 tree elem_type = TREE_TYPE (vec_type);
2011 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2012 if (maybe_ne (bit_field_size (rhs), elem_size))
2013 continue;
2015 unsigned idx;
2016 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2017 continue;
2019 /* Ignore it if target machine can't support this type of VECTOR
2020 operation. */
2021 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2022 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2023 continue;
2025 bool existed;
2026 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2027 if (!existed)
2029 info = new v_info;
2030 info->vec_type = vec_type;
2032 else if (!types_compatible_p (vec_type, info->vec_type))
2033 continue;
2034 info->vec.safe_push (std::make_pair (idx, i));
2037 /* At least two VECTOR to combine. */
2038 if (v_info_map.elements () <= 1)
2040 cleanup_vinfo_map (v_info_map);
2041 return false;
2044 /* Verify all VECTOR candidates by checking two conditions:
2045 1) sorted offsets are adjacent, no holes.
2046 2) can fill the whole VECTOR perfectly.
2047 And add the valid candidates to a vector for further handling. */
2048 auto_vec<tree> valid_vecs (v_info_map.elements ());
2049 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2050 it != v_info_map.end (); ++it)
2052 tree cand_vec = (*it).first;
2053 v_info_ptr cand_info = (*it).second;
2054 unsigned int num_elems
2055 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2056 if (cand_info->vec.length () != num_elems)
2057 continue;
2058 sbitmap holes = sbitmap_alloc (num_elems);
2059 bitmap_ones (holes);
2060 bool valid = true;
2061 v_info_elem *curr;
2062 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2064 if (!bitmap_bit_p (holes, curr->first))
2066 valid = false;
2067 break;
2069 else
2070 bitmap_clear_bit (holes, curr->first);
2072 if (valid && bitmap_empty_p (holes))
2073 valid_vecs.quick_push (cand_vec);
2074 sbitmap_free (holes);
2077 /* At least two VECTOR to combine. */
2078 if (valid_vecs.length () <= 1)
2080 cleanup_vinfo_map (v_info_map);
2081 return false;
2084 valid_vecs.qsort (sort_by_mach_mode);
2085 /* Go through all candidates by machine mode order, query the mode_to_total
2086 to get the total number for each mode and skip the single one. */
2087 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2089 tree tvec = valid_vecs[i];
2090 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2092 /* Skip modes with only a single candidate. */
2093 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2094 continue;
2096 unsigned int idx, j;
2097 gimple *sum = NULL;
2098 tree sum_vec = tvec;
2099 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2100 v_info_elem *elem;
2101 tree vec_type = info_ptr->vec_type;
2103 /* Build the sum for all candidates with same mode. */
2106 sum = build_and_add_sum (vec_type, sum_vec,
2107 valid_vecs[i + 1], opcode);
2108 /* Update the operands only after build_and_add_sum,
2109 so that we don't have to repeat the placement algorithm
2110 of build_and_add_sum. */
2111 if (sum_vec == tvec
2112 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2114 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2115 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2116 tree lhs = make_ssa_name (vec_type);
2117 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2118 gimple_set_uid (g, gimple_uid (sum));
2119 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2120 gimple_assign_set_rhs1 (sum, lhs);
2121 update_stmt (sum);
2123 if (!useless_type_conversion_p (vec_type,
2124 TREE_TYPE (valid_vecs[i + 1])))
2126 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2127 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2128 valid_vecs[i + 1]);
2129 tree lhs = make_ssa_name (vec_type);
2130 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2131 gimple_set_uid (g, gimple_uid (sum));
2132 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2133 gimple_assign_set_rhs2 (sum, lhs);
2134 update_stmt (sum);
2136 sum_vec = gimple_get_lhs (sum);
2137 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2138 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2139 /* Update those related ops of current candidate VECTOR. */
2140 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2142 idx = elem->second;
2143 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2144 /* Set this then op definition will get DCEd later. */
2145 gimple_set_visited (def, true);
2146 if (opcode == PLUS_EXPR
2147 || opcode == BIT_XOR_EXPR
2148 || opcode == BIT_IOR_EXPR)
2149 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2150 else if (opcode == MULT_EXPR)
2151 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2152 else
2154 gcc_assert (opcode == BIT_AND_EXPR);
2155 (*ops)[idx]->op
2156 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2158 (*ops)[idx]->rank = 0;
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2162 fprintf (dump_file, "Generating addition -> ");
2163 print_gimple_stmt (dump_file, sum, 0);
2165 i++;
2167 while ((i < valid_vecs.length () - 1)
2168 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2170 /* Referring to first valid VECTOR with this mode, generate the
2171 BIT_FIELD_REF statements accordingly. */
2172 info_ptr = *(v_info_map.get (tvec));
2173 gcc_assert (sum);
2174 tree elem_type = TREE_TYPE (vec_type);
2175 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2177 idx = elem->second;
2178 tree dst = make_ssa_name (elem_type);
2179 tree pos = bitsize_int (elem->first
2180 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2181 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2182 TYPE_SIZE (elem_type), pos);
2183 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2184 insert_stmt_after (gs, sum);
2185 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2186 /* Set this then op definition will get DCEd later. */
2187 gimple_set_visited (def, true);
2188 (*ops)[idx]->op = gimple_assign_lhs (gs);
2189 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2190 if (dump_file && (dump_flags & TDF_DETAILS))
2192 fprintf (dump_file, "Generating bit_field_ref -> ");
2193 print_gimple_stmt (dump_file, gs, 0);
2198 if (dump_file && (dump_flags & TDF_DETAILS))
2199 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2201 cleanup_vinfo_map (v_info_map);
2203 return true;
2206 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2207 expression, examine the other OPS to see if any of them are comparisons
2208 of the same values, which we may be able to combine or eliminate.
2209 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2211 static bool
2212 eliminate_redundant_comparison (enum tree_code opcode,
2213 vec<operand_entry *> *ops,
2214 unsigned int currindex,
2215 operand_entry *curr)
2217 tree op1, op2;
2218 enum tree_code lcode, rcode;
2219 gimple *def1, *def2;
2220 int i;
2221 operand_entry *oe;
2223 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2224 return false;
2226 /* Check that CURR is a comparison. */
2227 if (TREE_CODE (curr->op) != SSA_NAME)
2228 return false;
2229 def1 = SSA_NAME_DEF_STMT (curr->op);
2230 if (!is_gimple_assign (def1))
2231 return false;
2232 lcode = gimple_assign_rhs_code (def1);
2233 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2234 return false;
2235 op1 = gimple_assign_rhs1 (def1);
2236 op2 = gimple_assign_rhs2 (def1);
2238 /* Now look for a similar comparison in the remaining OPS. */
2239 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2241 tree t;
2243 if (TREE_CODE (oe->op) != SSA_NAME)
2244 continue;
2245 def2 = SSA_NAME_DEF_STMT (oe->op);
2246 if (!is_gimple_assign (def2))
2247 continue;
2248 rcode = gimple_assign_rhs_code (def2);
2249 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2250 continue;
2252 /* If we got here, we have a match. See if we can combine the
2253 two comparisons. */
2254 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2255 if (opcode == BIT_IOR_EXPR)
2256 t = maybe_fold_or_comparisons (type,
2257 lcode, op1, op2,
2258 rcode, gimple_assign_rhs1 (def2),
2259 gimple_assign_rhs2 (def2));
2260 else
2261 t = maybe_fold_and_comparisons (type,
2262 lcode, op1, op2,
2263 rcode, gimple_assign_rhs1 (def2),
2264 gimple_assign_rhs2 (def2));
2265 if (!t)
2266 continue;
2268 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2269 always give us a boolean_type_node value back. If the original
2270 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2271 we need to convert. */
2272 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2274 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2275 continue;
2276 t = fold_convert (TREE_TYPE (curr->op), t);
2279 if (TREE_CODE (t) != INTEGER_CST
2280 && !operand_equal_p (t, curr->op, 0))
2282 enum tree_code subcode;
2283 tree newop1, newop2;
2284 if (!COMPARISON_CLASS_P (t))
2285 continue;
2286 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2287 STRIP_USELESS_TYPE_CONVERSION (newop1);
2288 STRIP_USELESS_TYPE_CONVERSION (newop2);
2289 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2290 continue;
2291 if (lcode == TREE_CODE (t)
2292 && operand_equal_p (op1, newop1, 0)
2293 && operand_equal_p (op2, newop2, 0))
2294 t = curr->op;
2295 else if ((TREE_CODE (newop1) == SSA_NAME
2296 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2297 || (TREE_CODE (newop2) == SSA_NAME
2298 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2299 continue;
2302 if (dump_file && (dump_flags & TDF_DETAILS))
2304 fprintf (dump_file, "Equivalence: ");
2305 print_generic_expr (dump_file, curr->op);
2306 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2307 print_generic_expr (dump_file, oe->op);
2308 fprintf (dump_file, " -> ");
2309 print_generic_expr (dump_file, t);
2310 fprintf (dump_file, "\n");
2313 /* Now we can delete oe, as it has been subsumed by the new combined
2314 expression t. */
2315 ops->ordered_remove (i);
2316 reassociate_stats.ops_eliminated ++;
2318 /* If t is the same as curr->op, we're done. Otherwise we must
2319 replace curr->op with t. Special case is if we got a constant
2320 back, in which case we add it to the end instead of in place of
2321 the current entry. */
2322 if (TREE_CODE (t) == INTEGER_CST)
2324 ops->ordered_remove (currindex);
2325 add_to_ops_vec (ops, t);
2327 else if (!operand_equal_p (t, curr->op, 0))
2329 gimple *sum;
2330 enum tree_code subcode;
2331 tree newop1;
2332 tree newop2;
2333 gcc_assert (COMPARISON_CLASS_P (t));
2334 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2335 STRIP_USELESS_TYPE_CONVERSION (newop1);
2336 STRIP_USELESS_TYPE_CONVERSION (newop2);
2337 gcc_checking_assert (is_gimple_val (newop1)
2338 && is_gimple_val (newop2));
2339 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2340 curr->op = gimple_get_lhs (sum);
2342 return true;
2345 return false;
2349 /* Transform repeated addition of same values into multiply with
2350 constant. */
2351 static bool
2352 transform_add_to_multiply (vec<operand_entry *> *ops)
2354 operand_entry *oe;
2355 tree op = NULL_TREE;
2356 int j;
2357 int i, start = -1, end = 0, count = 0;
2358 auto_vec<std::pair <int, int> > indxs;
2359 bool changed = false;
2361 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2362 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2363 || !flag_unsafe_math_optimizations))
2364 return false;
2366 /* Look for repeated operands. */
2367 FOR_EACH_VEC_ELT (*ops, i, oe)
2369 if (start == -1)
2371 count = 1;
2372 op = oe->op;
2373 start = i;
2375 else if (operand_equal_p (oe->op, op, 0))
2377 count++;
2378 end = i;
2380 else
2382 if (count > 1)
2383 indxs.safe_push (std::make_pair (start, end));
2384 count = 1;
2385 op = oe->op;
2386 start = i;
2390 if (count > 1)
2391 indxs.safe_push (std::make_pair (start, end));
2393 for (j = indxs.length () - 1; j >= 0; --j)
2395 /* Convert repeated operand addition to multiplication. */
2396 start = indxs[j].first;
2397 end = indxs[j].second;
2398 op = (*ops)[start]->op;
2399 count = end - start + 1;
2400 for (i = end; i >= start; --i)
2401 ops->unordered_remove (i);
2402 tree tmp = make_ssa_name (TREE_TYPE (op));
2403 tree cst = build_int_cst (integer_type_node, count);
2404 gassign *mul_stmt
2405 = gimple_build_assign (tmp, MULT_EXPR,
2406 op, fold_convert (TREE_TYPE (op), cst));
2407 gimple_set_visited (mul_stmt, true);
2408 add_to_ops_vec (ops, tmp, mul_stmt);
2409 changed = true;
2412 return changed;
2416 /* Perform various identities and other optimizations on the list of
2417 operand entries, stored in OPS. The tree code for the binary
2418 operation between all the operands is OPCODE. */
2420 static void
2421 optimize_ops_list (enum tree_code opcode,
2422 vec<operand_entry *> *ops)
2424 unsigned int length = ops->length ();
2425 unsigned int i;
2426 operand_entry *oe;
2427 operand_entry *oelast = NULL;
2428 bool iterate = false;
2430 if (length == 1)
2431 return;
2433 oelast = ops->last ();
2435 /* If the last two are constants, pop the constants off, merge them
2436 and try the next two. */
2437 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2439 operand_entry *oelm1 = (*ops)[length - 2];
2441 if (oelm1->rank == 0
2442 && is_gimple_min_invariant (oelm1->op)
2443 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2444 TREE_TYPE (oelast->op)))
2446 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2447 oelm1->op, oelast->op);
2449 if (folded && is_gimple_min_invariant (folded))
2451 if (dump_file && (dump_flags & TDF_DETAILS))
2452 fprintf (dump_file, "Merging constants\n");
2454 ops->pop ();
2455 ops->pop ();
2457 add_to_ops_vec (ops, folded);
2458 reassociate_stats.constants_eliminated++;
2460 optimize_ops_list (opcode, ops);
2461 return;
2466 eliminate_using_constants (opcode, ops);
2467 oelast = NULL;
2469 for (i = 0; ops->iterate (i, &oe);)
2471 bool done = false;
2473 if (eliminate_not_pairs (opcode, ops, i, oe))
2474 return;
2475 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2476 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2477 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2479 if (done)
2480 return;
2481 iterate = true;
2482 oelast = NULL;
2483 continue;
2485 oelast = oe;
2486 i++;
2489 if (iterate)
2490 optimize_ops_list (opcode, ops);
2493 /* The following functions are subroutines to optimize_range_tests and allow
2494 it to try to change a logical combination of comparisons into a range
2495 test.
2497 For example, both
2498 X == 2 || X == 5 || X == 3 || X == 4
2500 X >= 2 && X <= 5
2501 are converted to
2502 (unsigned) (X - 2) <= 3
2504 For more information see comments above fold_test_range in fold-const.cc,
2505 this implementation is for GIMPLE. */
2509 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2511 void
2512 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2514 if (!skip_exp)
2515 print_generic_expr (file, r->exp);
2516 fprintf (file, " %c[", r->in_p ? '+' : '-');
2517 print_generic_expr (file, r->low);
2518 fputs (", ", file);
2519 print_generic_expr (file, r->high);
2520 fputc (']', file);
2523 /* Dump the range entry R to STDERR. */
2525 DEBUG_FUNCTION void
2526 debug_range_entry (struct range_entry *r)
2528 dump_range_entry (stderr, r, false);
2529 fputc ('\n', stderr);
2532 /* This is similar to make_range in fold-const.cc, but on top of
2533 GIMPLE instead of trees. If EXP is non-NULL, it should be
2534 an SSA_NAME and STMT argument is ignored, otherwise STMT
2535 argument should be a GIMPLE_COND. */
2537 void
2538 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2540 int in_p;
2541 tree low, high;
2542 bool is_bool, strict_overflow_p;
2544 r->exp = NULL_TREE;
2545 r->in_p = false;
2546 r->strict_overflow_p = false;
2547 r->low = NULL_TREE;
2548 r->high = NULL_TREE;
2549 if (exp != NULL_TREE
2550 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2551 return;
2553 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2554 and see if we can refine the range. Some of the cases below may not
2555 happen, but it doesn't seem worth worrying about this. We "continue"
2556 the outer loop when we've changed something; otherwise we "break"
2557 the switch, which will "break" the while. */
2558 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2559 high = low;
2560 in_p = 0;
2561 strict_overflow_p = false;
2562 is_bool = false;
2563 if (exp == NULL_TREE)
2564 is_bool = true;
2565 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2567 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2568 is_bool = true;
2569 else
2570 return;
2572 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2573 is_bool = true;
2575 while (1)
2577 enum tree_code code;
2578 tree arg0, arg1, exp_type;
2579 tree nexp;
2580 location_t loc;
2582 if (exp != NULL_TREE)
2584 if (TREE_CODE (exp) != SSA_NAME
2585 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2586 break;
2588 stmt = SSA_NAME_DEF_STMT (exp);
2589 if (!is_gimple_assign (stmt))
2590 break;
2592 code = gimple_assign_rhs_code (stmt);
2593 arg0 = gimple_assign_rhs1 (stmt);
2594 arg1 = gimple_assign_rhs2 (stmt);
2595 exp_type = TREE_TYPE (exp);
2597 else
2599 code = gimple_cond_code (stmt);
2600 arg0 = gimple_cond_lhs (stmt);
2601 arg1 = gimple_cond_rhs (stmt);
2602 exp_type = boolean_type_node;
2605 if (TREE_CODE (arg0) != SSA_NAME
2606 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2607 || ssa_name_maybe_undef_p (arg0))
2608 break;
2609 loc = gimple_location (stmt);
2610 switch (code)
2612 case BIT_NOT_EXPR:
2613 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2614 /* Ensure the range is either +[-,0], +[0,0],
2615 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2616 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2617 or similar expression of unconditional true or
2618 false, it should not be negated. */
2619 && ((high && integer_zerop (high))
2620 || (low && integer_onep (low))))
2622 in_p = !in_p;
2623 exp = arg0;
2624 continue;
2626 break;
2627 case SSA_NAME:
2628 exp = arg0;
2629 continue;
2630 CASE_CONVERT:
2631 if (is_bool)
2633 if ((TYPE_PRECISION (exp_type) == 1
2634 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2635 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2636 return;
2638 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2640 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2641 is_bool = true;
2642 else
2643 return;
2645 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2646 is_bool = true;
2647 goto do_default;
2648 case EQ_EXPR:
2649 case NE_EXPR:
2650 case LT_EXPR:
2651 case LE_EXPR:
2652 case GE_EXPR:
2653 case GT_EXPR:
2654 is_bool = true;
2655 /* FALLTHRU */
2656 default:
2657 if (!is_bool)
2658 return;
2659 do_default:
2660 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2661 &low, &high, &in_p,
2662 &strict_overflow_p);
2663 if (nexp != NULL_TREE)
2665 exp = nexp;
2666 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2667 continue;
2669 break;
2671 break;
2673 if (is_bool)
2675 r->exp = exp;
2676 r->in_p = in_p;
2677 r->low = low;
2678 r->high = high;
2679 r->strict_overflow_p = strict_overflow_p;
2683 /* Comparison function for qsort. Sort entries
2684 without SSA_NAME exp first, then with SSA_NAMEs sorted
2685 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2686 by increasing ->low and if ->low is the same, by increasing
2687 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2688 maximum. */
2690 static int
2691 range_entry_cmp (const void *a, const void *b)
2693 const struct range_entry *p = (const struct range_entry *) a;
2694 const struct range_entry *q = (const struct range_entry *) b;
2696 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2698 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2700 /* Group range_entries for the same SSA_NAME together. */
2701 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2702 return -1;
2703 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2704 return 1;
2705 /* If ->low is different, NULL low goes first, then by
2706 ascending low. */
2707 if (p->low != NULL_TREE)
2709 if (q->low != NULL_TREE)
2711 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2712 p->low, q->low);
2713 if (tem && integer_onep (tem))
2714 return -1;
2715 tem = fold_binary (GT_EXPR, boolean_type_node,
2716 p->low, q->low);
2717 if (tem && integer_onep (tem))
2718 return 1;
2720 else
2721 return 1;
2723 else if (q->low != NULL_TREE)
2724 return -1;
2725 /* If ->high is different, NULL high goes last, before that by
2726 ascending high. */
2727 if (p->high != NULL_TREE)
2729 if (q->high != NULL_TREE)
2731 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2732 p->high, q->high);
2733 if (tem && integer_onep (tem))
2734 return -1;
2735 tem = fold_binary (GT_EXPR, boolean_type_node,
2736 p->high, q->high);
2737 if (tem && integer_onep (tem))
2738 return 1;
2740 else
2741 return -1;
2743 else if (q->high != NULL_TREE)
2744 return 1;
2745 /* If both ranges are the same, sort below by ascending idx. */
2747 else
2748 return 1;
2750 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2751 return -1;
2753 if (p->idx < q->idx)
2754 return -1;
2755 else
2757 gcc_checking_assert (p->idx > q->idx);
2758 return 1;
2762 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2763 insert needed statements BEFORE or after GSI. */
2765 static tree
2766 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2768 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2769 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2770 if (TREE_CODE (ret) != SSA_NAME)
2772 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2773 if (before)
2774 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2775 else
2776 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2777 ret = gimple_assign_lhs (g);
2779 return ret;
2782 /* Helper routine of optimize_range_test.
2783 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2784 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2785 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2786 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2787 an array of COUNT pointers to other ranges. Return
2788 true if the range merge has been successful.
2789 If OPCODE is ERROR_MARK, this is called from within
2790 maybe_optimize_range_tests and is performing inter-bb range optimization.
2791 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2792 oe->rank. */
2794 static bool
2795 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2796 struct range_entry **otherrangep,
2797 unsigned int count, enum tree_code opcode,
2798 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2799 bool in_p, tree low, tree high, bool strict_overflow_p)
2801 unsigned int idx = range->idx;
2802 struct range_entry *swap_with = NULL;
2803 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2804 if (opcode == ERROR_MARK)
2806 /* For inter-bb range test optimization, pick from the range tests
2807 the one which is tested in the earliest condition (one dominating
2808 the others), because otherwise there could be some UB (e.g. signed
2809 overflow) in following bbs that we'd expose which wasn't there in
2810 the original program. See PR104196. */
2811 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2812 basic_block range_bb = orig_range_bb;
2813 for (unsigned int i = 0; i < count; i++)
2815 struct range_entry *this_range;
2816 if (otherrange)
2817 this_range = otherrange + i;
2818 else
2819 this_range = otherrangep[i];
2820 operand_entry *oe = (*ops)[this_range->idx];
2821 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2822 if (range_bb != this_bb
2823 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2825 swap_with = this_range;
2826 range_bb = this_bb;
2827 idx = this_range->idx;
2830 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2831 only defined in later blocks. In this case we can't move the
2832 merged comparison earlier, so instead check if there are any stmts
2833 that might trigger signed integer overflow in between and rewrite
2834 them. But only after we check if the optimization is possible. */
2835 if (seq && swap_with)
2837 rewrite_bb_first = range_bb;
2838 rewrite_bb_last = orig_range_bb;
2839 idx = range->idx;
2840 swap_with = NULL;
2843 operand_entry *oe = (*ops)[idx];
2844 tree op = oe->op;
2845 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2846 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2847 location_t loc = gimple_location (stmt);
2848 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2849 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2850 in_p, low, high);
2851 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2852 gimple_stmt_iterator gsi;
2853 unsigned int i, uid;
2855 if (tem == NULL_TREE)
2856 return false;
2858 /* If op is default def SSA_NAME, there is no place to insert the
2859 new comparison. Give up, unless we can use OP itself as the
2860 range test. */
2861 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2863 if (op == range->exp
2864 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2865 || TREE_CODE (optype) == BOOLEAN_TYPE)
2866 && (op == tem
2867 || (TREE_CODE (tem) == EQ_EXPR
2868 && TREE_OPERAND (tem, 0) == op
2869 && integer_onep (TREE_OPERAND (tem, 1))))
2870 && opcode != BIT_IOR_EXPR
2871 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2873 stmt = NULL;
2874 tem = op;
2876 else
2877 return false;
2880 if (swap_with)
2881 std::swap (range->idx, swap_with->idx);
2883 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2884 warning_at (loc, OPT_Wstrict_overflow,
2885 "assuming signed overflow does not occur "
2886 "when simplifying range test");
2888 if (dump_file && (dump_flags & TDF_DETAILS))
2890 struct range_entry *r;
2891 fprintf (dump_file, "Optimizing range tests ");
2892 dump_range_entry (dump_file, range, false);
2893 for (i = 0; i < count; i++)
2895 if (otherrange)
2896 r = otherrange + i;
2897 else
2898 r = otherrangep[i];
2899 if (r->exp
2900 && r->exp != range->exp
2901 && TREE_CODE (r->exp) == SSA_NAME)
2903 fprintf (dump_file, " and ");
2904 dump_range_entry (dump_file, r, false);
2906 else
2908 fprintf (dump_file, " and");
2909 dump_range_entry (dump_file, r, true);
2912 fprintf (dump_file, "\n into ");
2913 print_generic_expr (dump_file, tem);
2914 fprintf (dump_file, "\n");
2917 /* In inter-bb range optimization mode, if we have a seq, we can't
2918 move the merged comparison to the earliest bb from the comparisons
2919 being replaced, so instead rewrite stmts that could trigger signed
2920 integer overflow. */
2921 for (basic_block bb = rewrite_bb_last;
2922 bb != rewrite_bb_first; bb = single_pred (bb))
2923 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2924 !gsi_end_p (gsi); gsi_next (&gsi))
2926 gimple *stmt = gsi_stmt (gsi);
2927 if (is_gimple_assign (stmt))
2928 if (tree lhs = gimple_assign_lhs (stmt))
2929 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2930 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2931 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2933 enum tree_code code = gimple_assign_rhs_code (stmt);
2934 if (arith_code_with_undefined_signed_overflow (code))
2936 gimple_stmt_iterator gsip = gsi;
2937 gimple_stmt_iterator gsin = gsi;
2938 gsi_prev (&gsip);
2939 gsi_next (&gsin);
2940 rewrite_to_defined_overflow (&gsi);
2941 unsigned uid = gimple_uid (stmt);
2942 if (gsi_end_p (gsip))
2943 gsip = gsi_after_labels (bb);
2944 else
2945 gsi_next (&gsip);
2946 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2947 gsi_next (&gsip))
2948 gimple_set_uid (gsi_stmt (gsip), uid);
2953 if (opcode == BIT_IOR_EXPR
2954 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2955 tem = invert_truthvalue_loc (loc, tem);
2957 tem = fold_convert_loc (loc, optype, tem);
2958 if (stmt)
2960 gsi = gsi_for_stmt (stmt);
2961 uid = gimple_uid (stmt);
2963 else
2965 gsi = gsi_none ();
2966 uid = 0;
2968 if (stmt == NULL)
2969 gcc_checking_assert (tem == op);
2970 /* In rare cases range->exp can be equal to lhs of stmt.
2971 In that case we have to insert after the stmt rather then before
2972 it. If stmt is a PHI, insert it at the start of the basic block. */
2973 else if (op != range->exp)
2975 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2976 tem = force_into_ssa_name (&gsi, tem, true);
2977 gsi_prev (&gsi);
2979 else if (gimple_code (stmt) != GIMPLE_PHI)
2981 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2982 tem = force_into_ssa_name (&gsi, tem, false);
2984 else
2986 gsi = gsi_after_labels (gimple_bb (stmt));
2987 if (!gsi_end_p (gsi))
2988 uid = gimple_uid (gsi_stmt (gsi));
2989 else
2991 gsi = gsi_start_bb (gimple_bb (stmt));
2992 uid = 1;
2993 while (!gsi_end_p (gsi))
2995 uid = gimple_uid (gsi_stmt (gsi));
2996 gsi_next (&gsi);
2999 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3000 tem = force_into_ssa_name (&gsi, tem, true);
3001 if (gsi_end_p (gsi))
3002 gsi = gsi_last_bb (gimple_bb (stmt));
3003 else
3004 gsi_prev (&gsi);
3006 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3007 if (gimple_uid (gsi_stmt (gsi)))
3008 break;
3009 else
3010 gimple_set_uid (gsi_stmt (gsi), uid);
3012 oe->op = tem;
3013 range->exp = exp;
3014 range->low = low;
3015 range->high = high;
3016 range->in_p = in_p;
3017 range->strict_overflow_p = false;
3019 for (i = 0; i < count; i++)
3021 if (otherrange)
3022 range = otherrange + i;
3023 else
3024 range = otherrangep[i];
3025 oe = (*ops)[range->idx];
3026 /* Now change all the other range test immediate uses, so that
3027 those tests will be optimized away. */
3028 if (opcode == ERROR_MARK)
3030 if (oe->op)
3031 oe->op = build_int_cst (TREE_TYPE (oe->op),
3032 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3033 else
3034 oe->op = (oe->rank == BIT_IOR_EXPR
3035 ? boolean_false_node : boolean_true_node);
3037 else
3038 oe->op = error_mark_node;
3039 range->exp = NULL_TREE;
3040 range->low = NULL_TREE;
3041 range->high = NULL_TREE;
3043 return true;
3046 /* Optimize X == CST1 || X == CST2
3047 if popcount (CST1 ^ CST2) == 1 into
3048 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3049 Similarly for ranges. E.g.
3050 X != 2 && X != 3 && X != 10 && X != 11
3051 will be transformed by the previous optimization into
3052 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3053 and this loop can transform that into
3054 !(((X & ~8) - 2U) <= 1U). */
3056 static bool
3057 optimize_range_tests_xor (enum tree_code opcode, tree type,
3058 tree lowi, tree lowj, tree highi, tree highj,
3059 vec<operand_entry *> *ops,
3060 struct range_entry *rangei,
3061 struct range_entry *rangej)
3063 tree lowxor, highxor, tem, exp;
3064 /* Check lowi ^ lowj == highi ^ highj and
3065 popcount (lowi ^ lowj) == 1. */
3066 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3067 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3068 return false;
3069 if (!integer_pow2p (lowxor))
3070 return false;
3071 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3072 if (!tree_int_cst_equal (lowxor, highxor))
3073 return false;
3075 exp = rangei->exp;
3076 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3077 int prec = GET_MODE_PRECISION (mode);
3078 if (TYPE_PRECISION (type) < prec
3079 || (wi::to_wide (TYPE_MIN_VALUE (type))
3080 != wi::min_value (prec, TYPE_SIGN (type)))
3081 || (wi::to_wide (TYPE_MAX_VALUE (type))
3082 != wi::max_value (prec, TYPE_SIGN (type))))
3084 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3085 exp = fold_convert (type, exp);
3086 lowxor = fold_convert (type, lowxor);
3087 lowi = fold_convert (type, lowi);
3088 highi = fold_convert (type, highi);
3090 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3091 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3092 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3093 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3094 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3095 NULL, rangei->in_p, lowj, highj,
3096 rangei->strict_overflow_p
3097 || rangej->strict_overflow_p))
3098 return true;
3099 return false;
3102 /* Optimize X == CST1 || X == CST2
3103 if popcount (CST2 - CST1) == 1 into
3104 ((X - CST1) & ~(CST2 - CST1)) == 0.
3105 Similarly for ranges. E.g.
3106 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3107 || X == 75 || X == 45
3108 will be transformed by the previous optimization into
3109 (X - 43U) <= 3U || (X - 75U) <= 3U
3110 and this loop can transform that into
3111 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3112 static bool
3113 optimize_range_tests_diff (enum tree_code opcode, tree type,
3114 tree lowi, tree lowj, tree highi, tree highj,
3115 vec<operand_entry *> *ops,
3116 struct range_entry *rangei,
3117 struct range_entry *rangej)
3119 tree tem1, tem2, mask;
3120 /* Check highi - lowi == highj - lowj. */
3121 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3122 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3123 return false;
3124 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3125 if (!tree_int_cst_equal (tem1, tem2))
3126 return false;
3127 /* Check popcount (lowj - lowi) == 1. */
3128 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3129 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3130 return false;
3131 if (!integer_pow2p (tem1))
3132 return false;
3134 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3135 int prec = GET_MODE_PRECISION (mode);
3136 if (TYPE_PRECISION (type) < prec
3137 || (wi::to_wide (TYPE_MIN_VALUE (type))
3138 != wi::min_value (prec, TYPE_SIGN (type)))
3139 || (wi::to_wide (TYPE_MAX_VALUE (type))
3140 != wi::max_value (prec, TYPE_SIGN (type))))
3141 type = build_nonstandard_integer_type (prec, 1);
3142 else
3143 type = unsigned_type_for (type);
3144 tem1 = fold_convert (type, tem1);
3145 tem2 = fold_convert (type, tem2);
3146 lowi = fold_convert (type, lowi);
3147 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3148 tem1 = fold_build2 (MINUS_EXPR, type,
3149 fold_convert (type, rangei->exp), lowi);
3150 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3151 lowj = build_int_cst (type, 0);
3152 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3153 NULL, rangei->in_p, lowj, tem2,
3154 rangei->strict_overflow_p
3155 || rangej->strict_overflow_p))
3156 return true;
3157 return false;
3160 /* It does some common checks for function optimize_range_tests_xor and
3161 optimize_range_tests_diff.
3162 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3163 Else it calls optimize_range_tests_diff. */
3165 static bool
3166 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3167 bool optimize_xor, vec<operand_entry *> *ops,
3168 struct range_entry *ranges)
3170 int i, j;
3171 bool any_changes = false;
3172 for (i = first; i < length; i++)
3174 tree lowi, highi, lowj, highj, type, tem;
3176 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3177 continue;
3178 type = TREE_TYPE (ranges[i].exp);
3179 if (!INTEGRAL_TYPE_P (type))
3180 continue;
3181 lowi = ranges[i].low;
3182 if (lowi == NULL_TREE)
3183 lowi = TYPE_MIN_VALUE (type);
3184 highi = ranges[i].high;
3185 if (highi == NULL_TREE)
3186 continue;
3187 for (j = i + 1; j < length && j < i + 64; j++)
3189 bool changes;
3190 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3191 continue;
3192 lowj = ranges[j].low;
3193 if (lowj == NULL_TREE)
3194 continue;
3195 highj = ranges[j].high;
3196 if (highj == NULL_TREE)
3197 highj = TYPE_MAX_VALUE (type);
3198 /* Check lowj > highi. */
3199 tem = fold_binary (GT_EXPR, boolean_type_node,
3200 lowj, highi);
3201 if (tem == NULL_TREE || !integer_onep (tem))
3202 continue;
3203 if (optimize_xor)
3204 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3205 highi, highj, ops,
3206 ranges + i, ranges + j);
3207 else
3208 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3209 highi, highj, ops,
3210 ranges + i, ranges + j);
3211 if (changes)
3213 any_changes = true;
3214 break;
3218 return any_changes;
3221 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3222 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3223 EXP on success, NULL otherwise. */
3225 static tree
3226 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3227 wide_int *mask, tree *totallowp)
3229 tree tem = int_const_binop (MINUS_EXPR, high, low);
3230 if (tem == NULL_TREE
3231 || TREE_CODE (tem) != INTEGER_CST
3232 || TREE_OVERFLOW (tem)
3233 || tree_int_cst_sgn (tem) == -1
3234 || compare_tree_int (tem, prec) != -1)
3235 return NULL_TREE;
3237 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3238 *mask = wi::shifted_mask (0, max, false, prec);
3239 if (TREE_CODE (exp) == BIT_AND_EXPR
3240 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3242 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3243 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3244 if (wi::popcount (msk) == 1
3245 && wi::ltu_p (msk, prec - max))
3247 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3248 max += msk.to_uhwi ();
3249 exp = TREE_OPERAND (exp, 0);
3250 if (integer_zerop (low)
3251 && TREE_CODE (exp) == PLUS_EXPR
3252 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3254 tree ret = TREE_OPERAND (exp, 0);
3255 STRIP_NOPS (ret);
3256 widest_int bias
3257 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3258 TYPE_PRECISION (TREE_TYPE (low))));
3259 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3260 if (totallowp)
3262 *totallowp = tbias;
3263 return ret;
3265 else if (!tree_int_cst_lt (totallow, tbias))
3266 return NULL_TREE;
3267 bias = wi::to_widest (tbias);
3268 bias -= wi::to_widest (totallow);
3269 if (bias >= 0 && bias < prec - max)
3271 *mask = wi::lshift (*mask, bias);
3272 return ret;
3277 if (totallowp)
3278 return exp;
3279 if (!tree_int_cst_lt (totallow, low))
3280 return exp;
3281 tem = int_const_binop (MINUS_EXPR, low, totallow);
3282 if (tem == NULL_TREE
3283 || TREE_CODE (tem) != INTEGER_CST
3284 || TREE_OVERFLOW (tem)
3285 || compare_tree_int (tem, prec - max) == 1)
3286 return NULL_TREE;
3288 *mask = wi::lshift (*mask, wi::to_widest (tem));
3289 return exp;
3292 /* Attempt to optimize small range tests using bit test.
3293 E.g.
3294 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3295 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3296 has been by earlier optimizations optimized into:
3297 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3298 As all the 43 through 82 range is less than 64 numbers,
3299 for 64-bit word targets optimize that into:
3300 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3302 static bool
3303 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3304 vec<operand_entry *> *ops,
3305 struct range_entry *ranges)
3307 int i, j;
3308 bool any_changes = false;
3309 int prec = GET_MODE_BITSIZE (word_mode);
3310 auto_vec<struct range_entry *, 64> candidates;
3312 for (i = first; i < length - 1; i++)
3314 tree lowi, highi, lowj, highj, type;
3316 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3317 continue;
3318 type = TREE_TYPE (ranges[i].exp);
3319 if (!INTEGRAL_TYPE_P (type))
3320 continue;
3321 lowi = ranges[i].low;
3322 if (lowi == NULL_TREE)
3323 lowi = TYPE_MIN_VALUE (type);
3324 highi = ranges[i].high;
3325 if (highi == NULL_TREE)
3326 continue;
3327 wide_int mask;
3328 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3329 highi, &mask, &lowi);
3330 if (exp == NULL_TREE)
3331 continue;
3332 bool strict_overflow_p = ranges[i].strict_overflow_p;
3333 candidates.truncate (0);
3334 int end = MIN (i + 64, length);
3335 for (j = i + 1; j < end; j++)
3337 tree exp2;
3338 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3339 continue;
3340 if (ranges[j].exp == exp)
3342 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3344 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3345 if (exp2 == exp)
3347 else if (TREE_CODE (exp2) == PLUS_EXPR)
3349 exp2 = TREE_OPERAND (exp2, 0);
3350 STRIP_NOPS (exp2);
3351 if (exp2 != exp)
3352 continue;
3354 else
3355 continue;
3357 else
3358 continue;
3359 lowj = ranges[j].low;
3360 if (lowj == NULL_TREE)
3361 continue;
3362 highj = ranges[j].high;
3363 if (highj == NULL_TREE)
3364 highj = TYPE_MAX_VALUE (type);
3365 wide_int mask2;
3366 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3367 highj, &mask2, NULL);
3368 if (exp2 != exp)
3369 continue;
3370 mask |= mask2;
3371 strict_overflow_p |= ranges[j].strict_overflow_p;
3372 candidates.safe_push (&ranges[j]);
3375 /* If every possible relative value of the expression is a valid shift
3376 amount, then we can merge the entry test in the bit test. In this
3377 case, if we would need otherwise 2 or more comparisons, then use
3378 the bit test; in the other cases, the threshold is 3 comparisons. */
3379 bool entry_test_needed;
3380 value_range r;
3381 if (TREE_CODE (exp) == SSA_NAME
3382 && get_range_query (cfun)->range_of_expr (r, exp)
3383 && !r.undefined_p ()
3384 && !r.varying_p ()
3385 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3387 wide_int min = r.lower_bound ();
3388 wide_int ilowi = wi::to_wide (lowi);
3389 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3391 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3392 mask = wi::lshift (mask, ilowi - min);
3394 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3396 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3397 mask = wi::lrshift (mask, min - ilowi);
3399 entry_test_needed = false;
3401 else
3402 entry_test_needed = true;
3403 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3405 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3406 wi::to_widest (lowi)
3407 + prec - 1 - wi::clz (mask));
3408 operand_entry *oe = (*ops)[ranges[i].idx];
3409 tree op = oe->op;
3410 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3411 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3412 (cfun, oe->id));
3413 location_t loc = gimple_location (stmt);
3414 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3416 /* See if it isn't cheaper to pretend the minimum value of the
3417 range is 0, if maximum value is small enough.
3418 We can avoid then subtraction of the minimum value, but the
3419 mask constant could be perhaps more expensive. */
3420 if (compare_tree_int (lowi, 0) > 0
3421 && compare_tree_int (high, prec) < 0)
3423 int cost_diff;
3424 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3425 rtx reg = gen_raw_REG (word_mode, 10000);
3426 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3427 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3428 GEN_INT (-m)),
3429 word_mode, speed_p);
3430 rtx r = immed_wide_int_const (mask, word_mode);
3431 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3432 word_mode, speed_p);
3433 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3434 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3435 word_mode, speed_p);
3436 if (cost_diff > 0)
3438 mask = wi::lshift (mask, m);
3439 lowi = build_zero_cst (TREE_TYPE (lowi));
3443 tree tem;
3444 if (entry_test_needed)
3446 tem = build_range_check (loc, optype, unshare_expr (exp),
3447 false, lowi, high);
3448 if (tem == NULL_TREE || is_gimple_val (tem))
3449 continue;
3451 else
3452 tem = NULL_TREE;
3453 tree etype = unsigned_type_for (TREE_TYPE (exp));
3454 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3455 fold_convert_loc (loc, etype, exp),
3456 fold_convert_loc (loc, etype, lowi));
3457 exp = fold_convert_loc (loc, integer_type_node, exp);
3458 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3459 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3460 build_int_cst (word_type, 1), exp);
3461 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3462 wide_int_to_tree (word_type, mask));
3463 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3464 build_zero_cst (word_type));
3465 if (is_gimple_val (exp))
3466 continue;
3468 /* The shift might have undefined behavior if TEM is true,
3469 but reassociate_bb isn't prepared to have basic blocks
3470 split when it is running. So, temporarily emit a code
3471 with BIT_IOR_EXPR instead of &&, and fix it up in
3472 branch_fixup. */
3473 gimple_seq seq = NULL;
3474 if (tem)
3476 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3477 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3478 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3480 gimple_seq seq2;
3481 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3482 gimple_seq_add_seq_without_update (&seq, seq2);
3483 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3484 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3485 if (tem)
3487 gimple *g = gimple_build_assign (make_ssa_name (optype),
3488 BIT_IOR_EXPR, tem, exp);
3489 gimple_set_location (g, loc);
3490 gimple_seq_add_stmt_without_update (&seq, g);
3491 exp = gimple_assign_lhs (g);
3493 tree val = build_zero_cst (optype);
3494 if (update_range_test (&ranges[i], NULL, candidates.address (),
3495 candidates.length (), opcode, ops, exp,
3496 seq, false, val, val, strict_overflow_p))
3498 any_changes = true;
3499 if (tem)
3500 reassoc_branch_fixups.safe_push (tem);
3502 else
3503 gimple_seq_discard (seq);
3506 return any_changes;
3509 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3510 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3511 Also, handle x < C && y < C && z < C where C is power of two as
3512 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3513 as (x | y | z) < 0. */
3515 static bool
3516 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3517 vec<operand_entry *> *ops,
3518 struct range_entry *ranges)
3520 int i;
3521 unsigned int b;
3522 bool any_changes = false;
3523 auto_vec<int, 128> buckets;
3524 auto_vec<int, 32> chains;
3525 auto_vec<struct range_entry *, 32> candidates;
3527 for (i = first; i < length; i++)
3529 int idx;
3531 if (ranges[i].exp == NULL_TREE
3532 || TREE_CODE (ranges[i].exp) != SSA_NAME
3533 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3534 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3535 continue;
3537 if (ranges[i].low != NULL_TREE
3538 && ranges[i].high != NULL_TREE
3539 && ranges[i].in_p
3540 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3542 idx = !integer_zerop (ranges[i].low);
3543 if (idx && !integer_all_onesp (ranges[i].low))
3544 continue;
3546 else if (ranges[i].high != NULL_TREE
3547 && TREE_CODE (ranges[i].high) == INTEGER_CST
3548 && ranges[i].in_p)
3550 wide_int w = wi::to_wide (ranges[i].high);
3551 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3552 int l = wi::clz (w);
3553 idx = 2;
3554 if (l <= 0
3555 || l >= prec
3556 || w != wi::mask (prec - l, false, prec))
3557 continue;
3558 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3559 && ranges[i].low == NULL_TREE)
3560 || (ranges[i].low
3561 && integer_zerop (ranges[i].low))))
3562 continue;
3564 else if (ranges[i].high == NULL_TREE
3565 && ranges[i].low != NULL_TREE
3566 /* Perform this optimization only in the last
3567 reassoc pass, as it interferes with the reassociation
3568 itself or could also with VRP etc. which might not
3569 be able to virtually undo the optimization. */
3570 && !reassoc_insert_powi_p
3571 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3572 && integer_zerop (ranges[i].low))
3573 idx = 3;
3574 else
3575 continue;
3577 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3578 if (buckets.length () <= b)
3579 buckets.safe_grow_cleared (b + 1, true);
3580 if (chains.length () <= (unsigned) i)
3581 chains.safe_grow (i + 1, true);
3582 chains[i] = buckets[b];
3583 buckets[b] = i + 1;
3586 FOR_EACH_VEC_ELT (buckets, b, i)
3587 if (i && chains[i - 1])
3589 int j, k = i;
3590 if ((b % 4) == 2)
3592 /* When ranges[X - 1].high + 1 is a power of two,
3593 we need to process the same bucket up to
3594 precision - 1 times, each time split the entries
3595 with the same high bound into one chain and the
3596 rest into another one to be processed later. */
3597 int this_prev = i;
3598 int other_prev = 0;
3599 for (j = chains[i - 1]; j; j = chains[j - 1])
3601 if (tree_int_cst_equal (ranges[i - 1].high,
3602 ranges[j - 1].high))
3604 chains[this_prev - 1] = j;
3605 this_prev = j;
3607 else if (other_prev == 0)
3609 buckets[b] = j;
3610 other_prev = j;
3612 else
3614 chains[other_prev - 1] = j;
3615 other_prev = j;
3618 chains[this_prev - 1] = 0;
3619 if (other_prev)
3620 chains[other_prev - 1] = 0;
3621 if (chains[i - 1] == 0)
3623 if (other_prev)
3624 b--;
3625 continue;
3628 for (j = chains[i - 1]; j; j = chains[j - 1])
3630 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3631 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3632 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3633 k = j;
3635 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3636 tree type2 = NULL_TREE;
3637 bool strict_overflow_p = false;
3638 candidates.truncate (0);
3639 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3640 type1 = pointer_sized_int_node;
3641 for (j = i; j; j = chains[j - 1])
3643 tree type = TREE_TYPE (ranges[j - 1].exp);
3644 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3645 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3646 type = pointer_sized_int_node;
3647 if ((b % 4) == 3)
3649 /* For the signed < 0 cases, the types should be
3650 really compatible (all signed with the same precision,
3651 instead put ranges that have different in_p from
3652 k first. */
3653 if (!useless_type_conversion_p (type1, type))
3654 continue;
3655 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3656 candidates.safe_push (&ranges[j - 1]);
3657 type2 = type1;
3658 continue;
3660 if (j == k
3661 || useless_type_conversion_p (type1, type))
3663 else if (type2 == NULL_TREE
3664 || useless_type_conversion_p (type2, type))
3666 if (type2 == NULL_TREE)
3667 type2 = type;
3668 candidates.safe_push (&ranges[j - 1]);
3671 unsigned l = candidates.length ();
3672 for (j = i; j; j = chains[j - 1])
3674 tree type = TREE_TYPE (ranges[j - 1].exp);
3675 if (j == k)
3676 continue;
3677 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3678 type = pointer_sized_int_node;
3679 if ((b % 4) == 3)
3681 if (!useless_type_conversion_p (type1, type))
3682 continue;
3683 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3684 candidates.safe_push (&ranges[j - 1]);
3685 continue;
3687 if (useless_type_conversion_p (type1, type))
3689 else if (type2 == NULL_TREE
3690 || useless_type_conversion_p (type2, type))
3691 continue;
3692 candidates.safe_push (&ranges[j - 1]);
3694 gimple_seq seq = NULL;
3695 tree op = NULL_TREE;
3696 unsigned int id;
3697 struct range_entry *r;
3698 candidates.safe_push (&ranges[k - 1]);
3699 FOR_EACH_VEC_ELT (candidates, id, r)
3701 gimple *g;
3702 enum tree_code code;
3703 if (id == 0)
3705 op = r->exp;
3706 continue;
3708 if (id == l
3709 || POINTER_TYPE_P (TREE_TYPE (op))
3710 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3712 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3713 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3714 if (code == BIT_NOT_EXPR
3715 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3717 g = gimple_build_assign (make_ssa_name (type3),
3718 NOP_EXPR, op);
3719 gimple_seq_add_stmt_without_update (&seq, g);
3720 op = gimple_assign_lhs (g);
3722 g = gimple_build_assign (make_ssa_name (type3), code, op);
3723 gimple_seq_add_stmt_without_update (&seq, g);
3724 op = gimple_assign_lhs (g);
3726 tree type = TREE_TYPE (r->exp);
3727 tree exp = r->exp;
3728 if (POINTER_TYPE_P (type)
3729 || TREE_CODE (type) == OFFSET_TYPE
3730 || (id >= l && !useless_type_conversion_p (type1, type)))
3732 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3733 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3734 gimple_seq_add_stmt_without_update (&seq, g);
3735 exp = gimple_assign_lhs (g);
3737 if ((b % 4) == 3)
3738 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3739 else
3740 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3741 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3742 code, op, exp);
3743 gimple_seq_add_stmt_without_update (&seq, g);
3744 op = gimple_assign_lhs (g);
3746 type1 = TREE_TYPE (ranges[k - 1].exp);
3747 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3749 gimple *g
3750 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3751 gimple_seq_add_stmt_without_update (&seq, g);
3752 op = gimple_assign_lhs (g);
3754 candidates.pop ();
3755 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3756 candidates.length (), opcode, ops, op,
3757 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3758 ranges[k - 1].high, strict_overflow_p))
3759 any_changes = true;
3760 else
3761 gimple_seq_discard (seq);
3762 if ((b % 4) == 2 && buckets[b] != i)
3763 /* There is more work to do for this bucket. */
3764 b--;
3767 return any_changes;
3770 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3771 a >= 0 && a < b into (unsigned) a < (unsigned) b
3772 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3774 static bool
3775 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3776 vec<operand_entry *> *ops,
3777 struct range_entry *ranges,
3778 basic_block first_bb)
3780 int i;
3781 bool any_changes = false;
3782 hash_map<tree, int> *map = NULL;
3784 for (i = first; i < length; i++)
3786 if (ranges[i].exp == NULL_TREE
3787 || TREE_CODE (ranges[i].exp) != SSA_NAME
3788 || !ranges[i].in_p)
3789 continue;
3791 tree type = TREE_TYPE (ranges[i].exp);
3792 if (!INTEGRAL_TYPE_P (type)
3793 || TYPE_UNSIGNED (type)
3794 || ranges[i].low == NULL_TREE
3795 || !integer_zerop (ranges[i].low)
3796 || ranges[i].high != NULL_TREE)
3797 continue;
3798 /* EXP >= 0 here. */
3799 if (map == NULL)
3800 map = new hash_map <tree, int>;
3801 map->put (ranges[i].exp, i);
3804 if (map == NULL)
3805 return false;
3807 for (i = 0; i < length; i++)
3809 bool in_p = ranges[i].in_p;
3810 if (ranges[i].low == NULL_TREE
3811 || ranges[i].high == NULL_TREE)
3812 continue;
3813 if (!integer_zerop (ranges[i].low)
3814 || !integer_zerop (ranges[i].high))
3816 if (ranges[i].exp
3817 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3818 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3819 && integer_onep (ranges[i].low)
3820 && integer_onep (ranges[i].high))
3821 in_p = !in_p;
3822 else
3823 continue;
3826 gimple *stmt;
3827 tree_code ccode;
3828 tree rhs1, rhs2;
3829 if (ranges[i].exp)
3831 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3832 continue;
3833 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3834 if (!is_gimple_assign (stmt))
3835 continue;
3836 ccode = gimple_assign_rhs_code (stmt);
3837 rhs1 = gimple_assign_rhs1 (stmt);
3838 rhs2 = gimple_assign_rhs2 (stmt);
3840 else
3842 operand_entry *oe = (*ops)[ranges[i].idx];
3843 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3844 if (gimple_code (stmt) != GIMPLE_COND)
3845 continue;
3846 ccode = gimple_cond_code (stmt);
3847 rhs1 = gimple_cond_lhs (stmt);
3848 rhs2 = gimple_cond_rhs (stmt);
3851 if (TREE_CODE (rhs1) != SSA_NAME
3852 || rhs2 == NULL_TREE
3853 || TREE_CODE (rhs2) != SSA_NAME)
3854 continue;
3856 switch (ccode)
3858 case GT_EXPR:
3859 case GE_EXPR:
3860 case LT_EXPR:
3861 case LE_EXPR:
3862 break;
3863 default:
3864 continue;
3866 if (in_p)
3867 ccode = invert_tree_comparison (ccode, false);
3868 switch (ccode)
3870 case GT_EXPR:
3871 case GE_EXPR:
3872 std::swap (rhs1, rhs2);
3873 ccode = swap_tree_comparison (ccode);
3874 break;
3875 case LT_EXPR:
3876 case LE_EXPR:
3877 break;
3878 default:
3879 gcc_unreachable ();
3882 int *idx = map->get (rhs1);
3883 if (idx == NULL)
3884 continue;
3886 /* maybe_optimize_range_tests allows statements without side-effects
3887 in the basic blocks as long as they are consumed in the same bb.
3888 Make sure rhs2's def stmt is not among them, otherwise we can't
3889 use safely get_nonzero_bits on it. E.g. in:
3890 # RANGE [-83, 1] NONZERO 173
3891 # k_32 = PHI <k_47(13), k_12(9)>
3893 if (k_32 >= 0)
3894 goto <bb 5>; [26.46%]
3895 else
3896 goto <bb 9>; [73.54%]
3898 <bb 5> [local count: 140323371]:
3899 # RANGE [0, 1] NONZERO 1
3900 _5 = (int) k_32;
3901 # RANGE [0, 4] NONZERO 4
3902 _21 = _5 << 2;
3903 # RANGE [0, 4] NONZERO 4
3904 iftmp.0_44 = (char) _21;
3905 if (k_32 < iftmp.0_44)
3906 goto <bb 6>; [84.48%]
3907 else
3908 goto <bb 9>; [15.52%]
3909 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3910 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3911 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3912 those stmts even for negative k_32 and the value ranges would be no
3913 longer guaranteed and so the optimization would be invalid. */
3914 while (opcode == ERROR_MARK)
3916 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3917 basic_block bb2 = gimple_bb (g);
3918 if (bb2
3919 && bb2 != first_bb
3920 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3922 /* As an exception, handle a few common cases. */
3923 if (gimple_assign_cast_p (g)
3924 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3926 tree op0 = gimple_assign_rhs1 (g);
3927 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3928 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3929 > TYPE_PRECISION (TREE_TYPE (op0))))
3930 /* Zero-extension is always ok. */
3931 break;
3932 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3933 == TYPE_PRECISION (TREE_TYPE (op0))
3934 && TREE_CODE (op0) == SSA_NAME)
3936 /* Cast from signed to unsigned or vice versa. Retry
3937 with the op0 as new rhs2. */
3938 rhs2 = op0;
3939 continue;
3942 else if (is_gimple_assign (g)
3943 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3944 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3945 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3946 /* Masking with INTEGER_CST with MSB clear is always ok
3947 too. */
3948 break;
3949 rhs2 = NULL_TREE;
3951 break;
3953 if (rhs2 == NULL_TREE)
3954 continue;
3956 wide_int nz = get_nonzero_bits (rhs2);
3957 if (wi::neg_p (nz))
3958 continue;
3960 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3961 and RHS2 is known to be RHS2 >= 0. */
3962 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3964 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3965 if ((ranges[*idx].strict_overflow_p
3966 || ranges[i].strict_overflow_p)
3967 && issue_strict_overflow_warning (wc))
3968 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3969 "assuming signed overflow does not occur "
3970 "when simplifying range test");
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3974 struct range_entry *r = &ranges[*idx];
3975 fprintf (dump_file, "Optimizing range test ");
3976 print_generic_expr (dump_file, r->exp);
3977 fprintf (dump_file, " +[");
3978 print_generic_expr (dump_file, r->low);
3979 fprintf (dump_file, ", ");
3980 print_generic_expr (dump_file, r->high);
3981 fprintf (dump_file, "] and comparison ");
3982 print_generic_expr (dump_file, rhs1);
3983 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3984 print_generic_expr (dump_file, rhs2);
3985 fprintf (dump_file, "\n into (");
3986 print_generic_expr (dump_file, utype);
3987 fprintf (dump_file, ") ");
3988 print_generic_expr (dump_file, rhs1);
3989 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3990 print_generic_expr (dump_file, utype);
3991 fprintf (dump_file, ") ");
3992 print_generic_expr (dump_file, rhs2);
3993 fprintf (dump_file, "\n");
3996 operand_entry *oe = (*ops)[ranges[i].idx];
3997 ranges[i].in_p = 0;
3998 if (opcode == BIT_IOR_EXPR
3999 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4001 ranges[i].in_p = 1;
4002 ccode = invert_tree_comparison (ccode, false);
4005 unsigned int uid = gimple_uid (stmt);
4006 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4007 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4008 gimple_set_uid (g, uid);
4009 rhs1 = gimple_assign_lhs (g);
4010 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4011 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4013 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4014 gimple_set_uid (g, uid);
4015 rhs2 = gimple_assign_lhs (g);
4016 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4018 if (tree_swap_operands_p (rhs1, rhs2))
4020 std::swap (rhs1, rhs2);
4021 ccode = swap_tree_comparison (ccode);
4023 if (gimple_code (stmt) == GIMPLE_COND)
4025 gcond *c = as_a <gcond *> (stmt);
4026 gimple_cond_set_code (c, ccode);
4027 gimple_cond_set_lhs (c, rhs1);
4028 gimple_cond_set_rhs (c, rhs2);
4029 update_stmt (stmt);
4031 else
4033 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4034 if (!INTEGRAL_TYPE_P (ctype)
4035 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4036 && TYPE_PRECISION (ctype) != 1))
4037 ctype = boolean_type_node;
4038 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4039 gimple_set_uid (g, uid);
4040 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4041 if (oe->op && ctype != TREE_TYPE (oe->op))
4043 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4044 NOP_EXPR, gimple_assign_lhs (g));
4045 gimple_set_uid (g, uid);
4046 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4048 ranges[i].exp = gimple_assign_lhs (g);
4049 oe->op = ranges[i].exp;
4050 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4051 ranges[i].high = ranges[i].low;
4053 ranges[i].strict_overflow_p = false;
4054 oe = (*ops)[ranges[*idx].idx];
4055 /* Now change all the other range test immediate uses, so that
4056 those tests will be optimized away. */
4057 if (opcode == ERROR_MARK)
4059 if (oe->op)
4060 oe->op = build_int_cst (TREE_TYPE (oe->op),
4061 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4062 else
4063 oe->op = (oe->rank == BIT_IOR_EXPR
4064 ? boolean_false_node : boolean_true_node);
4066 else
4067 oe->op = error_mark_node;
4068 ranges[*idx].exp = NULL_TREE;
4069 ranges[*idx].low = NULL_TREE;
4070 ranges[*idx].high = NULL_TREE;
4071 any_changes = true;
4074 delete map;
4075 return any_changes;
4078 /* Optimize range tests, similarly how fold_range_test optimizes
4079 it on trees. The tree code for the binary
4080 operation between all the operands is OPCODE.
4081 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4082 maybe_optimize_range_tests for inter-bb range optimization.
4083 In that case if oe->op is NULL, oe->id is bb->index whose
4084 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4085 the actual opcode.
4086 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4088 static bool
4089 optimize_range_tests (enum tree_code opcode,
4090 vec<operand_entry *> *ops, basic_block first_bb)
4092 unsigned int length = ops->length (), i, j, first;
4093 operand_entry *oe;
4094 struct range_entry *ranges;
4095 bool any_changes = false;
4097 if (length == 1)
4098 return false;
4100 ranges = XNEWVEC (struct range_entry, length);
4101 for (i = 0; i < length; i++)
4103 oe = (*ops)[i];
4104 ranges[i].idx = i;
4105 init_range_entry (ranges + i, oe->op,
4106 oe->op
4107 ? NULL
4108 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4109 /* For | invert it now, we will invert it again before emitting
4110 the optimized expression. */
4111 if (opcode == BIT_IOR_EXPR
4112 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4113 ranges[i].in_p = !ranges[i].in_p;
4116 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4117 for (i = 0; i < length; i++)
4118 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4119 break;
4121 /* Try to merge ranges. */
4122 for (first = i; i < length; i++)
4124 tree low = ranges[i].low;
4125 tree high = ranges[i].high;
4126 int in_p = ranges[i].in_p;
4127 bool strict_overflow_p = ranges[i].strict_overflow_p;
4128 int update_fail_count = 0;
4130 for (j = i + 1; j < length; j++)
4132 if (ranges[i].exp != ranges[j].exp)
4133 break;
4134 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4135 ranges[j].in_p, ranges[j].low, ranges[j].high))
4136 break;
4137 strict_overflow_p |= ranges[j].strict_overflow_p;
4140 if (j == i + 1)
4141 continue;
4143 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4144 opcode, ops, ranges[i].exp, NULL, in_p,
4145 low, high, strict_overflow_p))
4147 i = j - 1;
4148 any_changes = true;
4150 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4151 while update_range_test would fail. */
4152 else if (update_fail_count == 64)
4153 i = j - 1;
4154 else
4155 ++update_fail_count;
4158 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4159 ops, ranges);
4161 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4162 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4163 ops, ranges);
4164 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4165 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4166 ops, ranges);
4167 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4168 ranges, first_bb);
4169 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4170 ops, ranges);
4172 if (any_changes && opcode != ERROR_MARK)
4174 j = 0;
4175 FOR_EACH_VEC_ELT (*ops, i, oe)
4177 if (oe->op == error_mark_node)
4178 continue;
4179 else if (i != j)
4180 (*ops)[j] = oe;
4181 j++;
4183 ops->truncate (j);
4186 XDELETEVEC (ranges);
4187 return any_changes;
4190 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4191 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4192 otherwise the comparison code. TYPE is a return value that is set
4193 to type of comparison. */
4195 static tree_code
4196 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4197 tree *lhs, tree *rhs, gassign **vcond)
4199 if (TREE_CODE (var) != SSA_NAME)
4200 return ERROR_MARK;
4202 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4203 if (stmt == NULL)
4204 return ERROR_MARK;
4205 if (vcond)
4206 *vcond = stmt;
4208 /* ??? If we start creating more COND_EXPR, we could perform
4209 this same optimization with them. For now, simplify. */
4210 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4211 return ERROR_MARK;
4213 tree cond = gimple_assign_rhs1 (stmt);
4214 tree_code cmp = TREE_CODE (cond);
4215 if (cmp != SSA_NAME)
4216 return ERROR_MARK;
4218 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4219 if (assign == NULL
4220 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4221 return ERROR_MARK;
4223 cmp = gimple_assign_rhs_code (assign);
4224 if (lhs)
4225 *lhs = gimple_assign_rhs1 (assign);
4226 if (rhs)
4227 *rhs = gimple_assign_rhs2 (assign);
4229 /* ??? For now, allow only canonical true and false result vectors.
4230 We could expand this to other constants should the need arise,
4231 but at the moment we don't create them. */
4232 tree t = gimple_assign_rhs2 (stmt);
4233 tree f = gimple_assign_rhs3 (stmt);
4234 bool inv;
4235 if (integer_all_onesp (t))
4236 inv = false;
4237 else if (integer_all_onesp (f))
4239 cmp = invert_tree_comparison (cmp, false);
4240 inv = true;
4242 else
4243 return ERROR_MARK;
4244 if (!integer_zerop (f))
4245 return ERROR_MARK;
4247 /* Success! */
4248 if (rets)
4249 *rets = assign;
4250 if (reti)
4251 *reti = inv;
4252 if (type)
4253 *type = TREE_TYPE (cond);
4254 return cmp;
4257 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4258 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4260 static bool
4261 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4263 unsigned int length = ops->length (), i, j;
4264 bool any_changes = false;
4266 if (length == 1)
4267 return false;
4269 for (i = 0; i < length; ++i)
4271 tree elt0 = (*ops)[i]->op;
4273 gassign *stmt0, *vcond0;
4274 bool invert;
4275 tree type, lhs0, rhs0;
4276 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4277 &rhs0, &vcond0);
4278 if (cmp0 == ERROR_MARK)
4279 continue;
4281 for (j = i + 1; j < length; ++j)
4283 tree &elt1 = (*ops)[j]->op;
4285 gassign *stmt1, *vcond1;
4286 tree lhs1, rhs1;
4287 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4288 &rhs1, &vcond1);
4289 if (cmp1 == ERROR_MARK)
4290 continue;
4292 tree comb;
4293 if (opcode == BIT_AND_EXPR)
4294 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4295 cmp1, lhs1, rhs1);
4296 else if (opcode == BIT_IOR_EXPR)
4297 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4298 cmp1, lhs1, rhs1);
4299 else
4300 gcc_unreachable ();
4301 if (comb == NULL)
4302 continue;
4304 /* Success! */
4305 if (dump_file && (dump_flags & TDF_DETAILS))
4307 fprintf (dump_file, "Transforming ");
4308 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4309 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4310 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4311 fprintf (dump_file, " into ");
4312 print_generic_expr (dump_file, comb);
4313 fputc ('\n', dump_file);
4316 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4317 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4318 true, GSI_SAME_STMT);
4319 if (invert)
4320 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4321 gimple_assign_rhs3_ptr (vcond0));
4322 gimple_assign_set_rhs1 (vcond0, exp);
4323 update_stmt (vcond0);
4325 elt1 = error_mark_node;
4326 any_changes = true;
4330 if (any_changes)
4332 operand_entry *oe;
4333 j = 0;
4334 FOR_EACH_VEC_ELT (*ops, i, oe)
4336 if (oe->op == error_mark_node)
4337 continue;
4338 else if (i != j)
4339 (*ops)[j] = oe;
4340 j++;
4342 ops->truncate (j);
4345 return any_changes;
4348 /* Return true if STMT is a cast like:
4349 <bb N>:
4351 _123 = (int) _234;
4353 <bb M>:
4354 # _345 = PHI <_123(N), 1(...), 1(...)>
4355 where _234 has bool type, _123 has single use and
4356 bb N has a single successor M. This is commonly used in
4357 the last block of a range test.
4359 Also Return true if STMT is tcc_compare like:
4360 <bb N>:
4362 _234 = a_2(D) == 2;
4364 <bb M>:
4365 # _345 = PHI <_234(N), 1(...), 1(...)>
4366 _346 = (int) _345;
4367 where _234 has booltype, single use and
4368 bb N has a single successor M. This is commonly used in
4369 the last block of a range test. */
4371 static bool
4372 final_range_test_p (gimple *stmt)
4374 basic_block bb, rhs_bb, lhs_bb;
4375 edge e;
4376 tree lhs, rhs;
4377 use_operand_p use_p;
4378 gimple *use_stmt;
4380 if (!gimple_assign_cast_p (stmt)
4381 && (!is_gimple_assign (stmt)
4382 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4383 != tcc_comparison)))
4384 return false;
4385 bb = gimple_bb (stmt);
4386 if (!single_succ_p (bb))
4387 return false;
4388 e = single_succ_edge (bb);
4389 if (e->flags & EDGE_COMPLEX)
4390 return false;
4392 lhs = gimple_assign_lhs (stmt);
4393 rhs = gimple_assign_rhs1 (stmt);
4394 if (gimple_assign_cast_p (stmt)
4395 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4396 || TREE_CODE (rhs) != SSA_NAME
4397 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4398 return false;
4400 if (!gimple_assign_cast_p (stmt)
4401 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4402 return false;
4404 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4405 if (!single_imm_use (lhs, &use_p, &use_stmt))
4406 return false;
4408 if (gimple_code (use_stmt) != GIMPLE_PHI
4409 || gimple_bb (use_stmt) != e->dest)
4410 return false;
4412 /* And that the rhs is defined in the same loop. */
4413 if (gimple_assign_cast_p (stmt))
4415 if (TREE_CODE (rhs) != SSA_NAME
4416 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4417 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4418 return false;
4420 else
4422 if (TREE_CODE (lhs) != SSA_NAME
4423 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4424 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4425 return false;
4428 return true;
4431 /* Return true if BB is suitable basic block for inter-bb range test
4432 optimization. If BACKWARD is true, BB should be the only predecessor
4433 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4434 or compared with to find a common basic block to which all conditions
4435 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4436 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4437 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4438 block points to an empty block that falls through into *OTHER_BB and
4439 the phi args match that path. */
4441 static bool
4442 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4443 bool *test_swapped_p, bool backward)
4445 edge_iterator ei, ei2;
4446 edge e, e2;
4447 gimple *stmt;
4448 gphi_iterator gsi;
4449 bool other_edge_seen = false;
4450 bool is_cond;
4452 if (test_bb == bb)
4453 return false;
4454 /* Check last stmt first. */
4455 stmt = last_nondebug_stmt (bb);
4456 if (stmt == NULL
4457 || (gimple_code (stmt) != GIMPLE_COND
4458 && (backward || !final_range_test_p (stmt)))
4459 || gimple_visited_p (stmt)
4460 || stmt_could_throw_p (cfun, stmt)
4461 || *other_bb == bb)
4462 return false;
4463 is_cond = gimple_code (stmt) == GIMPLE_COND;
4464 if (is_cond)
4466 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4467 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4468 to *OTHER_BB (if not set yet, try to find it out). */
4469 if (EDGE_COUNT (bb->succs) != 2)
4470 return false;
4471 FOR_EACH_EDGE (e, ei, bb->succs)
4473 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4474 return false;
4475 if (e->dest == test_bb)
4477 if (backward)
4478 continue;
4479 else
4480 return false;
4482 if (e->dest == bb)
4483 return false;
4484 if (*other_bb == NULL)
4486 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4487 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4488 return false;
4489 else if (e->dest == e2->dest)
4490 *other_bb = e->dest;
4491 if (*other_bb == NULL)
4492 return false;
4494 if (e->dest == *other_bb)
4495 other_edge_seen = true;
4496 else if (backward)
4497 return false;
4499 if (*other_bb == NULL || !other_edge_seen)
4500 return false;
4502 else if (single_succ (bb) != *other_bb)
4503 return false;
4505 /* Now check all PHIs of *OTHER_BB. */
4506 e = find_edge (bb, *other_bb);
4507 e2 = find_edge (test_bb, *other_bb);
4508 retry:;
4509 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4511 gphi *phi = gsi.phi ();
4512 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4513 corresponding to BB and TEST_BB predecessor must be the same. */
4514 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4515 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4517 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4518 one of the PHIs should have the lhs of the last stmt in
4519 that block as PHI arg and that PHI should have 0 or 1
4520 corresponding to it in all other range test basic blocks
4521 considered. */
4522 if (!is_cond)
4524 if (gimple_phi_arg_def (phi, e->dest_idx)
4525 == gimple_assign_lhs (stmt)
4526 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4527 || integer_onep (gimple_phi_arg_def (phi,
4528 e2->dest_idx))))
4529 continue;
4531 else
4533 gimple *test_last = last_nondebug_stmt (test_bb);
4534 if (gimple_code (test_last) == GIMPLE_COND)
4536 if (backward ? e2->src != test_bb : e->src != bb)
4537 return false;
4539 /* For last_bb, handle also:
4540 if (x_3(D) == 3)
4541 goto <bb 6>; [34.00%]
4542 else
4543 goto <bb 7>; [66.00%]
4545 <bb 6> [local count: 79512730]:
4547 <bb 7> [local count: 1073741824]:
4548 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4549 where bb 7 is *OTHER_BB, but the PHI values from the
4550 earlier bbs match the path through the empty bb
4551 in between. */
4552 edge e3;
4553 if (backward)
4554 e3 = EDGE_SUCC (test_bb,
4555 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4556 else
4557 e3 = EDGE_SUCC (bb,
4558 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4559 if (empty_block_p (e3->dest)
4560 && single_succ_p (e3->dest)
4561 && single_succ (e3->dest) == *other_bb
4562 && single_pred_p (e3->dest)
4563 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4565 if (backward)
4566 e2 = single_succ_edge (e3->dest);
4567 else
4568 e = single_succ_edge (e3->dest);
4569 if (test_swapped_p)
4570 *test_swapped_p = true;
4571 goto retry;
4574 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4575 == gimple_assign_lhs (test_last)
4576 && (integer_zerop (gimple_phi_arg_def (phi,
4577 e->dest_idx))
4578 || integer_onep (gimple_phi_arg_def (phi,
4579 e->dest_idx))))
4580 continue;
4583 return false;
4586 return true;
4589 /* Return true if BB doesn't have side-effects that would disallow
4590 range test optimization, all SSA_NAMEs set in the bb are consumed
4591 in the bb and there are no PHIs. */
4593 bool
4594 no_side_effect_bb (basic_block bb)
4596 gimple_stmt_iterator gsi;
4597 gimple *last;
4599 if (!gimple_seq_empty_p (phi_nodes (bb)))
4600 return false;
4601 last = last_nondebug_stmt (bb);
4602 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4604 gimple *stmt = gsi_stmt (gsi);
4605 tree lhs;
4606 imm_use_iterator imm_iter;
4607 use_operand_p use_p;
4609 if (is_gimple_debug (stmt))
4610 continue;
4611 if (gimple_has_side_effects (stmt))
4612 return false;
4613 if (stmt == last)
4614 return true;
4615 if (!is_gimple_assign (stmt))
4616 return false;
4617 lhs = gimple_assign_lhs (stmt);
4618 if (TREE_CODE (lhs) != SSA_NAME)
4619 return false;
4620 if (gimple_assign_rhs_could_trap_p (stmt))
4621 return false;
4622 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4624 gimple *use_stmt = USE_STMT (use_p);
4625 if (is_gimple_debug (use_stmt))
4626 continue;
4627 if (gimple_bb (use_stmt) != bb)
4628 return false;
4631 return false;
4634 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4635 return true and fill in *OPS recursively. */
4637 static bool
4638 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4639 class loop *loop)
4641 gimple *stmt = SSA_NAME_DEF_STMT (var);
4642 tree rhs[2];
4643 int i;
4645 if (!is_reassociable_op (stmt, code, loop))
4646 return false;
4648 rhs[0] = gimple_assign_rhs1 (stmt);
4649 rhs[1] = gimple_assign_rhs2 (stmt);
4650 gimple_set_visited (stmt, true);
4651 for (i = 0; i < 2; i++)
4652 if (TREE_CODE (rhs[i]) == SSA_NAME
4653 && !get_ops (rhs[i], code, ops, loop)
4654 && has_single_use (rhs[i]))
4656 operand_entry *oe = operand_entry_pool.allocate ();
4658 oe->op = rhs[i];
4659 oe->rank = code;
4660 oe->id = 0;
4661 oe->count = 1;
4662 oe->stmt_to_insert = NULL;
4663 ops->safe_push (oe);
4665 return true;
4668 /* Find the ops that were added by get_ops starting from VAR, see if
4669 they were changed during update_range_test and if yes, create new
4670 stmts. */
4672 static tree
4673 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4674 unsigned int *pidx, class loop *loop)
4676 gimple *stmt = SSA_NAME_DEF_STMT (var);
4677 tree rhs[4];
4678 int i;
4680 if (!is_reassociable_op (stmt, code, loop))
4681 return NULL;
4683 rhs[0] = gimple_assign_rhs1 (stmt);
4684 rhs[1] = gimple_assign_rhs2 (stmt);
4685 rhs[2] = rhs[0];
4686 rhs[3] = rhs[1];
4687 for (i = 0; i < 2; i++)
4688 if (TREE_CODE (rhs[i]) == SSA_NAME)
4690 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4691 if (rhs[2 + i] == NULL_TREE)
4693 if (has_single_use (rhs[i]))
4694 rhs[2 + i] = ops[(*pidx)++]->op;
4695 else
4696 rhs[2 + i] = rhs[i];
4699 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4700 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4702 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4703 var = make_ssa_name (TREE_TYPE (var));
4704 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4705 rhs[2], rhs[3]);
4706 gimple_set_uid (g, gimple_uid (stmt));
4707 gimple_set_visited (g, true);
4708 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4709 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4710 if (fold_stmt_inplace (&gsi2))
4711 update_stmt (g);
4713 return var;
4716 /* Structure to track the initial value passed to get_ops and
4717 the range in the ops vector for each basic block. */
4719 struct inter_bb_range_test_entry
4721 tree op;
4722 unsigned int first_idx, last_idx;
4725 /* Inter-bb range test optimization.
4727 Returns TRUE if a gimple conditional is optimized to a true/false,
4728 otherwise return FALSE.
4730 This indicates to the caller that it should run a CFG cleanup pass
4731 once reassociation is completed. */
4733 static bool
4734 maybe_optimize_range_tests (gimple *stmt)
4736 basic_block first_bb = gimple_bb (stmt);
4737 basic_block last_bb = first_bb;
4738 basic_block other_bb = NULL;
4739 basic_block bb;
4740 edge_iterator ei;
4741 edge e;
4742 auto_vec<operand_entry *> ops;
4743 auto_vec<inter_bb_range_test_entry> bbinfo;
4744 bool any_changes = false;
4745 bool cfg_cleanup_needed = false;
4747 /* Consider only basic blocks that end with GIMPLE_COND or
4748 a cast statement satisfying final_range_test_p. All
4749 but the last bb in the first_bb .. last_bb range
4750 should end with GIMPLE_COND. */
4751 if (gimple_code (stmt) == GIMPLE_COND)
4753 if (EDGE_COUNT (first_bb->succs) != 2)
4754 return cfg_cleanup_needed;
4756 else if (final_range_test_p (stmt))
4757 other_bb = single_succ (first_bb);
4758 else
4759 return cfg_cleanup_needed;
4761 if (stmt_could_throw_p (cfun, stmt))
4762 return cfg_cleanup_needed;
4764 /* As relative ordering of post-dominator sons isn't fixed,
4765 maybe_optimize_range_tests can be called first on any
4766 bb in the range we want to optimize. So, start searching
4767 backwards, if first_bb can be set to a predecessor. */
4768 while (single_pred_p (first_bb))
4770 basic_block pred_bb = single_pred (first_bb);
4771 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4772 break;
4773 if (!no_side_effect_bb (first_bb))
4774 break;
4775 first_bb = pred_bb;
4777 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4778 Before starting forward search in last_bb successors, find
4779 out the other_bb. */
4780 if (first_bb == last_bb)
4782 other_bb = NULL;
4783 /* As non-GIMPLE_COND last stmt always terminates the range,
4784 if forward search didn't discover anything, just give up. */
4785 if (gimple_code (stmt) != GIMPLE_COND)
4786 return cfg_cleanup_needed;
4787 /* Look at both successors. Either it ends with a GIMPLE_COND
4788 and satisfies suitable_cond_bb, or ends with a cast and
4789 other_bb is that cast's successor. */
4790 FOR_EACH_EDGE (e, ei, first_bb->succs)
4791 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4792 || e->dest == first_bb)
4793 return cfg_cleanup_needed;
4794 else if (single_pred_p (e->dest))
4796 stmt = last_nondebug_stmt (e->dest);
4797 if (stmt
4798 && gimple_code (stmt) == GIMPLE_COND
4799 && EDGE_COUNT (e->dest->succs) == 2)
4801 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4802 NULL, true))
4803 break;
4804 else
4805 other_bb = NULL;
4807 else if (stmt
4808 && final_range_test_p (stmt)
4809 && find_edge (first_bb, single_succ (e->dest)))
4811 other_bb = single_succ (e->dest);
4812 if (other_bb == first_bb)
4813 other_bb = NULL;
4816 if (other_bb == NULL)
4817 return cfg_cleanup_needed;
4819 /* Now do the forward search, moving last_bb to successor bbs
4820 that aren't other_bb. */
4821 while (EDGE_COUNT (last_bb->succs) == 2)
4823 FOR_EACH_EDGE (e, ei, last_bb->succs)
4824 if (e->dest != other_bb)
4825 break;
4826 if (e == NULL)
4827 break;
4828 if (!single_pred_p (e->dest))
4829 break;
4830 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4831 break;
4832 if (!no_side_effect_bb (e->dest))
4833 break;
4834 last_bb = e->dest;
4836 if (first_bb == last_bb)
4837 return cfg_cleanup_needed;
4838 /* Here basic blocks first_bb through last_bb's predecessor
4839 end with GIMPLE_COND, all of them have one of the edges to
4840 other_bb and another to another block in the range,
4841 all blocks except first_bb don't have side-effects and
4842 last_bb ends with either GIMPLE_COND, or cast satisfying
4843 final_range_test_p. */
4844 for (bb = last_bb; ; bb = single_pred (bb))
4846 enum tree_code code;
4847 tree lhs, rhs;
4848 inter_bb_range_test_entry bb_ent;
4850 bb_ent.op = NULL_TREE;
4851 bb_ent.first_idx = ops.length ();
4852 bb_ent.last_idx = bb_ent.first_idx;
4853 e = find_edge (bb, other_bb);
4854 stmt = last_nondebug_stmt (bb);
4855 gimple_set_visited (stmt, true);
4856 if (gimple_code (stmt) != GIMPLE_COND)
4858 use_operand_p use_p;
4859 gimple *phi;
4860 edge e2;
4861 unsigned int d;
4863 lhs = gimple_assign_lhs (stmt);
4864 rhs = gimple_assign_rhs1 (stmt);
4865 gcc_assert (bb == last_bb);
4867 /* stmt is
4868 _123 = (int) _234;
4870 _234 = a_2(D) == 2;
4872 followed by:
4873 <bb M>:
4874 # _345 = PHI <_123(N), 1(...), 1(...)>
4876 or 0 instead of 1. If it is 0, the _234
4877 range test is anded together with all the
4878 other range tests, if it is 1, it is ored with
4879 them. */
4880 single_imm_use (lhs, &use_p, &phi);
4881 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4882 e2 = find_edge (first_bb, other_bb);
4883 d = e2->dest_idx;
4884 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4885 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4886 code = BIT_AND_EXPR;
4887 else
4889 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4890 code = BIT_IOR_EXPR;
4893 /* If _234 SSA_NAME_DEF_STMT is
4894 _234 = _567 | _789;
4895 (or &, corresponding to 1/0 in the phi arguments,
4896 push into ops the individual range test arguments
4897 of the bitwise or resp. and, recursively. */
4898 if (TREE_CODE (rhs) == SSA_NAME
4899 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4900 != tcc_comparison)
4901 && !get_ops (rhs, code, &ops,
4902 loop_containing_stmt (stmt))
4903 && has_single_use (rhs))
4905 /* Otherwise, push the _234 range test itself. */
4906 operand_entry *oe = operand_entry_pool.allocate ();
4908 oe->op = rhs;
4909 oe->rank = code;
4910 oe->id = 0;
4911 oe->count = 1;
4912 oe->stmt_to_insert = NULL;
4913 ops.safe_push (oe);
4914 bb_ent.last_idx++;
4915 bb_ent.op = rhs;
4917 else if (is_gimple_assign (stmt)
4918 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4919 == tcc_comparison)
4920 && !get_ops (lhs, code, &ops,
4921 loop_containing_stmt (stmt))
4922 && has_single_use (lhs))
4924 operand_entry *oe = operand_entry_pool.allocate ();
4925 oe->op = lhs;
4926 oe->rank = code;
4927 oe->id = 0;
4928 oe->count = 1;
4929 ops.safe_push (oe);
4930 bb_ent.last_idx++;
4931 bb_ent.op = lhs;
4933 else
4935 bb_ent.last_idx = ops.length ();
4936 bb_ent.op = rhs;
4938 bbinfo.safe_push (bb_ent);
4939 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4940 ops[i]->id = bb->index;
4941 continue;
4943 else if (bb == last_bb)
4945 /* For last_bb, handle also:
4946 if (x_3(D) == 3)
4947 goto <bb 6>; [34.00%]
4948 else
4949 goto <bb 7>; [66.00%]
4951 <bb 6> [local count: 79512730]:
4953 <bb 7> [local count: 1073741824]:
4954 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4955 where bb 7 is OTHER_BB, but the PHI values from the
4956 earlier bbs match the path through the empty bb
4957 in between. */
4958 bool test_swapped_p = false;
4959 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4960 &other_bb, &test_swapped_p, true);
4961 gcc_assert (ok);
4962 if (test_swapped_p)
4963 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4965 /* Otherwise stmt is GIMPLE_COND. */
4966 code = gimple_cond_code (stmt);
4967 lhs = gimple_cond_lhs (stmt);
4968 rhs = gimple_cond_rhs (stmt);
4969 if (TREE_CODE (lhs) == SSA_NAME
4970 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4971 && ((code != EQ_EXPR && code != NE_EXPR)
4972 || rhs != boolean_false_node
4973 /* Either push into ops the individual bitwise
4974 or resp. and operands, depending on which
4975 edge is other_bb. */
4976 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4977 ^ (code == EQ_EXPR))
4978 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4979 loop_containing_stmt (stmt))))
4981 /* Or push the GIMPLE_COND stmt itself. */
4982 operand_entry *oe = operand_entry_pool.allocate ();
4984 oe->op = NULL;
4985 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4986 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4987 /* oe->op = NULL signs that there is no SSA_NAME
4988 for the range test, and oe->id instead is the
4989 basic block number, at which's end the GIMPLE_COND
4990 is. */
4991 oe->id = bb->index;
4992 oe->count = 1;
4993 oe->stmt_to_insert = NULL;
4994 ops.safe_push (oe);
4995 bb_ent.op = NULL;
4996 bb_ent.last_idx++;
4998 else if (ops.length () > bb_ent.first_idx)
5000 bb_ent.op = lhs;
5001 bb_ent.last_idx = ops.length ();
5003 bbinfo.safe_push (bb_ent);
5004 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5005 ops[i]->id = bb->index;
5006 if (bb == first_bb)
5007 break;
5009 if (ops.length () > 1)
5010 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5011 if (any_changes)
5013 unsigned int idx, max_idx = 0;
5014 /* update_ops relies on has_single_use predicates returning the
5015 same values as it did during get_ops earlier. Additionally it
5016 never removes statements, only adds new ones and it should walk
5017 from the single imm use and check the predicate already before
5018 making those changes.
5019 On the other side, the handling of GIMPLE_COND directly can turn
5020 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5021 it needs to be done in a separate loop afterwards. */
5022 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5024 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5025 && bbinfo[idx].op != NULL_TREE)
5027 tree new_op;
5029 max_idx = idx;
5030 stmt = last_nondebug_stmt (bb);
5031 new_op = update_ops (bbinfo[idx].op,
5032 (enum tree_code)
5033 ops[bbinfo[idx].first_idx]->rank,
5034 ops, &bbinfo[idx].first_idx,
5035 loop_containing_stmt (stmt));
5036 if (new_op == NULL_TREE)
5038 gcc_assert (bb == last_bb);
5039 new_op = ops[bbinfo[idx].first_idx++]->op;
5041 if (bbinfo[idx].op != new_op)
5043 imm_use_iterator iter;
5044 use_operand_p use_p;
5045 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5047 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5048 if (is_gimple_debug (use_stmt))
5049 continue;
5050 else if (gimple_code (use_stmt) == GIMPLE_COND
5051 || gimple_code (use_stmt) == GIMPLE_PHI)
5052 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5053 SET_USE (use_p, new_op);
5054 else if ((is_gimple_assign (use_stmt)
5055 && (TREE_CODE_CLASS
5056 (gimple_assign_rhs_code (use_stmt))
5057 == tcc_comparison)))
5058 cast_or_tcc_cmp_stmt = use_stmt;
5059 else if (gimple_assign_cast_p (use_stmt))
5060 cast_or_tcc_cmp_stmt = use_stmt;
5061 else
5062 gcc_unreachable ();
5064 if (cast_or_tcc_cmp_stmt)
5066 gcc_assert (bb == last_bb);
5067 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5068 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5069 enum tree_code rhs_code
5070 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5071 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5072 : CONVERT_EXPR;
5073 gassign *g;
5074 if (is_gimple_min_invariant (new_op))
5076 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5077 g = gimple_build_assign (new_lhs, new_op);
5079 else
5080 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5081 gimple_stmt_iterator gsi
5082 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5083 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5084 gimple_set_visited (g, true);
5085 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5086 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5087 if (is_gimple_debug (use_stmt))
5088 continue;
5089 else if (gimple_code (use_stmt) == GIMPLE_COND
5090 || gimple_code (use_stmt) == GIMPLE_PHI)
5091 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5092 SET_USE (use_p, new_lhs);
5093 else
5094 gcc_unreachable ();
5098 if (bb == first_bb)
5099 break;
5101 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5103 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5104 && bbinfo[idx].op == NULL_TREE
5105 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5107 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5109 if (idx > max_idx)
5110 max_idx = idx;
5112 /* If we collapse the conditional to a true/false
5113 condition, then bubble that knowledge up to our caller. */
5114 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5116 gimple_cond_make_false (cond_stmt);
5117 cfg_cleanup_needed = true;
5119 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5121 gimple_cond_make_true (cond_stmt);
5122 cfg_cleanup_needed = true;
5124 else
5126 gimple_cond_set_code (cond_stmt, NE_EXPR);
5127 gimple_cond_set_lhs (cond_stmt,
5128 ops[bbinfo[idx].first_idx]->op);
5129 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5131 update_stmt (cond_stmt);
5133 if (bb == first_bb)
5134 break;
5137 /* The above changes could result in basic blocks after the first
5138 modified one, up to and including last_bb, to be executed even if
5139 they would not be in the original program. If the value ranges of
5140 assignment lhs' in those bbs were dependent on the conditions
5141 guarding those basic blocks which now can change, the VRs might
5142 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5143 are only used within the same bb, it should be not a big deal if
5144 we just reset all the VRs in those bbs. See PR68671. */
5145 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5146 reset_flow_sensitive_info_in_bb (bb);
5148 return cfg_cleanup_needed;
5151 /* Remove def stmt of VAR if VAR has zero uses and recurse
5152 on rhs1 operand if so. */
5154 static void
5155 remove_visited_stmt_chain (tree var)
5157 gimple *stmt;
5158 gimple_stmt_iterator gsi;
5160 while (1)
5162 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5163 return;
5164 stmt = SSA_NAME_DEF_STMT (var);
5165 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5167 var = gimple_assign_rhs1 (stmt);
5168 gsi = gsi_for_stmt (stmt);
5169 reassoc_remove_stmt (&gsi);
5170 release_defs (stmt);
5172 else
5173 return;
5177 /* This function checks three consequtive operands in
5178 passed operands vector OPS starting from OPINDEX and
5179 swaps two operands if it is profitable for binary operation
5180 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5182 We pair ops with the same rank if possible. */
5184 static void
5185 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5186 unsigned int opindex)
5188 operand_entry *oe1, *oe2, *oe3;
5190 oe1 = ops[opindex];
5191 oe2 = ops[opindex + 1];
5192 oe3 = ops[opindex + 2];
5194 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5195 std::swap (*oe1, *oe3);
5196 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5197 std::swap (*oe1, *oe2);
5200 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5201 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5202 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5203 after the returned stmt. */
5205 static inline gimple *
5206 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5208 insert_before = true;
5209 if (TREE_CODE (rhs1) == SSA_NAME
5210 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5212 stmt = SSA_NAME_DEF_STMT (rhs1);
5213 insert_before = false;
5215 if (TREE_CODE (rhs2) == SSA_NAME
5216 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5218 stmt = SSA_NAME_DEF_STMT (rhs2);
5219 insert_before = false;
5221 return stmt;
5224 /* If the stmt that defines operand has to be inserted, insert it
5225 before the use. */
5226 static void
5227 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5229 gcc_assert (is_gimple_assign (stmt_to_insert));
5230 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5231 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5232 bool insert_before;
5233 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5234 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5235 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5237 /* If the insert point is not stmt, then insert_point would be
5238 the point where operand rhs1 or rhs2 is defined. In this case,
5239 stmt_to_insert has to be inserted afterwards. This would
5240 only happen when the stmt insertion point is flexible. */
5241 if (insert_before)
5242 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5243 else
5244 insert_stmt_after (stmt_to_insert, insert_point);
5248 /* Recursively rewrite our linearized statements so that the operators
5249 match those in OPS[OPINDEX], putting the computation in rank
5250 order. Return new lhs.
5251 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5252 the current stmt and during recursive invocations.
5253 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5254 recursive invocations. */
5256 static tree
5257 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5258 const vec<operand_entry *> &ops, bool changed,
5259 bool next_changed)
5261 tree rhs1 = gimple_assign_rhs1 (stmt);
5262 tree rhs2 = gimple_assign_rhs2 (stmt);
5263 tree lhs = gimple_assign_lhs (stmt);
5264 operand_entry *oe;
5266 /* The final recursion case for this function is that you have
5267 exactly two operations left.
5268 If we had exactly one op in the entire list to start with, we
5269 would have never called this function, and the tail recursion
5270 rewrites them one at a time. */
5271 if (opindex + 2 == ops.length ())
5273 operand_entry *oe1, *oe2;
5275 oe1 = ops[opindex];
5276 oe2 = ops[opindex + 1];
5278 if (rhs1 != oe1->op || rhs2 != oe2->op)
5280 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5281 unsigned int uid = gimple_uid (stmt);
5283 if (dump_file && (dump_flags & TDF_DETAILS))
5285 fprintf (dump_file, "Transforming ");
5286 print_gimple_stmt (dump_file, stmt, 0);
5289 /* If the stmt that defines operand has to be inserted, insert it
5290 before the use. */
5291 if (oe1->stmt_to_insert)
5292 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5293 if (oe2->stmt_to_insert)
5294 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5295 /* Even when changed is false, reassociation could have e.g. removed
5296 some redundant operations, so unless we are just swapping the
5297 arguments or unless there is no change at all (then we just
5298 return lhs), force creation of a new SSA_NAME. */
5299 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5301 bool insert_before;
5302 gimple *insert_point
5303 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5304 lhs = make_ssa_name (TREE_TYPE (lhs));
5305 stmt
5306 = gimple_build_assign (lhs, rhs_code,
5307 oe1->op, oe2->op);
5308 gimple_set_uid (stmt, uid);
5309 gimple_set_visited (stmt, true);
5310 if (insert_before)
5311 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5312 else
5313 insert_stmt_after (stmt, insert_point);
5315 else
5317 bool insert_before;
5318 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5319 insert_before)
5320 == stmt);
5321 gimple_assign_set_rhs1 (stmt, oe1->op);
5322 gimple_assign_set_rhs2 (stmt, oe2->op);
5323 update_stmt (stmt);
5326 if (rhs1 != oe1->op && rhs1 != oe2->op)
5327 remove_visited_stmt_chain (rhs1);
5329 if (dump_file && (dump_flags & TDF_DETAILS))
5331 fprintf (dump_file, " into ");
5332 print_gimple_stmt (dump_file, stmt, 0);
5335 return lhs;
5338 /* If we hit here, we should have 3 or more ops left. */
5339 gcc_assert (opindex + 2 < ops.length ());
5341 /* Rewrite the next operator. */
5342 oe = ops[opindex];
5344 /* If the stmt that defines operand has to be inserted, insert it
5345 before the use. */
5346 if (oe->stmt_to_insert)
5347 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5349 /* Recurse on the LHS of the binary operator, which is guaranteed to
5350 be the non-leaf side. */
5351 tree new_rhs1
5352 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5353 changed || oe->op != rhs2 || next_changed,
5354 false);
5356 if (oe->op != rhs2 || new_rhs1 != rhs1)
5358 if (dump_file && (dump_flags & TDF_DETAILS))
5360 fprintf (dump_file, "Transforming ");
5361 print_gimple_stmt (dump_file, stmt, 0);
5364 /* If changed is false, this is either opindex == 0
5365 or all outer rhs2's were equal to corresponding oe->op,
5366 and powi_result is NULL.
5367 That means lhs is equivalent before and after reassociation.
5368 Otherwise ensure the old lhs SSA_NAME is not reused and
5369 create a new stmt as well, so that any debug stmts will be
5370 properly adjusted. */
5371 if (changed)
5373 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5374 unsigned int uid = gimple_uid (stmt);
5375 bool insert_before;
5376 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5377 insert_before);
5379 lhs = make_ssa_name (TREE_TYPE (lhs));
5380 stmt = gimple_build_assign (lhs, rhs_code,
5381 new_rhs1, oe->op);
5382 gimple_set_uid (stmt, uid);
5383 gimple_set_visited (stmt, true);
5384 if (insert_before)
5385 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5386 else
5387 insert_stmt_after (stmt, insert_point);
5389 else
5391 bool insert_before;
5392 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5393 insert_before)
5394 == stmt);
5395 gimple_assign_set_rhs1 (stmt, new_rhs1);
5396 gimple_assign_set_rhs2 (stmt, oe->op);
5397 update_stmt (stmt);
5400 if (dump_file && (dump_flags & TDF_DETAILS))
5402 fprintf (dump_file, " into ");
5403 print_gimple_stmt (dump_file, stmt, 0);
5406 return lhs;
5409 /* Find out how many cycles we need to compute statements chain.
5410 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5411 maximum number of independent statements we may execute per cycle. */
5413 static int
5414 get_required_cycles (int ops_num, int cpu_width)
5416 int res;
5417 int elog;
5418 unsigned int rest;
5420 /* While we have more than 2 * cpu_width operands
5421 we may reduce number of operands by cpu_width
5422 per cycle. */
5423 res = ops_num / (2 * cpu_width);
5425 /* Remained operands count may be reduced twice per cycle
5426 until we have only one operand. */
5427 rest = (unsigned)(ops_num - res * cpu_width);
5428 elog = exact_log2 (rest);
5429 if (elog >= 0)
5430 res += elog;
5431 else
5432 res += floor_log2 (rest) + 1;
5434 return res;
5437 /* Given that the target fully pipelines FMA instructions, return the latency
5438 of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5439 pipes. */
5441 static inline int
5442 get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5444 gcc_checking_assert (mult_num && mult_num <= ops_num);
5446 /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5447 e.g:
5449 A * B + C * D
5451 _1 = A * B;
5452 _2 = .FMA (C, D, _1);
5454 Otherwise there's latency(MULT)*1 in the first FMA. */
5455 return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5458 /* Returns an optimal number of registers to use for computation of
5459 given statements.
5461 LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5462 that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5464 static int
5465 get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5466 enum tree_code opc, machine_mode mode)
5468 int param_width = param_tree_reassoc_width;
5469 int width;
5470 int width_min;
5471 int cycles_best;
5472 int ops_num = ops->length ();
5474 if (param_width > 0)
5475 width = param_width;
5476 else
5477 width = targetm.sched.reassociation_width (opc, mode);
5479 if (width == 1)
5480 return width;
5482 /* Get the minimal time required for sequence computation. */
5483 cycles_best = get_required_cycles (ops_num, width);
5485 /* Check if we may use less width and still compute sequence for
5486 the same time. It will allow us to reduce registers usage.
5487 get_required_cycles is monotonically increasing with lower width
5488 so we can perform a binary search for the minimal width that still
5489 results in the optimal cycle count. */
5490 width_min = 1;
5492 /* If the target fully pipelines FMA instruction, the multiply part can start
5493 already if its operands are ready. Assuming symmetric pipes are used for
5494 FMUL/FADD/FMA, then for a sequence of FMA like:
5496 _8 = .FMA (_2, _3, _1);
5497 _9 = .FMA (_5, _4, _8);
5498 _10 = .FMA (_7, _6, _9);
5500 , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5501 While with width=2:
5503 _8 = _4 * _5;
5504 _9 = .FMA (_2, _3, _1);
5505 _10 = .FMA (_6, _7, _8);
5506 _11 = _9 + _10;
5508 , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5509 latency(ADD), the first variant is preferred.
5511 Find out if we can get a smaller width considering FMA. */
5512 if (width > 1 && mult_num && param_fully_pipelined_fma)
5514 /* When param_fully_pipelined_fma is set, assume FMUL and FMA use the
5515 same units that can also do FADD. For other scenarios, such as when
5516 FMUL and FADD are using separated units, the following code may not
5517 appy. */
5518 int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5519 gcc_checking_assert (width_mult <= width);
5521 /* Latency of MULT_EXPRs. */
5522 int lat_mul
5523 = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5525 /* Quick search might not apply. So start from 1. */
5526 for (int i = 1; i < width_mult; i++)
5528 int lat_mul_new
5529 = get_mult_latency_consider_fma (ops_num, mult_num, i);
5530 int lat_add_new = get_required_cycles (ops_num, i);
5532 /* Assume latency(MULT) >= latency(ADD). */
5533 if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5535 width = i;
5536 break;
5540 else
5542 while (width > width_min)
5544 int width_mid = (width + width_min) / 2;
5546 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5547 width = width_mid;
5548 else if (width_min < width_mid)
5549 width_min = width_mid;
5550 else
5551 break;
5555 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5556 better than skipping these FMA candidates in widening_mul. */
5557 if (width == 1
5558 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5559 param_avoid_fma_max_bits))
5561 /* Look for cross backedge dependency:
5562 1. LHS is a phi argument in the same basic block it is defined.
5563 2. And the result of the phi node is used in OPS. */
5564 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5566 use_operand_p use_p;
5567 imm_use_iterator iter;
5568 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5569 if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5571 if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5572 != bb)
5573 continue;
5574 tree phi_result = gimple_phi_result (phi);
5575 operand_entry *oe;
5576 unsigned int j;
5577 FOR_EACH_VEC_ELT (*ops, j, oe)
5579 if (TREE_CODE (oe->op) != SSA_NAME)
5580 continue;
5582 /* Result of phi is operand of PLUS_EXPR. */
5583 if (oe->op == phi_result)
5584 return 2;
5586 /* Check is result of phi is operand of MULT_EXPR. */
5587 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5588 if (is_gimple_assign (def_stmt)
5589 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5591 tree rhs = gimple_assign_rhs1 (def_stmt);
5592 if (TREE_CODE (rhs) == SSA_NAME)
5594 if (rhs == phi_result)
5595 return 2;
5596 def_stmt = SSA_NAME_DEF_STMT (rhs);
5599 if (is_gimple_assign (def_stmt)
5600 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5602 if (gimple_assign_rhs1 (def_stmt) == phi_result
5603 || gimple_assign_rhs2 (def_stmt) == phi_result)
5604 return 2;
5610 return width;
5613 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5614 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5615 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5617 /* Rewrite statements with dependency chain with regard the chance to generate
5618 FMA.
5619 For the chain with FMA: Try to keep fma opportunity as much as possible.
5620 For the chain without FMA: Putting the computation in rank order and trying
5621 to allow operations to be executed in parallel.
5622 E.g.
5623 e + f + a * b + c * d;
5625 ssa1 = e + a * b;
5626 ssa2 = f + c * d;
5627 ssa3 = ssa1 + ssa2;
5629 This reassociation approach preserves the chance of fma generation as much
5630 as possible.
5632 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5633 the whole chain will have dependencies across the loop iteration. Just keep
5634 loop-carried ops in a separate chain.
5635 E.g.
5636 x_1 = phi (x_0, x_2)
5637 y_1 = phi (y_0, y_2)
5639 a + b + c + d + e + x1 + y1
5641 SSA1 = a + b;
5642 SSA2 = c + d;
5643 SSA3 = SSA1 + e;
5644 SSA4 = SSA3 + SSA2;
5645 SSA5 = x1 + y1;
5646 SSA6 = SSA4 + SSA5;
5648 static void
5649 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5650 const vec<operand_entry *> &ops)
5652 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5653 int op_num = ops.length ();
5654 int op_normal_num = op_num;
5655 gcc_assert (op_num > 0);
5656 int stmt_num = op_num - 1;
5657 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5658 int i = 0, j = 0;
5659 tree tmp_op[2], op1;
5660 operand_entry *oe;
5661 gimple *stmt1 = NULL;
5662 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5663 int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5664 int width_active = 0, width_count = 0;
5665 bool has_biased = false, ops_changed = false;
5666 auto_vec<operand_entry *> ops_normal;
5667 auto_vec<operand_entry *> ops_biased;
5668 vec<operand_entry *> *ops1;
5670 /* We start expression rewriting from the top statements.
5671 So, in this loop we create a full list of statements
5672 we will work with. */
5673 stmts[stmt_num - 1] = stmt;
5674 for (i = stmt_num - 2; i >= 0; i--)
5675 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5677 /* Avoid adding loop-carried ops to long chains, first filter out the
5678 loop-carried. But we need to make sure that the length of the remainder
5679 is not less than 4, which is the smallest ops length we can break the
5680 dependency. */
5681 FOR_EACH_VEC_ELT (ops, i, oe)
5683 if (TREE_CODE (oe->op) == SSA_NAME
5684 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5685 && op_normal_num > 4)
5687 ops_biased.safe_push (oe);
5688 has_biased = true;
5689 op_normal_num --;
5691 else
5692 ops_normal.safe_push (oe);
5695 /* Width should not be larger than ops length / 2, since we can not create
5696 more parallel dependency chains that exceeds such value. */
5697 int width_normal = op_normal_num / 2;
5698 int width_biased = (op_num - op_normal_num) / 2;
5699 width_normal = width <= width_normal ? width : width_normal;
5700 width_biased = width <= width_biased ? width : width_biased;
5702 ops1 = &ops_normal;
5703 width_count = width_active = width_normal;
5705 /* Build parallel dependency chain according to width. */
5706 for (i = 0; i < stmt_num; i++)
5708 if (dump_file && (dump_flags & TDF_DETAILS))
5710 fprintf (dump_file, "Transforming ");
5711 print_gimple_stmt (dump_file, stmts[i], 0);
5714 /* When the work of normal ops is over, but the loop is not over,
5715 continue to do biased ops. */
5716 if (width_count == 0 && ops1 == &ops_normal)
5718 ops1 = &ops_biased;
5719 width_count = width_active = width_biased;
5720 ops_changed = true;
5723 /* Swap the operands if no FMA in the chain. */
5724 if (ops1->length () > 2 && !has_fma)
5725 swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5727 if (i < width_active
5728 || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5730 for (j = 0; j < 2; j++)
5732 oe = ops1->pop ();
5733 tmp_op[j] = oe->op;
5734 /* If the stmt that defines operand has to be inserted, insert it
5735 before the use. */
5736 stmt1 = oe->stmt_to_insert;
5737 if (stmt1)
5738 insert_stmt_before_use (stmts[i], stmt1);
5739 stmt1 = NULL;
5741 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5742 tmp_op[1],
5743 tmp_op[0],
5744 opcode);
5745 gimple_set_visited (stmts[i], true);
5748 else
5750 /* We keep original statement only for the last one. All others are
5751 recreated. */
5752 if (!ops1->length ())
5754 /* For biased length equal to 2. */
5755 if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5756 last_rhs2_stmt_index = i - 1;
5758 /* When width_count == 2 and there is no biased, just finish. */
5759 if (width_count == NORMAL_END_STMT && !has_biased)
5761 last_rhs1_stmt_index = i - 1;
5762 last_rhs2_stmt_index = i - 2;
5764 if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5766 /* We keep original statement only for the last one. All
5767 others are recreated. */
5768 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5769 (stmts[last_rhs1_stmt_index]));
5770 gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5771 (stmts[last_rhs2_stmt_index]));
5772 update_stmt (stmts[i]);
5774 else
5776 stmts[i] =
5777 build_and_add_sum (TREE_TYPE (last_rhs1),
5778 gimple_assign_lhs (stmts[i-width_count]),
5779 gimple_assign_lhs
5780 (stmts[i-width_count+1]),
5781 opcode);
5782 gimple_set_visited (stmts[i], true);
5783 width_count--;
5785 /* It is the end of normal or biased ops.
5786 last_rhs1_stmt_index used to record the last stmt index
5787 for normal ops. last_rhs2_stmt_index used to record the
5788 last stmt index for biased ops. */
5789 if (width_count == BIASED_END_STMT)
5791 gcc_assert (has_biased);
5792 if (ops_biased.length ())
5793 last_rhs1_stmt_index = i;
5794 else
5795 last_rhs2_stmt_index = i;
5796 width_count--;
5800 else
5802 /* Attach the rest ops to the parallel dependency chain. */
5803 oe = ops1->pop ();
5804 op1 = oe->op;
5805 stmt1 = oe->stmt_to_insert;
5806 if (stmt1)
5807 insert_stmt_before_use (stmts[i], stmt1);
5808 stmt1 = NULL;
5810 /* For only one biased ops. */
5811 if (width_count == SPECIAL_BIASED_END_STMT)
5813 /* We keep original statement only for the last one. All
5814 others are recreated. */
5815 gcc_assert (has_biased);
5816 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5817 (stmts[last_rhs1_stmt_index]));
5818 gimple_assign_set_rhs2 (stmts[i], op1);
5819 update_stmt (stmts[i]);
5821 else
5823 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5824 gimple_assign_lhs
5825 (stmts[i-width_active]),
5826 op1,
5827 opcode);
5828 gimple_set_visited (stmts[i], true);
5833 if (dump_file && (dump_flags & TDF_DETAILS))
5835 fprintf (dump_file, " into ");
5836 print_gimple_stmt (dump_file, stmts[i], 0);
5840 remove_visited_stmt_chain (last_rhs1);
5843 /* Transform STMT, which is really (A +B) + (C + D) into the left
5844 linear form, ((A+B)+C)+D.
5845 Recurse on D if necessary. */
5847 static void
5848 linearize_expr (gimple *stmt)
5850 gimple_stmt_iterator gsi;
5851 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5852 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5853 gimple *oldbinrhs = binrhs;
5854 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5855 gimple *newbinrhs = NULL;
5856 class loop *loop = loop_containing_stmt (stmt);
5857 tree lhs = gimple_assign_lhs (stmt);
5859 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5860 && is_reassociable_op (binrhs, rhscode, loop));
5862 gsi = gsi_for_stmt (stmt);
5864 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5865 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5866 gimple_assign_rhs_code (binrhs),
5867 gimple_assign_lhs (binlhs),
5868 gimple_assign_rhs2 (binrhs));
5869 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5870 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5871 gimple_set_uid (binrhs, gimple_uid (stmt));
5873 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5874 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5876 if (dump_file && (dump_flags & TDF_DETAILS))
5878 fprintf (dump_file, "Linearized: ");
5879 print_gimple_stmt (dump_file, stmt, 0);
5882 reassociate_stats.linearized++;
5883 update_stmt (stmt);
5885 gsi = gsi_for_stmt (oldbinrhs);
5886 reassoc_remove_stmt (&gsi);
5887 release_defs (oldbinrhs);
5889 gimple_set_visited (stmt, true);
5890 gimple_set_visited (binlhs, true);
5891 gimple_set_visited (binrhs, true);
5893 /* Tail recurse on the new rhs if it still needs reassociation. */
5894 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5895 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5896 want to change the algorithm while converting to tuples. */
5897 linearize_expr (stmt);
5900 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5901 it. Otherwise, return NULL. */
5903 static gimple *
5904 get_single_immediate_use (tree lhs)
5906 use_operand_p immuse;
5907 gimple *immusestmt;
5909 if (TREE_CODE (lhs) == SSA_NAME
5910 && single_imm_use (lhs, &immuse, &immusestmt)
5911 && is_gimple_assign (immusestmt))
5912 return immusestmt;
5914 return NULL;
5917 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5918 representing the negated value. Insertions of any necessary
5919 instructions go before GSI.
5920 This function is recursive in that, if you hand it "a_5" as the
5921 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5922 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5924 static tree
5925 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5927 gimple *negatedefstmt = NULL;
5928 tree resultofnegate;
5929 gimple_stmt_iterator gsi;
5930 unsigned int uid;
5932 /* If we are trying to negate a name, defined by an add, negate the
5933 add operands instead. */
5934 if (TREE_CODE (tonegate) == SSA_NAME)
5935 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5936 if (TREE_CODE (tonegate) == SSA_NAME
5937 && is_gimple_assign (negatedefstmt)
5938 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5939 && has_single_use (gimple_assign_lhs (negatedefstmt))
5940 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5942 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5943 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5944 tree lhs = gimple_assign_lhs (negatedefstmt);
5945 gimple *g;
5947 gsi = gsi_for_stmt (negatedefstmt);
5948 rhs1 = negate_value (rhs1, &gsi);
5950 gsi = gsi_for_stmt (negatedefstmt);
5951 rhs2 = negate_value (rhs2, &gsi);
5953 gsi = gsi_for_stmt (negatedefstmt);
5954 lhs = make_ssa_name (TREE_TYPE (lhs));
5955 gimple_set_visited (negatedefstmt, true);
5956 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5957 gimple_set_uid (g, gimple_uid (negatedefstmt));
5958 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5959 return lhs;
5962 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5963 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5964 NULL_TREE, true, GSI_SAME_STMT);
5965 gsi = *gsip;
5966 uid = gimple_uid (gsi_stmt (gsi));
5967 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5969 gimple *stmt = gsi_stmt (gsi);
5970 if (gimple_uid (stmt) != 0)
5971 break;
5972 gimple_set_uid (stmt, uid);
5974 return resultofnegate;
5977 /* Return true if we should break up the subtract in STMT into an add
5978 with negate. This is true when we the subtract operands are really
5979 adds, or the subtract itself is used in an add expression. In
5980 either case, breaking up the subtract into an add with negate
5981 exposes the adds to reassociation. */
5983 static bool
5984 should_break_up_subtract (gimple *stmt)
5986 tree lhs = gimple_assign_lhs (stmt);
5987 tree binlhs = gimple_assign_rhs1 (stmt);
5988 tree binrhs = gimple_assign_rhs2 (stmt);
5989 gimple *immusestmt;
5990 class loop *loop = loop_containing_stmt (stmt);
5992 if (TREE_CODE (binlhs) == SSA_NAME
5993 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5994 return true;
5996 if (TREE_CODE (binrhs) == SSA_NAME
5997 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5998 return true;
6000 if (TREE_CODE (lhs) == SSA_NAME
6001 && (immusestmt = get_single_immediate_use (lhs))
6002 && is_gimple_assign (immusestmt)
6003 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6004 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6005 && gimple_assign_rhs1 (immusestmt) == lhs)
6006 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6007 return true;
6008 return false;
6011 /* Transform STMT from A - B into A + -B. */
6013 static void
6014 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6016 tree rhs1 = gimple_assign_rhs1 (stmt);
6017 tree rhs2 = gimple_assign_rhs2 (stmt);
6019 if (dump_file && (dump_flags & TDF_DETAILS))
6021 fprintf (dump_file, "Breaking up subtract ");
6022 print_gimple_stmt (dump_file, stmt, 0);
6025 rhs2 = negate_value (rhs2, gsip);
6026 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6027 update_stmt (stmt);
6030 /* Determine whether STMT is a builtin call that raises an SSA name
6031 to an integer power and has only one use. If so, and this is early
6032 reassociation and unsafe math optimizations are permitted, place
6033 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6034 If any of these conditions does not hold, return FALSE. */
6036 static bool
6037 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6039 tree arg1;
6040 REAL_VALUE_TYPE c, cint;
6042 switch (gimple_call_combined_fn (stmt))
6044 CASE_CFN_POW:
6045 if (flag_errno_math)
6046 return false;
6048 *base = gimple_call_arg (stmt, 0);
6049 arg1 = gimple_call_arg (stmt, 1);
6051 if (TREE_CODE (arg1) != REAL_CST)
6052 return false;
6054 c = TREE_REAL_CST (arg1);
6056 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6057 return false;
6059 *exponent = real_to_integer (&c);
6060 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6061 if (!real_identical (&c, &cint))
6062 return false;
6064 break;
6066 CASE_CFN_POWI:
6067 *base = gimple_call_arg (stmt, 0);
6068 arg1 = gimple_call_arg (stmt, 1);
6070 if (!tree_fits_shwi_p (arg1))
6071 return false;
6073 *exponent = tree_to_shwi (arg1);
6074 break;
6076 default:
6077 return false;
6080 /* Expanding negative exponents is generally unproductive, so we don't
6081 complicate matters with those. Exponents of zero and one should
6082 have been handled by expression folding. */
6083 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6084 return false;
6086 return true;
6089 /* Try to derive and add operand entry for OP to *OPS. Return false if
6090 unsuccessful. */
6092 static bool
6093 try_special_add_to_ops (vec<operand_entry *> *ops,
6094 enum tree_code code,
6095 tree op, gimple* def_stmt)
6097 tree base = NULL_TREE;
6098 HOST_WIDE_INT exponent = 0;
6100 if (TREE_CODE (op) != SSA_NAME
6101 || ! has_single_use (op))
6102 return false;
6104 if (code == MULT_EXPR
6105 && reassoc_insert_powi_p
6106 && flag_unsafe_math_optimizations
6107 && is_gimple_call (def_stmt)
6108 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6110 add_repeat_to_ops_vec (ops, base, exponent);
6111 gimple_set_visited (def_stmt, true);
6112 return true;
6114 else if (code == MULT_EXPR
6115 && is_gimple_assign (def_stmt)
6116 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6117 && !HONOR_SNANS (TREE_TYPE (op))
6118 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6119 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6120 && (!FLOAT_TYPE_P (TREE_TYPE (op))
6121 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6123 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6124 tree cst = build_minus_one_cst (TREE_TYPE (op));
6125 add_to_ops_vec (ops, rhs1);
6126 add_to_ops_vec (ops, cst);
6127 gimple_set_visited (def_stmt, true);
6128 return true;
6131 return false;
6134 /* Recursively linearize a binary expression that is the RHS of STMT.
6135 Place the operands of the expression tree in the vector named OPS. */
6137 static void
6138 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6139 bool is_associative, bool set_visited)
6141 tree binlhs = gimple_assign_rhs1 (stmt);
6142 tree binrhs = gimple_assign_rhs2 (stmt);
6143 gimple *binlhsdef = NULL, *binrhsdef = NULL;
6144 bool binlhsisreassoc = false;
6145 bool binrhsisreassoc = false;
6146 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6147 class loop *loop = loop_containing_stmt (stmt);
6149 if (set_visited)
6150 gimple_set_visited (stmt, true);
6152 if (TREE_CODE (binlhs) == SSA_NAME)
6154 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6155 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6156 && !stmt_could_throw_p (cfun, binlhsdef));
6159 if (TREE_CODE (binrhs) == SSA_NAME)
6161 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6162 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6163 && !stmt_could_throw_p (cfun, binrhsdef));
6166 /* If the LHS is not reassociable, but the RHS is, we need to swap
6167 them. If neither is reassociable, there is nothing we can do, so
6168 just put them in the ops vector. If the LHS is reassociable,
6169 linearize it. If both are reassociable, then linearize the RHS
6170 and the LHS. */
6172 if (!binlhsisreassoc)
6174 /* If this is not a associative operation like division, give up. */
6175 if (!is_associative)
6177 add_to_ops_vec (ops, binrhs);
6178 return;
6181 if (!binrhsisreassoc)
6183 bool swap = false;
6184 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6185 /* If we add ops for the rhs we expect to be able to recurse
6186 to it via the lhs during expression rewrite so swap
6187 operands. */
6188 swap = true;
6189 else
6190 add_to_ops_vec (ops, binrhs);
6192 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6193 add_to_ops_vec (ops, binlhs);
6195 if (!swap)
6196 return;
6199 if (dump_file && (dump_flags & TDF_DETAILS))
6201 fprintf (dump_file, "swapping operands of ");
6202 print_gimple_stmt (dump_file, stmt, 0);
6205 swap_ssa_operands (stmt,
6206 gimple_assign_rhs1_ptr (stmt),
6207 gimple_assign_rhs2_ptr (stmt));
6208 update_stmt (stmt);
6210 if (dump_file && (dump_flags & TDF_DETAILS))
6212 fprintf (dump_file, " is now ");
6213 print_gimple_stmt (dump_file, stmt, 0);
6215 if (!binrhsisreassoc)
6216 return;
6218 /* We want to make it so the lhs is always the reassociative op,
6219 so swap. */
6220 std::swap (binlhs, binrhs);
6222 else if (binrhsisreassoc)
6224 linearize_expr (stmt);
6225 binlhs = gimple_assign_rhs1 (stmt);
6226 binrhs = gimple_assign_rhs2 (stmt);
6229 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6230 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6231 rhscode, loop));
6232 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6233 is_associative, set_visited);
6235 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6236 add_to_ops_vec (ops, binrhs);
6239 /* Repropagate the negates back into subtracts, since no other pass
6240 currently does it. */
6242 static void
6243 repropagate_negates (void)
6245 unsigned int i = 0;
6246 tree negate;
6248 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6250 gimple *user = get_single_immediate_use (negate);
6251 if (!user || !is_gimple_assign (user))
6252 continue;
6254 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6255 if (TREE_CODE (negateop) == SSA_NAME
6256 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6257 continue;
6259 /* The negate operand can be either operand of a PLUS_EXPR
6260 (it can be the LHS if the RHS is a constant for example).
6262 Force the negate operand to the RHS of the PLUS_EXPR, then
6263 transform the PLUS_EXPR into a MINUS_EXPR. */
6264 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6266 /* If the negated operand appears on the LHS of the
6267 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6268 to force the negated operand to the RHS of the PLUS_EXPR. */
6269 if (gimple_assign_rhs1 (user) == negate)
6271 swap_ssa_operands (user,
6272 gimple_assign_rhs1_ptr (user),
6273 gimple_assign_rhs2_ptr (user));
6276 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6277 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6278 if (gimple_assign_rhs2 (user) == negate)
6280 tree rhs1 = gimple_assign_rhs1 (user);
6281 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6282 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6283 negateop);
6284 update_stmt (user);
6287 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6289 if (gimple_assign_rhs1 (user) == negate)
6291 /* We have
6292 x = -negateop
6293 y = x - b
6294 which we transform into
6295 x = negateop + b
6296 y = -x .
6297 This pushes down the negate which we possibly can merge
6298 into some other operation, hence insert it into the
6299 plus_negates vector. */
6300 gimple *feed = SSA_NAME_DEF_STMT (negate);
6301 tree b = gimple_assign_rhs2 (user);
6302 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6303 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6304 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6305 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6306 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6307 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6308 user = gsi_stmt (gsi2);
6309 update_stmt (user);
6310 reassoc_remove_stmt (&gsi);
6311 release_defs (feed);
6312 plus_negates.safe_push (gimple_assign_lhs (user));
6314 else
6316 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6317 getting rid of one operation. */
6318 tree rhs1 = gimple_assign_rhs1 (user);
6319 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6320 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6321 update_stmt (gsi_stmt (gsi));
6327 /* Break up subtract operations in block BB.
6329 We do this top down because we don't know whether the subtract is
6330 part of a possible chain of reassociation except at the top.
6332 IE given
6333 d = f + g
6334 c = a + e
6335 b = c - d
6336 q = b - r
6337 k = t - q
6339 we want to break up k = t - q, but we won't until we've transformed q
6340 = b - r, which won't be broken up until we transform b = c - d.
6342 En passant, clear the GIMPLE visited flag on every statement
6343 and set UIDs within each basic block. */
6345 static void
6346 break_up_subtract_bb (basic_block bb)
6348 gimple_stmt_iterator gsi;
6349 basic_block son;
6350 unsigned int uid = 1;
6352 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6354 gimple *stmt = gsi_stmt (gsi);
6355 gimple_set_visited (stmt, false);
6356 gimple_set_uid (stmt, uid++);
6358 if (!is_gimple_assign (stmt)
6359 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6360 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6361 continue;
6363 /* Look for simple gimple subtract operations. */
6364 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6366 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6367 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6368 continue;
6370 /* Check for a subtract used only in an addition. If this
6371 is the case, transform it into add of a negate for better
6372 reassociation. IE transform C = A-B into C = A + -B if C
6373 is only used in an addition. */
6374 if (should_break_up_subtract (stmt))
6375 break_up_subtract (stmt, &gsi);
6377 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6378 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6379 plus_negates.safe_push (gimple_assign_lhs (stmt));
6381 for (son = first_dom_son (CDI_DOMINATORS, bb);
6382 son;
6383 son = next_dom_son (CDI_DOMINATORS, son))
6384 break_up_subtract_bb (son);
6387 /* Used for repeated factor analysis. */
6388 struct repeat_factor
6390 /* An SSA name that occurs in a multiply chain. */
6391 tree factor;
6393 /* Cached rank of the factor. */
6394 unsigned rank;
6396 /* Number of occurrences of the factor in the chain. */
6397 HOST_WIDE_INT count;
6399 /* An SSA name representing the product of this factor and
6400 all factors appearing later in the repeated factor vector. */
6401 tree repr;
6405 static vec<repeat_factor> repeat_factor_vec;
6407 /* Used for sorting the repeat factor vector. Sort primarily by
6408 ascending occurrence count, secondarily by descending rank. */
6410 static int
6411 compare_repeat_factors (const void *x1, const void *x2)
6413 const repeat_factor *rf1 = (const repeat_factor *) x1;
6414 const repeat_factor *rf2 = (const repeat_factor *) x2;
6416 if (rf1->count != rf2->count)
6417 return rf1->count - rf2->count;
6419 return rf2->rank - rf1->rank;
6422 /* Look for repeated operands in OPS in the multiply tree rooted at
6423 STMT. Replace them with an optimal sequence of multiplies and powi
6424 builtin calls, and remove the used operands from OPS. Return an
6425 SSA name representing the value of the replacement sequence. */
6427 static tree
6428 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6430 unsigned i, j, vec_len;
6431 int ii;
6432 operand_entry *oe;
6433 repeat_factor *rf1, *rf2;
6434 repeat_factor rfnew;
6435 tree result = NULL_TREE;
6436 tree target_ssa, iter_result;
6437 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6438 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6439 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6440 gimple *mul_stmt, *pow_stmt;
6442 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6443 target, unless type is integral. */
6444 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6445 return NULL_TREE;
6447 /* Allocate the repeated factor vector. */
6448 repeat_factor_vec.create (10);
6450 /* Scan the OPS vector for all SSA names in the product and build
6451 up a vector of occurrence counts for each factor. */
6452 FOR_EACH_VEC_ELT (*ops, i, oe)
6454 if (TREE_CODE (oe->op) == SSA_NAME)
6456 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6458 if (rf1->factor == oe->op)
6460 rf1->count += oe->count;
6461 break;
6465 if (j >= repeat_factor_vec.length ())
6467 rfnew.factor = oe->op;
6468 rfnew.rank = oe->rank;
6469 rfnew.count = oe->count;
6470 rfnew.repr = NULL_TREE;
6471 repeat_factor_vec.safe_push (rfnew);
6476 /* Sort the repeated factor vector by (a) increasing occurrence count,
6477 and (b) decreasing rank. */
6478 repeat_factor_vec.qsort (compare_repeat_factors);
6480 /* It is generally best to combine as many base factors as possible
6481 into a product before applying __builtin_powi to the result.
6482 However, the sort order chosen for the repeated factor vector
6483 allows us to cache partial results for the product of the base
6484 factors for subsequent use. When we already have a cached partial
6485 result from a previous iteration, it is best to make use of it
6486 before looking for another __builtin_pow opportunity.
6488 As an example, consider x * x * y * y * y * z * z * z * z.
6489 We want to first compose the product x * y * z, raise it to the
6490 second power, then multiply this by y * z, and finally multiply
6491 by z. This can be done in 5 multiplies provided we cache y * z
6492 for use in both expressions:
6494 t1 = y * z
6495 t2 = t1 * x
6496 t3 = t2 * t2
6497 t4 = t1 * t3
6498 result = t4 * z
6500 If we instead ignored the cached y * z and first multiplied by
6501 the __builtin_pow opportunity z * z, we would get the inferior:
6503 t1 = y * z
6504 t2 = t1 * x
6505 t3 = t2 * t2
6506 t4 = z * z
6507 t5 = t3 * t4
6508 result = t5 * y */
6510 vec_len = repeat_factor_vec.length ();
6512 /* Repeatedly look for opportunities to create a builtin_powi call. */
6513 while (true)
6515 HOST_WIDE_INT power;
6517 /* First look for the largest cached product of factors from
6518 preceding iterations. If found, create a builtin_powi for
6519 it if the minimum occurrence count for its factors is at
6520 least 2, or just use this cached product as our next
6521 multiplicand if the minimum occurrence count is 1. */
6522 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6524 if (rf1->repr && rf1->count > 0)
6525 break;
6528 if (j < vec_len)
6530 power = rf1->count;
6532 if (power == 1)
6534 iter_result = rf1->repr;
6536 if (dump_file && (dump_flags & TDF_DETAILS))
6538 unsigned elt;
6539 repeat_factor *rf;
6540 fputs ("Multiplying by cached product ", dump_file);
6541 for (elt = j; elt < vec_len; elt++)
6543 rf = &repeat_factor_vec[elt];
6544 print_generic_expr (dump_file, rf->factor);
6545 if (elt < vec_len - 1)
6546 fputs (" * ", dump_file);
6548 fputs ("\n", dump_file);
6551 else
6553 if (INTEGRAL_TYPE_P (type))
6555 gcc_assert (power > 1);
6556 gimple_stmt_iterator gsip = gsi;
6557 gsi_prev (&gsip);
6558 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6559 rf1->repr, power);
6560 gimple_stmt_iterator gsic = gsi;
6561 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6563 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6564 gimple_set_visited (gsi_stmt (gsic), true);
6565 gsi_prev (&gsic);
6568 else
6570 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6571 pow_stmt
6572 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6573 build_int_cst (integer_type_node,
6574 power));
6575 gimple_call_set_lhs (pow_stmt, iter_result);
6576 gimple_set_location (pow_stmt, gimple_location (stmt));
6577 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6578 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6581 if (dump_file && (dump_flags & TDF_DETAILS))
6583 unsigned elt;
6584 repeat_factor *rf;
6585 fputs ("Building __builtin_pow call for cached product (",
6586 dump_file);
6587 for (elt = j; elt < vec_len; elt++)
6589 rf = &repeat_factor_vec[elt];
6590 print_generic_expr (dump_file, rf->factor);
6591 if (elt < vec_len - 1)
6592 fputs (" * ", dump_file);
6594 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6595 power);
6599 else
6601 /* Otherwise, find the first factor in the repeated factor
6602 vector whose occurrence count is at least 2. If no such
6603 factor exists, there are no builtin_powi opportunities
6604 remaining. */
6605 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6607 if (rf1->count >= 2)
6608 break;
6611 if (j >= vec_len)
6612 break;
6614 power = rf1->count;
6616 if (dump_file && (dump_flags & TDF_DETAILS))
6618 unsigned elt;
6619 repeat_factor *rf;
6620 fputs ("Building __builtin_pow call for (", dump_file);
6621 for (elt = j; elt < vec_len; elt++)
6623 rf = &repeat_factor_vec[elt];
6624 print_generic_expr (dump_file, rf->factor);
6625 if (elt < vec_len - 1)
6626 fputs (" * ", dump_file);
6628 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6631 reassociate_stats.pows_created++;
6633 /* Visit each element of the vector in reverse order (so that
6634 high-occurrence elements are visited first, and within the
6635 same occurrence count, lower-ranked elements are visited
6636 first). Form a linear product of all elements in this order
6637 whose occurrencce count is at least that of element J.
6638 Record the SSA name representing the product of each element
6639 with all subsequent elements in the vector. */
6640 if (j == vec_len - 1)
6641 rf1->repr = rf1->factor;
6642 else
6644 for (ii = vec_len - 2; ii >= (int)j; ii--)
6646 tree op1, op2;
6648 rf1 = &repeat_factor_vec[ii];
6649 rf2 = &repeat_factor_vec[ii + 1];
6651 /* Init the last factor's representative to be itself. */
6652 if (!rf2->repr)
6653 rf2->repr = rf2->factor;
6655 op1 = rf1->factor;
6656 op2 = rf2->repr;
6658 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6659 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6660 op1, op2);
6661 gimple_set_location (mul_stmt, gimple_location (stmt));
6662 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6663 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6664 rf1->repr = target_ssa;
6666 /* Don't reprocess the multiply we just introduced. */
6667 gimple_set_visited (mul_stmt, true);
6671 /* Form a call to __builtin_powi for the maximum product
6672 just formed, raised to the power obtained earlier. */
6673 rf1 = &repeat_factor_vec[j];
6674 if (INTEGRAL_TYPE_P (type))
6676 gcc_assert (power > 1);
6677 gimple_stmt_iterator gsip = gsi;
6678 gsi_prev (&gsip);
6679 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6680 rf1->repr, power);
6681 gimple_stmt_iterator gsic = gsi;
6682 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6684 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6685 gimple_set_visited (gsi_stmt (gsic), true);
6686 gsi_prev (&gsic);
6689 else
6691 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6692 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6693 build_int_cst (integer_type_node,
6694 power));
6695 gimple_call_set_lhs (pow_stmt, iter_result);
6696 gimple_set_location (pow_stmt, gimple_location (stmt));
6697 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6698 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6702 /* If we previously formed at least one other builtin_powi call,
6703 form the product of this one and those others. */
6704 if (result)
6706 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6707 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6708 result, iter_result);
6709 gimple_set_location (mul_stmt, gimple_location (stmt));
6710 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6711 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6712 gimple_set_visited (mul_stmt, true);
6713 result = new_result;
6715 else
6716 result = iter_result;
6718 /* Decrement the occurrence count of each element in the product
6719 by the count found above, and remove this many copies of each
6720 factor from OPS. */
6721 for (i = j; i < vec_len; i++)
6723 unsigned k = power;
6724 unsigned n;
6726 rf1 = &repeat_factor_vec[i];
6727 rf1->count -= power;
6729 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6731 if (oe->op == rf1->factor)
6733 if (oe->count <= k)
6735 ops->ordered_remove (n);
6736 k -= oe->count;
6738 if (k == 0)
6739 break;
6741 else
6743 oe->count -= k;
6744 break;
6751 /* At this point all elements in the repeated factor vector have a
6752 remaining occurrence count of 0 or 1, and those with a count of 1
6753 don't have cached representatives. Re-sort the ops vector and
6754 clean up. */
6755 ops->qsort (sort_by_operand_rank);
6756 repeat_factor_vec.release ();
6758 /* Return the final product computed herein. Note that there may
6759 still be some elements with single occurrence count left in OPS;
6760 those will be handled by the normal reassociation logic. */
6761 return result;
6764 /* Attempt to optimize
6765 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6766 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6768 static void
6769 attempt_builtin_copysign (vec<operand_entry *> *ops)
6771 operand_entry *oe;
6772 unsigned int i;
6773 unsigned int length = ops->length ();
6774 tree cst = ops->last ()->op;
6776 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6777 return;
6779 FOR_EACH_VEC_ELT (*ops, i, oe)
6781 if (TREE_CODE (oe->op) == SSA_NAME
6782 && has_single_use (oe->op))
6784 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6785 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6787 tree arg0, arg1;
6788 switch (gimple_call_combined_fn (old_call))
6790 CASE_CFN_COPYSIGN:
6791 CASE_CFN_COPYSIGN_FN:
6792 arg0 = gimple_call_arg (old_call, 0);
6793 arg1 = gimple_call_arg (old_call, 1);
6794 /* The first argument of copysign must be a constant,
6795 otherwise there's nothing to do. */
6796 if (TREE_CODE (arg0) == REAL_CST)
6798 tree type = TREE_TYPE (arg0);
6799 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6800 /* If we couldn't fold to a single constant, skip it.
6801 That happens e.g. for inexact multiplication when
6802 -frounding-math. */
6803 if (mul == NULL_TREE)
6804 break;
6805 /* Instead of adjusting OLD_CALL, let's build a new
6806 call to not leak the LHS and prevent keeping bogus
6807 debug statements. DCE will clean up the old call. */
6808 gcall *new_call;
6809 if (gimple_call_internal_p (old_call))
6810 new_call = gimple_build_call_internal
6811 (IFN_COPYSIGN, 2, mul, arg1);
6812 else
6813 new_call = gimple_build_call
6814 (gimple_call_fndecl (old_call), 2, mul, arg1);
6815 tree lhs = make_ssa_name (type);
6816 gimple_call_set_lhs (new_call, lhs);
6817 gimple_set_location (new_call,
6818 gimple_location (old_call));
6819 insert_stmt_after (new_call, old_call);
6820 /* We've used the constant, get rid of it. */
6821 ops->pop ();
6822 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6823 /* Handle the CST1 < 0 case by negating the result. */
6824 if (cst1_neg)
6826 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6827 gimple *negate_stmt
6828 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6829 insert_stmt_after (negate_stmt, new_call);
6830 oe->op = negrhs;
6832 else
6833 oe->op = lhs;
6834 if (dump_file && (dump_flags & TDF_DETAILS))
6836 fprintf (dump_file, "Optimizing copysign: ");
6837 print_generic_expr (dump_file, cst);
6838 fprintf (dump_file, " * COPYSIGN (");
6839 print_generic_expr (dump_file, arg0);
6840 fprintf (dump_file, ", ");
6841 print_generic_expr (dump_file, arg1);
6842 fprintf (dump_file, ") into %sCOPYSIGN (",
6843 cst1_neg ? "-" : "");
6844 print_generic_expr (dump_file, mul);
6845 fprintf (dump_file, ", ");
6846 print_generic_expr (dump_file, arg1);
6847 fprintf (dump_file, "\n");
6849 return;
6851 break;
6852 default:
6853 break;
6860 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6862 static void
6863 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6865 tree rhs1;
6867 if (dump_file && (dump_flags & TDF_DETAILS))
6869 fprintf (dump_file, "Transforming ");
6870 print_gimple_stmt (dump_file, stmt, 0);
6873 rhs1 = gimple_assign_rhs1 (stmt);
6874 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6875 update_stmt (stmt);
6876 remove_visited_stmt_chain (rhs1);
6878 if (dump_file && (dump_flags & TDF_DETAILS))
6880 fprintf (dump_file, " into ");
6881 print_gimple_stmt (dump_file, stmt, 0);
6885 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6887 static void
6888 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6889 tree rhs1, tree rhs2)
6891 if (dump_file && (dump_flags & TDF_DETAILS))
6893 fprintf (dump_file, "Transforming ");
6894 print_gimple_stmt (dump_file, stmt, 0);
6897 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6898 update_stmt (gsi_stmt (*gsi));
6899 remove_visited_stmt_chain (rhs1);
6901 if (dump_file && (dump_flags & TDF_DETAILS))
6903 fprintf (dump_file, " into ");
6904 print_gimple_stmt (dump_file, stmt, 0);
6908 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6909 Put no-mult ops and mult ops alternately at the end of the queue, which is
6910 conducive to generating more FMA and reducing the loss of FMA when breaking
6911 the chain.
6912 E.g.
6913 a * b + c * d + e generates:
6915 _4 = c_9(D) * d_10(D);
6916 _12 = .FMA (a_7(D), b_8(D), _4);
6917 _11 = e_6(D) + _12;
6919 Rearrange ops to -> e + a * b + c * d generates:
6921 _4 = .FMA (c_7(D), d_8(D), _3);
6922 _11 = .FMA (a_5(D), b_6(D), _4);
6924 Return the number of MULT_EXPRs in the chain. */
6925 static int
6926 rank_ops_for_fma (vec<operand_entry *> *ops)
6928 operand_entry *oe;
6929 unsigned int i;
6930 unsigned int ops_length = ops->length ();
6931 auto_vec<operand_entry *> ops_mult;
6932 auto_vec<operand_entry *> ops_others;
6934 FOR_EACH_VEC_ELT (*ops, i, oe)
6936 if (TREE_CODE (oe->op) == SSA_NAME)
6938 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6939 if (is_gimple_assign (def_stmt))
6941 if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6942 ops_mult.safe_push (oe);
6943 /* A negate on the multiplication leads to FNMA. */
6944 else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6945 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6947 gimple *neg_def_stmt
6948 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6949 if (is_gimple_assign (neg_def_stmt)
6950 && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6951 && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6952 ops_mult.safe_push (oe);
6953 else
6954 ops_others.safe_push (oe);
6956 else
6957 ops_others.safe_push (oe);
6959 else
6960 ops_others.safe_push (oe);
6962 else
6963 ops_others.safe_push (oe);
6965 /* 1. When ops_mult.length == 2, like the following case,
6967 a * b + c * d + e.
6969 we need to rearrange the ops.
6971 Putting ops that not def from mult in front can generate more FMAs.
6973 2. If all ops are defined with mult, we don't need to rearrange them. */
6974 unsigned mult_num = ops_mult.length ();
6975 if (mult_num >= 2 && mult_num != ops_length)
6977 /* Put no-mult ops and mult ops alternately at the end of the
6978 queue, which is conducive to generating more FMA and reducing the
6979 loss of FMA when breaking the chain. */
6980 ops->truncate (0);
6981 ops->splice (ops_mult);
6982 int j, opindex = ops->length ();
6983 int others_length = ops_others.length ();
6984 for (j = 0; j < others_length; j++)
6986 oe = ops_others.pop ();
6987 ops->quick_insert (opindex, oe);
6988 if (opindex > 0)
6989 opindex--;
6992 return mult_num;
6994 /* Reassociate expressions in basic block BB and its post-dominator as
6995 children.
6997 Bubble up return status from maybe_optimize_range_tests. */
6999 static bool
7000 reassociate_bb (basic_block bb)
7002 gimple_stmt_iterator gsi;
7003 basic_block son;
7004 gimple *stmt = last_nondebug_stmt (bb);
7005 bool cfg_cleanup_needed = false;
7007 if (stmt && !gimple_visited_p (stmt))
7008 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7010 bool do_prev = false;
7011 for (gsi = gsi_last_bb (bb);
7012 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7014 do_prev = true;
7015 stmt = gsi_stmt (gsi);
7017 if (is_gimple_assign (stmt)
7018 && !stmt_could_throw_p (cfun, stmt))
7020 tree lhs, rhs1, rhs2;
7021 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7023 /* If this was part of an already processed statement,
7024 we don't need to touch it again. */
7025 if (gimple_visited_p (stmt))
7027 /* This statement might have become dead because of previous
7028 reassociations. */
7029 if (has_zero_uses (gimple_get_lhs (stmt)))
7031 reassoc_remove_stmt (&gsi);
7032 release_defs (stmt);
7033 /* We might end up removing the last stmt above which
7034 places the iterator to the end of the sequence.
7035 Reset it to the last stmt in this case and make sure
7036 we don't do gsi_prev in that case. */
7037 if (gsi_end_p (gsi))
7039 gsi = gsi_last_bb (bb);
7040 do_prev = false;
7043 continue;
7046 /* If this is not a gimple binary expression, there is
7047 nothing for us to do with it. */
7048 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7049 continue;
7051 lhs = gimple_assign_lhs (stmt);
7052 rhs1 = gimple_assign_rhs1 (stmt);
7053 rhs2 = gimple_assign_rhs2 (stmt);
7055 /* For non-bit or min/max operations we can't associate
7056 all types. Verify that here. */
7057 if ((rhs_code != BIT_IOR_EXPR
7058 && rhs_code != BIT_AND_EXPR
7059 && rhs_code != BIT_XOR_EXPR
7060 && rhs_code != MIN_EXPR
7061 && rhs_code != MAX_EXPR
7062 && !can_reassociate_type_p (TREE_TYPE (lhs)))
7063 || !can_reassociate_op_p (rhs1)
7064 || !can_reassociate_op_p (rhs2))
7065 continue;
7067 if (associative_tree_code (rhs_code))
7069 auto_vec<operand_entry *> ops;
7070 tree powi_result = NULL_TREE;
7071 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7073 /* There may be no immediate uses left by the time we
7074 get here because we may have eliminated them all. */
7075 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7076 continue;
7078 gimple_set_visited (stmt, true);
7079 linearize_expr_tree (&ops, stmt, true, true);
7080 ops.qsort (sort_by_operand_rank);
7081 int orig_len = ops.length ();
7082 optimize_ops_list (rhs_code, &ops);
7083 if (undistribute_ops_list (rhs_code, &ops,
7084 loop_containing_stmt (stmt)))
7086 ops.qsort (sort_by_operand_rank);
7087 optimize_ops_list (rhs_code, &ops);
7089 if (undistribute_bitref_for_vector (rhs_code, &ops,
7090 loop_containing_stmt (stmt)))
7092 ops.qsort (sort_by_operand_rank);
7093 optimize_ops_list (rhs_code, &ops);
7095 if (rhs_code == PLUS_EXPR
7096 && transform_add_to_multiply (&ops))
7097 ops.qsort (sort_by_operand_rank);
7099 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7101 if (is_vector)
7102 optimize_vec_cond_expr (rhs_code, &ops);
7103 else
7104 optimize_range_tests (rhs_code, &ops, NULL);
7107 if (rhs_code == MULT_EXPR && !is_vector)
7109 attempt_builtin_copysign (&ops);
7111 if (reassoc_insert_powi_p
7112 && (flag_unsafe_math_optimizations
7113 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7114 powi_result = attempt_builtin_powi (stmt, &ops);
7117 operand_entry *last;
7118 bool negate_result = false;
7119 if (ops.length () > 1
7120 && rhs_code == MULT_EXPR)
7122 last = ops.last ();
7123 if ((integer_minus_onep (last->op)
7124 || real_minus_onep (last->op))
7125 && !HONOR_SNANS (TREE_TYPE (lhs))
7126 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7127 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7129 ops.pop ();
7130 negate_result = true;
7134 tree new_lhs = lhs;
7135 /* If the operand vector is now empty, all operands were
7136 consumed by the __builtin_powi optimization. */
7137 if (ops.length () == 0)
7138 transform_stmt_to_copy (&gsi, stmt, powi_result);
7139 else if (ops.length () == 1)
7141 tree last_op = ops.last ()->op;
7143 /* If the stmt that defines operand has to be inserted, insert it
7144 before the use. */
7145 if (ops.last ()->stmt_to_insert)
7146 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7147 if (powi_result)
7148 transform_stmt_to_multiply (&gsi, stmt, last_op,
7149 powi_result);
7150 else
7151 transform_stmt_to_copy (&gsi, stmt, last_op);
7153 else
7155 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7156 int ops_num = ops.length ();
7157 int width = 0;
7158 int mult_num = 0;
7160 /* For binary bit operations, if there are at least 3
7161 operands and the last operand in OPS is a constant,
7162 move it to the front. This helps ensure that we generate
7163 (X & Y) & C rather than (X & C) & Y. The former will
7164 often match a canonical bit test when we get to RTL. */
7165 if (ops.length () > 2
7166 && (rhs_code == BIT_AND_EXPR
7167 || rhs_code == BIT_IOR_EXPR
7168 || rhs_code == BIT_XOR_EXPR)
7169 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7170 std::swap (*ops[0], *ops[ops_num - 1]);
7172 optimization_type opt_type = bb_optimization_type (bb);
7174 /* If the target support FMA, rank_ops_for_fma will detect if
7175 the chain has fmas and rearrange the ops if so. */
7176 if (direct_internal_fn_supported_p (IFN_FMA,
7177 TREE_TYPE (lhs),
7178 opt_type)
7179 && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7181 mult_num = rank_ops_for_fma (&ops);
7184 /* Only rewrite the expression tree to parallel in the
7185 last reassoc pass to avoid useless work back-and-forth
7186 with initial linearization. */
7187 bool has_fma = mult_num >= 2 && mult_num != ops_num;
7188 if (!reassoc_insert_powi_p
7189 && ops.length () > 3
7190 && (width = get_reassociation_width (&ops, mult_num, lhs,
7191 rhs_code, mode))
7192 > 1)
7194 if (dump_file && (dump_flags & TDF_DETAILS))
7195 fprintf (dump_file,
7196 "Width = %d was chosen for reassociation\n",
7197 width);
7198 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7199 width,
7200 has_fma,
7201 ops);
7203 else
7205 /* When there are three operands left, we want
7206 to make sure the ones that get the double
7207 binary op are chosen wisely. */
7208 int len = ops.length ();
7209 if (len >= 3
7210 && (!has_fma
7211 /* width > 1 means ranking ops results in better
7212 parallelism. Check current value to avoid
7213 calling get_reassociation_width again. */
7214 || (width != 1
7215 && get_reassociation_width (
7216 &ops, mult_num, lhs, rhs_code, mode)
7217 > 1)))
7218 swap_ops_for_binary_stmt (ops, len - 3);
7220 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7221 powi_result != NULL
7222 || negate_result,
7223 len != orig_len);
7226 /* If we combined some repeated factors into a
7227 __builtin_powi call, multiply that result by the
7228 reassociated operands. */
7229 if (powi_result)
7231 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7232 tree type = TREE_TYPE (lhs);
7233 tree target_ssa = make_temp_ssa_name (type, NULL,
7234 "reassocpow");
7235 gimple_set_lhs (lhs_stmt, target_ssa);
7236 update_stmt (lhs_stmt);
7237 if (lhs != new_lhs)
7239 target_ssa = new_lhs;
7240 new_lhs = lhs;
7242 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7243 powi_result, target_ssa);
7244 gimple_set_location (mul_stmt, gimple_location (stmt));
7245 gimple_set_uid (mul_stmt, gimple_uid (stmt));
7246 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7250 if (negate_result)
7252 stmt = SSA_NAME_DEF_STMT (lhs);
7253 tree tmp = make_ssa_name (TREE_TYPE (lhs));
7254 gimple_set_lhs (stmt, tmp);
7255 if (lhs != new_lhs)
7256 tmp = new_lhs;
7257 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7258 tmp);
7259 gimple_set_uid (neg_stmt, gimple_uid (stmt));
7260 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7261 update_stmt (stmt);
7266 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
7267 son;
7268 son = next_dom_son (CDI_POST_DOMINATORS, son))
7269 cfg_cleanup_needed |= reassociate_bb (son);
7271 return cfg_cleanup_needed;
7274 /* Add jumps around shifts for range tests turned into bit tests.
7275 For each SSA_NAME VAR we have code like:
7276 VAR = ...; // final stmt of range comparison
7277 // bit test here...;
7278 OTHERVAR = ...; // final stmt of the bit test sequence
7279 RES = VAR | OTHERVAR;
7280 Turn the above into:
7281 VAR = ...;
7282 if (VAR != 0)
7283 goto <l3>;
7284 else
7285 goto <l2>;
7286 <l2>:
7287 // bit test here...;
7288 OTHERVAR = ...;
7289 <l3>:
7290 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7292 static void
7293 branch_fixup (void)
7295 tree var;
7296 unsigned int i;
7298 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7300 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7301 gimple *use_stmt;
7302 use_operand_p use;
7303 bool ok = single_imm_use (var, &use, &use_stmt);
7304 gcc_assert (ok
7305 && is_gimple_assign (use_stmt)
7306 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7307 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7309 basic_block cond_bb = gimple_bb (def_stmt);
7310 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7311 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7313 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7314 gimple *g = gimple_build_cond (NE_EXPR, var,
7315 build_zero_cst (TREE_TYPE (var)),
7316 NULL_TREE, NULL_TREE);
7317 location_t loc = gimple_location (use_stmt);
7318 gimple_set_location (g, loc);
7319 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7321 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7322 etrue->probability = profile_probability::even ();
7323 edge efalse = find_edge (cond_bb, then_bb);
7324 efalse->flags = EDGE_FALSE_VALUE;
7325 efalse->probability -= etrue->probability;
7326 then_bb->count -= etrue->count ();
7328 tree othervar = NULL_TREE;
7329 if (gimple_assign_rhs1 (use_stmt) == var)
7330 othervar = gimple_assign_rhs2 (use_stmt);
7331 else if (gimple_assign_rhs2 (use_stmt) == var)
7332 othervar = gimple_assign_rhs1 (use_stmt);
7333 else
7334 gcc_unreachable ();
7335 tree lhs = gimple_assign_lhs (use_stmt);
7336 gphi *phi = create_phi_node (lhs, merge_bb);
7337 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7338 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7339 gsi = gsi_for_stmt (use_stmt);
7340 gsi_remove (&gsi, true);
7342 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7343 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7345 reassoc_branch_fixups.release ();
7348 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7349 void debug_ops_vector (vec<operand_entry *> ops);
7351 /* Dump the operand entry vector OPS to FILE. */
7353 void
7354 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7356 operand_entry *oe;
7357 unsigned int i;
7359 FOR_EACH_VEC_ELT (ops, i, oe)
7361 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7362 print_generic_expr (file, oe->op);
7363 fprintf (file, "\n");
7367 /* Dump the operand entry vector OPS to STDERR. */
7369 DEBUG_FUNCTION void
7370 debug_ops_vector (vec<operand_entry *> ops)
7372 dump_ops_vector (stderr, ops);
7375 /* Bubble up return status from reassociate_bb. */
7377 static bool
7378 do_reassoc (void)
7380 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7381 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7384 /* Initialize the reassociation pass. */
7386 static void
7387 init_reassoc (void)
7389 int i;
7390 int64_t rank = 2;
7391 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7393 /* Find the loops, so that we can prevent moving calculations in
7394 them. */
7395 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7397 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7399 next_operand_entry_id = 0;
7401 /* Reverse RPO (Reverse Post Order) will give us something where
7402 deeper loops come later. */
7403 pre_and_rev_post_order_compute (NULL, bbs, false);
7404 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7405 operand_rank = new hash_map<tree, int64_t>;
7407 /* Give each default definition a distinct rank. This includes
7408 parameters and the static chain. Walk backwards over all
7409 SSA names so that we get proper rank ordering according
7410 to tree_swap_operands_p. */
7411 for (i = num_ssa_names - 1; i > 0; --i)
7413 tree name = ssa_name (i);
7414 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7415 insert_operand_rank (name, ++rank);
7418 /* Set up rank for each BB */
7419 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7420 bb_rank[bbs[i]] = ++rank << 16;
7422 free (bbs);
7423 calculate_dominance_info (CDI_POST_DOMINATORS);
7424 plus_negates = vNULL;
7425 mark_ssa_maybe_undefs ();
7428 /* Cleanup after the reassociation pass, and print stats if
7429 requested. */
7431 static void
7432 fini_reassoc (void)
7434 statistics_counter_event (cfun, "Linearized",
7435 reassociate_stats.linearized);
7436 statistics_counter_event (cfun, "Constants eliminated",
7437 reassociate_stats.constants_eliminated);
7438 statistics_counter_event (cfun, "Ops eliminated",
7439 reassociate_stats.ops_eliminated);
7440 statistics_counter_event (cfun, "Statements rewritten",
7441 reassociate_stats.rewritten);
7442 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7443 reassociate_stats.pows_encountered);
7444 statistics_counter_event (cfun, "Built-in powi calls created",
7445 reassociate_stats.pows_created);
7447 delete operand_rank;
7448 bitmap_clear (biased_names);
7449 operand_entry_pool.release ();
7450 free (bb_rank);
7451 plus_negates.release ();
7452 free_dominance_info (CDI_POST_DOMINATORS);
7453 loop_optimizer_finalize ();
7456 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7457 insertion of __builtin_powi calls.
7459 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7460 optimization of a gimple conditional. Otherwise returns zero. */
7462 static unsigned int
7463 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7465 reassoc_insert_powi_p = insert_powi_p;
7466 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7468 init_reassoc ();
7470 bool cfg_cleanup_needed;
7471 cfg_cleanup_needed = do_reassoc ();
7472 repropagate_negates ();
7473 branch_fixup ();
7475 fini_reassoc ();
7476 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7479 namespace {
7481 const pass_data pass_data_reassoc =
7483 GIMPLE_PASS, /* type */
7484 "reassoc", /* name */
7485 OPTGROUP_NONE, /* optinfo_flags */
7486 TV_TREE_REASSOC, /* tv_id */
7487 ( PROP_cfg | PROP_ssa ), /* properties_required */
7488 0, /* properties_provided */
7489 0, /* properties_destroyed */
7490 0, /* todo_flags_start */
7491 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7494 class pass_reassoc : public gimple_opt_pass
7496 public:
7497 pass_reassoc (gcc::context *ctxt)
7498 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7501 /* opt_pass methods: */
7502 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7503 void set_pass_param (unsigned int n, bool param) final override
7505 gcc_assert (n == 0);
7506 insert_powi_p = param;
7507 bias_loop_carried_phi_ranks_p = !param;
7509 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7510 unsigned int execute (function *) final override
7512 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7515 private:
7516 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7517 point 3a in the pass header comment. */
7518 bool insert_powi_p;
7519 bool bias_loop_carried_phi_ranks_p;
7520 }; // class pass_reassoc
7522 } // anon namespace
7524 gimple_opt_pass *
7525 make_pass_reassoc (gcc::context *ctxt)
7527 return new pass_reassoc (ctxt);