Skip analyzer strndup test on hppa*-*-hpux*
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob07fc8e2f1f9d935df00f180d7dac31fa63dcbefb
1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
110 You want to first merge all leaves of the same rank, as much as
111 possible.
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
126 mergetmp2 = d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
135 So we have
137 build binary op
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p;
188 /* Statistics */
189 static struct
191 int linearized;
192 int constants_eliminated;
193 int ops_eliminated;
194 int rewritten;
195 int pows_encountered;
196 int pows_created;
197 } reassociate_stats;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static int64_t *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, int64_t> *operand_rank;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 biased. */
217 static auto_bitmap biased_names;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec<tree> reassoc_branch_fixups;
225 /* Forward decls. */
226 static int64_t get_rank (tree);
227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
232 static bool
233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
235 gimple *stmt = gsi_stmt (*gsi);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 return gsi_remove (gsi, true);
240 gimple_stmt_iterator prev = *gsi;
241 gsi_prev (&prev);
242 unsigned uid = gimple_uid (stmt);
243 basic_block bb = gimple_bb (stmt);
244 bool ret = gsi_remove (gsi, true);
245 if (!gsi_end_p (prev))
246 gsi_next (&prev);
247 else
248 prev = gsi_start_bb (bb);
249 gimple *end_stmt = gsi_stmt (*gsi);
250 while ((stmt = gsi_stmt (prev)) != end_stmt)
252 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 gimple_set_uid (stmt, uid);
254 gsi_next (&prev);
256 return ret;
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
266 like this:
268 x_1 = phi(x_0, x_2)
269 a = x_1 | 1
270 b = a ^ 2
271 .MEM = b
272 c = b + d
273 x_2 = c + e
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
278 static bool
279 propagate_bias_p (gimple *stmt)
281 use_operand_p use;
282 imm_use_iterator use_iter;
283 gimple *single_use_stmt = NULL;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 return false;
288 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
290 gimple *current_use_stmt = USE_STMT (use);
292 if (is_gimple_assign (current_use_stmt)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
295 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 return false;
297 single_use_stmt = current_use_stmt;
301 if (single_use_stmt == NULL)
302 return false;
304 if (gimple_bb (stmt)->loop_father
305 != gimple_bb (single_use_stmt)->loop_father)
306 return false;
308 return true;
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
318 static int64_t
319 phi_rank (gimple *stmt)
321 basic_block bb = gimple_bb (stmt);
322 class loop *father = bb->loop_father;
323 tree res;
324 unsigned i;
325 use_operand_p use;
326 gimple *use_stmt;
328 if (!reassoc_bias_loop_carried_phi_ranks_p)
329 return bb_rank[bb->index];
331 /* We only care about real loops (those with a latch). */
332 if (!father->latch)
333 return bb_rank[bb->index];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb != father->header
337 || father->inner)
338 return bb_rank[bb->index];
340 /* Ignore virtual SSA_NAMEs. */
341 res = gimple_phi_result (stmt);
342 if (virtual_operand_p (res))
343 return bb_rank[bb->index];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res, &use, &use_stmt)
348 || gimple_bb (use_stmt)->loop_father != father)
349 return bb_rank[bb->index];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i = 0; i < gimple_phi_num_args (stmt); i++)
354 tree arg = gimple_phi_arg_def (stmt, i);
355 if (TREE_CODE (arg) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg))
358 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 if (gimple_bb (def_stmt)->loop_father == father)
360 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
364 /* Must be an uninteresting phi. */
365 return bb_rank[bb->index];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
372 static int64_t
373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
375 int64_t op_rank;
377 op_rank = get_rank (op);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op) == SSA_NAME
382 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
384 if (maybe_biased_p == NULL)
385 return rank;
386 *maybe_biased_p = true;
389 return MAX (rank, op_rank);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e)
397 int64_t *slot = operand_rank->get (e);
398 return slot ? *slot : -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
403 static inline void
404 insert_operand_rank (tree e, int64_t rank)
406 gcc_assert (rank > 0);
407 gcc_assert (!operand_rank->put (e, rank));
410 /* Given an expression E, return the rank of the expression. */
412 static int64_t
413 get_rank (tree e)
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
420 the BB.
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
424 results. */
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
430 x_1 = phi(x_0, x_2)
431 b = a + x_1
432 c = b + d
433 x_2 = c + e
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
439 x_1 = phi(x_0, x_2)
440 b = a + d
441 c = b + e
442 x_2 = c + x_1
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e) == SSA_NAME)
454 ssa_op_iter iter;
455 gimple *stmt;
456 int64_t rank;
457 tree op;
459 /* If we already have a rank for this expression, use that. */
460 rank = find_operand_rank (e);
461 if (rank != -1)
462 return rank;
464 stmt = SSA_NAME_DEF_STMT (e);
465 if (gimple_code (stmt) == GIMPLE_PHI)
467 rank = phi_rank (stmt);
468 if (rank != bb_rank[gimple_bb (stmt)->index])
469 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
472 else if (!is_gimple_assign (stmt))
473 rank = bb_rank[gimple_bb (stmt)->index];
475 else
477 bool biased_p = false;
478 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
485 thus have rank 0. */
486 rank = 0;
487 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
488 rank = propagate_rank (rank, op, maybe_biased_p);
490 rank += 1;
491 if (biased_p)
492 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
495 if (dump_file && (dump_flags & TDF_DETAILS))
497 fprintf (dump_file, "Rank for ");
498 print_generic_expr (dump_file, e);
499 fprintf (dump_file, " is %" PRId64 "\n", rank);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e, rank);
504 return rank;
507 /* Constants, globals, etc., are rank 0 */
508 return 0;
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
521 static inline int
522 constant_type (tree t)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
525 return INTEGER_CONST_TYPE;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t) || real_minus_onep (t))
532 return FLOAT_ONE_CONST_TYPE;
533 return FLOAT_CONST_TYPE;
535 else
536 return OTHER_CONST_TYPE;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
541 static int
542 sort_by_operand_rank (const void *pa, const void *pb)
544 const operand_entry *oea = *(const operand_entry *const *)pa;
545 const operand_entry *oeb = *(const operand_entry *const *)pb;
547 if (oeb->rank != oea->rank)
548 return oeb->rank > oea->rank ? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
553 if (oea->rank == 0)
555 if (constant_type (oeb->op) != constant_type (oea->op))
556 return constant_type (oea->op) - constant_type (oeb->op);
557 else
558 /* To make sorting result stable, we use unique IDs to determine
559 order. */
560 return oeb->id > oea->id ? 1 : -1;
563 if (TREE_CODE (oea->op) != SSA_NAME)
565 if (TREE_CODE (oeb->op) != SSA_NAME)
566 return oeb->id > oea->id ? 1 : -1;
567 else
568 return 1;
570 else if (TREE_CODE (oeb->op) != SSA_NAME)
571 return -1;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
580 See PR60418. */
581 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
582 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
583 basic_block bba = gimple_bb (stmta);
584 basic_block bbb = gimple_bb (stmtb);
585 if (bbb != bba)
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
589 if (!bba)
590 return 1;
591 if (!bbb)
592 return -1;
593 /* If neither is, compare bb_rank. */
594 if (bb_rank[bbb->index] != bb_rank[bba->index])
595 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
603 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
606 return oeb->id > oea->id ? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
611 static void
612 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
614 operand_entry *oe = operand_entry_pool.allocate ();
616 oe->op = op;
617 oe->rank = get_rank (op);
618 oe->id = next_operand_entry_id++;
619 oe->count = 1;
620 oe->stmt_to_insert = stmt_to_insert;
621 ops->safe_push (oe);
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
625 count REPEAT. */
627 static void
628 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
629 HOST_WIDE_INT repeat)
631 operand_entry *oe = operand_entry_pool.allocate ();
633 oe->op = op;
634 oe->rank = get_rank (op);
635 oe->id = next_operand_entry_id++;
636 oe->count = repeat;
637 oe->stmt_to_insert = NULL;
638 ops->safe_push (oe);
640 reassociate_stats.pows_encountered++;
643 /* Returns true if we can associate the SSA def OP. */
645 static bool
646 can_reassociate_op_p (tree op)
648 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
649 return false;
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
655 return false;
656 return true;
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
663 static bool
664 can_reassociate_type_p (tree type)
666 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
667 || NON_SAT_FIXED_POINT_TYPE_P (type)
668 || (flag_associative_math && FLOAT_TYPE_P (type)))
669 return true;
670 return false;
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
676 static bool
677 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
679 basic_block bb = gimple_bb (stmt);
681 if (gimple_bb (stmt) == NULL)
682 return false;
684 if (!flow_bb_inside_loop_p (loop, bb))
685 return false;
687 if (is_gimple_assign (stmt)
688 && gimple_assign_rhs_code (stmt) == code
689 && has_single_use (gimple_assign_lhs (stmt)))
691 tree rhs1 = gimple_assign_rhs1 (stmt);
692 tree rhs2 = gimple_assign_rhs2 (stmt);
693 if (!can_reassociate_op_p (rhs1)
694 || (rhs2 && !can_reassociate_op_p (rhs2)))
695 return false;
696 return true;
699 return false;
703 /* Return true if STMT is a nop-conversion. */
705 static bool
706 gimple_nop_conversion_p (gimple *stmt)
708 if (gassign *ass = dyn_cast <gassign *> (stmt))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
712 TREE_TYPE (gimple_assign_rhs1 (ass))))
713 return true;
715 return false;
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
721 static tree
722 get_unary_op (tree name, enum tree_code opcode)
724 gimple *stmt = SSA_NAME_DEF_STMT (name);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt)
728 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
729 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
731 if (!is_gimple_assign (stmt))
732 return NULL_TREE;
734 if (gimple_assign_rhs_code (stmt) == opcode)
735 return gimple_assign_rhs1 (stmt);
736 return NULL_TREE;
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
741 static bool
742 ops_equal_values_p (tree op1, tree op2)
744 if (op1 == op2)
745 return true;
747 tree orig_op1 = op1;
748 if (TREE_CODE (op1) == SSA_NAME)
750 gimple *stmt = SSA_NAME_DEF_STMT (op1);
751 if (gimple_nop_conversion_p (stmt))
753 op1 = gimple_assign_rhs1 (stmt);
754 if (op1 == op2)
755 return true;
759 if (TREE_CODE (op2) == SSA_NAME)
761 gimple *stmt = SSA_NAME_DEF_STMT (op2);
762 if (gimple_nop_conversion_p (stmt))
764 op2 = gimple_assign_rhs1 (stmt);
765 if (op1 == op2
766 || orig_op1 == op2)
767 return true;
771 return false;
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
779 static bool
780 eliminate_duplicate_pair (enum tree_code opcode,
781 vec<operand_entry *> *ops,
782 bool *all_done,
783 unsigned int i,
784 operand_entry *curr,
785 operand_entry *last)
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last && last->op == curr->op)
795 switch (opcode)
797 case MAX_EXPR:
798 case MIN_EXPR:
799 case BIT_IOR_EXPR:
800 case BIT_AND_EXPR:
801 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, curr->op);
805 fprintf (dump_file, " [&|minmax] ");
806 print_generic_expr (dump_file, last->op);
807 fprintf (dump_file, " -> ");
808 print_generic_stmt (dump_file, last->op);
811 ops->ordered_remove (i);
812 reassociate_stats.ops_eliminated ++;
814 return true;
816 case BIT_XOR_EXPR:
817 if (dump_file && (dump_flags & TDF_DETAILS))
819 fprintf (dump_file, "Equivalence: ");
820 print_generic_expr (dump_file, curr->op);
821 fprintf (dump_file, " ^ ");
822 print_generic_expr (dump_file, last->op);
823 fprintf (dump_file, " -> nothing\n");
826 reassociate_stats.ops_eliminated += 2;
828 if (ops->length () == 2)
830 ops->truncate (0);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
832 *all_done = true;
834 else
836 ops->ordered_remove (i-1);
837 ops->ordered_remove (i-1);
840 return true;
842 default:
843 break;
846 return false;
849 static vec<tree> plus_negates;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
855 return false. */
857 static bool
858 eliminate_plus_minus_pair (enum tree_code opcode,
859 vec<operand_entry *> *ops,
860 unsigned int currindex,
861 operand_entry *curr)
863 tree negateop;
864 tree notop;
865 unsigned int i;
866 operand_entry *oe;
868 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
869 return false;
871 negateop = get_unary_op (curr->op, NEGATE_EXPR);
872 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
873 if (negateop == NULL_TREE && notop == NULL_TREE)
874 return false;
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
878 one, we can stop. */
880 for (i = currindex + 1;
881 ops->iterate (i, &oe)
882 && oe->rank >= curr->rank - 1 ;
883 i++)
885 if (negateop
886 && ops_equal_values_p (oe->op, negateop))
888 if (dump_file && (dump_flags & TDF_DETAILS))
890 fprintf (dump_file, "Equivalence: ");
891 print_generic_expr (dump_file, negateop);
892 fprintf (dump_file, " + -");
893 print_generic_expr (dump_file, oe->op);
894 fprintf (dump_file, " -> 0\n");
897 ops->ordered_remove (i);
898 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
899 ops->ordered_remove (currindex);
900 reassociate_stats.ops_eliminated ++;
902 return true;
904 else if (notop
905 && ops_equal_values_p (oe->op, notop))
907 tree op_type = TREE_TYPE (oe->op);
909 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Equivalence: ");
912 print_generic_expr (dump_file, notop);
913 fprintf (dump_file, " + ~");
914 print_generic_expr (dump_file, oe->op);
915 fprintf (dump_file, " -> -1\n");
918 ops->ordered_remove (i);
919 add_to_ops_vec (ops, build_all_ones_cst (op_type));
920 ops->ordered_remove (currindex);
921 reassociate_stats.ops_eliminated ++;
923 return true;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop != NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
931 plus_negates.safe_push (curr->op);
933 return false;
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
940 false. */
942 static bool
943 eliminate_not_pairs (enum tree_code opcode,
944 vec<operand_entry *> *ops,
945 unsigned int currindex,
946 operand_entry *curr)
948 tree notop;
949 unsigned int i;
950 operand_entry *oe;
952 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
953 || TREE_CODE (curr->op) != SSA_NAME)
954 return false;
956 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
957 if (notop == NULL_TREE)
958 return false;
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
962 one, we can stop. */
964 for (i = currindex + 1;
965 ops->iterate (i, &oe)
966 && oe->rank >= curr->rank - 1;
967 i++)
969 if (oe->op == notop)
971 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Equivalence: ");
974 print_generic_expr (dump_file, notop);
975 if (opcode == BIT_AND_EXPR)
976 fprintf (dump_file, " & ~");
977 else if (opcode == BIT_IOR_EXPR)
978 fprintf (dump_file, " | ~");
979 print_generic_expr (dump_file, oe->op);
980 if (opcode == BIT_AND_EXPR)
981 fprintf (dump_file, " -> 0\n");
982 else if (opcode == BIT_IOR_EXPR)
983 fprintf (dump_file, " -> -1\n");
986 if (opcode == BIT_AND_EXPR)
987 oe->op = build_zero_cst (TREE_TYPE (oe->op));
988 else if (opcode == BIT_IOR_EXPR)
989 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
991 reassociate_stats.ops_eliminated += ops->length () - 1;
992 ops->truncate (0);
993 ops->quick_push (oe);
994 return true;
998 return false;
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1008 static void
1009 eliminate_using_constants (enum tree_code opcode,
1010 vec<operand_entry *> *ops)
1012 operand_entry *oelast = ops->last ();
1013 tree type = TREE_TYPE (oelast->op);
1015 if (oelast->rank == 0
1016 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1018 switch (opcode)
1020 case BIT_AND_EXPR:
1021 if (integer_zerop (oelast->op))
1023 if (ops->length () != 1)
1025 if (dump_file && (dump_flags & TDF_DETAILS))
1026 fprintf (dump_file, "Found & 0, removing all other ops\n");
1028 reassociate_stats.ops_eliminated += ops->length () - 1;
1030 ops->truncate (0);
1031 ops->quick_push (oelast);
1032 return;
1035 else if (integer_all_onesp (oelast->op))
1037 if (ops->length () != 1)
1039 if (dump_file && (dump_flags & TDF_DETAILS))
1040 fprintf (dump_file, "Found & -1, removing\n");
1041 ops->pop ();
1042 reassociate_stats.ops_eliminated++;
1045 break;
1046 case BIT_IOR_EXPR:
1047 if (integer_all_onesp (oelast->op))
1049 if (ops->length () != 1)
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 fprintf (dump_file, "Found | -1, removing all other ops\n");
1054 reassociate_stats.ops_eliminated += ops->length () - 1;
1056 ops->truncate (0);
1057 ops->quick_push (oelast);
1058 return;
1061 else if (integer_zerop (oelast->op))
1063 if (ops->length () != 1)
1065 if (dump_file && (dump_flags & TDF_DETAILS))
1066 fprintf (dump_file, "Found | 0, removing\n");
1067 ops->pop ();
1068 reassociate_stats.ops_eliminated++;
1071 break;
1072 case MULT_EXPR:
1073 if (integer_zerop (oelast->op)
1074 || (FLOAT_TYPE_P (type)
1075 && !HONOR_NANS (type)
1076 && !HONOR_SIGNED_ZEROS (type)
1077 && real_zerop (oelast->op)))
1079 if (ops->length () != 1)
1081 if (dump_file && (dump_flags & TDF_DETAILS))
1082 fprintf (dump_file, "Found * 0, removing all other ops\n");
1084 reassociate_stats.ops_eliminated += ops->length () - 1;
1085 ops->truncate (0);
1086 ops->quick_push (oelast);
1087 return;
1090 else if (integer_onep (oelast->op)
1091 || (FLOAT_TYPE_P (type)
1092 && !HONOR_SNANS (type)
1093 && real_onep (oelast->op)))
1095 if (ops->length () != 1)
1097 if (dump_file && (dump_flags & TDF_DETAILS))
1098 fprintf (dump_file, "Found * 1, removing\n");
1099 ops->pop ();
1100 reassociate_stats.ops_eliminated++;
1101 return;
1104 break;
1105 case BIT_XOR_EXPR:
1106 case PLUS_EXPR:
1107 case MINUS_EXPR:
1108 if (integer_zerop (oelast->op)
1109 || (FLOAT_TYPE_P (type)
1110 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1111 && fold_real_zero_addition_p (type, 0, oelast->op,
1112 opcode == MINUS_EXPR)))
1114 if (ops->length () != 1)
1116 if (dump_file && (dump_flags & TDF_DETAILS))
1117 fprintf (dump_file, "Found [|^+] 0, removing\n");
1118 ops->pop ();
1119 reassociate_stats.ops_eliminated++;
1120 return;
1123 break;
1124 default:
1125 break;
1131 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1132 bool, bool);
1134 /* Structure for tracking and counting operands. */
1135 struct oecount {
1136 unsigned int cnt;
1137 unsigned int id;
1138 enum tree_code oecode;
1139 tree op;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec<oecount> cvec;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher : int_hash <int, 0, 1>
1151 static inline hashval_t hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1157 inline hashval_t
1158 oecount_hasher::hash (int p)
1160 const oecount *c = &cvec[p - 42];
1161 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1164 /* Comparison function for oecount. */
1166 inline bool
1167 oecount_hasher::equal (int p1, int p2)
1169 const oecount *c1 = &cvec[p1 - 42];
1170 const oecount *c2 = &cvec[p2 - 42];
1171 return c1->oecode == c2->oecode && c1->op == c2->op;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1176 static int
1177 oecount_cmp (const void *p1, const void *p2)
1179 const oecount *c1 = (const oecount *)p1;
1180 const oecount *c2 = (const oecount *)p2;
1181 if (c1->cnt != c2->cnt)
1182 return c1->cnt > c2->cnt ? 1 : -1;
1183 else
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1->id > c2->id ? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1191 static bool
1192 stmt_is_power_of_op (gimple *stmt, tree op)
1194 if (!is_gimple_call (stmt))
1195 return false;
1197 switch (gimple_call_combined_fn (stmt))
1199 CASE_CFN_POW:
1200 CASE_CFN_POWI:
1201 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1203 default:
1204 return false;
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple *stmt)
1215 REAL_VALUE_TYPE c, cint;
1216 HOST_WIDE_INT power;
1217 tree arg1;
1219 switch (gimple_call_combined_fn (stmt))
1221 CASE_CFN_POW:
1222 arg1 = gimple_call_arg (stmt, 1);
1223 c = TREE_REAL_CST (arg1);
1224 power = real_to_integer (&c) - 1;
1225 real_from_integer (&cint, VOIDmode, power, SIGNED);
1226 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1227 return power;
1229 CASE_CFN_POWI:
1230 arg1 = gimple_call_arg (stmt, 1);
1231 power = TREE_INT_CST_LOW (arg1) - 1;
1232 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1233 return power;
1235 default:
1236 gcc_unreachable ();
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1243 static tree
1244 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1246 gimple *use_stmt;
1247 use_operand_p use;
1248 imm_use_iterator iter;
1249 tree new_lhs, new_debug_lhs = NULL_TREE;
1250 tree lhs = gimple_get_lhs (stmt);
1252 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1253 gimple_set_lhs (stmt, new_lhs);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1258 tree repl = new_lhs;
1259 if (is_gimple_debug (use_stmt))
1261 if (new_debug_lhs == NULL_TREE)
1263 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1264 gdebug *def_temp
1265 = gimple_build_debug_bind (new_debug_lhs,
1266 build2 (opcode, TREE_TYPE (lhs),
1267 new_lhs, op),
1268 stmt);
1269 gimple_set_uid (def_temp, gimple_uid (stmt));
1270 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1271 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1273 repl = new_debug_lhs;
1275 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1276 SET_USE (use, repl);
1277 update_stmt (use_stmt);
1279 return new_lhs;
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1286 static void
1287 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1288 vec<gimple *> &stmts_to_fix)
1290 unsigned i;
1291 gimple *stmt;
1293 if (*def != op
1294 && TREE_CODE (*def) == SSA_NAME
1295 && (stmt = SSA_NAME_DEF_STMT (*def))
1296 && gimple_code (stmt) != GIMPLE_NOP)
1297 *def = make_new_ssa_for_def (stmt, opcode, op);
1299 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1300 make_new_ssa_for_def (stmt, opcode, op);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1307 static void
1308 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1310 tree lhs;
1311 gimple *use_stmt;
1312 use_operand_p use;
1313 gimple_stmt_iterator gsi;
1315 if (is_gimple_call (stmt))
1316 lhs = gimple_call_lhs (stmt);
1317 else
1318 lhs = gimple_assign_lhs (stmt);
1320 gcc_assert (has_single_use (lhs));
1321 single_imm_use (lhs, &use, &use_stmt);
1322 if (lhs == *def)
1323 *def = op;
1324 SET_USE (use, op);
1325 if (TREE_CODE (op) != SSA_NAME)
1326 update_stmt (use_stmt);
1327 gsi = gsi_for_stmt (stmt);
1328 unlink_stmt_vdef (stmt);
1329 reassoc_remove_stmt (&gsi);
1330 release_defs (stmt);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1337 static void
1338 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1340 tree orig_def = *def;
1341 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec<gimple *, 64> stmts_to_fix;
1348 tree name;
1350 if (opcode == MULT_EXPR)
1352 if (stmt_is_power_of_op (stmt, op))
1354 if (decrement_power (stmt) == 1)
1356 if (stmts_to_fix.length () > 0)
1357 stmts_to_fix.pop ();
1358 propagate_op_to_single_use (op, stmt, def);
1360 break;
1362 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1364 if (gimple_assign_rhs1 (stmt) == op)
1366 tree cst = build_minus_one_cst (TREE_TYPE (op));
1367 if (stmts_to_fix.length () > 0)
1368 stmts_to_fix.pop ();
1369 propagate_op_to_single_use (cst, stmt, def);
1370 break;
1372 else if (integer_minus_onep (op)
1373 || real_minus_onep (op))
1375 gimple_assign_set_rhs_code
1376 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1377 break;
1382 name = gimple_assign_rhs1 (stmt);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1386 single use. */
1387 if (gimple_assign_rhs_code (stmt) == opcode
1388 && (name == op
1389 || gimple_assign_rhs2 (stmt) == op))
1391 if (name == op)
1392 name = gimple_assign_rhs2 (stmt);
1393 if (stmts_to_fix.length () > 0)
1394 stmts_to_fix.pop ();
1395 propagate_op_to_single_use (name, stmt, def);
1396 break;
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode == MULT_EXPR
1403 && gimple_assign_rhs_code (stmt) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt)))
1407 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1408 if (stmt_is_power_of_op (stmt2, op))
1410 if (decrement_power (stmt2) == 1)
1411 propagate_op_to_single_use (op, stmt2, def);
1412 else
1413 stmts_to_fix.safe_push (stmt2);
1414 break;
1416 else if (is_gimple_assign (stmt2)
1417 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1419 if (gimple_assign_rhs1 (stmt2) == op)
1421 tree cst = build_minus_one_cst (TREE_TYPE (op));
1422 propagate_op_to_single_use (cst, stmt2, def);
1423 break;
1425 else if (integer_minus_onep (op)
1426 || real_minus_onep (op))
1428 stmts_to_fix.safe_push (stmt2);
1429 gimple_assign_set_rhs_code
1430 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1431 break;
1436 /* Continue walking the chain. */
1437 gcc_assert (name != op
1438 && TREE_CODE (name) == SSA_NAME);
1439 stmt = SSA_NAME_DEF_STMT (name);
1440 stmts_to_fix.safe_push (stmt);
1442 while (1);
1444 if (stmts_to_fix.length () > 0 || *def == orig_def)
1445 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1451 static bool
1452 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1454 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1 || s1 == s2)
1460 return true;
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1463 if (!bb2)
1464 return false;
1466 if (bb1 == bb2)
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1) == GIMPLE_PHI)
1472 return true;
1474 if (gimple_code (s2) == GIMPLE_PHI)
1475 return false;
1477 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1479 if (gimple_uid (s1) < gimple_uid (s2))
1480 return true;
1482 if (gimple_uid (s1) > gimple_uid (s2))
1483 return false;
1485 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1486 unsigned int uid = gimple_uid (s1);
1487 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1489 gimple *s = gsi_stmt (gsi);
1490 if (gimple_uid (s) != uid)
1491 break;
1492 if (s == s2)
1493 return true;
1496 return false;
1499 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1502 /* Insert STMT after INSERT_POINT. */
1504 static void
1505 insert_stmt_after (gimple *stmt, gimple *insert_point)
1507 gimple_stmt_iterator gsi;
1508 basic_block bb;
1510 if (gimple_code (insert_point) == GIMPLE_PHI)
1511 bb = gimple_bb (insert_point);
1512 else if (!stmt_ends_bb_p (insert_point))
1514 gsi = gsi_for_stmt (insert_point);
1515 gimple_set_uid (stmt, gimple_uid (insert_point));
1516 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1517 return;
1519 else if (gimple_code (insert_point) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1523 gcc_unreachable ();
1524 else
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1531 gsi = gsi_after_labels (bb);
1532 if (gsi_end_p (gsi))
1534 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1535 gimple_set_uid (stmt,
1536 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1538 else
1539 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1540 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1547 static gimple *
1548 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1550 gimple *op1def = NULL, *op2def = NULL;
1551 gimple_stmt_iterator gsi;
1552 tree op;
1553 gassign *sum;
1555 /* Create the addition statement. */
1556 op = make_ssa_name (type);
1557 sum = gimple_build_assign (op, opcode, op1, op2);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1) == SSA_NAME)
1561 op1def = SSA_NAME_DEF_STMT (op1);
1562 if (TREE_CODE (op2) == SSA_NAME)
1563 op2def = SSA_NAME_DEF_STMT (op2);
1564 if ((!op1def || gimple_nop_p (op1def))
1565 && (!op2def || gimple_nop_p (op2def)))
1567 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1568 if (!gsi_end_p (gsi)
1569 && is_gimple_call (gsi_stmt (gsi))
1570 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1572 /* Don't add statements before a returns_twice call at the start
1573 of a function. */
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1575 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1577 if (gsi_end_p (gsi))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 gimple_set_uid (sum,
1582 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1584 else
1585 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1586 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1588 else
1590 gimple *insert_point;
1591 if ((!op1def || gimple_nop_p (op1def))
1592 || (op2def && !gimple_nop_p (op2def)
1593 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1594 insert_point = op2def;
1595 else
1596 insert_point = op1def;
1597 insert_stmt_after (sum, insert_point);
1599 update_stmt (sum);
1601 return sum;
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1639 static bool
1640 undistribute_ops_list (enum tree_code opcode,
1641 vec<operand_entry *> *ops, class loop *loop)
1643 unsigned int length = ops->length ();
1644 operand_entry *oe1;
1645 unsigned i, j;
1646 unsigned nr_candidates, nr_candidates2;
1647 sbitmap_iterator sbi0;
1648 vec<operand_entry *> *subops;
1649 bool changed = false;
1650 unsigned int next_oecount_id = 0;
1652 if (length <= 1
1653 || opcode != PLUS_EXPR)
1654 return false;
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap candidates (length);
1658 bitmap_clear (candidates);
1659 nr_candidates = 0;
1660 FOR_EACH_VEC_ELT (*ops, i, oe1)
1662 enum tree_code dcode;
1663 gimple *oe1def;
1665 if (TREE_CODE (oe1->op) != SSA_NAME)
1666 continue;
1667 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1668 if (!is_gimple_assign (oe1def))
1669 continue;
1670 dcode = gimple_assign_rhs_code (oe1def);
1671 if ((dcode != MULT_EXPR
1672 && dcode != RDIV_EXPR)
1673 || !is_reassociable_op (oe1def, dcode, loop))
1674 continue;
1676 bitmap_set_bit (candidates, i);
1677 nr_candidates++;
1680 if (nr_candidates < 2)
1681 return false;
1683 if (dump_file && (dump_flags & TDF_DETAILS))
1685 fprintf (dump_file, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file,
1687 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1688 fprintf (dump_file, " %d\n", nr_candidates);
1691 /* Build linearized sub-operand lists and the counting table. */
1692 cvec.create (0);
1694 hash_table<oecount_hasher> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1699 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1702 gimple *oedef;
1703 enum tree_code oecode;
1704 unsigned j;
1706 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1707 oecode = gimple_assign_rhs_code (oedef);
1708 linearize_expr_tree (&subops[i], oedef,
1709 associative_tree_code (oecode), false);
1711 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1713 oecount c;
1714 int *slot;
1715 int idx;
1716 c.oecode = oecode;
1717 c.cnt = 1;
1718 c.id = next_oecount_id++;
1719 c.op = oe1->op;
1720 cvec.safe_push (c);
1721 idx = cvec.length () + 41;
1722 slot = ctable.find_slot (idx, INSERT);
1723 if (!*slot)
1725 *slot = idx;
1727 else
1729 cvec.pop ();
1730 cvec[*slot - 42].cnt++;
1735 /* Sort the counting table. */
1736 cvec.qsort (oecount_cmp);
1738 if (dump_file && (dump_flags & TDF_DETAILS))
1740 oecount *c;
1741 fprintf (dump_file, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec, j, c)
1744 fprintf (dump_file, " %u %s: ", c->cnt,
1745 c->oecode == MULT_EXPR
1746 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1747 print_generic_expr (dump_file, c->op);
1748 fprintf (dump_file, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap candidates2 (length);
1754 while (!cvec.is_empty ())
1756 oecount *c = &cvec.last ();
1757 if (c->cnt < 2)
1758 break;
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2);
1763 nr_candidates2 = 0;
1764 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1766 gimple *oedef;
1767 enum tree_code oecode;
1768 unsigned j;
1769 tree op = (*ops)[i]->op;
1771 /* If we undistributed in this chain already this may be
1772 a constant. */
1773 if (TREE_CODE (op) != SSA_NAME)
1774 continue;
1776 oedef = SSA_NAME_DEF_STMT (op);
1777 oecode = gimple_assign_rhs_code (oedef);
1778 if (oecode != c->oecode)
1779 continue;
1781 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1783 if (oe1->op == c->op)
1785 bitmap_set_bit (candidates2, i);
1786 ++nr_candidates2;
1787 break;
1792 if (nr_candidates2 >= 2)
1794 operand_entry *oe1, *oe2;
1795 gimple *prod;
1796 int first = bitmap_first_set_bit (candidates2);
1798 /* Build the new addition chain. */
1799 oe1 = (*ops)[first];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, "Building (");
1803 print_generic_expr (dump_file, oe1->op);
1805 zero_one_operation (&oe1->op, c->oecode, c->op);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1808 gimple *sum;
1809 oe2 = (*ops)[i];
1810 if (dump_file && (dump_flags & TDF_DETAILS))
1812 fprintf (dump_file, " + ");
1813 print_generic_expr (dump_file, oe2->op);
1815 zero_one_operation (&oe2->op, c->oecode, c->op);
1816 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1817 oe1->op, oe2->op, opcode);
1818 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1819 oe2->rank = 0;
1820 oe1->op = gimple_get_lhs (sum);
1823 /* Apply the multiplication/division. */
1824 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1825 oe1->op, c->op, c->oecode);
1826 if (dump_file && (dump_flags & TDF_DETAILS))
1828 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1829 print_generic_expr (dump_file, c->op);
1830 fprintf (dump_file, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1->op = gimple_assign_lhs (prod);
1836 oe1->rank = get_rank (oe1->op);
1837 subops[first].release ();
1839 changed = true;
1842 cvec.pop ();
1845 for (i = 0; i < ops->length (); ++i)
1846 subops[i].release ();
1847 free (subops);
1848 cvec.release ();
1850 return changed;
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair<unsigned, unsigned> v_info_elem;
1857 struct v_info {
1858 tree vec_type;
1859 auto_vec<v_info_elem, 32> vec;
1861 typedef v_info *v_info_ptr;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1864 static int
1865 sort_by_mach_mode (const void *p_i, const void *p_j)
1867 const tree tr1 = *((const tree *) p_i);
1868 const tree tr2 = *((const tree *) p_j);
1869 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1870 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1871 if (mode1 > mode2)
1872 return 1;
1873 else if (mode1 < mode2)
1874 return -1;
1875 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1876 return -1;
1877 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1878 return 1;
1879 return 0;
1882 /* Cleanup hash map for VECTOR information. */
1883 static void
1884 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1886 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1887 it != info_map.end (); ++it)
1889 v_info_ptr info = (*it).second;
1890 delete info;
1891 (*it).second = NULL;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1897 is transformed to
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1915 TODO:
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1921 static bool
1922 undistribute_bitref_for_vector (enum tree_code opcode,
1923 vec<operand_entry *> *ops, struct loop *loop)
1925 if (ops->length () <= 1)
1926 return false;
1928 if (opcode != PLUS_EXPR
1929 && opcode != MULT_EXPR
1930 && opcode != BIT_XOR_EXPR
1931 && opcode != BIT_IOR_EXPR
1932 && opcode != BIT_AND_EXPR)
1933 return false;
1935 hash_map<tree, v_info_ptr> v_info_map;
1936 operand_entry *oe1;
1937 unsigned i;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops, i, oe1)
1943 enum tree_code dcode;
1944 gimple *oe1def;
1946 if (TREE_CODE (oe1->op) != SSA_NAME)
1947 continue;
1948 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1949 if (!is_gimple_assign (oe1def))
1950 continue;
1951 dcode = gimple_assign_rhs_code (oe1def);
1952 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1953 continue;
1955 tree rhs = gimple_assign_rhs1 (oe1def);
1956 tree vec = TREE_OPERAND (rhs, 0);
1957 tree vec_type = TREE_TYPE (vec);
1959 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1960 continue;
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1964 continue;
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1968 continue;
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1971 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1972 continue;
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1979 machine_mode simd_mode;
1980 unsigned HOST_WIDE_INT size, nunits;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1984 continue;
1985 if (size <= elem_size || (size % elem_size) != 0)
1986 continue;
1987 nunits = size / elem_size;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1989 nunits).exists (&simd_mode))
1990 continue;
1991 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1995 continue;
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1998 size. */
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2000 continue;
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2004 continue;
2007 tree elem_type = TREE_TYPE (vec_type);
2008 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2009 if (maybe_ne (bit_field_size (rhs), elem_size))
2010 continue;
2012 unsigned idx;
2013 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2014 continue;
2016 /* Ignore it if target machine can't support this type of VECTOR
2017 operation. */
2018 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2019 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2020 continue;
2022 bool existed;
2023 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2024 if (!existed)
2026 info = new v_info;
2027 info->vec_type = vec_type;
2029 else if (!types_compatible_p (vec_type, info->vec_type))
2030 continue;
2031 info->vec.safe_push (std::make_pair (idx, i));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map);
2038 return false;
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec<tree> valid_vecs (v_info_map.elements ());
2046 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2047 it != v_info_map.end (); ++it)
2049 tree cand_vec = (*it).first;
2050 v_info_ptr cand_info = (*it).second;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2053 if (cand_info->vec.length () != num_elems)
2054 continue;
2055 sbitmap holes = sbitmap_alloc (num_elems);
2056 bitmap_ones (holes);
2057 bool valid = true;
2058 v_info_elem *curr;
2059 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2061 if (!bitmap_bit_p (holes, curr->first))
2063 valid = false;
2064 break;
2066 else
2067 bitmap_clear_bit (holes, curr->first);
2069 if (valid && bitmap_empty_p (holes))
2070 valid_vecs.quick_push (cand_vec);
2071 sbitmap_free (holes);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs.length () <= 1)
2077 cleanup_vinfo_map (v_info_map);
2078 return false;
2081 valid_vecs.qsort (sort_by_mach_mode);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2086 tree tvec = valid_vecs[i];
2087 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2091 continue;
2093 unsigned int idx, j;
2094 gimple *sum = NULL;
2095 tree sum_vec = tvec;
2096 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2097 v_info_elem *elem;
2098 tree vec_type = info_ptr->vec_type;
2100 /* Build the sum for all candidates with same mode. */
2103 sum = build_and_add_sum (vec_type, sum_vec,
2104 valid_vecs[i + 1], opcode);
2105 /* Update the operands only after build_and_add_sum,
2106 so that we don't have to repeat the placement algorithm
2107 of build_and_add_sum. */
2108 if (sum_vec == tvec
2109 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2111 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2112 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2113 tree lhs = make_ssa_name (vec_type);
2114 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2115 gimple_set_uid (g, gimple_uid (sum));
2116 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2117 gimple_assign_set_rhs1 (sum, lhs);
2118 update_stmt (sum);
2120 if (!useless_type_conversion_p (vec_type,
2121 TREE_TYPE (valid_vecs[i + 1])))
2123 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2124 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2125 valid_vecs[i + 1]);
2126 tree lhs = make_ssa_name (vec_type);
2127 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2128 gimple_set_uid (g, gimple_uid (sum));
2129 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2130 gimple_assign_set_rhs2 (sum, lhs);
2131 update_stmt (sum);
2133 sum_vec = gimple_get_lhs (sum);
2134 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2135 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2136 /* Update those related ops of current candidate VECTOR. */
2137 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2139 idx = elem->second;
2140 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2141 /* Set this then op definition will get DCEd later. */
2142 gimple_set_visited (def, true);
2143 if (opcode == PLUS_EXPR
2144 || opcode == BIT_XOR_EXPR
2145 || opcode == BIT_IOR_EXPR)
2146 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2147 else if (opcode == MULT_EXPR)
2148 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2149 else
2151 gcc_assert (opcode == BIT_AND_EXPR);
2152 (*ops)[idx]->op
2153 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2155 (*ops)[idx]->rank = 0;
2157 if (dump_file && (dump_flags & TDF_DETAILS))
2159 fprintf (dump_file, "Generating addition -> ");
2160 print_gimple_stmt (dump_file, sum, 0);
2162 i++;
2164 while ((i < valid_vecs.length () - 1)
2165 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2167 /* Referring to first valid VECTOR with this mode, generate the
2168 BIT_FIELD_REF statements accordingly. */
2169 info_ptr = *(v_info_map.get (tvec));
2170 gcc_assert (sum);
2171 tree elem_type = TREE_TYPE (vec_type);
2172 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2174 idx = elem->second;
2175 tree dst = make_ssa_name (elem_type);
2176 tree pos = bitsize_int (elem->first
2177 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2178 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2179 TYPE_SIZE (elem_type), pos);
2180 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2181 insert_stmt_after (gs, sum);
2182 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2183 /* Set this then op definition will get DCEd later. */
2184 gimple_set_visited (def, true);
2185 (*ops)[idx]->op = gimple_assign_lhs (gs);
2186 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2187 if (dump_file && (dump_flags & TDF_DETAILS))
2189 fprintf (dump_file, "Generating bit_field_ref -> ");
2190 print_gimple_stmt (dump_file, gs, 0);
2195 if (dump_file && (dump_flags & TDF_DETAILS))
2196 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2198 cleanup_vinfo_map (v_info_map);
2200 return true;
2203 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2204 expression, examine the other OPS to see if any of them are comparisons
2205 of the same values, which we may be able to combine or eliminate.
2206 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2208 static bool
2209 eliminate_redundant_comparison (enum tree_code opcode,
2210 vec<operand_entry *> *ops,
2211 unsigned int currindex,
2212 operand_entry *curr)
2214 tree op1, op2;
2215 enum tree_code lcode, rcode;
2216 gimple *def1, *def2;
2217 int i;
2218 operand_entry *oe;
2220 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2221 return false;
2223 /* Check that CURR is a comparison. */
2224 if (TREE_CODE (curr->op) != SSA_NAME)
2225 return false;
2226 def1 = SSA_NAME_DEF_STMT (curr->op);
2227 if (!is_gimple_assign (def1))
2228 return false;
2229 lcode = gimple_assign_rhs_code (def1);
2230 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2231 return false;
2232 op1 = gimple_assign_rhs1 (def1);
2233 op2 = gimple_assign_rhs2 (def1);
2235 /* Now look for a similar comparison in the remaining OPS. */
2236 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2238 tree t;
2240 if (TREE_CODE (oe->op) != SSA_NAME)
2241 continue;
2242 def2 = SSA_NAME_DEF_STMT (oe->op);
2243 if (!is_gimple_assign (def2))
2244 continue;
2245 rcode = gimple_assign_rhs_code (def2);
2246 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2247 continue;
2249 /* If we got here, we have a match. See if we can combine the
2250 two comparisons. */
2251 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2252 if (opcode == BIT_IOR_EXPR)
2253 t = maybe_fold_or_comparisons (type,
2254 lcode, op1, op2,
2255 rcode, gimple_assign_rhs1 (def2),
2256 gimple_assign_rhs2 (def2));
2257 else
2258 t = maybe_fold_and_comparisons (type,
2259 lcode, op1, op2,
2260 rcode, gimple_assign_rhs1 (def2),
2261 gimple_assign_rhs2 (def2));
2262 if (!t)
2263 continue;
2265 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2266 always give us a boolean_type_node value back. If the original
2267 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2268 we need to convert. */
2269 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2271 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2272 continue;
2273 t = fold_convert (TREE_TYPE (curr->op), t);
2276 if (TREE_CODE (t) != INTEGER_CST
2277 && !operand_equal_p (t, curr->op, 0))
2279 enum tree_code subcode;
2280 tree newop1, newop2;
2281 if (!COMPARISON_CLASS_P (t))
2282 continue;
2283 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2284 STRIP_USELESS_TYPE_CONVERSION (newop1);
2285 STRIP_USELESS_TYPE_CONVERSION (newop2);
2286 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2287 continue;
2288 if (lcode == TREE_CODE (t)
2289 && operand_equal_p (op1, newop1, 0)
2290 && operand_equal_p (op2, newop2, 0))
2291 t = curr->op;
2292 else if ((TREE_CODE (newop1) == SSA_NAME
2293 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2294 || (TREE_CODE (newop2) == SSA_NAME
2295 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2296 continue;
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2301 fprintf (dump_file, "Equivalence: ");
2302 print_generic_expr (dump_file, curr->op);
2303 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2304 print_generic_expr (dump_file, oe->op);
2305 fprintf (dump_file, " -> ");
2306 print_generic_expr (dump_file, t);
2307 fprintf (dump_file, "\n");
2310 /* Now we can delete oe, as it has been subsumed by the new combined
2311 expression t. */
2312 ops->ordered_remove (i);
2313 reassociate_stats.ops_eliminated ++;
2315 /* If t is the same as curr->op, we're done. Otherwise we must
2316 replace curr->op with t. Special case is if we got a constant
2317 back, in which case we add it to the end instead of in place of
2318 the current entry. */
2319 if (TREE_CODE (t) == INTEGER_CST)
2321 ops->ordered_remove (currindex);
2322 add_to_ops_vec (ops, t);
2324 else if (!operand_equal_p (t, curr->op, 0))
2326 gimple *sum;
2327 enum tree_code subcode;
2328 tree newop1;
2329 tree newop2;
2330 gcc_assert (COMPARISON_CLASS_P (t));
2331 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2332 STRIP_USELESS_TYPE_CONVERSION (newop1);
2333 STRIP_USELESS_TYPE_CONVERSION (newop2);
2334 gcc_checking_assert (is_gimple_val (newop1)
2335 && is_gimple_val (newop2));
2336 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2337 curr->op = gimple_get_lhs (sum);
2339 return true;
2342 return false;
2346 /* Transform repeated addition of same values into multiply with
2347 constant. */
2348 static bool
2349 transform_add_to_multiply (vec<operand_entry *> *ops)
2351 operand_entry *oe;
2352 tree op = NULL_TREE;
2353 int j;
2354 int i, start = -1, end = 0, count = 0;
2355 auto_vec<std::pair <int, int> > indxs;
2356 bool changed = false;
2358 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2359 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2360 || !flag_unsafe_math_optimizations))
2361 return false;
2363 /* Look for repeated operands. */
2364 FOR_EACH_VEC_ELT (*ops, i, oe)
2366 if (start == -1)
2368 count = 1;
2369 op = oe->op;
2370 start = i;
2372 else if (operand_equal_p (oe->op, op, 0))
2374 count++;
2375 end = i;
2377 else
2379 if (count > 1)
2380 indxs.safe_push (std::make_pair (start, end));
2381 count = 1;
2382 op = oe->op;
2383 start = i;
2387 if (count > 1)
2388 indxs.safe_push (std::make_pair (start, end));
2390 for (j = indxs.length () - 1; j >= 0; --j)
2392 /* Convert repeated operand addition to multiplication. */
2393 start = indxs[j].first;
2394 end = indxs[j].second;
2395 op = (*ops)[start]->op;
2396 count = end - start + 1;
2397 for (i = end; i >= start; --i)
2398 ops->unordered_remove (i);
2399 tree tmp = make_ssa_name (TREE_TYPE (op));
2400 tree cst = build_int_cst (integer_type_node, count);
2401 gassign *mul_stmt
2402 = gimple_build_assign (tmp, MULT_EXPR,
2403 op, fold_convert (TREE_TYPE (op), cst));
2404 gimple_set_visited (mul_stmt, true);
2405 add_to_ops_vec (ops, tmp, mul_stmt);
2406 changed = true;
2409 return changed;
2413 /* Perform various identities and other optimizations on the list of
2414 operand entries, stored in OPS. The tree code for the binary
2415 operation between all the operands is OPCODE. */
2417 static void
2418 optimize_ops_list (enum tree_code opcode,
2419 vec<operand_entry *> *ops)
2421 unsigned int length = ops->length ();
2422 unsigned int i;
2423 operand_entry *oe;
2424 operand_entry *oelast = NULL;
2425 bool iterate = false;
2427 if (length == 1)
2428 return;
2430 oelast = ops->last ();
2432 /* If the last two are constants, pop the constants off, merge them
2433 and try the next two. */
2434 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2436 operand_entry *oelm1 = (*ops)[length - 2];
2438 if (oelm1->rank == 0
2439 && is_gimple_min_invariant (oelm1->op)
2440 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2441 TREE_TYPE (oelast->op)))
2443 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2444 oelm1->op, oelast->op);
2446 if (folded && is_gimple_min_invariant (folded))
2448 if (dump_file && (dump_flags & TDF_DETAILS))
2449 fprintf (dump_file, "Merging constants\n");
2451 ops->pop ();
2452 ops->pop ();
2454 add_to_ops_vec (ops, folded);
2455 reassociate_stats.constants_eliminated++;
2457 optimize_ops_list (opcode, ops);
2458 return;
2463 eliminate_using_constants (opcode, ops);
2464 oelast = NULL;
2466 for (i = 0; ops->iterate (i, &oe);)
2468 bool done = false;
2470 if (eliminate_not_pairs (opcode, ops, i, oe))
2471 return;
2472 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2473 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2474 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2476 if (done)
2477 return;
2478 iterate = true;
2479 oelast = NULL;
2480 continue;
2482 oelast = oe;
2483 i++;
2486 if (iterate)
2487 optimize_ops_list (opcode, ops);
2490 /* The following functions are subroutines to optimize_range_tests and allow
2491 it to try to change a logical combination of comparisons into a range
2492 test.
2494 For example, both
2495 X == 2 || X == 5 || X == 3 || X == 4
2497 X >= 2 && X <= 5
2498 are converted to
2499 (unsigned) (X - 2) <= 3
2501 For more information see comments above fold_test_range in fold-const.cc,
2502 this implementation is for GIMPLE. */
2506 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2508 void
2509 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2511 if (!skip_exp)
2512 print_generic_expr (file, r->exp);
2513 fprintf (file, " %c[", r->in_p ? '+' : '-');
2514 print_generic_expr (file, r->low);
2515 fputs (", ", file);
2516 print_generic_expr (file, r->high);
2517 fputc (']', file);
2520 /* Dump the range entry R to STDERR. */
2522 DEBUG_FUNCTION void
2523 debug_range_entry (struct range_entry *r)
2525 dump_range_entry (stderr, r, false);
2526 fputc ('\n', stderr);
2529 /* This is similar to make_range in fold-const.cc, but on top of
2530 GIMPLE instead of trees. If EXP is non-NULL, it should be
2531 an SSA_NAME and STMT argument is ignored, otherwise STMT
2532 argument should be a GIMPLE_COND. */
2534 void
2535 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2537 int in_p;
2538 tree low, high;
2539 bool is_bool, strict_overflow_p;
2541 r->exp = NULL_TREE;
2542 r->in_p = false;
2543 r->strict_overflow_p = false;
2544 r->low = NULL_TREE;
2545 r->high = NULL_TREE;
2546 if (exp != NULL_TREE
2547 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2548 return;
2550 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2551 and see if we can refine the range. Some of the cases below may not
2552 happen, but it doesn't seem worth worrying about this. We "continue"
2553 the outer loop when we've changed something; otherwise we "break"
2554 the switch, which will "break" the while. */
2555 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2556 high = low;
2557 in_p = 0;
2558 strict_overflow_p = false;
2559 is_bool = false;
2560 if (exp == NULL_TREE)
2561 is_bool = true;
2562 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2564 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2565 is_bool = true;
2566 else
2567 return;
2569 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2570 is_bool = true;
2572 while (1)
2574 enum tree_code code;
2575 tree arg0, arg1, exp_type;
2576 tree nexp;
2577 location_t loc;
2579 if (exp != NULL_TREE)
2581 if (TREE_CODE (exp) != SSA_NAME
2582 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2583 break;
2585 stmt = SSA_NAME_DEF_STMT (exp);
2586 if (!is_gimple_assign (stmt))
2587 break;
2589 code = gimple_assign_rhs_code (stmt);
2590 arg0 = gimple_assign_rhs1 (stmt);
2591 arg1 = gimple_assign_rhs2 (stmt);
2592 exp_type = TREE_TYPE (exp);
2594 else
2596 code = gimple_cond_code (stmt);
2597 arg0 = gimple_cond_lhs (stmt);
2598 arg1 = gimple_cond_rhs (stmt);
2599 exp_type = boolean_type_node;
2602 if (TREE_CODE (arg0) != SSA_NAME
2603 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2604 break;
2605 loc = gimple_location (stmt);
2606 switch (code)
2608 case BIT_NOT_EXPR:
2609 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2610 /* Ensure the range is either +[-,0], +[0,0],
2611 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2612 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2613 or similar expression of unconditional true or
2614 false, it should not be negated. */
2615 && ((high && integer_zerop (high))
2616 || (low && integer_onep (low))))
2618 in_p = !in_p;
2619 exp = arg0;
2620 continue;
2622 break;
2623 case SSA_NAME:
2624 exp = arg0;
2625 continue;
2626 CASE_CONVERT:
2627 if (is_bool)
2629 if ((TYPE_PRECISION (exp_type) == 1
2630 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2631 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2632 return;
2634 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2636 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2637 is_bool = true;
2638 else
2639 return;
2641 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2642 is_bool = true;
2643 goto do_default;
2644 case EQ_EXPR:
2645 case NE_EXPR:
2646 case LT_EXPR:
2647 case LE_EXPR:
2648 case GE_EXPR:
2649 case GT_EXPR:
2650 is_bool = true;
2651 /* FALLTHRU */
2652 default:
2653 if (!is_bool)
2654 return;
2655 do_default:
2656 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2657 &low, &high, &in_p,
2658 &strict_overflow_p);
2659 if (nexp != NULL_TREE)
2661 exp = nexp;
2662 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2663 continue;
2665 break;
2667 break;
2669 if (is_bool)
2671 r->exp = exp;
2672 r->in_p = in_p;
2673 r->low = low;
2674 r->high = high;
2675 r->strict_overflow_p = strict_overflow_p;
2679 /* Comparison function for qsort. Sort entries
2680 without SSA_NAME exp first, then with SSA_NAMEs sorted
2681 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2682 by increasing ->low and if ->low is the same, by increasing
2683 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2684 maximum. */
2686 static int
2687 range_entry_cmp (const void *a, const void *b)
2689 const struct range_entry *p = (const struct range_entry *) a;
2690 const struct range_entry *q = (const struct range_entry *) b;
2692 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2694 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2696 /* Group range_entries for the same SSA_NAME together. */
2697 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2698 return -1;
2699 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2700 return 1;
2701 /* If ->low is different, NULL low goes first, then by
2702 ascending low. */
2703 if (p->low != NULL_TREE)
2705 if (q->low != NULL_TREE)
2707 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2708 p->low, q->low);
2709 if (tem && integer_onep (tem))
2710 return -1;
2711 tem = fold_binary (GT_EXPR, boolean_type_node,
2712 p->low, q->low);
2713 if (tem && integer_onep (tem))
2714 return 1;
2716 else
2717 return 1;
2719 else if (q->low != NULL_TREE)
2720 return -1;
2721 /* If ->high is different, NULL high goes last, before that by
2722 ascending high. */
2723 if (p->high != NULL_TREE)
2725 if (q->high != NULL_TREE)
2727 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2728 p->high, q->high);
2729 if (tem && integer_onep (tem))
2730 return -1;
2731 tem = fold_binary (GT_EXPR, boolean_type_node,
2732 p->high, q->high);
2733 if (tem && integer_onep (tem))
2734 return 1;
2736 else
2737 return -1;
2739 else if (q->high != NULL_TREE)
2740 return 1;
2741 /* If both ranges are the same, sort below by ascending idx. */
2743 else
2744 return 1;
2746 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2747 return -1;
2749 if (p->idx < q->idx)
2750 return -1;
2751 else
2753 gcc_checking_assert (p->idx > q->idx);
2754 return 1;
2758 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2759 insert needed statements BEFORE or after GSI. */
2761 static tree
2762 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2764 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2765 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2766 if (TREE_CODE (ret) != SSA_NAME)
2768 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2769 if (before)
2770 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2771 else
2772 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2773 ret = gimple_assign_lhs (g);
2775 return ret;
2778 /* Helper routine of optimize_range_test.
2779 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2780 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2781 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2782 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2783 an array of COUNT pointers to other ranges. Return
2784 true if the range merge has been successful.
2785 If OPCODE is ERROR_MARK, this is called from within
2786 maybe_optimize_range_tests and is performing inter-bb range optimization.
2787 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2788 oe->rank. */
2790 static bool
2791 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2792 struct range_entry **otherrangep,
2793 unsigned int count, enum tree_code opcode,
2794 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2795 bool in_p, tree low, tree high, bool strict_overflow_p)
2797 unsigned int idx = range->idx;
2798 struct range_entry *swap_with = NULL;
2799 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2800 if (opcode == ERROR_MARK)
2802 /* For inter-bb range test optimization, pick from the range tests
2803 the one which is tested in the earliest condition (one dominating
2804 the others), because otherwise there could be some UB (e.g. signed
2805 overflow) in following bbs that we'd expose which wasn't there in
2806 the original program. See PR104196. */
2807 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2808 basic_block range_bb = orig_range_bb;
2809 for (unsigned int i = 0; i < count; i++)
2811 struct range_entry *this_range;
2812 if (otherrange)
2813 this_range = otherrange + i;
2814 else
2815 this_range = otherrangep[i];
2816 operand_entry *oe = (*ops)[this_range->idx];
2817 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2818 if (range_bb != this_bb
2819 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2821 swap_with = this_range;
2822 range_bb = this_bb;
2823 idx = this_range->idx;
2826 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2827 only defined in later blocks. In this case we can't move the
2828 merged comparison earlier, so instead check if there are any stmts
2829 that might trigger signed integer overflow in between and rewrite
2830 them. But only after we check if the optimization is possible. */
2831 if (seq && swap_with)
2833 rewrite_bb_first = range_bb;
2834 rewrite_bb_last = orig_range_bb;
2835 idx = range->idx;
2836 swap_with = NULL;
2839 operand_entry *oe = (*ops)[idx];
2840 tree op = oe->op;
2841 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2842 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2843 location_t loc = gimple_location (stmt);
2844 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2845 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2846 in_p, low, high);
2847 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2848 gimple_stmt_iterator gsi;
2849 unsigned int i, uid;
2851 if (tem == NULL_TREE)
2852 return false;
2854 /* If op is default def SSA_NAME, there is no place to insert the
2855 new comparison. Give up, unless we can use OP itself as the
2856 range test. */
2857 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2859 if (op == range->exp
2860 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2861 || TREE_CODE (optype) == BOOLEAN_TYPE)
2862 && (op == tem
2863 || (TREE_CODE (tem) == EQ_EXPR
2864 && TREE_OPERAND (tem, 0) == op
2865 && integer_onep (TREE_OPERAND (tem, 1))))
2866 && opcode != BIT_IOR_EXPR
2867 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2869 stmt = NULL;
2870 tem = op;
2872 else
2873 return false;
2876 if (swap_with)
2877 std::swap (range->idx, swap_with->idx);
2879 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2880 warning_at (loc, OPT_Wstrict_overflow,
2881 "assuming signed overflow does not occur "
2882 "when simplifying range test");
2884 if (dump_file && (dump_flags & TDF_DETAILS))
2886 struct range_entry *r;
2887 fprintf (dump_file, "Optimizing range tests ");
2888 dump_range_entry (dump_file, range, false);
2889 for (i = 0; i < count; i++)
2891 if (otherrange)
2892 r = otherrange + i;
2893 else
2894 r = otherrangep[i];
2895 if (r->exp
2896 && r->exp != range->exp
2897 && TREE_CODE (r->exp) == SSA_NAME)
2899 fprintf (dump_file, " and ");
2900 dump_range_entry (dump_file, r, false);
2902 else
2904 fprintf (dump_file, " and");
2905 dump_range_entry (dump_file, r, true);
2908 fprintf (dump_file, "\n into ");
2909 print_generic_expr (dump_file, tem);
2910 fprintf (dump_file, "\n");
2913 /* In inter-bb range optimization mode, if we have a seq, we can't
2914 move the merged comparison to the earliest bb from the comparisons
2915 being replaced, so instead rewrite stmts that could trigger signed
2916 integer overflow. */
2917 for (basic_block bb = rewrite_bb_last;
2918 bb != rewrite_bb_first; bb = single_pred (bb))
2919 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2920 !gsi_end_p (gsi); gsi_next (&gsi))
2922 gimple *stmt = gsi_stmt (gsi);
2923 if (is_gimple_assign (stmt))
2924 if (tree lhs = gimple_assign_lhs (stmt))
2925 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2926 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2927 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2929 enum tree_code code = gimple_assign_rhs_code (stmt);
2930 if (arith_code_with_undefined_signed_overflow (code))
2932 gimple_stmt_iterator gsip = gsi;
2933 gimple_stmt_iterator gsin = gsi;
2934 gsi_prev (&gsip);
2935 gsi_next (&gsin);
2936 rewrite_to_defined_overflow (&gsi);
2937 unsigned uid = gimple_uid (stmt);
2938 if (gsi_end_p (gsip))
2939 gsip = gsi_after_labels (bb);
2940 else
2941 gsi_next (&gsip);
2942 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2943 gsi_next (&gsip))
2944 gimple_set_uid (gsi_stmt (gsip), uid);
2949 if (opcode == BIT_IOR_EXPR
2950 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2951 tem = invert_truthvalue_loc (loc, tem);
2953 tem = fold_convert_loc (loc, optype, tem);
2954 if (stmt)
2956 gsi = gsi_for_stmt (stmt);
2957 uid = gimple_uid (stmt);
2959 else
2961 gsi = gsi_none ();
2962 uid = 0;
2964 if (stmt == NULL)
2965 gcc_checking_assert (tem == op);
2966 /* In rare cases range->exp can be equal to lhs of stmt.
2967 In that case we have to insert after the stmt rather then before
2968 it. If stmt is a PHI, insert it at the start of the basic block. */
2969 else if (op != range->exp)
2971 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2972 tem = force_into_ssa_name (&gsi, tem, true);
2973 gsi_prev (&gsi);
2975 else if (gimple_code (stmt) != GIMPLE_PHI)
2977 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2978 tem = force_into_ssa_name (&gsi, tem, false);
2980 else
2982 gsi = gsi_after_labels (gimple_bb (stmt));
2983 if (!gsi_end_p (gsi))
2984 uid = gimple_uid (gsi_stmt (gsi));
2985 else
2987 gsi = gsi_start_bb (gimple_bb (stmt));
2988 uid = 1;
2989 while (!gsi_end_p (gsi))
2991 uid = gimple_uid (gsi_stmt (gsi));
2992 gsi_next (&gsi);
2995 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2996 tem = force_into_ssa_name (&gsi, tem, true);
2997 if (gsi_end_p (gsi))
2998 gsi = gsi_last_bb (gimple_bb (stmt));
2999 else
3000 gsi_prev (&gsi);
3002 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3003 if (gimple_uid (gsi_stmt (gsi)))
3004 break;
3005 else
3006 gimple_set_uid (gsi_stmt (gsi), uid);
3008 oe->op = tem;
3009 range->exp = exp;
3010 range->low = low;
3011 range->high = high;
3012 range->in_p = in_p;
3013 range->strict_overflow_p = false;
3015 for (i = 0; i < count; i++)
3017 if (otherrange)
3018 range = otherrange + i;
3019 else
3020 range = otherrangep[i];
3021 oe = (*ops)[range->idx];
3022 /* Now change all the other range test immediate uses, so that
3023 those tests will be optimized away. */
3024 if (opcode == ERROR_MARK)
3026 if (oe->op)
3027 oe->op = build_int_cst (TREE_TYPE (oe->op),
3028 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3029 else
3030 oe->op = (oe->rank == BIT_IOR_EXPR
3031 ? boolean_false_node : boolean_true_node);
3033 else
3034 oe->op = error_mark_node;
3035 range->exp = NULL_TREE;
3036 range->low = NULL_TREE;
3037 range->high = NULL_TREE;
3039 return true;
3042 /* Optimize X == CST1 || X == CST2
3043 if popcount (CST1 ^ CST2) == 1 into
3044 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3045 Similarly for ranges. E.g.
3046 X != 2 && X != 3 && X != 10 && X != 11
3047 will be transformed by the previous optimization into
3048 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3049 and this loop can transform that into
3050 !(((X & ~8) - 2U) <= 1U). */
3052 static bool
3053 optimize_range_tests_xor (enum tree_code opcode, tree type,
3054 tree lowi, tree lowj, tree highi, tree highj,
3055 vec<operand_entry *> *ops,
3056 struct range_entry *rangei,
3057 struct range_entry *rangej)
3059 tree lowxor, highxor, tem, exp;
3060 /* Check lowi ^ lowj == highi ^ highj and
3061 popcount (lowi ^ lowj) == 1. */
3062 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3063 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3064 return false;
3065 if (!integer_pow2p (lowxor))
3066 return false;
3067 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3068 if (!tree_int_cst_equal (lowxor, highxor))
3069 return false;
3071 exp = rangei->exp;
3072 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3073 int prec = GET_MODE_PRECISION (mode);
3074 if (TYPE_PRECISION (type) < prec
3075 || (wi::to_wide (TYPE_MIN_VALUE (type))
3076 != wi::min_value (prec, TYPE_SIGN (type)))
3077 || (wi::to_wide (TYPE_MAX_VALUE (type))
3078 != wi::max_value (prec, TYPE_SIGN (type))))
3080 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3081 exp = fold_convert (type, exp);
3082 lowxor = fold_convert (type, lowxor);
3083 lowi = fold_convert (type, lowi);
3084 highi = fold_convert (type, highi);
3086 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3087 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3088 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3089 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3090 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3091 NULL, rangei->in_p, lowj, highj,
3092 rangei->strict_overflow_p
3093 || rangej->strict_overflow_p))
3094 return true;
3095 return false;
3098 /* Optimize X == CST1 || X == CST2
3099 if popcount (CST2 - CST1) == 1 into
3100 ((X - CST1) & ~(CST2 - CST1)) == 0.
3101 Similarly for ranges. E.g.
3102 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3103 || X == 75 || X == 45
3104 will be transformed by the previous optimization into
3105 (X - 43U) <= 3U || (X - 75U) <= 3U
3106 and this loop can transform that into
3107 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3108 static bool
3109 optimize_range_tests_diff (enum tree_code opcode, tree type,
3110 tree lowi, tree lowj, tree highi, tree highj,
3111 vec<operand_entry *> *ops,
3112 struct range_entry *rangei,
3113 struct range_entry *rangej)
3115 tree tem1, tem2, mask;
3116 /* Check highi - lowi == highj - lowj. */
3117 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3118 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3119 return false;
3120 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3121 if (!tree_int_cst_equal (tem1, tem2))
3122 return false;
3123 /* Check popcount (lowj - lowi) == 1. */
3124 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3125 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3126 return false;
3127 if (!integer_pow2p (tem1))
3128 return false;
3130 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3131 int prec = GET_MODE_PRECISION (mode);
3132 if (TYPE_PRECISION (type) < prec
3133 || (wi::to_wide (TYPE_MIN_VALUE (type))
3134 != wi::min_value (prec, TYPE_SIGN (type)))
3135 || (wi::to_wide (TYPE_MAX_VALUE (type))
3136 != wi::max_value (prec, TYPE_SIGN (type))))
3137 type = build_nonstandard_integer_type (prec, 1);
3138 else
3139 type = unsigned_type_for (type);
3140 tem1 = fold_convert (type, tem1);
3141 tem2 = fold_convert (type, tem2);
3142 lowi = fold_convert (type, lowi);
3143 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3144 tem1 = fold_build2 (MINUS_EXPR, type,
3145 fold_convert (type, rangei->exp), lowi);
3146 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3147 lowj = build_int_cst (type, 0);
3148 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3149 NULL, rangei->in_p, lowj, tem2,
3150 rangei->strict_overflow_p
3151 || rangej->strict_overflow_p))
3152 return true;
3153 return false;
3156 /* It does some common checks for function optimize_range_tests_xor and
3157 optimize_range_tests_diff.
3158 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3159 Else it calls optimize_range_tests_diff. */
3161 static bool
3162 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3163 bool optimize_xor, vec<operand_entry *> *ops,
3164 struct range_entry *ranges)
3166 int i, j;
3167 bool any_changes = false;
3168 for (i = first; i < length; i++)
3170 tree lowi, highi, lowj, highj, type, tem;
3172 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3173 continue;
3174 type = TREE_TYPE (ranges[i].exp);
3175 if (!INTEGRAL_TYPE_P (type))
3176 continue;
3177 lowi = ranges[i].low;
3178 if (lowi == NULL_TREE)
3179 lowi = TYPE_MIN_VALUE (type);
3180 highi = ranges[i].high;
3181 if (highi == NULL_TREE)
3182 continue;
3183 for (j = i + 1; j < length && j < i + 64; j++)
3185 bool changes;
3186 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3187 continue;
3188 lowj = ranges[j].low;
3189 if (lowj == NULL_TREE)
3190 continue;
3191 highj = ranges[j].high;
3192 if (highj == NULL_TREE)
3193 highj = TYPE_MAX_VALUE (type);
3194 /* Check lowj > highi. */
3195 tem = fold_binary (GT_EXPR, boolean_type_node,
3196 lowj, highi);
3197 if (tem == NULL_TREE || !integer_onep (tem))
3198 continue;
3199 if (optimize_xor)
3200 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3201 highi, highj, ops,
3202 ranges + i, ranges + j);
3203 else
3204 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3205 highi, highj, ops,
3206 ranges + i, ranges + j);
3207 if (changes)
3209 any_changes = true;
3210 break;
3214 return any_changes;
3217 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3218 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3219 EXP on success, NULL otherwise. */
3221 static tree
3222 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3223 wide_int *mask, tree *totallowp)
3225 tree tem = int_const_binop (MINUS_EXPR, high, low);
3226 if (tem == NULL_TREE
3227 || TREE_CODE (tem) != INTEGER_CST
3228 || TREE_OVERFLOW (tem)
3229 || tree_int_cst_sgn (tem) == -1
3230 || compare_tree_int (tem, prec) != -1)
3231 return NULL_TREE;
3233 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3234 *mask = wi::shifted_mask (0, max, false, prec);
3235 if (TREE_CODE (exp) == BIT_AND_EXPR
3236 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3238 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3239 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3240 if (wi::popcount (msk) == 1
3241 && wi::ltu_p (msk, prec - max))
3243 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3244 max += msk.to_uhwi ();
3245 exp = TREE_OPERAND (exp, 0);
3246 if (integer_zerop (low)
3247 && TREE_CODE (exp) == PLUS_EXPR
3248 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3250 tree ret = TREE_OPERAND (exp, 0);
3251 STRIP_NOPS (ret);
3252 widest_int bias
3253 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3254 TYPE_PRECISION (TREE_TYPE (low))));
3255 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3256 if (totallowp)
3258 *totallowp = tbias;
3259 return ret;
3261 else if (!tree_int_cst_lt (totallow, tbias))
3262 return NULL_TREE;
3263 bias = wi::to_widest (tbias);
3264 bias -= wi::to_widest (totallow);
3265 if (bias >= 0 && bias < prec - max)
3267 *mask = wi::lshift (*mask, bias);
3268 return ret;
3273 if (totallowp)
3274 return exp;
3275 if (!tree_int_cst_lt (totallow, low))
3276 return exp;
3277 tem = int_const_binop (MINUS_EXPR, low, totallow);
3278 if (tem == NULL_TREE
3279 || TREE_CODE (tem) != INTEGER_CST
3280 || TREE_OVERFLOW (tem)
3281 || compare_tree_int (tem, prec - max) == 1)
3282 return NULL_TREE;
3284 *mask = wi::lshift (*mask, wi::to_widest (tem));
3285 return exp;
3288 /* Attempt to optimize small range tests using bit test.
3289 E.g.
3290 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3291 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3292 has been by earlier optimizations optimized into:
3293 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3294 As all the 43 through 82 range is less than 64 numbers,
3295 for 64-bit word targets optimize that into:
3296 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3298 static bool
3299 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3300 vec<operand_entry *> *ops,
3301 struct range_entry *ranges)
3303 int i, j;
3304 bool any_changes = false;
3305 int prec = GET_MODE_BITSIZE (word_mode);
3306 auto_vec<struct range_entry *, 64> candidates;
3308 for (i = first; i < length - 1; i++)
3310 tree lowi, highi, lowj, highj, type;
3312 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3313 continue;
3314 type = TREE_TYPE (ranges[i].exp);
3315 if (!INTEGRAL_TYPE_P (type))
3316 continue;
3317 lowi = ranges[i].low;
3318 if (lowi == NULL_TREE)
3319 lowi = TYPE_MIN_VALUE (type);
3320 highi = ranges[i].high;
3321 if (highi == NULL_TREE)
3322 continue;
3323 wide_int mask;
3324 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3325 highi, &mask, &lowi);
3326 if (exp == NULL_TREE)
3327 continue;
3328 bool strict_overflow_p = ranges[i].strict_overflow_p;
3329 candidates.truncate (0);
3330 int end = MIN (i + 64, length);
3331 for (j = i + 1; j < end; j++)
3333 tree exp2;
3334 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3335 continue;
3336 if (ranges[j].exp == exp)
3338 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3340 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3341 if (exp2 == exp)
3343 else if (TREE_CODE (exp2) == PLUS_EXPR)
3345 exp2 = TREE_OPERAND (exp2, 0);
3346 STRIP_NOPS (exp2);
3347 if (exp2 != exp)
3348 continue;
3350 else
3351 continue;
3353 else
3354 continue;
3355 lowj = ranges[j].low;
3356 if (lowj == NULL_TREE)
3357 continue;
3358 highj = ranges[j].high;
3359 if (highj == NULL_TREE)
3360 highj = TYPE_MAX_VALUE (type);
3361 wide_int mask2;
3362 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3363 highj, &mask2, NULL);
3364 if (exp2 != exp)
3365 continue;
3366 mask |= mask2;
3367 strict_overflow_p |= ranges[j].strict_overflow_p;
3368 candidates.safe_push (&ranges[j]);
3371 /* If every possible relative value of the expression is a valid shift
3372 amount, then we can merge the entry test in the bit test. In this
3373 case, if we would need otherwise 2 or more comparisons, then use
3374 the bit test; in the other cases, the threshold is 3 comparisons. */
3375 bool entry_test_needed;
3376 value_range r;
3377 if (TREE_CODE (exp) == SSA_NAME
3378 && get_range_query (cfun)->range_of_expr (r, exp)
3379 && !r.undefined_p ()
3380 && !r.varying_p ()
3381 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3383 wide_int min = r.lower_bound ();
3384 wide_int ilowi = wi::to_wide (lowi);
3385 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3387 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3388 mask = wi::lshift (mask, ilowi - min);
3390 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3392 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3393 mask = wi::lrshift (mask, min - ilowi);
3395 entry_test_needed = false;
3397 else
3398 entry_test_needed = true;
3399 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3401 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3402 wi::to_widest (lowi)
3403 + prec - 1 - wi::clz (mask));
3404 operand_entry *oe = (*ops)[ranges[i].idx];
3405 tree op = oe->op;
3406 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3407 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3408 (cfun, oe->id));
3409 location_t loc = gimple_location (stmt);
3410 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3412 /* See if it isn't cheaper to pretend the minimum value of the
3413 range is 0, if maximum value is small enough.
3414 We can avoid then subtraction of the minimum value, but the
3415 mask constant could be perhaps more expensive. */
3416 if (compare_tree_int (lowi, 0) > 0
3417 && compare_tree_int (high, prec) < 0)
3419 int cost_diff;
3420 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3421 rtx reg = gen_raw_REG (word_mode, 10000);
3422 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3423 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3424 GEN_INT (-m)),
3425 word_mode, speed_p);
3426 rtx r = immed_wide_int_const (mask, word_mode);
3427 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3428 word_mode, speed_p);
3429 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3430 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3431 word_mode, speed_p);
3432 if (cost_diff > 0)
3434 mask = wi::lshift (mask, m);
3435 lowi = build_zero_cst (TREE_TYPE (lowi));
3439 tree tem;
3440 if (entry_test_needed)
3442 tem = build_range_check (loc, optype, unshare_expr (exp),
3443 false, lowi, high);
3444 if (tem == NULL_TREE || is_gimple_val (tem))
3445 continue;
3447 else
3448 tem = NULL_TREE;
3449 tree etype = unsigned_type_for (TREE_TYPE (exp));
3450 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3451 fold_convert_loc (loc, etype, exp),
3452 fold_convert_loc (loc, etype, lowi));
3453 exp = fold_convert_loc (loc, integer_type_node, exp);
3454 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3455 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3456 build_int_cst (word_type, 1), exp);
3457 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3458 wide_int_to_tree (word_type, mask));
3459 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3460 build_zero_cst (word_type));
3461 if (is_gimple_val (exp))
3462 continue;
3464 /* The shift might have undefined behavior if TEM is true,
3465 but reassociate_bb isn't prepared to have basic blocks
3466 split when it is running. So, temporarily emit a code
3467 with BIT_IOR_EXPR instead of &&, and fix it up in
3468 branch_fixup. */
3469 gimple_seq seq = NULL;
3470 if (tem)
3472 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3473 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3474 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3476 gimple_seq seq2;
3477 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3478 gimple_seq_add_seq_without_update (&seq, seq2);
3479 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3480 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3481 if (tem)
3483 gimple *g = gimple_build_assign (make_ssa_name (optype),
3484 BIT_IOR_EXPR, tem, exp);
3485 gimple_set_location (g, loc);
3486 gimple_seq_add_stmt_without_update (&seq, g);
3487 exp = gimple_assign_lhs (g);
3489 tree val = build_zero_cst (optype);
3490 if (update_range_test (&ranges[i], NULL, candidates.address (),
3491 candidates.length (), opcode, ops, exp,
3492 seq, false, val, val, strict_overflow_p))
3494 any_changes = true;
3495 if (tem)
3496 reassoc_branch_fixups.safe_push (tem);
3498 else
3499 gimple_seq_discard (seq);
3502 return any_changes;
3505 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3506 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3507 Also, handle x < C && y < C && z < C where C is power of two as
3508 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3509 as (x | y | z) < 0. */
3511 static bool
3512 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3513 vec<operand_entry *> *ops,
3514 struct range_entry *ranges)
3516 int i;
3517 unsigned int b;
3518 bool any_changes = false;
3519 auto_vec<int, 128> buckets;
3520 auto_vec<int, 32> chains;
3521 auto_vec<struct range_entry *, 32> candidates;
3523 for (i = first; i < length; i++)
3525 int idx;
3527 if (ranges[i].exp == NULL_TREE
3528 || TREE_CODE (ranges[i].exp) != SSA_NAME
3529 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3530 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3531 continue;
3533 if (ranges[i].low != NULL_TREE
3534 && ranges[i].high != NULL_TREE
3535 && ranges[i].in_p
3536 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3538 idx = !integer_zerop (ranges[i].low);
3539 if (idx && !integer_all_onesp (ranges[i].low))
3540 continue;
3542 else if (ranges[i].high != NULL_TREE
3543 && TREE_CODE (ranges[i].high) == INTEGER_CST
3544 && ranges[i].in_p)
3546 wide_int w = wi::to_wide (ranges[i].high);
3547 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3548 int l = wi::clz (w);
3549 idx = 2;
3550 if (l <= 0
3551 || l >= prec
3552 || w != wi::mask (prec - l, false, prec))
3553 continue;
3554 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3555 && ranges[i].low == NULL_TREE)
3556 || (ranges[i].low
3557 && integer_zerop (ranges[i].low))))
3558 continue;
3560 else if (ranges[i].high == NULL_TREE
3561 && ranges[i].low != NULL_TREE
3562 /* Perform this optimization only in the last
3563 reassoc pass, as it interferes with the reassociation
3564 itself or could also with VRP etc. which might not
3565 be able to virtually undo the optimization. */
3566 && !reassoc_insert_powi_p
3567 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3568 && integer_zerop (ranges[i].low))
3569 idx = 3;
3570 else
3571 continue;
3573 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3574 if (buckets.length () <= b)
3575 buckets.safe_grow_cleared (b + 1, true);
3576 if (chains.length () <= (unsigned) i)
3577 chains.safe_grow (i + 1, true);
3578 chains[i] = buckets[b];
3579 buckets[b] = i + 1;
3582 FOR_EACH_VEC_ELT (buckets, b, i)
3583 if (i && chains[i - 1])
3585 int j, k = i;
3586 if ((b % 4) == 2)
3588 /* When ranges[X - 1].high + 1 is a power of two,
3589 we need to process the same bucket up to
3590 precision - 1 times, each time split the entries
3591 with the same high bound into one chain and the
3592 rest into another one to be processed later. */
3593 int this_prev = i;
3594 int other_prev = 0;
3595 for (j = chains[i - 1]; j; j = chains[j - 1])
3597 if (tree_int_cst_equal (ranges[i - 1].high,
3598 ranges[j - 1].high))
3600 chains[this_prev - 1] = j;
3601 this_prev = j;
3603 else if (other_prev == 0)
3605 buckets[b] = j;
3606 other_prev = j;
3608 else
3610 chains[other_prev - 1] = j;
3611 other_prev = j;
3614 chains[this_prev - 1] = 0;
3615 if (other_prev)
3616 chains[other_prev - 1] = 0;
3617 if (chains[i - 1] == 0)
3619 if (other_prev)
3620 b--;
3621 continue;
3624 for (j = chains[i - 1]; j; j = chains[j - 1])
3626 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3627 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3628 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3629 k = j;
3631 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3632 tree type2 = NULL_TREE;
3633 bool strict_overflow_p = false;
3634 candidates.truncate (0);
3635 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3636 type1 = pointer_sized_int_node;
3637 for (j = i; j; j = chains[j - 1])
3639 tree type = TREE_TYPE (ranges[j - 1].exp);
3640 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3641 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3642 type = pointer_sized_int_node;
3643 if ((b % 4) == 3)
3645 /* For the signed < 0 cases, the types should be
3646 really compatible (all signed with the same precision,
3647 instead put ranges that have different in_p from
3648 k first. */
3649 if (!useless_type_conversion_p (type1, type))
3650 continue;
3651 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3652 candidates.safe_push (&ranges[j - 1]);
3653 type2 = type1;
3654 continue;
3656 if (j == k
3657 || useless_type_conversion_p (type1, type))
3659 else if (type2 == NULL_TREE
3660 || useless_type_conversion_p (type2, type))
3662 if (type2 == NULL_TREE)
3663 type2 = type;
3664 candidates.safe_push (&ranges[j - 1]);
3667 unsigned l = candidates.length ();
3668 for (j = i; j; j = chains[j - 1])
3670 tree type = TREE_TYPE (ranges[j - 1].exp);
3671 if (j == k)
3672 continue;
3673 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3674 type = pointer_sized_int_node;
3675 if ((b % 4) == 3)
3677 if (!useless_type_conversion_p (type1, type))
3678 continue;
3679 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3680 candidates.safe_push (&ranges[j - 1]);
3681 continue;
3683 if (useless_type_conversion_p (type1, type))
3685 else if (type2 == NULL_TREE
3686 || useless_type_conversion_p (type2, type))
3687 continue;
3688 candidates.safe_push (&ranges[j - 1]);
3690 gimple_seq seq = NULL;
3691 tree op = NULL_TREE;
3692 unsigned int id;
3693 struct range_entry *r;
3694 candidates.safe_push (&ranges[k - 1]);
3695 FOR_EACH_VEC_ELT (candidates, id, r)
3697 gimple *g;
3698 enum tree_code code;
3699 if (id == 0)
3701 op = r->exp;
3702 continue;
3704 if (id == l
3705 || POINTER_TYPE_P (TREE_TYPE (op))
3706 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3708 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3709 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3710 if (code == BIT_NOT_EXPR
3711 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3713 g = gimple_build_assign (make_ssa_name (type3),
3714 NOP_EXPR, op);
3715 gimple_seq_add_stmt_without_update (&seq, g);
3716 op = gimple_assign_lhs (g);
3718 g = gimple_build_assign (make_ssa_name (type3), code, op);
3719 gimple_seq_add_stmt_without_update (&seq, g);
3720 op = gimple_assign_lhs (g);
3722 tree type = TREE_TYPE (r->exp);
3723 tree exp = r->exp;
3724 if (POINTER_TYPE_P (type)
3725 || TREE_CODE (type) == OFFSET_TYPE
3726 || (id >= l && !useless_type_conversion_p (type1, type)))
3728 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3729 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3730 gimple_seq_add_stmt_without_update (&seq, g);
3731 exp = gimple_assign_lhs (g);
3733 if ((b % 4) == 3)
3734 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3735 else
3736 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3737 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3738 code, op, exp);
3739 gimple_seq_add_stmt_without_update (&seq, g);
3740 op = gimple_assign_lhs (g);
3742 type1 = TREE_TYPE (ranges[k - 1].exp);
3743 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3745 gimple *g
3746 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3747 gimple_seq_add_stmt_without_update (&seq, g);
3748 op = gimple_assign_lhs (g);
3750 candidates.pop ();
3751 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3752 candidates.length (), opcode, ops, op,
3753 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3754 ranges[k - 1].high, strict_overflow_p))
3755 any_changes = true;
3756 else
3757 gimple_seq_discard (seq);
3758 if ((b % 4) == 2 && buckets[b] != i)
3759 /* There is more work to do for this bucket. */
3760 b--;
3763 return any_changes;
3766 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3767 a >= 0 && a < b into (unsigned) a < (unsigned) b
3768 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3770 static bool
3771 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3772 vec<operand_entry *> *ops,
3773 struct range_entry *ranges,
3774 basic_block first_bb)
3776 int i;
3777 bool any_changes = false;
3778 hash_map<tree, int> *map = NULL;
3780 for (i = first; i < length; i++)
3782 if (ranges[i].exp == NULL_TREE
3783 || TREE_CODE (ranges[i].exp) != SSA_NAME
3784 || !ranges[i].in_p)
3785 continue;
3787 tree type = TREE_TYPE (ranges[i].exp);
3788 if (!INTEGRAL_TYPE_P (type)
3789 || TYPE_UNSIGNED (type)
3790 || ranges[i].low == NULL_TREE
3791 || !integer_zerop (ranges[i].low)
3792 || ranges[i].high != NULL_TREE)
3793 continue;
3794 /* EXP >= 0 here. */
3795 if (map == NULL)
3796 map = new hash_map <tree, int>;
3797 map->put (ranges[i].exp, i);
3800 if (map == NULL)
3801 return false;
3803 for (i = 0; i < length; i++)
3805 bool in_p = ranges[i].in_p;
3806 if (ranges[i].low == NULL_TREE
3807 || ranges[i].high == NULL_TREE)
3808 continue;
3809 if (!integer_zerop (ranges[i].low)
3810 || !integer_zerop (ranges[i].high))
3812 if (ranges[i].exp
3813 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3814 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3815 && integer_onep (ranges[i].low)
3816 && integer_onep (ranges[i].high))
3817 in_p = !in_p;
3818 else
3819 continue;
3822 gimple *stmt;
3823 tree_code ccode;
3824 tree rhs1, rhs2;
3825 if (ranges[i].exp)
3827 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3828 continue;
3829 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3830 if (!is_gimple_assign (stmt))
3831 continue;
3832 ccode = gimple_assign_rhs_code (stmt);
3833 rhs1 = gimple_assign_rhs1 (stmt);
3834 rhs2 = gimple_assign_rhs2 (stmt);
3836 else
3838 operand_entry *oe = (*ops)[ranges[i].idx];
3839 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3840 if (gimple_code (stmt) != GIMPLE_COND)
3841 continue;
3842 ccode = gimple_cond_code (stmt);
3843 rhs1 = gimple_cond_lhs (stmt);
3844 rhs2 = gimple_cond_rhs (stmt);
3847 if (TREE_CODE (rhs1) != SSA_NAME
3848 || rhs2 == NULL_TREE
3849 || TREE_CODE (rhs2) != SSA_NAME)
3850 continue;
3852 switch (ccode)
3854 case GT_EXPR:
3855 case GE_EXPR:
3856 case LT_EXPR:
3857 case LE_EXPR:
3858 break;
3859 default:
3860 continue;
3862 if (in_p)
3863 ccode = invert_tree_comparison (ccode, false);
3864 switch (ccode)
3866 case GT_EXPR:
3867 case GE_EXPR:
3868 std::swap (rhs1, rhs2);
3869 ccode = swap_tree_comparison (ccode);
3870 break;
3871 case LT_EXPR:
3872 case LE_EXPR:
3873 break;
3874 default:
3875 gcc_unreachable ();
3878 int *idx = map->get (rhs1);
3879 if (idx == NULL)
3880 continue;
3882 /* maybe_optimize_range_tests allows statements without side-effects
3883 in the basic blocks as long as they are consumed in the same bb.
3884 Make sure rhs2's def stmt is not among them, otherwise we can't
3885 use safely get_nonzero_bits on it. E.g. in:
3886 # RANGE [-83, 1] NONZERO 173
3887 # k_32 = PHI <k_47(13), k_12(9)>
3889 if (k_32 >= 0)
3890 goto <bb 5>; [26.46%]
3891 else
3892 goto <bb 9>; [73.54%]
3894 <bb 5> [local count: 140323371]:
3895 # RANGE [0, 1] NONZERO 1
3896 _5 = (int) k_32;
3897 # RANGE [0, 4] NONZERO 4
3898 _21 = _5 << 2;
3899 # RANGE [0, 4] NONZERO 4
3900 iftmp.0_44 = (char) _21;
3901 if (k_32 < iftmp.0_44)
3902 goto <bb 6>; [84.48%]
3903 else
3904 goto <bb 9>; [15.52%]
3905 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3906 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3907 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3908 those stmts even for negative k_32 and the value ranges would be no
3909 longer guaranteed and so the optimization would be invalid. */
3910 while (opcode == ERROR_MARK)
3912 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3913 basic_block bb2 = gimple_bb (g);
3914 if (bb2
3915 && bb2 != first_bb
3916 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3918 /* As an exception, handle a few common cases. */
3919 if (gimple_assign_cast_p (g)
3920 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3922 tree op0 = gimple_assign_rhs1 (g);
3923 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3924 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3925 > TYPE_PRECISION (TREE_TYPE (op0))))
3926 /* Zero-extension is always ok. */
3927 break;
3928 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3929 == TYPE_PRECISION (TREE_TYPE (op0))
3930 && TREE_CODE (op0) == SSA_NAME)
3932 /* Cast from signed to unsigned or vice versa. Retry
3933 with the op0 as new rhs2. */
3934 rhs2 = op0;
3935 continue;
3938 else if (is_gimple_assign (g)
3939 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3940 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3941 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3942 /* Masking with INTEGER_CST with MSB clear is always ok
3943 too. */
3944 break;
3945 rhs2 = NULL_TREE;
3947 break;
3949 if (rhs2 == NULL_TREE)
3950 continue;
3952 wide_int nz = get_nonzero_bits (rhs2);
3953 if (wi::neg_p (nz))
3954 continue;
3956 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3957 and RHS2 is known to be RHS2 >= 0. */
3958 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3960 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3961 if ((ranges[*idx].strict_overflow_p
3962 || ranges[i].strict_overflow_p)
3963 && issue_strict_overflow_warning (wc))
3964 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3965 "assuming signed overflow does not occur "
3966 "when simplifying range test");
3968 if (dump_file && (dump_flags & TDF_DETAILS))
3970 struct range_entry *r = &ranges[*idx];
3971 fprintf (dump_file, "Optimizing range test ");
3972 print_generic_expr (dump_file, r->exp);
3973 fprintf (dump_file, " +[");
3974 print_generic_expr (dump_file, r->low);
3975 fprintf (dump_file, ", ");
3976 print_generic_expr (dump_file, r->high);
3977 fprintf (dump_file, "] and comparison ");
3978 print_generic_expr (dump_file, rhs1);
3979 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3980 print_generic_expr (dump_file, rhs2);
3981 fprintf (dump_file, "\n into (");
3982 print_generic_expr (dump_file, utype);
3983 fprintf (dump_file, ") ");
3984 print_generic_expr (dump_file, rhs1);
3985 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3986 print_generic_expr (dump_file, utype);
3987 fprintf (dump_file, ") ");
3988 print_generic_expr (dump_file, rhs2);
3989 fprintf (dump_file, "\n");
3992 operand_entry *oe = (*ops)[ranges[i].idx];
3993 ranges[i].in_p = 0;
3994 if (opcode == BIT_IOR_EXPR
3995 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3997 ranges[i].in_p = 1;
3998 ccode = invert_tree_comparison (ccode, false);
4001 unsigned int uid = gimple_uid (stmt);
4002 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4003 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4004 gimple_set_uid (g, uid);
4005 rhs1 = gimple_assign_lhs (g);
4006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4007 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4009 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4010 gimple_set_uid (g, uid);
4011 rhs2 = gimple_assign_lhs (g);
4012 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4014 if (tree_swap_operands_p (rhs1, rhs2))
4016 std::swap (rhs1, rhs2);
4017 ccode = swap_tree_comparison (ccode);
4019 if (gimple_code (stmt) == GIMPLE_COND)
4021 gcond *c = as_a <gcond *> (stmt);
4022 gimple_cond_set_code (c, ccode);
4023 gimple_cond_set_lhs (c, rhs1);
4024 gimple_cond_set_rhs (c, rhs2);
4025 update_stmt (stmt);
4027 else
4029 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4030 if (!INTEGRAL_TYPE_P (ctype)
4031 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4032 && TYPE_PRECISION (ctype) != 1))
4033 ctype = boolean_type_node;
4034 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4035 gimple_set_uid (g, uid);
4036 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4037 if (oe->op && ctype != TREE_TYPE (oe->op))
4039 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4040 NOP_EXPR, gimple_assign_lhs (g));
4041 gimple_set_uid (g, uid);
4042 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4044 ranges[i].exp = gimple_assign_lhs (g);
4045 oe->op = ranges[i].exp;
4046 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4047 ranges[i].high = ranges[i].low;
4049 ranges[i].strict_overflow_p = false;
4050 oe = (*ops)[ranges[*idx].idx];
4051 /* Now change all the other range test immediate uses, so that
4052 those tests will be optimized away. */
4053 if (opcode == ERROR_MARK)
4055 if (oe->op)
4056 oe->op = build_int_cst (TREE_TYPE (oe->op),
4057 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4058 else
4059 oe->op = (oe->rank == BIT_IOR_EXPR
4060 ? boolean_false_node : boolean_true_node);
4062 else
4063 oe->op = error_mark_node;
4064 ranges[*idx].exp = NULL_TREE;
4065 ranges[*idx].low = NULL_TREE;
4066 ranges[*idx].high = NULL_TREE;
4067 any_changes = true;
4070 delete map;
4071 return any_changes;
4074 /* Optimize range tests, similarly how fold_range_test optimizes
4075 it on trees. The tree code for the binary
4076 operation between all the operands is OPCODE.
4077 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4078 maybe_optimize_range_tests for inter-bb range optimization.
4079 In that case if oe->op is NULL, oe->id is bb->index whose
4080 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4081 the actual opcode.
4082 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4084 static bool
4085 optimize_range_tests (enum tree_code opcode,
4086 vec<operand_entry *> *ops, basic_block first_bb)
4088 unsigned int length = ops->length (), i, j, first;
4089 operand_entry *oe;
4090 struct range_entry *ranges;
4091 bool any_changes = false;
4093 if (length == 1)
4094 return false;
4096 ranges = XNEWVEC (struct range_entry, length);
4097 for (i = 0; i < length; i++)
4099 oe = (*ops)[i];
4100 ranges[i].idx = i;
4101 init_range_entry (ranges + i, oe->op,
4102 oe->op
4103 ? NULL
4104 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4105 /* For | invert it now, we will invert it again before emitting
4106 the optimized expression. */
4107 if (opcode == BIT_IOR_EXPR
4108 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4109 ranges[i].in_p = !ranges[i].in_p;
4112 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4113 for (i = 0; i < length; i++)
4114 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4115 break;
4117 /* Try to merge ranges. */
4118 for (first = i; i < length; i++)
4120 tree low = ranges[i].low;
4121 tree high = ranges[i].high;
4122 int in_p = ranges[i].in_p;
4123 bool strict_overflow_p = ranges[i].strict_overflow_p;
4124 int update_fail_count = 0;
4126 for (j = i + 1; j < length; j++)
4128 if (ranges[i].exp != ranges[j].exp)
4129 break;
4130 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4131 ranges[j].in_p, ranges[j].low, ranges[j].high))
4132 break;
4133 strict_overflow_p |= ranges[j].strict_overflow_p;
4136 if (j == i + 1)
4137 continue;
4139 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4140 opcode, ops, ranges[i].exp, NULL, in_p,
4141 low, high, strict_overflow_p))
4143 i = j - 1;
4144 any_changes = true;
4146 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4147 while update_range_test would fail. */
4148 else if (update_fail_count == 64)
4149 i = j - 1;
4150 else
4151 ++update_fail_count;
4154 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4155 ops, ranges);
4157 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4158 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4159 ops, ranges);
4160 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4161 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4162 ops, ranges);
4163 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4164 ranges, first_bb);
4165 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4166 ops, ranges);
4168 if (any_changes && opcode != ERROR_MARK)
4170 j = 0;
4171 FOR_EACH_VEC_ELT (*ops, i, oe)
4173 if (oe->op == error_mark_node)
4174 continue;
4175 else if (i != j)
4176 (*ops)[j] = oe;
4177 j++;
4179 ops->truncate (j);
4182 XDELETEVEC (ranges);
4183 return any_changes;
4186 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4187 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4188 otherwise the comparison code. TYPE is a return value that is set
4189 to type of comparison. */
4191 static tree_code
4192 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4193 tree *lhs, tree *rhs, gassign **vcond)
4195 if (TREE_CODE (var) != SSA_NAME)
4196 return ERROR_MARK;
4198 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4199 if (stmt == NULL)
4200 return ERROR_MARK;
4201 if (vcond)
4202 *vcond = stmt;
4204 /* ??? If we start creating more COND_EXPR, we could perform
4205 this same optimization with them. For now, simplify. */
4206 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4207 return ERROR_MARK;
4209 tree cond = gimple_assign_rhs1 (stmt);
4210 tree_code cmp = TREE_CODE (cond);
4211 if (cmp != SSA_NAME)
4212 return ERROR_MARK;
4214 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4215 if (assign == NULL
4216 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4217 return ERROR_MARK;
4219 cmp = gimple_assign_rhs_code (assign);
4220 if (lhs)
4221 *lhs = gimple_assign_rhs1 (assign);
4222 if (rhs)
4223 *rhs = gimple_assign_rhs2 (assign);
4225 /* ??? For now, allow only canonical true and false result vectors.
4226 We could expand this to other constants should the need arise,
4227 but at the moment we don't create them. */
4228 tree t = gimple_assign_rhs2 (stmt);
4229 tree f = gimple_assign_rhs3 (stmt);
4230 bool inv;
4231 if (integer_all_onesp (t))
4232 inv = false;
4233 else if (integer_all_onesp (f))
4235 cmp = invert_tree_comparison (cmp, false);
4236 inv = true;
4238 else
4239 return ERROR_MARK;
4240 if (!integer_zerop (f))
4241 return ERROR_MARK;
4243 /* Success! */
4244 if (rets)
4245 *rets = assign;
4246 if (reti)
4247 *reti = inv;
4248 if (type)
4249 *type = TREE_TYPE (cond);
4250 return cmp;
4253 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4254 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4256 static bool
4257 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4259 unsigned int length = ops->length (), i, j;
4260 bool any_changes = false;
4262 if (length == 1)
4263 return false;
4265 for (i = 0; i < length; ++i)
4267 tree elt0 = (*ops)[i]->op;
4269 gassign *stmt0, *vcond0;
4270 bool invert;
4271 tree type, lhs0, rhs0;
4272 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4273 &rhs0, &vcond0);
4274 if (cmp0 == ERROR_MARK)
4275 continue;
4277 for (j = i + 1; j < length; ++j)
4279 tree &elt1 = (*ops)[j]->op;
4281 gassign *stmt1, *vcond1;
4282 tree lhs1, rhs1;
4283 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4284 &rhs1, &vcond1);
4285 if (cmp1 == ERROR_MARK)
4286 continue;
4288 tree comb;
4289 if (opcode == BIT_AND_EXPR)
4290 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4291 cmp1, lhs1, rhs1);
4292 else if (opcode == BIT_IOR_EXPR)
4293 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4294 cmp1, lhs1, rhs1);
4295 else
4296 gcc_unreachable ();
4297 if (comb == NULL)
4298 continue;
4300 /* Success! */
4301 if (dump_file && (dump_flags & TDF_DETAILS))
4303 fprintf (dump_file, "Transforming ");
4304 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4305 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4306 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4307 fprintf (dump_file, " into ");
4308 print_generic_expr (dump_file, comb);
4309 fputc ('\n', dump_file);
4312 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4313 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4314 true, GSI_SAME_STMT);
4315 if (invert)
4316 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4317 gimple_assign_rhs3_ptr (vcond0));
4318 gimple_assign_set_rhs1 (vcond0, exp);
4319 update_stmt (vcond0);
4321 elt1 = error_mark_node;
4322 any_changes = true;
4326 if (any_changes)
4328 operand_entry *oe;
4329 j = 0;
4330 FOR_EACH_VEC_ELT (*ops, i, oe)
4332 if (oe->op == error_mark_node)
4333 continue;
4334 else if (i != j)
4335 (*ops)[j] = oe;
4336 j++;
4338 ops->truncate (j);
4341 return any_changes;
4344 /* Return true if STMT is a cast like:
4345 <bb N>:
4347 _123 = (int) _234;
4349 <bb M>:
4350 # _345 = PHI <_123(N), 1(...), 1(...)>
4351 where _234 has bool type, _123 has single use and
4352 bb N has a single successor M. This is commonly used in
4353 the last block of a range test.
4355 Also Return true if STMT is tcc_compare like:
4356 <bb N>:
4358 _234 = a_2(D) == 2;
4360 <bb M>:
4361 # _345 = PHI <_234(N), 1(...), 1(...)>
4362 _346 = (int) _345;
4363 where _234 has booltype, single use and
4364 bb N has a single successor M. This is commonly used in
4365 the last block of a range test. */
4367 static bool
4368 final_range_test_p (gimple *stmt)
4370 basic_block bb, rhs_bb, lhs_bb;
4371 edge e;
4372 tree lhs, rhs;
4373 use_operand_p use_p;
4374 gimple *use_stmt;
4376 if (!gimple_assign_cast_p (stmt)
4377 && (!is_gimple_assign (stmt)
4378 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4379 != tcc_comparison)))
4380 return false;
4381 bb = gimple_bb (stmt);
4382 if (!single_succ_p (bb))
4383 return false;
4384 e = single_succ_edge (bb);
4385 if (e->flags & EDGE_COMPLEX)
4386 return false;
4388 lhs = gimple_assign_lhs (stmt);
4389 rhs = gimple_assign_rhs1 (stmt);
4390 if (gimple_assign_cast_p (stmt)
4391 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4392 || TREE_CODE (rhs) != SSA_NAME
4393 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4394 return false;
4396 if (!gimple_assign_cast_p (stmt)
4397 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4398 return false;
4400 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4401 if (!single_imm_use (lhs, &use_p, &use_stmt))
4402 return false;
4404 if (gimple_code (use_stmt) != GIMPLE_PHI
4405 || gimple_bb (use_stmt) != e->dest)
4406 return false;
4408 /* And that the rhs is defined in the same loop. */
4409 if (gimple_assign_cast_p (stmt))
4411 if (TREE_CODE (rhs) != SSA_NAME
4412 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4413 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4414 return false;
4416 else
4418 if (TREE_CODE (lhs) != SSA_NAME
4419 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4420 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4421 return false;
4424 return true;
4427 /* Return true if BB is suitable basic block for inter-bb range test
4428 optimization. If BACKWARD is true, BB should be the only predecessor
4429 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4430 or compared with to find a common basic block to which all conditions
4431 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4432 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4433 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4434 block points to an empty block that falls through into *OTHER_BB and
4435 the phi args match that path. */
4437 static bool
4438 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4439 bool *test_swapped_p, bool backward)
4441 edge_iterator ei, ei2;
4442 edge e, e2;
4443 gimple *stmt;
4444 gphi_iterator gsi;
4445 bool other_edge_seen = false;
4446 bool is_cond;
4448 if (test_bb == bb)
4449 return false;
4450 /* Check last stmt first. */
4451 stmt = last_nondebug_stmt (bb);
4452 if (stmt == NULL
4453 || (gimple_code (stmt) != GIMPLE_COND
4454 && (backward || !final_range_test_p (stmt)))
4455 || gimple_visited_p (stmt)
4456 || stmt_could_throw_p (cfun, stmt)
4457 || *other_bb == bb)
4458 return false;
4459 is_cond = gimple_code (stmt) == GIMPLE_COND;
4460 if (is_cond)
4462 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4463 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4464 to *OTHER_BB (if not set yet, try to find it out). */
4465 if (EDGE_COUNT (bb->succs) != 2)
4466 return false;
4467 FOR_EACH_EDGE (e, ei, bb->succs)
4469 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4470 return false;
4471 if (e->dest == test_bb)
4473 if (backward)
4474 continue;
4475 else
4476 return false;
4478 if (e->dest == bb)
4479 return false;
4480 if (*other_bb == NULL)
4482 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4483 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4484 return false;
4485 else if (e->dest == e2->dest)
4486 *other_bb = e->dest;
4487 if (*other_bb == NULL)
4488 return false;
4490 if (e->dest == *other_bb)
4491 other_edge_seen = true;
4492 else if (backward)
4493 return false;
4495 if (*other_bb == NULL || !other_edge_seen)
4496 return false;
4498 else if (single_succ (bb) != *other_bb)
4499 return false;
4501 /* Now check all PHIs of *OTHER_BB. */
4502 e = find_edge (bb, *other_bb);
4503 e2 = find_edge (test_bb, *other_bb);
4504 retry:;
4505 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4507 gphi *phi = gsi.phi ();
4508 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4509 corresponding to BB and TEST_BB predecessor must be the same. */
4510 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4511 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4513 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4514 one of the PHIs should have the lhs of the last stmt in
4515 that block as PHI arg and that PHI should have 0 or 1
4516 corresponding to it in all other range test basic blocks
4517 considered. */
4518 if (!is_cond)
4520 if (gimple_phi_arg_def (phi, e->dest_idx)
4521 == gimple_assign_lhs (stmt)
4522 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4523 || integer_onep (gimple_phi_arg_def (phi,
4524 e2->dest_idx))))
4525 continue;
4527 else
4529 gimple *test_last = last_nondebug_stmt (test_bb);
4530 if (gimple_code (test_last) == GIMPLE_COND)
4532 if (backward ? e2->src != test_bb : e->src != bb)
4533 return false;
4535 /* For last_bb, handle also:
4536 if (x_3(D) == 3)
4537 goto <bb 6>; [34.00%]
4538 else
4539 goto <bb 7>; [66.00%]
4541 <bb 6> [local count: 79512730]:
4543 <bb 7> [local count: 1073741824]:
4544 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4545 where bb 7 is *OTHER_BB, but the PHI values from the
4546 earlier bbs match the path through the empty bb
4547 in between. */
4548 edge e3;
4549 if (backward)
4550 e3 = EDGE_SUCC (test_bb,
4551 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4552 else
4553 e3 = EDGE_SUCC (bb,
4554 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4555 if (empty_block_p (e3->dest)
4556 && single_succ_p (e3->dest)
4557 && single_succ (e3->dest) == *other_bb
4558 && single_pred_p (e3->dest)
4559 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4561 if (backward)
4562 e2 = single_succ_edge (e3->dest);
4563 else
4564 e = single_succ_edge (e3->dest);
4565 if (test_swapped_p)
4566 *test_swapped_p = true;
4567 goto retry;
4570 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4571 == gimple_assign_lhs (test_last)
4572 && (integer_zerop (gimple_phi_arg_def (phi,
4573 e->dest_idx))
4574 || integer_onep (gimple_phi_arg_def (phi,
4575 e->dest_idx))))
4576 continue;
4579 return false;
4582 return true;
4585 /* Return true if BB doesn't have side-effects that would disallow
4586 range test optimization, all SSA_NAMEs set in the bb are consumed
4587 in the bb and there are no PHIs. */
4589 bool
4590 no_side_effect_bb (basic_block bb)
4592 gimple_stmt_iterator gsi;
4593 gimple *last;
4595 if (!gimple_seq_empty_p (phi_nodes (bb)))
4596 return false;
4597 last = last_nondebug_stmt (bb);
4598 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4600 gimple *stmt = gsi_stmt (gsi);
4601 tree lhs;
4602 imm_use_iterator imm_iter;
4603 use_operand_p use_p;
4605 if (is_gimple_debug (stmt))
4606 continue;
4607 if (gimple_has_side_effects (stmt))
4608 return false;
4609 if (stmt == last)
4610 return true;
4611 if (!is_gimple_assign (stmt))
4612 return false;
4613 lhs = gimple_assign_lhs (stmt);
4614 if (TREE_CODE (lhs) != SSA_NAME)
4615 return false;
4616 if (gimple_assign_rhs_could_trap_p (stmt))
4617 return false;
4618 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4620 gimple *use_stmt = USE_STMT (use_p);
4621 if (is_gimple_debug (use_stmt))
4622 continue;
4623 if (gimple_bb (use_stmt) != bb)
4624 return false;
4627 return false;
4630 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4631 return true and fill in *OPS recursively. */
4633 static bool
4634 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4635 class loop *loop)
4637 gimple *stmt = SSA_NAME_DEF_STMT (var);
4638 tree rhs[2];
4639 int i;
4641 if (!is_reassociable_op (stmt, code, loop))
4642 return false;
4644 rhs[0] = gimple_assign_rhs1 (stmt);
4645 rhs[1] = gimple_assign_rhs2 (stmt);
4646 gimple_set_visited (stmt, true);
4647 for (i = 0; i < 2; i++)
4648 if (TREE_CODE (rhs[i]) == SSA_NAME
4649 && !get_ops (rhs[i], code, ops, loop)
4650 && has_single_use (rhs[i]))
4652 operand_entry *oe = operand_entry_pool.allocate ();
4654 oe->op = rhs[i];
4655 oe->rank = code;
4656 oe->id = 0;
4657 oe->count = 1;
4658 oe->stmt_to_insert = NULL;
4659 ops->safe_push (oe);
4661 return true;
4664 /* Find the ops that were added by get_ops starting from VAR, see if
4665 they were changed during update_range_test and if yes, create new
4666 stmts. */
4668 static tree
4669 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4670 unsigned int *pidx, class loop *loop)
4672 gimple *stmt = SSA_NAME_DEF_STMT (var);
4673 tree rhs[4];
4674 int i;
4676 if (!is_reassociable_op (stmt, code, loop))
4677 return NULL;
4679 rhs[0] = gimple_assign_rhs1 (stmt);
4680 rhs[1] = gimple_assign_rhs2 (stmt);
4681 rhs[2] = rhs[0];
4682 rhs[3] = rhs[1];
4683 for (i = 0; i < 2; i++)
4684 if (TREE_CODE (rhs[i]) == SSA_NAME)
4686 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4687 if (rhs[2 + i] == NULL_TREE)
4689 if (has_single_use (rhs[i]))
4690 rhs[2 + i] = ops[(*pidx)++]->op;
4691 else
4692 rhs[2 + i] = rhs[i];
4695 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4696 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4698 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4699 var = make_ssa_name (TREE_TYPE (var));
4700 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4701 rhs[2], rhs[3]);
4702 gimple_set_uid (g, gimple_uid (stmt));
4703 gimple_set_visited (g, true);
4704 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4705 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4706 if (fold_stmt_inplace (&gsi2))
4707 update_stmt (g);
4709 return var;
4712 /* Structure to track the initial value passed to get_ops and
4713 the range in the ops vector for each basic block. */
4715 struct inter_bb_range_test_entry
4717 tree op;
4718 unsigned int first_idx, last_idx;
4721 /* Inter-bb range test optimization.
4723 Returns TRUE if a gimple conditional is optimized to a true/false,
4724 otherwise return FALSE.
4726 This indicates to the caller that it should run a CFG cleanup pass
4727 once reassociation is completed. */
4729 static bool
4730 maybe_optimize_range_tests (gimple *stmt)
4732 basic_block first_bb = gimple_bb (stmt);
4733 basic_block last_bb = first_bb;
4734 basic_block other_bb = NULL;
4735 basic_block bb;
4736 edge_iterator ei;
4737 edge e;
4738 auto_vec<operand_entry *> ops;
4739 auto_vec<inter_bb_range_test_entry> bbinfo;
4740 bool any_changes = false;
4741 bool cfg_cleanup_needed = false;
4743 /* Consider only basic blocks that end with GIMPLE_COND or
4744 a cast statement satisfying final_range_test_p. All
4745 but the last bb in the first_bb .. last_bb range
4746 should end with GIMPLE_COND. */
4747 if (gimple_code (stmt) == GIMPLE_COND)
4749 if (EDGE_COUNT (first_bb->succs) != 2)
4750 return cfg_cleanup_needed;
4752 else if (final_range_test_p (stmt))
4753 other_bb = single_succ (first_bb);
4754 else
4755 return cfg_cleanup_needed;
4757 if (stmt_could_throw_p (cfun, stmt))
4758 return cfg_cleanup_needed;
4760 /* As relative ordering of post-dominator sons isn't fixed,
4761 maybe_optimize_range_tests can be called first on any
4762 bb in the range we want to optimize. So, start searching
4763 backwards, if first_bb can be set to a predecessor. */
4764 while (single_pred_p (first_bb))
4766 basic_block pred_bb = single_pred (first_bb);
4767 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4768 break;
4769 if (!no_side_effect_bb (first_bb))
4770 break;
4771 first_bb = pred_bb;
4773 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4774 Before starting forward search in last_bb successors, find
4775 out the other_bb. */
4776 if (first_bb == last_bb)
4778 other_bb = NULL;
4779 /* As non-GIMPLE_COND last stmt always terminates the range,
4780 if forward search didn't discover anything, just give up. */
4781 if (gimple_code (stmt) != GIMPLE_COND)
4782 return cfg_cleanup_needed;
4783 /* Look at both successors. Either it ends with a GIMPLE_COND
4784 and satisfies suitable_cond_bb, or ends with a cast and
4785 other_bb is that cast's successor. */
4786 FOR_EACH_EDGE (e, ei, first_bb->succs)
4787 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4788 || e->dest == first_bb)
4789 return cfg_cleanup_needed;
4790 else if (single_pred_p (e->dest))
4792 stmt = last_nondebug_stmt (e->dest);
4793 if (stmt
4794 && gimple_code (stmt) == GIMPLE_COND
4795 && EDGE_COUNT (e->dest->succs) == 2)
4797 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4798 NULL, true))
4799 break;
4800 else
4801 other_bb = NULL;
4803 else if (stmt
4804 && final_range_test_p (stmt)
4805 && find_edge (first_bb, single_succ (e->dest)))
4807 other_bb = single_succ (e->dest);
4808 if (other_bb == first_bb)
4809 other_bb = NULL;
4812 if (other_bb == NULL)
4813 return cfg_cleanup_needed;
4815 /* Now do the forward search, moving last_bb to successor bbs
4816 that aren't other_bb. */
4817 while (EDGE_COUNT (last_bb->succs) == 2)
4819 FOR_EACH_EDGE (e, ei, last_bb->succs)
4820 if (e->dest != other_bb)
4821 break;
4822 if (e == NULL)
4823 break;
4824 if (!single_pred_p (e->dest))
4825 break;
4826 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4827 break;
4828 if (!no_side_effect_bb (e->dest))
4829 break;
4830 last_bb = e->dest;
4832 if (first_bb == last_bb)
4833 return cfg_cleanup_needed;
4834 /* Here basic blocks first_bb through last_bb's predecessor
4835 end with GIMPLE_COND, all of them have one of the edges to
4836 other_bb and another to another block in the range,
4837 all blocks except first_bb don't have side-effects and
4838 last_bb ends with either GIMPLE_COND, or cast satisfying
4839 final_range_test_p. */
4840 for (bb = last_bb; ; bb = single_pred (bb))
4842 enum tree_code code;
4843 tree lhs, rhs;
4844 inter_bb_range_test_entry bb_ent;
4846 bb_ent.op = NULL_TREE;
4847 bb_ent.first_idx = ops.length ();
4848 bb_ent.last_idx = bb_ent.first_idx;
4849 e = find_edge (bb, other_bb);
4850 stmt = last_nondebug_stmt (bb);
4851 gimple_set_visited (stmt, true);
4852 if (gimple_code (stmt) != GIMPLE_COND)
4854 use_operand_p use_p;
4855 gimple *phi;
4856 edge e2;
4857 unsigned int d;
4859 lhs = gimple_assign_lhs (stmt);
4860 rhs = gimple_assign_rhs1 (stmt);
4861 gcc_assert (bb == last_bb);
4863 /* stmt is
4864 _123 = (int) _234;
4866 _234 = a_2(D) == 2;
4868 followed by:
4869 <bb M>:
4870 # _345 = PHI <_123(N), 1(...), 1(...)>
4872 or 0 instead of 1. If it is 0, the _234
4873 range test is anded together with all the
4874 other range tests, if it is 1, it is ored with
4875 them. */
4876 single_imm_use (lhs, &use_p, &phi);
4877 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4878 e2 = find_edge (first_bb, other_bb);
4879 d = e2->dest_idx;
4880 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4881 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4882 code = BIT_AND_EXPR;
4883 else
4885 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4886 code = BIT_IOR_EXPR;
4889 /* If _234 SSA_NAME_DEF_STMT is
4890 _234 = _567 | _789;
4891 (or &, corresponding to 1/0 in the phi arguments,
4892 push into ops the individual range test arguments
4893 of the bitwise or resp. and, recursively. */
4894 if (TREE_CODE (rhs) == SSA_NAME
4895 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4896 != tcc_comparison)
4897 && !get_ops (rhs, code, &ops,
4898 loop_containing_stmt (stmt))
4899 && has_single_use (rhs))
4901 /* Otherwise, push the _234 range test itself. */
4902 operand_entry *oe = operand_entry_pool.allocate ();
4904 oe->op = rhs;
4905 oe->rank = code;
4906 oe->id = 0;
4907 oe->count = 1;
4908 oe->stmt_to_insert = NULL;
4909 ops.safe_push (oe);
4910 bb_ent.last_idx++;
4911 bb_ent.op = rhs;
4913 else if (is_gimple_assign (stmt)
4914 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4915 == tcc_comparison)
4916 && !get_ops (lhs, code, &ops,
4917 loop_containing_stmt (stmt))
4918 && has_single_use (lhs))
4920 operand_entry *oe = operand_entry_pool.allocate ();
4921 oe->op = lhs;
4922 oe->rank = code;
4923 oe->id = 0;
4924 oe->count = 1;
4925 ops.safe_push (oe);
4926 bb_ent.last_idx++;
4927 bb_ent.op = lhs;
4929 else
4931 bb_ent.last_idx = ops.length ();
4932 bb_ent.op = rhs;
4934 bbinfo.safe_push (bb_ent);
4935 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4936 ops[i]->id = bb->index;
4937 continue;
4939 else if (bb == last_bb)
4941 /* For last_bb, handle also:
4942 if (x_3(D) == 3)
4943 goto <bb 6>; [34.00%]
4944 else
4945 goto <bb 7>; [66.00%]
4947 <bb 6> [local count: 79512730]:
4949 <bb 7> [local count: 1073741824]:
4950 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4951 where bb 7 is OTHER_BB, but the PHI values from the
4952 earlier bbs match the path through the empty bb
4953 in between. */
4954 bool test_swapped_p = false;
4955 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4956 &other_bb, &test_swapped_p, true);
4957 gcc_assert (ok);
4958 if (test_swapped_p)
4959 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4961 /* Otherwise stmt is GIMPLE_COND. */
4962 code = gimple_cond_code (stmt);
4963 lhs = gimple_cond_lhs (stmt);
4964 rhs = gimple_cond_rhs (stmt);
4965 if (TREE_CODE (lhs) == SSA_NAME
4966 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4967 && ((code != EQ_EXPR && code != NE_EXPR)
4968 || rhs != boolean_false_node
4969 /* Either push into ops the individual bitwise
4970 or resp. and operands, depending on which
4971 edge is other_bb. */
4972 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4973 ^ (code == EQ_EXPR))
4974 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4975 loop_containing_stmt (stmt))))
4977 /* Or push the GIMPLE_COND stmt itself. */
4978 operand_entry *oe = operand_entry_pool.allocate ();
4980 oe->op = NULL;
4981 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4982 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4983 /* oe->op = NULL signs that there is no SSA_NAME
4984 for the range test, and oe->id instead is the
4985 basic block number, at which's end the GIMPLE_COND
4986 is. */
4987 oe->id = bb->index;
4988 oe->count = 1;
4989 oe->stmt_to_insert = NULL;
4990 ops.safe_push (oe);
4991 bb_ent.op = NULL;
4992 bb_ent.last_idx++;
4994 else if (ops.length () > bb_ent.first_idx)
4996 bb_ent.op = lhs;
4997 bb_ent.last_idx = ops.length ();
4999 bbinfo.safe_push (bb_ent);
5000 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5001 ops[i]->id = bb->index;
5002 if (bb == first_bb)
5003 break;
5005 if (ops.length () > 1)
5006 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5007 if (any_changes)
5009 unsigned int idx, max_idx = 0;
5010 /* update_ops relies on has_single_use predicates returning the
5011 same values as it did during get_ops earlier. Additionally it
5012 never removes statements, only adds new ones and it should walk
5013 from the single imm use and check the predicate already before
5014 making those changes.
5015 On the other side, the handling of GIMPLE_COND directly can turn
5016 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5017 it needs to be done in a separate loop afterwards. */
5018 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5020 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5021 && bbinfo[idx].op != NULL_TREE)
5023 tree new_op;
5025 max_idx = idx;
5026 stmt = last_nondebug_stmt (bb);
5027 new_op = update_ops (bbinfo[idx].op,
5028 (enum tree_code)
5029 ops[bbinfo[idx].first_idx]->rank,
5030 ops, &bbinfo[idx].first_idx,
5031 loop_containing_stmt (stmt));
5032 if (new_op == NULL_TREE)
5034 gcc_assert (bb == last_bb);
5035 new_op = ops[bbinfo[idx].first_idx++]->op;
5037 if (bbinfo[idx].op != new_op)
5039 imm_use_iterator iter;
5040 use_operand_p use_p;
5041 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5043 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5044 if (is_gimple_debug (use_stmt))
5045 continue;
5046 else if (gimple_code (use_stmt) == GIMPLE_COND
5047 || gimple_code (use_stmt) == GIMPLE_PHI)
5048 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5049 SET_USE (use_p, new_op);
5050 else if ((is_gimple_assign (use_stmt)
5051 && (TREE_CODE_CLASS
5052 (gimple_assign_rhs_code (use_stmt))
5053 == tcc_comparison)))
5054 cast_or_tcc_cmp_stmt = use_stmt;
5055 else if (gimple_assign_cast_p (use_stmt))
5056 cast_or_tcc_cmp_stmt = use_stmt;
5057 else
5058 gcc_unreachable ();
5060 if (cast_or_tcc_cmp_stmt)
5062 gcc_assert (bb == last_bb);
5063 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5064 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5065 enum tree_code rhs_code
5066 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5067 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5068 : CONVERT_EXPR;
5069 gassign *g;
5070 if (is_gimple_min_invariant (new_op))
5072 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5073 g = gimple_build_assign (new_lhs, new_op);
5075 else
5076 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5077 gimple_stmt_iterator gsi
5078 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5079 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5080 gimple_set_visited (g, true);
5081 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5082 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5083 if (is_gimple_debug (use_stmt))
5084 continue;
5085 else if (gimple_code (use_stmt) == GIMPLE_COND
5086 || gimple_code (use_stmt) == GIMPLE_PHI)
5087 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5088 SET_USE (use_p, new_lhs);
5089 else
5090 gcc_unreachable ();
5094 if (bb == first_bb)
5095 break;
5097 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5099 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5100 && bbinfo[idx].op == NULL_TREE
5101 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5103 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5105 if (idx > max_idx)
5106 max_idx = idx;
5108 /* If we collapse the conditional to a true/false
5109 condition, then bubble that knowledge up to our caller. */
5110 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5112 gimple_cond_make_false (cond_stmt);
5113 cfg_cleanup_needed = true;
5115 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5117 gimple_cond_make_true (cond_stmt);
5118 cfg_cleanup_needed = true;
5120 else
5122 gimple_cond_set_code (cond_stmt, NE_EXPR);
5123 gimple_cond_set_lhs (cond_stmt,
5124 ops[bbinfo[idx].first_idx]->op);
5125 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5127 update_stmt (cond_stmt);
5129 if (bb == first_bb)
5130 break;
5133 /* The above changes could result in basic blocks after the first
5134 modified one, up to and including last_bb, to be executed even if
5135 they would not be in the original program. If the value ranges of
5136 assignment lhs' in those bbs were dependent on the conditions
5137 guarding those basic blocks which now can change, the VRs might
5138 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5139 are only used within the same bb, it should be not a big deal if
5140 we just reset all the VRs in those bbs. See PR68671. */
5141 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5142 reset_flow_sensitive_info_in_bb (bb);
5144 return cfg_cleanup_needed;
5147 /* Remove def stmt of VAR if VAR has zero uses and recurse
5148 on rhs1 operand if so. */
5150 static void
5151 remove_visited_stmt_chain (tree var)
5153 gimple *stmt;
5154 gimple_stmt_iterator gsi;
5156 while (1)
5158 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5159 return;
5160 stmt = SSA_NAME_DEF_STMT (var);
5161 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5163 var = gimple_assign_rhs1 (stmt);
5164 gsi = gsi_for_stmt (stmt);
5165 reassoc_remove_stmt (&gsi);
5166 release_defs (stmt);
5168 else
5169 return;
5173 /* This function checks three consequtive operands in
5174 passed operands vector OPS starting from OPINDEX and
5175 swaps two operands if it is profitable for binary operation
5176 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5178 We pair ops with the same rank if possible. */
5180 static void
5181 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5182 unsigned int opindex)
5184 operand_entry *oe1, *oe2, *oe3;
5186 oe1 = ops[opindex];
5187 oe2 = ops[opindex + 1];
5188 oe3 = ops[opindex + 2];
5190 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5191 std::swap (*oe1, *oe3);
5192 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5193 std::swap (*oe1, *oe2);
5196 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5197 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5198 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5199 after the returned stmt. */
5201 static inline gimple *
5202 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5204 insert_before = true;
5205 if (TREE_CODE (rhs1) == SSA_NAME
5206 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5208 stmt = SSA_NAME_DEF_STMT (rhs1);
5209 insert_before = false;
5211 if (TREE_CODE (rhs2) == SSA_NAME
5212 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5214 stmt = SSA_NAME_DEF_STMT (rhs2);
5215 insert_before = false;
5217 return stmt;
5220 /* If the stmt that defines operand has to be inserted, insert it
5221 before the use. */
5222 static void
5223 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5225 gcc_assert (is_gimple_assign (stmt_to_insert));
5226 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5227 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5228 bool insert_before;
5229 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5230 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5231 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5233 /* If the insert point is not stmt, then insert_point would be
5234 the point where operand rhs1 or rhs2 is defined. In this case,
5235 stmt_to_insert has to be inserted afterwards. This would
5236 only happen when the stmt insertion point is flexible. */
5237 if (insert_before)
5238 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5239 else
5240 insert_stmt_after (stmt_to_insert, insert_point);
5244 /* Recursively rewrite our linearized statements so that the operators
5245 match those in OPS[OPINDEX], putting the computation in rank
5246 order. Return new lhs.
5247 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5248 the current stmt and during recursive invocations.
5249 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5250 recursive invocations. */
5252 static tree
5253 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5254 const vec<operand_entry *> &ops, bool changed,
5255 bool next_changed)
5257 tree rhs1 = gimple_assign_rhs1 (stmt);
5258 tree rhs2 = gimple_assign_rhs2 (stmt);
5259 tree lhs = gimple_assign_lhs (stmt);
5260 operand_entry *oe;
5262 /* The final recursion case for this function is that you have
5263 exactly two operations left.
5264 If we had exactly one op in the entire list to start with, we
5265 would have never called this function, and the tail recursion
5266 rewrites them one at a time. */
5267 if (opindex + 2 == ops.length ())
5269 operand_entry *oe1, *oe2;
5271 oe1 = ops[opindex];
5272 oe2 = ops[opindex + 1];
5274 if (rhs1 != oe1->op || rhs2 != oe2->op)
5276 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5277 unsigned int uid = gimple_uid (stmt);
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5281 fprintf (dump_file, "Transforming ");
5282 print_gimple_stmt (dump_file, stmt, 0);
5285 /* If the stmt that defines operand has to be inserted, insert it
5286 before the use. */
5287 if (oe1->stmt_to_insert)
5288 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5289 if (oe2->stmt_to_insert)
5290 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5291 /* Even when changed is false, reassociation could have e.g. removed
5292 some redundant operations, so unless we are just swapping the
5293 arguments or unless there is no change at all (then we just
5294 return lhs), force creation of a new SSA_NAME. */
5295 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5297 bool insert_before;
5298 gimple *insert_point
5299 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5300 lhs = make_ssa_name (TREE_TYPE (lhs));
5301 stmt
5302 = gimple_build_assign (lhs, rhs_code,
5303 oe1->op, oe2->op);
5304 gimple_set_uid (stmt, uid);
5305 gimple_set_visited (stmt, true);
5306 if (insert_before)
5307 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5308 else
5309 insert_stmt_after (stmt, insert_point);
5311 else
5313 bool insert_before;
5314 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5315 insert_before)
5316 == stmt);
5317 gimple_assign_set_rhs1 (stmt, oe1->op);
5318 gimple_assign_set_rhs2 (stmt, oe2->op);
5319 update_stmt (stmt);
5322 if (rhs1 != oe1->op && rhs1 != oe2->op)
5323 remove_visited_stmt_chain (rhs1);
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5327 fprintf (dump_file, " into ");
5328 print_gimple_stmt (dump_file, stmt, 0);
5331 return lhs;
5334 /* If we hit here, we should have 3 or more ops left. */
5335 gcc_assert (opindex + 2 < ops.length ());
5337 /* Rewrite the next operator. */
5338 oe = ops[opindex];
5340 /* If the stmt that defines operand has to be inserted, insert it
5341 before the use. */
5342 if (oe->stmt_to_insert)
5343 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5345 /* Recurse on the LHS of the binary operator, which is guaranteed to
5346 be the non-leaf side. */
5347 tree new_rhs1
5348 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5349 changed || oe->op != rhs2 || next_changed,
5350 false);
5352 if (oe->op != rhs2 || new_rhs1 != rhs1)
5354 if (dump_file && (dump_flags & TDF_DETAILS))
5356 fprintf (dump_file, "Transforming ");
5357 print_gimple_stmt (dump_file, stmt, 0);
5360 /* If changed is false, this is either opindex == 0
5361 or all outer rhs2's were equal to corresponding oe->op,
5362 and powi_result is NULL.
5363 That means lhs is equivalent before and after reassociation.
5364 Otherwise ensure the old lhs SSA_NAME is not reused and
5365 create a new stmt as well, so that any debug stmts will be
5366 properly adjusted. */
5367 if (changed)
5369 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5370 unsigned int uid = gimple_uid (stmt);
5371 bool insert_before;
5372 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5373 insert_before);
5375 lhs = make_ssa_name (TREE_TYPE (lhs));
5376 stmt = gimple_build_assign (lhs, rhs_code,
5377 new_rhs1, oe->op);
5378 gimple_set_uid (stmt, uid);
5379 gimple_set_visited (stmt, true);
5380 if (insert_before)
5381 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5382 else
5383 insert_stmt_after (stmt, insert_point);
5385 else
5387 bool insert_before;
5388 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5389 insert_before)
5390 == stmt);
5391 gimple_assign_set_rhs1 (stmt, new_rhs1);
5392 gimple_assign_set_rhs2 (stmt, oe->op);
5393 update_stmt (stmt);
5396 if (dump_file && (dump_flags & TDF_DETAILS))
5398 fprintf (dump_file, " into ");
5399 print_gimple_stmt (dump_file, stmt, 0);
5402 return lhs;
5405 /* Find out how many cycles we need to compute statements chain.
5406 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5407 maximum number of independent statements we may execute per cycle. */
5409 static int
5410 get_required_cycles (int ops_num, int cpu_width)
5412 int res;
5413 int elog;
5414 unsigned int rest;
5416 /* While we have more than 2 * cpu_width operands
5417 we may reduce number of operands by cpu_width
5418 per cycle. */
5419 res = ops_num / (2 * cpu_width);
5421 /* Remained operands count may be reduced twice per cycle
5422 until we have only one operand. */
5423 rest = (unsigned)(ops_num - res * cpu_width);
5424 elog = exact_log2 (rest);
5425 if (elog >= 0)
5426 res += elog;
5427 else
5428 res += floor_log2 (rest) + 1;
5430 return res;
5433 /* Returns an optimal number of registers to use for computation of
5434 given statements.
5436 LHS is the result ssa name of OPS. */
5438 static int
5439 get_reassociation_width (vec<operand_entry *> *ops, tree lhs,
5440 enum tree_code opc, machine_mode mode)
5442 int param_width = param_tree_reassoc_width;
5443 int width;
5444 int width_min;
5445 int cycles_best;
5446 int ops_num = ops->length ();
5448 if (param_width > 0)
5449 width = param_width;
5450 else
5451 width = targetm.sched.reassociation_width (opc, mode);
5453 if (width == 1)
5454 return width;
5456 /* Get the minimal time required for sequence computation. */
5457 cycles_best = get_required_cycles (ops_num, width);
5459 /* Check if we may use less width and still compute sequence for
5460 the same time. It will allow us to reduce registers usage.
5461 get_required_cycles is monotonically increasing with lower width
5462 so we can perform a binary search for the minimal width that still
5463 results in the optimal cycle count. */
5464 width_min = 1;
5465 while (width > width_min)
5467 int width_mid = (width + width_min) / 2;
5469 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5470 width = width_mid;
5471 else if (width_min < width_mid)
5472 width_min = width_mid;
5473 else
5474 break;
5477 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5478 better than skipping these FMA candidates in widening_mul. */
5479 if (width == 1
5480 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5481 param_avoid_fma_max_bits))
5483 /* Look for cross backedge dependency:
5484 1. LHS is a phi argument in the same basic block it is defined.
5485 2. And the result of the phi node is used in OPS. */
5486 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5488 use_operand_p use_p;
5489 imm_use_iterator iter;
5490 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5491 if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5493 if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5494 != bb)
5495 continue;
5496 tree phi_result = gimple_phi_result (phi);
5497 operand_entry *oe;
5498 unsigned int j;
5499 FOR_EACH_VEC_ELT (*ops, j, oe)
5501 if (TREE_CODE (oe->op) != SSA_NAME)
5502 continue;
5504 /* Result of phi is operand of PLUS_EXPR. */
5505 if (oe->op == phi_result)
5506 return 2;
5508 /* Check is result of phi is operand of MULT_EXPR. */
5509 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5510 if (is_gimple_assign (def_stmt)
5511 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5513 tree rhs = gimple_assign_rhs1 (def_stmt);
5514 if (TREE_CODE (rhs) == SSA_NAME)
5516 if (rhs == phi_result)
5517 return 2;
5518 def_stmt = SSA_NAME_DEF_STMT (rhs);
5521 if (is_gimple_assign (def_stmt)
5522 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5524 if (gimple_assign_rhs1 (def_stmt) == phi_result
5525 || gimple_assign_rhs2 (def_stmt) == phi_result)
5526 return 2;
5532 return width;
5535 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5536 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5537 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5539 /* Rewrite statements with dependency chain with regard the chance to generate
5540 FMA.
5541 For the chain with FMA: Try to keep fma opportunity as much as possible.
5542 For the chain without FMA: Putting the computation in rank order and trying
5543 to allow operations to be executed in parallel.
5544 E.g.
5545 e + f + a * b + c * d;
5547 ssa1 = e + a * b;
5548 ssa2 = f + c * d;
5549 ssa3 = ssa1 + ssa2;
5551 This reassociation approach preserves the chance of fma generation as much
5552 as possible.
5554 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5555 the whole chain will have dependencies across the loop iteration. Just keep
5556 loop-carried ops in a separate chain.
5557 E.g.
5558 x_1 = phi (x_0, x_2)
5559 y_1 = phi (y_0, y_2)
5561 a + b + c + d + e + x1 + y1
5563 SSA1 = a + b;
5564 SSA2 = c + d;
5565 SSA3 = SSA1 + e;
5566 SSA4 = SSA3 + SSA2;
5567 SSA5 = x1 + y1;
5568 SSA6 = SSA4 + SSA5;
5570 static void
5571 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5572 const vec<operand_entry *> &ops)
5574 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5575 int op_num = ops.length ();
5576 int op_normal_num = op_num;
5577 gcc_assert (op_num > 0);
5578 int stmt_num = op_num - 1;
5579 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5580 int i = 0, j = 0;
5581 tree tmp_op[2], op1;
5582 operand_entry *oe;
5583 gimple *stmt1 = NULL;
5584 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5585 int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5586 int width_active = 0, width_count = 0;
5587 bool has_biased = false, ops_changed = false;
5588 auto_vec<operand_entry *> ops_normal;
5589 auto_vec<operand_entry *> ops_biased;
5590 vec<operand_entry *> *ops1;
5592 /* We start expression rewriting from the top statements.
5593 So, in this loop we create a full list of statements
5594 we will work with. */
5595 stmts[stmt_num - 1] = stmt;
5596 for (i = stmt_num - 2; i >= 0; i--)
5597 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5599 /* Avoid adding loop-carried ops to long chains, first filter out the
5600 loop-carried. But we need to make sure that the length of the remainder
5601 is not less than 4, which is the smallest ops length we can break the
5602 dependency. */
5603 FOR_EACH_VEC_ELT (ops, i, oe)
5605 if (TREE_CODE (oe->op) == SSA_NAME
5606 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5607 && op_normal_num > 4)
5609 ops_biased.safe_push (oe);
5610 has_biased = true;
5611 op_normal_num --;
5613 else
5614 ops_normal.safe_push (oe);
5617 /* Width should not be larger than ops length / 2, since we can not create
5618 more parallel dependency chains that exceeds such value. */
5619 int width_normal = op_normal_num / 2;
5620 int width_biased = (op_num - op_normal_num) / 2;
5621 width_normal = width <= width_normal ? width : width_normal;
5622 width_biased = width <= width_biased ? width : width_biased;
5624 ops1 = &ops_normal;
5625 width_count = width_active = width_normal;
5627 /* Build parallel dependency chain according to width. */
5628 for (i = 0; i < stmt_num; i++)
5630 if (dump_file && (dump_flags & TDF_DETAILS))
5632 fprintf (dump_file, "Transforming ");
5633 print_gimple_stmt (dump_file, stmts[i], 0);
5636 /* When the work of normal ops is over, but the loop is not over,
5637 continue to do biased ops. */
5638 if (width_count == 0 && ops1 == &ops_normal)
5640 ops1 = &ops_biased;
5641 width_count = width_active = width_biased;
5642 ops_changed = true;
5645 /* Swap the operands if no FMA in the chain. */
5646 if (ops1->length () > 2 && !has_fma)
5647 swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5649 if (i < width_active
5650 || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5652 for (j = 0; j < 2; j++)
5654 oe = ops1->pop ();
5655 tmp_op[j] = oe->op;
5656 /* If the stmt that defines operand has to be inserted, insert it
5657 before the use. */
5658 stmt1 = oe->stmt_to_insert;
5659 if (stmt1)
5660 insert_stmt_before_use (stmts[i], stmt1);
5661 stmt1 = NULL;
5663 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5664 tmp_op[1],
5665 tmp_op[0],
5666 opcode);
5667 gimple_set_visited (stmts[i], true);
5670 else
5672 /* We keep original statement only for the last one. All others are
5673 recreated. */
5674 if (!ops1->length ())
5676 /* For biased length equal to 2. */
5677 if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5678 last_rhs2_stmt_index = i - 1;
5680 /* When width_count == 2 and there is no biased, just finish. */
5681 if (width_count == NORMAL_END_STMT && !has_biased)
5683 last_rhs1_stmt_index = i - 1;
5684 last_rhs2_stmt_index = i - 2;
5686 if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5688 /* We keep original statement only for the last one. All
5689 others are recreated. */
5690 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5691 (stmts[last_rhs1_stmt_index]));
5692 gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5693 (stmts[last_rhs2_stmt_index]));
5694 update_stmt (stmts[i]);
5696 else
5698 stmts[i] =
5699 build_and_add_sum (TREE_TYPE (last_rhs1),
5700 gimple_assign_lhs (stmts[i-width_count]),
5701 gimple_assign_lhs
5702 (stmts[i-width_count+1]),
5703 opcode);
5704 gimple_set_visited (stmts[i], true);
5705 width_count--;
5707 /* It is the end of normal or biased ops.
5708 last_rhs1_stmt_index used to record the last stmt index
5709 for normal ops. last_rhs2_stmt_index used to record the
5710 last stmt index for biased ops. */
5711 if (width_count == BIASED_END_STMT)
5713 gcc_assert (has_biased);
5714 if (ops_biased.length ())
5715 last_rhs1_stmt_index = i;
5716 else
5717 last_rhs2_stmt_index = i;
5718 width_count--;
5722 else
5724 /* Attach the rest ops to the parallel dependency chain. */
5725 oe = ops1->pop ();
5726 op1 = oe->op;
5727 stmt1 = oe->stmt_to_insert;
5728 if (stmt1)
5729 insert_stmt_before_use (stmts[i], stmt1);
5730 stmt1 = NULL;
5732 /* For only one biased ops. */
5733 if (width_count == SPECIAL_BIASED_END_STMT)
5735 /* We keep original statement only for the last one. All
5736 others are recreated. */
5737 gcc_assert (has_biased);
5738 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5739 (stmts[last_rhs1_stmt_index]));
5740 gimple_assign_set_rhs2 (stmts[i], op1);
5741 update_stmt (stmts[i]);
5743 else
5745 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5746 gimple_assign_lhs
5747 (stmts[i-width_active]),
5748 op1,
5749 opcode);
5750 gimple_set_visited (stmts[i], true);
5755 if (dump_file && (dump_flags & TDF_DETAILS))
5757 fprintf (dump_file, " into ");
5758 print_gimple_stmt (dump_file, stmts[i], 0);
5762 remove_visited_stmt_chain (last_rhs1);
5765 /* Transform STMT, which is really (A +B) + (C + D) into the left
5766 linear form, ((A+B)+C)+D.
5767 Recurse on D if necessary. */
5769 static void
5770 linearize_expr (gimple *stmt)
5772 gimple_stmt_iterator gsi;
5773 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5774 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5775 gimple *oldbinrhs = binrhs;
5776 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5777 gimple *newbinrhs = NULL;
5778 class loop *loop = loop_containing_stmt (stmt);
5779 tree lhs = gimple_assign_lhs (stmt);
5781 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5782 && is_reassociable_op (binrhs, rhscode, loop));
5784 gsi = gsi_for_stmt (stmt);
5786 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5787 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5788 gimple_assign_rhs_code (binrhs),
5789 gimple_assign_lhs (binlhs),
5790 gimple_assign_rhs2 (binrhs));
5791 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5792 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5793 gimple_set_uid (binrhs, gimple_uid (stmt));
5795 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5796 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5798 if (dump_file && (dump_flags & TDF_DETAILS))
5800 fprintf (dump_file, "Linearized: ");
5801 print_gimple_stmt (dump_file, stmt, 0);
5804 reassociate_stats.linearized++;
5805 update_stmt (stmt);
5807 gsi = gsi_for_stmt (oldbinrhs);
5808 reassoc_remove_stmt (&gsi);
5809 release_defs (oldbinrhs);
5811 gimple_set_visited (stmt, true);
5812 gimple_set_visited (binlhs, true);
5813 gimple_set_visited (binrhs, true);
5815 /* Tail recurse on the new rhs if it still needs reassociation. */
5816 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5817 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5818 want to change the algorithm while converting to tuples. */
5819 linearize_expr (stmt);
5822 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5823 it. Otherwise, return NULL. */
5825 static gimple *
5826 get_single_immediate_use (tree lhs)
5828 use_operand_p immuse;
5829 gimple *immusestmt;
5831 if (TREE_CODE (lhs) == SSA_NAME
5832 && single_imm_use (lhs, &immuse, &immusestmt)
5833 && is_gimple_assign (immusestmt))
5834 return immusestmt;
5836 return NULL;
5839 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5840 representing the negated value. Insertions of any necessary
5841 instructions go before GSI.
5842 This function is recursive in that, if you hand it "a_5" as the
5843 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5844 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5846 static tree
5847 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5849 gimple *negatedefstmt = NULL;
5850 tree resultofnegate;
5851 gimple_stmt_iterator gsi;
5852 unsigned int uid;
5854 /* If we are trying to negate a name, defined by an add, negate the
5855 add operands instead. */
5856 if (TREE_CODE (tonegate) == SSA_NAME)
5857 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5858 if (TREE_CODE (tonegate) == SSA_NAME
5859 && is_gimple_assign (negatedefstmt)
5860 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5861 && has_single_use (gimple_assign_lhs (negatedefstmt))
5862 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5864 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5865 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5866 tree lhs = gimple_assign_lhs (negatedefstmt);
5867 gimple *g;
5869 gsi = gsi_for_stmt (negatedefstmt);
5870 rhs1 = negate_value (rhs1, &gsi);
5872 gsi = gsi_for_stmt (negatedefstmt);
5873 rhs2 = negate_value (rhs2, &gsi);
5875 gsi = gsi_for_stmt (negatedefstmt);
5876 lhs = make_ssa_name (TREE_TYPE (lhs));
5877 gimple_set_visited (negatedefstmt, true);
5878 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5879 gimple_set_uid (g, gimple_uid (negatedefstmt));
5880 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5881 return lhs;
5884 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5885 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5886 NULL_TREE, true, GSI_SAME_STMT);
5887 gsi = *gsip;
5888 uid = gimple_uid (gsi_stmt (gsi));
5889 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5891 gimple *stmt = gsi_stmt (gsi);
5892 if (gimple_uid (stmt) != 0)
5893 break;
5894 gimple_set_uid (stmt, uid);
5896 return resultofnegate;
5899 /* Return true if we should break up the subtract in STMT into an add
5900 with negate. This is true when we the subtract operands are really
5901 adds, or the subtract itself is used in an add expression. In
5902 either case, breaking up the subtract into an add with negate
5903 exposes the adds to reassociation. */
5905 static bool
5906 should_break_up_subtract (gimple *stmt)
5908 tree lhs = gimple_assign_lhs (stmt);
5909 tree binlhs = gimple_assign_rhs1 (stmt);
5910 tree binrhs = gimple_assign_rhs2 (stmt);
5911 gimple *immusestmt;
5912 class loop *loop = loop_containing_stmt (stmt);
5914 if (TREE_CODE (binlhs) == SSA_NAME
5915 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5916 return true;
5918 if (TREE_CODE (binrhs) == SSA_NAME
5919 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5920 return true;
5922 if (TREE_CODE (lhs) == SSA_NAME
5923 && (immusestmt = get_single_immediate_use (lhs))
5924 && is_gimple_assign (immusestmt)
5925 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5926 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5927 && gimple_assign_rhs1 (immusestmt) == lhs)
5928 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5929 return true;
5930 return false;
5933 /* Transform STMT from A - B into A + -B. */
5935 static void
5936 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5938 tree rhs1 = gimple_assign_rhs1 (stmt);
5939 tree rhs2 = gimple_assign_rhs2 (stmt);
5941 if (dump_file && (dump_flags & TDF_DETAILS))
5943 fprintf (dump_file, "Breaking up subtract ");
5944 print_gimple_stmt (dump_file, stmt, 0);
5947 rhs2 = negate_value (rhs2, gsip);
5948 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5949 update_stmt (stmt);
5952 /* Determine whether STMT is a builtin call that raises an SSA name
5953 to an integer power and has only one use. If so, and this is early
5954 reassociation and unsafe math optimizations are permitted, place
5955 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5956 If any of these conditions does not hold, return FALSE. */
5958 static bool
5959 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5961 tree arg1;
5962 REAL_VALUE_TYPE c, cint;
5964 switch (gimple_call_combined_fn (stmt))
5966 CASE_CFN_POW:
5967 if (flag_errno_math)
5968 return false;
5970 *base = gimple_call_arg (stmt, 0);
5971 arg1 = gimple_call_arg (stmt, 1);
5973 if (TREE_CODE (arg1) != REAL_CST)
5974 return false;
5976 c = TREE_REAL_CST (arg1);
5978 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5979 return false;
5981 *exponent = real_to_integer (&c);
5982 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5983 if (!real_identical (&c, &cint))
5984 return false;
5986 break;
5988 CASE_CFN_POWI:
5989 *base = gimple_call_arg (stmt, 0);
5990 arg1 = gimple_call_arg (stmt, 1);
5992 if (!tree_fits_shwi_p (arg1))
5993 return false;
5995 *exponent = tree_to_shwi (arg1);
5996 break;
5998 default:
5999 return false;
6002 /* Expanding negative exponents is generally unproductive, so we don't
6003 complicate matters with those. Exponents of zero and one should
6004 have been handled by expression folding. */
6005 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6006 return false;
6008 return true;
6011 /* Try to derive and add operand entry for OP to *OPS. Return false if
6012 unsuccessful. */
6014 static bool
6015 try_special_add_to_ops (vec<operand_entry *> *ops,
6016 enum tree_code code,
6017 tree op, gimple* def_stmt)
6019 tree base = NULL_TREE;
6020 HOST_WIDE_INT exponent = 0;
6022 if (TREE_CODE (op) != SSA_NAME
6023 || ! has_single_use (op))
6024 return false;
6026 if (code == MULT_EXPR
6027 && reassoc_insert_powi_p
6028 && flag_unsafe_math_optimizations
6029 && is_gimple_call (def_stmt)
6030 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6032 add_repeat_to_ops_vec (ops, base, exponent);
6033 gimple_set_visited (def_stmt, true);
6034 return true;
6036 else if (code == MULT_EXPR
6037 && is_gimple_assign (def_stmt)
6038 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6039 && !HONOR_SNANS (TREE_TYPE (op))
6040 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6041 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6042 && (!FLOAT_TYPE_P (TREE_TYPE (op))
6043 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6045 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6046 tree cst = build_minus_one_cst (TREE_TYPE (op));
6047 add_to_ops_vec (ops, rhs1);
6048 add_to_ops_vec (ops, cst);
6049 gimple_set_visited (def_stmt, true);
6050 return true;
6053 return false;
6056 /* Recursively linearize a binary expression that is the RHS of STMT.
6057 Place the operands of the expression tree in the vector named OPS. */
6059 static void
6060 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6061 bool is_associative, bool set_visited)
6063 tree binlhs = gimple_assign_rhs1 (stmt);
6064 tree binrhs = gimple_assign_rhs2 (stmt);
6065 gimple *binlhsdef = NULL, *binrhsdef = NULL;
6066 bool binlhsisreassoc = false;
6067 bool binrhsisreassoc = false;
6068 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6069 class loop *loop = loop_containing_stmt (stmt);
6071 if (set_visited)
6072 gimple_set_visited (stmt, true);
6074 if (TREE_CODE (binlhs) == SSA_NAME)
6076 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6077 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6078 && !stmt_could_throw_p (cfun, binlhsdef));
6081 if (TREE_CODE (binrhs) == SSA_NAME)
6083 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6084 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6085 && !stmt_could_throw_p (cfun, binrhsdef));
6088 /* If the LHS is not reassociable, but the RHS is, we need to swap
6089 them. If neither is reassociable, there is nothing we can do, so
6090 just put them in the ops vector. If the LHS is reassociable,
6091 linearize it. If both are reassociable, then linearize the RHS
6092 and the LHS. */
6094 if (!binlhsisreassoc)
6096 /* If this is not a associative operation like division, give up. */
6097 if (!is_associative)
6099 add_to_ops_vec (ops, binrhs);
6100 return;
6103 if (!binrhsisreassoc)
6105 bool swap = false;
6106 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6107 /* If we add ops for the rhs we expect to be able to recurse
6108 to it via the lhs during expression rewrite so swap
6109 operands. */
6110 swap = true;
6111 else
6112 add_to_ops_vec (ops, binrhs);
6114 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6115 add_to_ops_vec (ops, binlhs);
6117 if (!swap)
6118 return;
6121 if (dump_file && (dump_flags & TDF_DETAILS))
6123 fprintf (dump_file, "swapping operands of ");
6124 print_gimple_stmt (dump_file, stmt, 0);
6127 swap_ssa_operands (stmt,
6128 gimple_assign_rhs1_ptr (stmt),
6129 gimple_assign_rhs2_ptr (stmt));
6130 update_stmt (stmt);
6132 if (dump_file && (dump_flags & TDF_DETAILS))
6134 fprintf (dump_file, " is now ");
6135 print_gimple_stmt (dump_file, stmt, 0);
6137 if (!binrhsisreassoc)
6138 return;
6140 /* We want to make it so the lhs is always the reassociative op,
6141 so swap. */
6142 std::swap (binlhs, binrhs);
6144 else if (binrhsisreassoc)
6146 linearize_expr (stmt);
6147 binlhs = gimple_assign_rhs1 (stmt);
6148 binrhs = gimple_assign_rhs2 (stmt);
6151 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6152 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6153 rhscode, loop));
6154 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6155 is_associative, set_visited);
6157 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6158 add_to_ops_vec (ops, binrhs);
6161 /* Repropagate the negates back into subtracts, since no other pass
6162 currently does it. */
6164 static void
6165 repropagate_negates (void)
6167 unsigned int i = 0;
6168 tree negate;
6170 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6172 gimple *user = get_single_immediate_use (negate);
6173 if (!user || !is_gimple_assign (user))
6174 continue;
6176 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6177 if (TREE_CODE (negateop) == SSA_NAME
6178 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6179 continue;
6181 /* The negate operand can be either operand of a PLUS_EXPR
6182 (it can be the LHS if the RHS is a constant for example).
6184 Force the negate operand to the RHS of the PLUS_EXPR, then
6185 transform the PLUS_EXPR into a MINUS_EXPR. */
6186 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6188 /* If the negated operand appears on the LHS of the
6189 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6190 to force the negated operand to the RHS of the PLUS_EXPR. */
6191 if (gimple_assign_rhs1 (user) == negate)
6193 swap_ssa_operands (user,
6194 gimple_assign_rhs1_ptr (user),
6195 gimple_assign_rhs2_ptr (user));
6198 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6199 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6200 if (gimple_assign_rhs2 (user) == negate)
6202 tree rhs1 = gimple_assign_rhs1 (user);
6203 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6204 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6205 negateop);
6206 update_stmt (user);
6209 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6211 if (gimple_assign_rhs1 (user) == negate)
6213 /* We have
6214 x = -negateop
6215 y = x - b
6216 which we transform into
6217 x = negateop + b
6218 y = -x .
6219 This pushes down the negate which we possibly can merge
6220 into some other operation, hence insert it into the
6221 plus_negates vector. */
6222 gimple *feed = SSA_NAME_DEF_STMT (negate);
6223 tree b = gimple_assign_rhs2 (user);
6224 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6225 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6226 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6227 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6228 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6229 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6230 user = gsi_stmt (gsi2);
6231 update_stmt (user);
6232 reassoc_remove_stmt (&gsi);
6233 release_defs (feed);
6234 plus_negates.safe_push (gimple_assign_lhs (user));
6236 else
6238 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6239 getting rid of one operation. */
6240 tree rhs1 = gimple_assign_rhs1 (user);
6241 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6242 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6243 update_stmt (gsi_stmt (gsi));
6249 /* Break up subtract operations in block BB.
6251 We do this top down because we don't know whether the subtract is
6252 part of a possible chain of reassociation except at the top.
6254 IE given
6255 d = f + g
6256 c = a + e
6257 b = c - d
6258 q = b - r
6259 k = t - q
6261 we want to break up k = t - q, but we won't until we've transformed q
6262 = b - r, which won't be broken up until we transform b = c - d.
6264 En passant, clear the GIMPLE visited flag on every statement
6265 and set UIDs within each basic block. */
6267 static void
6268 break_up_subtract_bb (basic_block bb)
6270 gimple_stmt_iterator gsi;
6271 basic_block son;
6272 unsigned int uid = 1;
6274 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6276 gimple *stmt = gsi_stmt (gsi);
6277 gimple_set_visited (stmt, false);
6278 gimple_set_uid (stmt, uid++);
6280 if (!is_gimple_assign (stmt)
6281 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6282 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6283 continue;
6285 /* Look for simple gimple subtract operations. */
6286 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6288 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6289 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6290 continue;
6292 /* Check for a subtract used only in an addition. If this
6293 is the case, transform it into add of a negate for better
6294 reassociation. IE transform C = A-B into C = A + -B if C
6295 is only used in an addition. */
6296 if (should_break_up_subtract (stmt))
6297 break_up_subtract (stmt, &gsi);
6299 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6300 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6301 plus_negates.safe_push (gimple_assign_lhs (stmt));
6303 for (son = first_dom_son (CDI_DOMINATORS, bb);
6304 son;
6305 son = next_dom_son (CDI_DOMINATORS, son))
6306 break_up_subtract_bb (son);
6309 /* Used for repeated factor analysis. */
6310 struct repeat_factor
6312 /* An SSA name that occurs in a multiply chain. */
6313 tree factor;
6315 /* Cached rank of the factor. */
6316 unsigned rank;
6318 /* Number of occurrences of the factor in the chain. */
6319 HOST_WIDE_INT count;
6321 /* An SSA name representing the product of this factor and
6322 all factors appearing later in the repeated factor vector. */
6323 tree repr;
6327 static vec<repeat_factor> repeat_factor_vec;
6329 /* Used for sorting the repeat factor vector. Sort primarily by
6330 ascending occurrence count, secondarily by descending rank. */
6332 static int
6333 compare_repeat_factors (const void *x1, const void *x2)
6335 const repeat_factor *rf1 = (const repeat_factor *) x1;
6336 const repeat_factor *rf2 = (const repeat_factor *) x2;
6338 if (rf1->count != rf2->count)
6339 return rf1->count - rf2->count;
6341 return rf2->rank - rf1->rank;
6344 /* Look for repeated operands in OPS in the multiply tree rooted at
6345 STMT. Replace them with an optimal sequence of multiplies and powi
6346 builtin calls, and remove the used operands from OPS. Return an
6347 SSA name representing the value of the replacement sequence. */
6349 static tree
6350 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6352 unsigned i, j, vec_len;
6353 int ii;
6354 operand_entry *oe;
6355 repeat_factor *rf1, *rf2;
6356 repeat_factor rfnew;
6357 tree result = NULL_TREE;
6358 tree target_ssa, iter_result;
6359 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6360 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6361 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6362 gimple *mul_stmt, *pow_stmt;
6364 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6365 target, unless type is integral. */
6366 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6367 return NULL_TREE;
6369 /* Allocate the repeated factor vector. */
6370 repeat_factor_vec.create (10);
6372 /* Scan the OPS vector for all SSA names in the product and build
6373 up a vector of occurrence counts for each factor. */
6374 FOR_EACH_VEC_ELT (*ops, i, oe)
6376 if (TREE_CODE (oe->op) == SSA_NAME)
6378 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6380 if (rf1->factor == oe->op)
6382 rf1->count += oe->count;
6383 break;
6387 if (j >= repeat_factor_vec.length ())
6389 rfnew.factor = oe->op;
6390 rfnew.rank = oe->rank;
6391 rfnew.count = oe->count;
6392 rfnew.repr = NULL_TREE;
6393 repeat_factor_vec.safe_push (rfnew);
6398 /* Sort the repeated factor vector by (a) increasing occurrence count,
6399 and (b) decreasing rank. */
6400 repeat_factor_vec.qsort (compare_repeat_factors);
6402 /* It is generally best to combine as many base factors as possible
6403 into a product before applying __builtin_powi to the result.
6404 However, the sort order chosen for the repeated factor vector
6405 allows us to cache partial results for the product of the base
6406 factors for subsequent use. When we already have a cached partial
6407 result from a previous iteration, it is best to make use of it
6408 before looking for another __builtin_pow opportunity.
6410 As an example, consider x * x * y * y * y * z * z * z * z.
6411 We want to first compose the product x * y * z, raise it to the
6412 second power, then multiply this by y * z, and finally multiply
6413 by z. This can be done in 5 multiplies provided we cache y * z
6414 for use in both expressions:
6416 t1 = y * z
6417 t2 = t1 * x
6418 t3 = t2 * t2
6419 t4 = t1 * t3
6420 result = t4 * z
6422 If we instead ignored the cached y * z and first multiplied by
6423 the __builtin_pow opportunity z * z, we would get the inferior:
6425 t1 = y * z
6426 t2 = t1 * x
6427 t3 = t2 * t2
6428 t4 = z * z
6429 t5 = t3 * t4
6430 result = t5 * y */
6432 vec_len = repeat_factor_vec.length ();
6434 /* Repeatedly look for opportunities to create a builtin_powi call. */
6435 while (true)
6437 HOST_WIDE_INT power;
6439 /* First look for the largest cached product of factors from
6440 preceding iterations. If found, create a builtin_powi for
6441 it if the minimum occurrence count for its factors is at
6442 least 2, or just use this cached product as our next
6443 multiplicand if the minimum occurrence count is 1. */
6444 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6446 if (rf1->repr && rf1->count > 0)
6447 break;
6450 if (j < vec_len)
6452 power = rf1->count;
6454 if (power == 1)
6456 iter_result = rf1->repr;
6458 if (dump_file && (dump_flags & TDF_DETAILS))
6460 unsigned elt;
6461 repeat_factor *rf;
6462 fputs ("Multiplying by cached product ", dump_file);
6463 for (elt = j; elt < vec_len; elt++)
6465 rf = &repeat_factor_vec[elt];
6466 print_generic_expr (dump_file, rf->factor);
6467 if (elt < vec_len - 1)
6468 fputs (" * ", dump_file);
6470 fputs ("\n", dump_file);
6473 else
6475 if (INTEGRAL_TYPE_P (type))
6477 gcc_assert (power > 1);
6478 gimple_stmt_iterator gsip = gsi;
6479 gsi_prev (&gsip);
6480 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6481 rf1->repr, power);
6482 gimple_stmt_iterator gsic = gsi;
6483 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6485 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6486 gimple_set_visited (gsi_stmt (gsic), true);
6487 gsi_prev (&gsic);
6490 else
6492 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6493 pow_stmt
6494 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6495 build_int_cst (integer_type_node,
6496 power));
6497 gimple_call_set_lhs (pow_stmt, iter_result);
6498 gimple_set_location (pow_stmt, gimple_location (stmt));
6499 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6500 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6503 if (dump_file && (dump_flags & TDF_DETAILS))
6505 unsigned elt;
6506 repeat_factor *rf;
6507 fputs ("Building __builtin_pow call for cached product (",
6508 dump_file);
6509 for (elt = j; elt < vec_len; elt++)
6511 rf = &repeat_factor_vec[elt];
6512 print_generic_expr (dump_file, rf->factor);
6513 if (elt < vec_len - 1)
6514 fputs (" * ", dump_file);
6516 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6517 power);
6521 else
6523 /* Otherwise, find the first factor in the repeated factor
6524 vector whose occurrence count is at least 2. If no such
6525 factor exists, there are no builtin_powi opportunities
6526 remaining. */
6527 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6529 if (rf1->count >= 2)
6530 break;
6533 if (j >= vec_len)
6534 break;
6536 power = rf1->count;
6538 if (dump_file && (dump_flags & TDF_DETAILS))
6540 unsigned elt;
6541 repeat_factor *rf;
6542 fputs ("Building __builtin_pow call for (", dump_file);
6543 for (elt = j; elt < vec_len; elt++)
6545 rf = &repeat_factor_vec[elt];
6546 print_generic_expr (dump_file, rf->factor);
6547 if (elt < vec_len - 1)
6548 fputs (" * ", dump_file);
6550 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6553 reassociate_stats.pows_created++;
6555 /* Visit each element of the vector in reverse order (so that
6556 high-occurrence elements are visited first, and within the
6557 same occurrence count, lower-ranked elements are visited
6558 first). Form a linear product of all elements in this order
6559 whose occurrencce count is at least that of element J.
6560 Record the SSA name representing the product of each element
6561 with all subsequent elements in the vector. */
6562 if (j == vec_len - 1)
6563 rf1->repr = rf1->factor;
6564 else
6566 for (ii = vec_len - 2; ii >= (int)j; ii--)
6568 tree op1, op2;
6570 rf1 = &repeat_factor_vec[ii];
6571 rf2 = &repeat_factor_vec[ii + 1];
6573 /* Init the last factor's representative to be itself. */
6574 if (!rf2->repr)
6575 rf2->repr = rf2->factor;
6577 op1 = rf1->factor;
6578 op2 = rf2->repr;
6580 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6581 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6582 op1, op2);
6583 gimple_set_location (mul_stmt, gimple_location (stmt));
6584 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6585 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6586 rf1->repr = target_ssa;
6588 /* Don't reprocess the multiply we just introduced. */
6589 gimple_set_visited (mul_stmt, true);
6593 /* Form a call to __builtin_powi for the maximum product
6594 just formed, raised to the power obtained earlier. */
6595 rf1 = &repeat_factor_vec[j];
6596 if (INTEGRAL_TYPE_P (type))
6598 gcc_assert (power > 1);
6599 gimple_stmt_iterator gsip = gsi;
6600 gsi_prev (&gsip);
6601 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6602 rf1->repr, power);
6603 gimple_stmt_iterator gsic = gsi;
6604 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6606 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6607 gimple_set_visited (gsi_stmt (gsic), true);
6608 gsi_prev (&gsic);
6611 else
6613 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6614 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6615 build_int_cst (integer_type_node,
6616 power));
6617 gimple_call_set_lhs (pow_stmt, iter_result);
6618 gimple_set_location (pow_stmt, gimple_location (stmt));
6619 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6620 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6624 /* If we previously formed at least one other builtin_powi call,
6625 form the product of this one and those others. */
6626 if (result)
6628 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6629 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6630 result, iter_result);
6631 gimple_set_location (mul_stmt, gimple_location (stmt));
6632 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6633 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6634 gimple_set_visited (mul_stmt, true);
6635 result = new_result;
6637 else
6638 result = iter_result;
6640 /* Decrement the occurrence count of each element in the product
6641 by the count found above, and remove this many copies of each
6642 factor from OPS. */
6643 for (i = j; i < vec_len; i++)
6645 unsigned k = power;
6646 unsigned n;
6648 rf1 = &repeat_factor_vec[i];
6649 rf1->count -= power;
6651 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6653 if (oe->op == rf1->factor)
6655 if (oe->count <= k)
6657 ops->ordered_remove (n);
6658 k -= oe->count;
6660 if (k == 0)
6661 break;
6663 else
6665 oe->count -= k;
6666 break;
6673 /* At this point all elements in the repeated factor vector have a
6674 remaining occurrence count of 0 or 1, and those with a count of 1
6675 don't have cached representatives. Re-sort the ops vector and
6676 clean up. */
6677 ops->qsort (sort_by_operand_rank);
6678 repeat_factor_vec.release ();
6680 /* Return the final product computed herein. Note that there may
6681 still be some elements with single occurrence count left in OPS;
6682 those will be handled by the normal reassociation logic. */
6683 return result;
6686 /* Attempt to optimize
6687 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6688 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6690 static void
6691 attempt_builtin_copysign (vec<operand_entry *> *ops)
6693 operand_entry *oe;
6694 unsigned int i;
6695 unsigned int length = ops->length ();
6696 tree cst = ops->last ()->op;
6698 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6699 return;
6701 FOR_EACH_VEC_ELT (*ops, i, oe)
6703 if (TREE_CODE (oe->op) == SSA_NAME
6704 && has_single_use (oe->op))
6706 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6707 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6709 tree arg0, arg1;
6710 switch (gimple_call_combined_fn (old_call))
6712 CASE_CFN_COPYSIGN:
6713 CASE_CFN_COPYSIGN_FN:
6714 arg0 = gimple_call_arg (old_call, 0);
6715 arg1 = gimple_call_arg (old_call, 1);
6716 /* The first argument of copysign must be a constant,
6717 otherwise there's nothing to do. */
6718 if (TREE_CODE (arg0) == REAL_CST)
6720 tree type = TREE_TYPE (arg0);
6721 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6722 /* If we couldn't fold to a single constant, skip it.
6723 That happens e.g. for inexact multiplication when
6724 -frounding-math. */
6725 if (mul == NULL_TREE)
6726 break;
6727 /* Instead of adjusting OLD_CALL, let's build a new
6728 call to not leak the LHS and prevent keeping bogus
6729 debug statements. DCE will clean up the old call. */
6730 gcall *new_call;
6731 if (gimple_call_internal_p (old_call))
6732 new_call = gimple_build_call_internal
6733 (IFN_COPYSIGN, 2, mul, arg1);
6734 else
6735 new_call = gimple_build_call
6736 (gimple_call_fndecl (old_call), 2, mul, arg1);
6737 tree lhs = make_ssa_name (type);
6738 gimple_call_set_lhs (new_call, lhs);
6739 gimple_set_location (new_call,
6740 gimple_location (old_call));
6741 insert_stmt_after (new_call, old_call);
6742 /* We've used the constant, get rid of it. */
6743 ops->pop ();
6744 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6745 /* Handle the CST1 < 0 case by negating the result. */
6746 if (cst1_neg)
6748 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6749 gimple *negate_stmt
6750 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6751 insert_stmt_after (negate_stmt, new_call);
6752 oe->op = negrhs;
6754 else
6755 oe->op = lhs;
6756 if (dump_file && (dump_flags & TDF_DETAILS))
6758 fprintf (dump_file, "Optimizing copysign: ");
6759 print_generic_expr (dump_file, cst);
6760 fprintf (dump_file, " * COPYSIGN (");
6761 print_generic_expr (dump_file, arg0);
6762 fprintf (dump_file, ", ");
6763 print_generic_expr (dump_file, arg1);
6764 fprintf (dump_file, ") into %sCOPYSIGN (",
6765 cst1_neg ? "-" : "");
6766 print_generic_expr (dump_file, mul);
6767 fprintf (dump_file, ", ");
6768 print_generic_expr (dump_file, arg1);
6769 fprintf (dump_file, "\n");
6771 return;
6773 break;
6774 default:
6775 break;
6782 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6784 static void
6785 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6787 tree rhs1;
6789 if (dump_file && (dump_flags & TDF_DETAILS))
6791 fprintf (dump_file, "Transforming ");
6792 print_gimple_stmt (dump_file, stmt, 0);
6795 rhs1 = gimple_assign_rhs1 (stmt);
6796 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6797 update_stmt (stmt);
6798 remove_visited_stmt_chain (rhs1);
6800 if (dump_file && (dump_flags & TDF_DETAILS))
6802 fprintf (dump_file, " into ");
6803 print_gimple_stmt (dump_file, stmt, 0);
6807 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6809 static void
6810 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6811 tree rhs1, tree rhs2)
6813 if (dump_file && (dump_flags & TDF_DETAILS))
6815 fprintf (dump_file, "Transforming ");
6816 print_gimple_stmt (dump_file, stmt, 0);
6819 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6820 update_stmt (gsi_stmt (*gsi));
6821 remove_visited_stmt_chain (rhs1);
6823 if (dump_file && (dump_flags & TDF_DETAILS))
6825 fprintf (dump_file, " into ");
6826 print_gimple_stmt (dump_file, stmt, 0);
6830 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6831 Put no-mult ops and mult ops alternately at the end of the queue, which is
6832 conducive to generating more FMA and reducing the loss of FMA when breaking
6833 the chain.
6834 E.g.
6835 a * b + c * d + e generates:
6837 _4 = c_9(D) * d_10(D);
6838 _12 = .FMA (a_7(D), b_8(D), _4);
6839 _11 = e_6(D) + _12;
6841 Rearrange ops to -> e + a * b + c * d generates:
6843 _4 = .FMA (c_7(D), d_8(D), _3);
6844 _11 = .FMA (a_5(D), b_6(D), _4); */
6845 static bool
6846 rank_ops_for_fma (vec<operand_entry *> *ops)
6848 operand_entry *oe;
6849 unsigned int i;
6850 unsigned int ops_length = ops->length ();
6851 auto_vec<operand_entry *> ops_mult;
6852 auto_vec<operand_entry *> ops_others;
6854 FOR_EACH_VEC_ELT (*ops, i, oe)
6856 if (TREE_CODE (oe->op) == SSA_NAME)
6858 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6859 if (is_gimple_assign (def_stmt)
6860 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6861 ops_mult.safe_push (oe);
6862 else
6863 ops_others.safe_push (oe);
6865 else
6866 ops_others.safe_push (oe);
6868 /* 1. When ops_mult.length == 2, like the following case,
6870 a * b + c * d + e.
6872 we need to rearrange the ops.
6874 Putting ops that not def from mult in front can generate more FMAs.
6876 2. If all ops are defined with mult, we don't need to rearrange them. */
6877 if (ops_mult.length () >= 2 && ops_mult.length () != ops_length)
6879 /* Put no-mult ops and mult ops alternately at the end of the
6880 queue, which is conducive to generating more FMA and reducing the
6881 loss of FMA when breaking the chain. */
6882 ops->truncate (0);
6883 ops->splice (ops_mult);
6884 int j, opindex = ops->length ();
6885 int others_length = ops_others.length ();
6886 for (j = 0; j < others_length; j++)
6888 oe = ops_others.pop ();
6889 ops->quick_insert (opindex, oe);
6890 if (opindex > 0)
6891 opindex--;
6893 return true;
6895 return false;
6897 /* Reassociate expressions in basic block BB and its post-dominator as
6898 children.
6900 Bubble up return status from maybe_optimize_range_tests. */
6902 static bool
6903 reassociate_bb (basic_block bb)
6905 gimple_stmt_iterator gsi;
6906 basic_block son;
6907 gimple *stmt = last_nondebug_stmt (bb);
6908 bool cfg_cleanup_needed = false;
6910 if (stmt && !gimple_visited_p (stmt))
6911 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6913 bool do_prev = false;
6914 for (gsi = gsi_last_bb (bb);
6915 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6917 do_prev = true;
6918 stmt = gsi_stmt (gsi);
6920 if (is_gimple_assign (stmt)
6921 && !stmt_could_throw_p (cfun, stmt))
6923 tree lhs, rhs1, rhs2;
6924 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6926 /* If this was part of an already processed statement,
6927 we don't need to touch it again. */
6928 if (gimple_visited_p (stmt))
6930 /* This statement might have become dead because of previous
6931 reassociations. */
6932 if (has_zero_uses (gimple_get_lhs (stmt)))
6934 reassoc_remove_stmt (&gsi);
6935 release_defs (stmt);
6936 /* We might end up removing the last stmt above which
6937 places the iterator to the end of the sequence.
6938 Reset it to the last stmt in this case and make sure
6939 we don't do gsi_prev in that case. */
6940 if (gsi_end_p (gsi))
6942 gsi = gsi_last_bb (bb);
6943 do_prev = false;
6946 continue;
6949 /* If this is not a gimple binary expression, there is
6950 nothing for us to do with it. */
6951 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6952 continue;
6954 lhs = gimple_assign_lhs (stmt);
6955 rhs1 = gimple_assign_rhs1 (stmt);
6956 rhs2 = gimple_assign_rhs2 (stmt);
6958 /* For non-bit or min/max operations we can't associate
6959 all types. Verify that here. */
6960 if ((rhs_code != BIT_IOR_EXPR
6961 && rhs_code != BIT_AND_EXPR
6962 && rhs_code != BIT_XOR_EXPR
6963 && rhs_code != MIN_EXPR
6964 && rhs_code != MAX_EXPR
6965 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6966 || !can_reassociate_op_p (rhs1)
6967 || !can_reassociate_op_p (rhs2))
6968 continue;
6970 if (associative_tree_code (rhs_code))
6972 auto_vec<operand_entry *> ops;
6973 tree powi_result = NULL_TREE;
6974 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6976 /* There may be no immediate uses left by the time we
6977 get here because we may have eliminated them all. */
6978 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6979 continue;
6981 gimple_set_visited (stmt, true);
6982 linearize_expr_tree (&ops, stmt, true, true);
6983 ops.qsort (sort_by_operand_rank);
6984 int orig_len = ops.length ();
6985 optimize_ops_list (rhs_code, &ops);
6986 if (undistribute_ops_list (rhs_code, &ops,
6987 loop_containing_stmt (stmt)))
6989 ops.qsort (sort_by_operand_rank);
6990 optimize_ops_list (rhs_code, &ops);
6992 if (undistribute_bitref_for_vector (rhs_code, &ops,
6993 loop_containing_stmt (stmt)))
6995 ops.qsort (sort_by_operand_rank);
6996 optimize_ops_list (rhs_code, &ops);
6998 if (rhs_code == PLUS_EXPR
6999 && transform_add_to_multiply (&ops))
7000 ops.qsort (sort_by_operand_rank);
7002 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7004 if (is_vector)
7005 optimize_vec_cond_expr (rhs_code, &ops);
7006 else
7007 optimize_range_tests (rhs_code, &ops, NULL);
7010 if (rhs_code == MULT_EXPR && !is_vector)
7012 attempt_builtin_copysign (&ops);
7014 if (reassoc_insert_powi_p
7015 && (flag_unsafe_math_optimizations
7016 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7017 powi_result = attempt_builtin_powi (stmt, &ops);
7020 operand_entry *last;
7021 bool negate_result = false;
7022 if (ops.length () > 1
7023 && rhs_code == MULT_EXPR)
7025 last = ops.last ();
7026 if ((integer_minus_onep (last->op)
7027 || real_minus_onep (last->op))
7028 && !HONOR_SNANS (TREE_TYPE (lhs))
7029 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7030 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7032 ops.pop ();
7033 negate_result = true;
7037 tree new_lhs = lhs;
7038 /* If the operand vector is now empty, all operands were
7039 consumed by the __builtin_powi optimization. */
7040 if (ops.length () == 0)
7041 transform_stmt_to_copy (&gsi, stmt, powi_result);
7042 else if (ops.length () == 1)
7044 tree last_op = ops.last ()->op;
7046 /* If the stmt that defines operand has to be inserted, insert it
7047 before the use. */
7048 if (ops.last ()->stmt_to_insert)
7049 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7050 if (powi_result)
7051 transform_stmt_to_multiply (&gsi, stmt, last_op,
7052 powi_result);
7053 else
7054 transform_stmt_to_copy (&gsi, stmt, last_op);
7056 else
7058 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7059 int ops_num = ops.length ();
7060 int width;
7061 bool has_fma = false;
7063 /* For binary bit operations, if there are at least 3
7064 operands and the last operand in OPS is a constant,
7065 move it to the front. This helps ensure that we generate
7066 (X & Y) & C rather than (X & C) & Y. The former will
7067 often match a canonical bit test when we get to RTL. */
7068 if (ops.length () > 2
7069 && (rhs_code == BIT_AND_EXPR
7070 || rhs_code == BIT_IOR_EXPR
7071 || rhs_code == BIT_XOR_EXPR)
7072 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7073 std::swap (*ops[0], *ops[ops_num - 1]);
7075 optimization_type opt_type = bb_optimization_type (bb);
7077 /* If the target support FMA, rank_ops_for_fma will detect if
7078 the chain has fmas and rearrange the ops if so. */
7079 if (direct_internal_fn_supported_p (IFN_FMA,
7080 TREE_TYPE (lhs),
7081 opt_type)
7082 && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7084 has_fma = rank_ops_for_fma (&ops);
7087 /* Only rewrite the expression tree to parallel in the
7088 last reassoc pass to avoid useless work back-and-forth
7089 with initial linearization. */
7090 if (!reassoc_insert_powi_p
7091 && ops.length () > 3
7092 && (width
7093 = get_reassociation_width (&ops, lhs, rhs_code, mode))
7094 > 1)
7096 if (dump_file && (dump_flags & TDF_DETAILS))
7097 fprintf (dump_file,
7098 "Width = %d was chosen for reassociation\n",
7099 width);
7100 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7101 width,
7102 has_fma,
7103 ops);
7105 else
7107 /* When there are three operands left, we want
7108 to make sure the ones that get the double
7109 binary op are chosen wisely. */
7110 int len = ops.length ();
7111 if (len >= 3
7112 && (!has_fma
7113 /* width > 1 means ranking ops results in better
7114 parallelism. */
7115 || get_reassociation_width (&ops, lhs, rhs_code,
7116 mode)
7117 > 1))
7118 swap_ops_for_binary_stmt (ops, len - 3);
7120 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7121 powi_result != NULL
7122 || negate_result,
7123 len != orig_len);
7126 /* If we combined some repeated factors into a
7127 __builtin_powi call, multiply that result by the
7128 reassociated operands. */
7129 if (powi_result)
7131 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7132 tree type = TREE_TYPE (lhs);
7133 tree target_ssa = make_temp_ssa_name (type, NULL,
7134 "reassocpow");
7135 gimple_set_lhs (lhs_stmt, target_ssa);
7136 update_stmt (lhs_stmt);
7137 if (lhs != new_lhs)
7139 target_ssa = new_lhs;
7140 new_lhs = lhs;
7142 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7143 powi_result, target_ssa);
7144 gimple_set_location (mul_stmt, gimple_location (stmt));
7145 gimple_set_uid (mul_stmt, gimple_uid (stmt));
7146 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7150 if (negate_result)
7152 stmt = SSA_NAME_DEF_STMT (lhs);
7153 tree tmp = make_ssa_name (TREE_TYPE (lhs));
7154 gimple_set_lhs (stmt, tmp);
7155 if (lhs != new_lhs)
7156 tmp = new_lhs;
7157 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7158 tmp);
7159 gimple_set_uid (neg_stmt, gimple_uid (stmt));
7160 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7161 update_stmt (stmt);
7166 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
7167 son;
7168 son = next_dom_son (CDI_POST_DOMINATORS, son))
7169 cfg_cleanup_needed |= reassociate_bb (son);
7171 return cfg_cleanup_needed;
7174 /* Add jumps around shifts for range tests turned into bit tests.
7175 For each SSA_NAME VAR we have code like:
7176 VAR = ...; // final stmt of range comparison
7177 // bit test here...;
7178 OTHERVAR = ...; // final stmt of the bit test sequence
7179 RES = VAR | OTHERVAR;
7180 Turn the above into:
7181 VAR = ...;
7182 if (VAR != 0)
7183 goto <l3>;
7184 else
7185 goto <l2>;
7186 <l2>:
7187 // bit test here...;
7188 OTHERVAR = ...;
7189 <l3>:
7190 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7192 static void
7193 branch_fixup (void)
7195 tree var;
7196 unsigned int i;
7198 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7200 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7201 gimple *use_stmt;
7202 use_operand_p use;
7203 bool ok = single_imm_use (var, &use, &use_stmt);
7204 gcc_assert (ok
7205 && is_gimple_assign (use_stmt)
7206 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7207 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7209 basic_block cond_bb = gimple_bb (def_stmt);
7210 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7211 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7213 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7214 gimple *g = gimple_build_cond (NE_EXPR, var,
7215 build_zero_cst (TREE_TYPE (var)),
7216 NULL_TREE, NULL_TREE);
7217 location_t loc = gimple_location (use_stmt);
7218 gimple_set_location (g, loc);
7219 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7221 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7222 etrue->probability = profile_probability::even ();
7223 edge efalse = find_edge (cond_bb, then_bb);
7224 efalse->flags = EDGE_FALSE_VALUE;
7225 efalse->probability -= etrue->probability;
7226 then_bb->count -= etrue->count ();
7228 tree othervar = NULL_TREE;
7229 if (gimple_assign_rhs1 (use_stmt) == var)
7230 othervar = gimple_assign_rhs2 (use_stmt);
7231 else if (gimple_assign_rhs2 (use_stmt) == var)
7232 othervar = gimple_assign_rhs1 (use_stmt);
7233 else
7234 gcc_unreachable ();
7235 tree lhs = gimple_assign_lhs (use_stmt);
7236 gphi *phi = create_phi_node (lhs, merge_bb);
7237 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7238 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7239 gsi = gsi_for_stmt (use_stmt);
7240 gsi_remove (&gsi, true);
7242 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7243 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7245 reassoc_branch_fixups.release ();
7248 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7249 void debug_ops_vector (vec<operand_entry *> ops);
7251 /* Dump the operand entry vector OPS to FILE. */
7253 void
7254 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7256 operand_entry *oe;
7257 unsigned int i;
7259 FOR_EACH_VEC_ELT (ops, i, oe)
7261 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7262 print_generic_expr (file, oe->op);
7263 fprintf (file, "\n");
7267 /* Dump the operand entry vector OPS to STDERR. */
7269 DEBUG_FUNCTION void
7270 debug_ops_vector (vec<operand_entry *> ops)
7272 dump_ops_vector (stderr, ops);
7275 /* Bubble up return status from reassociate_bb. */
7277 static bool
7278 do_reassoc (void)
7280 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7281 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7284 /* Initialize the reassociation pass. */
7286 static void
7287 init_reassoc (void)
7289 int i;
7290 int64_t rank = 2;
7291 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7293 /* Find the loops, so that we can prevent moving calculations in
7294 them. */
7295 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7297 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7299 next_operand_entry_id = 0;
7301 /* Reverse RPO (Reverse Post Order) will give us something where
7302 deeper loops come later. */
7303 pre_and_rev_post_order_compute (NULL, bbs, false);
7304 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7305 operand_rank = new hash_map<tree, int64_t>;
7307 /* Give each default definition a distinct rank. This includes
7308 parameters and the static chain. Walk backwards over all
7309 SSA names so that we get proper rank ordering according
7310 to tree_swap_operands_p. */
7311 for (i = num_ssa_names - 1; i > 0; --i)
7313 tree name = ssa_name (i);
7314 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7315 insert_operand_rank (name, ++rank);
7318 /* Set up rank for each BB */
7319 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7320 bb_rank[bbs[i]] = ++rank << 16;
7322 free (bbs);
7323 calculate_dominance_info (CDI_POST_DOMINATORS);
7324 plus_negates = vNULL;
7327 /* Cleanup after the reassociation pass, and print stats if
7328 requested. */
7330 static void
7331 fini_reassoc (void)
7333 statistics_counter_event (cfun, "Linearized",
7334 reassociate_stats.linearized);
7335 statistics_counter_event (cfun, "Constants eliminated",
7336 reassociate_stats.constants_eliminated);
7337 statistics_counter_event (cfun, "Ops eliminated",
7338 reassociate_stats.ops_eliminated);
7339 statistics_counter_event (cfun, "Statements rewritten",
7340 reassociate_stats.rewritten);
7341 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7342 reassociate_stats.pows_encountered);
7343 statistics_counter_event (cfun, "Built-in powi calls created",
7344 reassociate_stats.pows_created);
7346 delete operand_rank;
7347 bitmap_clear (biased_names);
7348 operand_entry_pool.release ();
7349 free (bb_rank);
7350 plus_negates.release ();
7351 free_dominance_info (CDI_POST_DOMINATORS);
7352 loop_optimizer_finalize ();
7355 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7356 insertion of __builtin_powi calls.
7358 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7359 optimization of a gimple conditional. Otherwise returns zero. */
7361 static unsigned int
7362 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7364 reassoc_insert_powi_p = insert_powi_p;
7365 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7367 init_reassoc ();
7369 bool cfg_cleanup_needed;
7370 cfg_cleanup_needed = do_reassoc ();
7371 repropagate_negates ();
7372 branch_fixup ();
7374 fini_reassoc ();
7375 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7378 namespace {
7380 const pass_data pass_data_reassoc =
7382 GIMPLE_PASS, /* type */
7383 "reassoc", /* name */
7384 OPTGROUP_NONE, /* optinfo_flags */
7385 TV_TREE_REASSOC, /* tv_id */
7386 ( PROP_cfg | PROP_ssa ), /* properties_required */
7387 0, /* properties_provided */
7388 0, /* properties_destroyed */
7389 0, /* todo_flags_start */
7390 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7393 class pass_reassoc : public gimple_opt_pass
7395 public:
7396 pass_reassoc (gcc::context *ctxt)
7397 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7400 /* opt_pass methods: */
7401 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7402 void set_pass_param (unsigned int n, bool param) final override
7404 gcc_assert (n == 0);
7405 insert_powi_p = param;
7406 bias_loop_carried_phi_ranks_p = !param;
7408 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7409 unsigned int execute (function *) final override
7411 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7414 private:
7415 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7416 point 3a in the pass header comment. */
7417 bool insert_powi_p;
7418 bool bias_loop_carried_phi_ranks_p;
7419 }; // class pass_reassoc
7421 } // anon namespace
7423 gimple_opt_pass *
7424 make_pass_reassoc (gcc::context *ctxt)
7426 return new pass_reassoc (ctxt);