Fix gnat.dg/opt39.adb on hppa.
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob8fd22591f557b18c71aa214efb65382bc4a1a10a
1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 static bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
278 propagate_bias_p (gimple *stmt)
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 gimple *current_use_stmt = USE_STMT (use);
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
300 if (single_use_stmt == NULL)
301 return false;
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
307 return true;
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
318 phi_rank (gimple *stmt)
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 int64_t op_rank;
376 op_rank = get_rank (op);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
388 return MAX (rank, op_rank);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e)
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
402 static inline void
403 insert_operand_rank (tree e, int64_t rank)
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
409 /* Given an expression E, return the rank of the expression. */
411 static int64_t
412 get_rank (tree e)
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e) == SSA_NAME)
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
474 else
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
521 constant_type (tree t)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
534 else
535 return OTHER_CONST_TYPE;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
541 sort_by_operand_rank (const void *pa, const void *pb)
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
562 if (TREE_CODE (oea->op) != SSA_NAME)
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 return oeb->id > oea->id ? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
610 static void
611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
613 operand_entry *oe = operand_entry_pool.allocate ();
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
626 static void
627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
630 operand_entry *oe = operand_entry_pool.allocate ();
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
639 reassociate_stats.pows_encountered++;
642 /* Returns true if we can associate the SSA def OP. */
644 static bool
645 can_reassociate_op_p (tree op)
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
662 static bool
663 can_reassociate_type_p (tree type)
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
675 static bool
676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
678 basic_block bb = gimple_bb (stmt);
680 if (gimple_bb (stmt) == NULL)
681 return false;
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
698 return false;
702 /* Return true if STMT is a nop-conversion. */
704 static bool
705 gimple_nop_conversion_p (gimple *stmt)
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
714 return false;
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
720 static tree
721 get_unary_op (tree name, enum tree_code opcode)
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
740 static bool
741 ops_equal_values_p (tree op1, tree op2)
743 if (op1 == op2)
744 return true;
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
758 if (TREE_CODE (op2) == SSA_NAME)
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
770 return false;
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
778 static bool
779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last && last->op == curr->op)
794 switch (opcode)
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
825 reassociate_stats.ops_eliminated += 2;
827 if (ops->length () == 2)
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
833 else
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
839 return true;
841 default:
842 break;
845 return false;
848 static vec<tree> plus_negates;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
856 static bool
857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
887 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
901 return true;
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
906 tree op_type = TREE_TYPE (oe->op);
908 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
922 return true;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
932 return false;
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
941 static bool
942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
968 if (oe->op == notop)
970 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
997 return false;
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1017 switch (opcode)
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1022 if (ops->length () != 1)
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1034 else if (integer_all_onesp (oelast->op))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1060 else if (integer_zerop (oelast->op))
1062 if (ops->length () != 1)
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1078 if (ops->length () != 1)
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1094 if (ops->length () != 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1113 if (ops->length () != 1)
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1122 break;
1123 default:
1124 break;
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher : int_hash <int, 0, 1>
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1163 /* Comparison function for oecount. */
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1193 if (!is_gimple_call (stmt))
1194 return false;
1196 switch (gimple_call_combined_fn (stmt))
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1202 default:
1203 return false;
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1218 switch (gimple_call_combined_fn (stmt))
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1234 default:
1235 gcc_unreachable ();
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1260 if (new_debug_lhs == NULL_TREE)
1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 gimple_set_uid (def_temp, gimple_uid (stmt));
1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1272 repl = new_debug_lhs;
1274 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 SET_USE (use, repl);
1276 update_stmt (use_stmt);
1278 return new_lhs;
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1285 static void
1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 vec<gimple *> &stmts_to_fix)
1289 unsigned i;
1290 gimple *stmt;
1292 if (*def != op
1293 && TREE_CODE (*def) == SSA_NAME
1294 && (stmt = SSA_NAME_DEF_STMT (*def))
1295 && gimple_code (stmt) != GIMPLE_NOP)
1296 *def = make_new_ssa_for_def (stmt, opcode, op);
1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299 make_new_ssa_for_def (stmt, opcode, op);
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1306 static void
1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1309 tree lhs;
1310 gimple *use_stmt;
1311 use_operand_p use;
1312 gimple_stmt_iterator gsi;
1314 if (is_gimple_call (stmt))
1315 lhs = gimple_call_lhs (stmt);
1316 else
1317 lhs = gimple_assign_lhs (stmt);
1319 gcc_assert (has_single_use (lhs));
1320 single_imm_use (lhs, &use, &use_stmt);
1321 if (lhs == *def)
1322 *def = op;
1323 SET_USE (use, op);
1324 if (TREE_CODE (op) != SSA_NAME)
1325 update_stmt (use_stmt);
1326 gsi = gsi_for_stmt (stmt);
1327 unlink_stmt_vdef (stmt);
1328 reassoc_remove_stmt (&gsi);
1329 release_defs (stmt);
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1336 static void
1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1339 tree orig_def = *def;
1340 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec<gimple *, 64> stmts_to_fix;
1347 tree name;
1349 if (opcode == MULT_EXPR)
1351 if (stmt_is_power_of_op (stmt, op))
1353 if (decrement_power (stmt) == 1)
1355 if (stmts_to_fix.length () > 0)
1356 stmts_to_fix.pop ();
1357 propagate_op_to_single_use (op, stmt, def);
1359 break;
1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1363 if (gimple_assign_rhs1 (stmt) == op)
1365 tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 if (stmts_to_fix.length () > 0)
1367 stmts_to_fix.pop ();
1368 propagate_op_to_single_use (cst, stmt, def);
1369 break;
1371 else if (integer_minus_onep (op)
1372 || real_minus_onep (op))
1374 gimple_assign_set_rhs_code
1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 break;
1381 name = gimple_assign_rhs1 (stmt);
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1385 single use. */
1386 if (gimple_assign_rhs_code (stmt) == opcode
1387 && (name == op
1388 || gimple_assign_rhs2 (stmt) == op))
1390 if (name == op)
1391 name = gimple_assign_rhs2 (stmt);
1392 if (stmts_to_fix.length () > 0)
1393 stmts_to_fix.pop ();
1394 propagate_op_to_single_use (name, stmt, def);
1395 break;
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode == MULT_EXPR
1402 && gimple_assign_rhs_code (stmt) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt)))
1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 if (stmt_is_power_of_op (stmt2, op))
1409 if (decrement_power (stmt2) == 1)
1410 propagate_op_to_single_use (op, stmt2, def);
1411 else
1412 stmts_to_fix.safe_push (stmt2);
1413 break;
1415 else if (is_gimple_assign (stmt2)
1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1418 if (gimple_assign_rhs1 (stmt2) == op)
1420 tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 propagate_op_to_single_use (cst, stmt2, def);
1422 break;
1424 else if (integer_minus_onep (op)
1425 || real_minus_onep (op))
1427 stmts_to_fix.safe_push (stmt2);
1428 gimple_assign_set_rhs_code
1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 break;
1435 /* Continue walking the chain. */
1436 gcc_assert (name != op
1437 && TREE_CODE (name) == SSA_NAME);
1438 stmt = SSA_NAME_DEF_STMT (name);
1439 stmts_to_fix.safe_push (stmt);
1441 while (1);
1443 if (stmts_to_fix.length () > 0 || *def == orig_def)
1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1450 static bool
1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1 || s1 == s2)
1459 return true;
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1462 if (!bb2)
1463 return false;
1465 if (bb1 == bb2)
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1) == GIMPLE_PHI)
1471 return true;
1473 if (gimple_code (s2) == GIMPLE_PHI)
1474 return false;
1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1478 if (gimple_uid (s1) < gimple_uid (s2))
1479 return true;
1481 if (gimple_uid (s1) > gimple_uid (s2))
1482 return false;
1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485 unsigned int uid = gimple_uid (s1);
1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1488 gimple *s = gsi_stmt (gsi);
1489 if (gimple_uid (s) != uid)
1490 break;
1491 if (s == s2)
1492 return true;
1495 return false;
1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1501 /* Insert STMT after INSERT_POINT. */
1503 static void
1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1506 gimple_stmt_iterator gsi;
1507 basic_block bb;
1509 if (gimple_code (insert_point) == GIMPLE_PHI)
1510 bb = gimple_bb (insert_point);
1511 else if (!stmt_ends_bb_p (insert_point))
1513 gsi = gsi_for_stmt (insert_point);
1514 gimple_set_uid (stmt, gimple_uid (insert_point));
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 return;
1518 else if (gimple_code (insert_point) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1522 gcc_unreachable ();
1523 else
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530 gsi = gsi_after_labels (bb);
1531 if (gsi_end_p (gsi))
1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534 gimple_set_uid (stmt,
1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1537 else
1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1546 static gimple *
1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1549 gimple *op1def = NULL, *op2def = NULL;
1550 gimple_stmt_iterator gsi;
1551 tree op;
1552 gassign *sum;
1554 /* Create the addition statement. */
1555 op = make_ssa_name (type);
1556 sum = gimple_build_assign (op, opcode, op1, op2);
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1) == SSA_NAME)
1560 op1def = SSA_NAME_DEF_STMT (op1);
1561 if (TREE_CODE (op2) == SSA_NAME)
1562 op2def = SSA_NAME_DEF_STMT (op2);
1563 if ((!op1def || gimple_nop_p (op1def))
1564 && (!op2def || gimple_nop_p (op2def)))
1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567 if (gsi_end_p (gsi))
1569 gimple_stmt_iterator gsi2
1570 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1571 gimple_set_uid (sum,
1572 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1574 else
1575 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1576 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1578 else
1580 gimple *insert_point;
1581 if ((!op1def || gimple_nop_p (op1def))
1582 || (op2def && !gimple_nop_p (op2def)
1583 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1584 insert_point = op2def;
1585 else
1586 insert_point = op1def;
1587 insert_stmt_after (sum, insert_point);
1589 update_stmt (sum);
1591 return sum;
1594 /* Perform un-distribution of divisions and multiplications.
1595 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1596 to (A + B) / X for real X.
1598 The algorithm is organized as follows.
1600 - First we walk the addition chain *OPS looking for summands that
1601 are defined by a multiplication or a real division. This results
1602 in the candidates bitmap with relevant indices into *OPS.
1604 - Second we build the chains of multiplications or divisions for
1605 these candidates, counting the number of occurrences of (operand, code)
1606 pairs in all of the candidates chains.
1608 - Third we sort the (operand, code) pairs by number of occurrence and
1609 process them starting with the pair with the most uses.
1611 * For each such pair we walk the candidates again to build a
1612 second candidate bitmap noting all multiplication/division chains
1613 that have at least one occurrence of (operand, code).
1615 * We build an alternate addition chain only covering these
1616 candidates with one (operand, code) operation removed from their
1617 multiplication/division chain.
1619 * The first candidate gets replaced by the alternate addition chain
1620 multiplied/divided by the operand.
1622 * All candidate chains get disabled for further processing and
1623 processing of (operand, code) pairs continues.
1625 The alternate addition chains built are re-processed by the main
1626 reassociation algorithm which allows optimizing a * x * y + b * y * x
1627 to (a + b ) * x * y in one invocation of the reassociation pass. */
1629 static bool
1630 undistribute_ops_list (enum tree_code opcode,
1631 vec<operand_entry *> *ops, class loop *loop)
1633 unsigned int length = ops->length ();
1634 operand_entry *oe1;
1635 unsigned i, j;
1636 unsigned nr_candidates, nr_candidates2;
1637 sbitmap_iterator sbi0;
1638 vec<operand_entry *> *subops;
1639 bool changed = false;
1640 unsigned int next_oecount_id = 0;
1642 if (length <= 1
1643 || opcode != PLUS_EXPR)
1644 return false;
1646 /* Build a list of candidates to process. */
1647 auto_sbitmap candidates (length);
1648 bitmap_clear (candidates);
1649 nr_candidates = 0;
1650 FOR_EACH_VEC_ELT (*ops, i, oe1)
1652 enum tree_code dcode;
1653 gimple *oe1def;
1655 if (TREE_CODE (oe1->op) != SSA_NAME)
1656 continue;
1657 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1658 if (!is_gimple_assign (oe1def))
1659 continue;
1660 dcode = gimple_assign_rhs_code (oe1def);
1661 if ((dcode != MULT_EXPR
1662 && dcode != RDIV_EXPR)
1663 || !is_reassociable_op (oe1def, dcode, loop))
1664 continue;
1666 bitmap_set_bit (candidates, i);
1667 nr_candidates++;
1670 if (nr_candidates < 2)
1671 return false;
1673 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "searching for un-distribute opportunities ");
1676 print_generic_expr (dump_file,
1677 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1678 fprintf (dump_file, " %d\n", nr_candidates);
1681 /* Build linearized sub-operand lists and the counting table. */
1682 cvec.create (0);
1684 hash_table<oecount_hasher> ctable (15);
1686 /* ??? Macro arguments cannot have multi-argument template types in
1687 them. This typedef is needed to workaround that limitation. */
1688 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1689 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1690 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1692 gimple *oedef;
1693 enum tree_code oecode;
1694 unsigned j;
1696 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1697 oecode = gimple_assign_rhs_code (oedef);
1698 linearize_expr_tree (&subops[i], oedef,
1699 associative_tree_code (oecode), false);
1701 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1703 oecount c;
1704 int *slot;
1705 int idx;
1706 c.oecode = oecode;
1707 c.cnt = 1;
1708 c.id = next_oecount_id++;
1709 c.op = oe1->op;
1710 cvec.safe_push (c);
1711 idx = cvec.length () + 41;
1712 slot = ctable.find_slot (idx, INSERT);
1713 if (!*slot)
1715 *slot = idx;
1717 else
1719 cvec.pop ();
1720 cvec[*slot - 42].cnt++;
1725 /* Sort the counting table. */
1726 cvec.qsort (oecount_cmp);
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1730 oecount *c;
1731 fprintf (dump_file, "Candidates:\n");
1732 FOR_EACH_VEC_ELT (cvec, j, c)
1734 fprintf (dump_file, " %u %s: ", c->cnt,
1735 c->oecode == MULT_EXPR
1736 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1737 print_generic_expr (dump_file, c->op);
1738 fprintf (dump_file, "\n");
1742 /* Process the (operand, code) pairs in order of most occurrence. */
1743 auto_sbitmap candidates2 (length);
1744 while (!cvec.is_empty ())
1746 oecount *c = &cvec.last ();
1747 if (c->cnt < 2)
1748 break;
1750 /* Now collect the operands in the outer chain that contain
1751 the common operand in their inner chain. */
1752 bitmap_clear (candidates2);
1753 nr_candidates2 = 0;
1754 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1756 gimple *oedef;
1757 enum tree_code oecode;
1758 unsigned j;
1759 tree op = (*ops)[i]->op;
1761 /* If we undistributed in this chain already this may be
1762 a constant. */
1763 if (TREE_CODE (op) != SSA_NAME)
1764 continue;
1766 oedef = SSA_NAME_DEF_STMT (op);
1767 oecode = gimple_assign_rhs_code (oedef);
1768 if (oecode != c->oecode)
1769 continue;
1771 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1773 if (oe1->op == c->op)
1775 bitmap_set_bit (candidates2, i);
1776 ++nr_candidates2;
1777 break;
1782 if (nr_candidates2 >= 2)
1784 operand_entry *oe1, *oe2;
1785 gimple *prod;
1786 int first = bitmap_first_set_bit (candidates2);
1788 /* Build the new addition chain. */
1789 oe1 = (*ops)[first];
1790 if (dump_file && (dump_flags & TDF_DETAILS))
1792 fprintf (dump_file, "Building (");
1793 print_generic_expr (dump_file, oe1->op);
1795 zero_one_operation (&oe1->op, c->oecode, c->op);
1796 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1798 gimple *sum;
1799 oe2 = (*ops)[i];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, " + ");
1803 print_generic_expr (dump_file, oe2->op);
1805 zero_one_operation (&oe2->op, c->oecode, c->op);
1806 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1807 oe1->op, oe2->op, opcode);
1808 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1809 oe2->rank = 0;
1810 oe1->op = gimple_get_lhs (sum);
1813 /* Apply the multiplication/division. */
1814 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1815 oe1->op, c->op, c->oecode);
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1819 print_generic_expr (dump_file, c->op);
1820 fprintf (dump_file, "\n");
1823 /* Record it in the addition chain and disable further
1824 undistribution with this op. */
1825 oe1->op = gimple_assign_lhs (prod);
1826 oe1->rank = get_rank (oe1->op);
1827 subops[first].release ();
1829 changed = true;
1832 cvec.pop ();
1835 for (i = 0; i < ops->length (); ++i)
1836 subops[i].release ();
1837 free (subops);
1838 cvec.release ();
1840 return changed;
1843 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1844 first: element index for each relevant BIT_FIELD_REF.
1845 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1846 typedef std::pair<unsigned, unsigned> v_info_elem;
1847 struct v_info {
1848 tree vec_type;
1849 auto_vec<v_info_elem, 32> vec;
1851 typedef v_info *v_info_ptr;
1853 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1854 static int
1855 sort_by_mach_mode (const void *p_i, const void *p_j)
1857 const tree tr1 = *((const tree *) p_i);
1858 const tree tr2 = *((const tree *) p_j);
1859 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1860 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1861 if (mode1 > mode2)
1862 return 1;
1863 else if (mode1 < mode2)
1864 return -1;
1865 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1866 return -1;
1867 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1868 return 1;
1869 return 0;
1872 /* Cleanup hash map for VECTOR information. */
1873 static void
1874 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1876 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1877 it != info_map.end (); ++it)
1879 v_info_ptr info = (*it).second;
1880 delete info;
1881 (*it).second = NULL;
1885 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1886 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1887 is transformed to
1888 Vs = (V1 + V2 + ... + Vn)
1889 Vs[0] + Vs[1] + ... + Vs[k]
1891 The basic steps are listed below:
1893 1) Check the addition chain *OPS by looking those summands coming from
1894 VECTOR bit_field_ref on VECTOR type. Put the information into
1895 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1897 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1898 continuous, they can cover the whole VECTOR perfectly without any holes.
1899 Obtain one VECTOR list which contain candidates to be transformed.
1901 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1902 candidates with same mode, build the addition statements for them and
1903 generate BIT_FIELD_REFs accordingly.
1905 TODO:
1906 The current implementation requires the whole VECTORs should be fully
1907 covered, but it can be extended to support partial, checking adjacent
1908 but not fill the whole, it may need some cost model to define the
1909 boundary to do or not.
1911 static bool
1912 undistribute_bitref_for_vector (enum tree_code opcode,
1913 vec<operand_entry *> *ops, struct loop *loop)
1915 if (ops->length () <= 1)
1916 return false;
1918 if (opcode != PLUS_EXPR
1919 && opcode != MULT_EXPR
1920 && opcode != BIT_XOR_EXPR
1921 && opcode != BIT_IOR_EXPR
1922 && opcode != BIT_AND_EXPR)
1923 return false;
1925 hash_map<tree, v_info_ptr> v_info_map;
1926 operand_entry *oe1;
1927 unsigned i;
1929 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1930 information into map. */
1931 FOR_EACH_VEC_ELT (*ops, i, oe1)
1933 enum tree_code dcode;
1934 gimple *oe1def;
1936 if (TREE_CODE (oe1->op) != SSA_NAME)
1937 continue;
1938 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1939 if (!is_gimple_assign (oe1def))
1940 continue;
1941 dcode = gimple_assign_rhs_code (oe1def);
1942 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1943 continue;
1945 tree rhs = gimple_assign_rhs1 (oe1def);
1946 tree vec = TREE_OPERAND (rhs, 0);
1947 tree vec_type = TREE_TYPE (vec);
1949 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1950 continue;
1952 /* Ignore it if target machine can't support this VECTOR type. */
1953 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1954 continue;
1956 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1957 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1958 continue;
1960 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1961 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1962 continue;
1964 /* The type of BIT_FIELD_REF might not be equal to the element type of
1965 the vector. We want to use a vector type with element type the
1966 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1967 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1969 machine_mode simd_mode;
1970 unsigned HOST_WIDE_INT size, nunits;
1971 unsigned HOST_WIDE_INT elem_size
1972 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1973 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1974 continue;
1975 if (size <= elem_size || (size % elem_size) != 0)
1976 continue;
1977 nunits = size / elem_size;
1978 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1979 nunits).exists (&simd_mode))
1980 continue;
1981 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1983 /* Ignore it if target machine can't support this VECTOR type. */
1984 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1985 continue;
1987 /* Check const vector type, constrain BIT_FIELD_REF offset and
1988 size. */
1989 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1990 continue;
1992 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1993 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1994 continue;
1997 tree elem_type = TREE_TYPE (vec_type);
1998 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1999 if (maybe_ne (bit_field_size (rhs), elem_size))
2000 continue;
2002 unsigned idx;
2003 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2004 continue;
2006 /* Ignore it if target machine can't support this type of VECTOR
2007 operation. */
2008 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2009 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2010 continue;
2012 bool existed;
2013 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2014 if (!existed)
2016 info = new v_info;
2017 info->vec_type = vec_type;
2019 else if (!types_compatible_p (vec_type, info->vec_type))
2020 continue;
2021 info->vec.safe_push (std::make_pair (idx, i));
2024 /* At least two VECTOR to combine. */
2025 if (v_info_map.elements () <= 1)
2027 cleanup_vinfo_map (v_info_map);
2028 return false;
2031 /* Verify all VECTOR candidates by checking two conditions:
2032 1) sorted offsets are adjacent, no holes.
2033 2) can fill the whole VECTOR perfectly.
2034 And add the valid candidates to a vector for further handling. */
2035 auto_vec<tree> valid_vecs (v_info_map.elements ());
2036 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2037 it != v_info_map.end (); ++it)
2039 tree cand_vec = (*it).first;
2040 v_info_ptr cand_info = (*it).second;
2041 unsigned int num_elems
2042 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2043 if (cand_info->vec.length () != num_elems)
2044 continue;
2045 sbitmap holes = sbitmap_alloc (num_elems);
2046 bitmap_ones (holes);
2047 bool valid = true;
2048 v_info_elem *curr;
2049 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2051 if (!bitmap_bit_p (holes, curr->first))
2053 valid = false;
2054 break;
2056 else
2057 bitmap_clear_bit (holes, curr->first);
2059 if (valid && bitmap_empty_p (holes))
2060 valid_vecs.quick_push (cand_vec);
2061 sbitmap_free (holes);
2064 /* At least two VECTOR to combine. */
2065 if (valid_vecs.length () <= 1)
2067 cleanup_vinfo_map (v_info_map);
2068 return false;
2071 valid_vecs.qsort (sort_by_mach_mode);
2072 /* Go through all candidates by machine mode order, query the mode_to_total
2073 to get the total number for each mode and skip the single one. */
2074 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2076 tree tvec = valid_vecs[i];
2077 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2079 /* Skip modes with only a single candidate. */
2080 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2081 continue;
2083 unsigned int idx, j;
2084 gimple *sum = NULL;
2085 tree sum_vec = tvec;
2086 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2087 v_info_elem *elem;
2088 tree vec_type = info_ptr->vec_type;
2090 /* Build the sum for all candidates with same mode. */
2093 sum = build_and_add_sum (vec_type, sum_vec,
2094 valid_vecs[i + 1], opcode);
2095 if (!useless_type_conversion_p (vec_type,
2096 TREE_TYPE (valid_vecs[i + 1])))
2098 /* Update the operands only after build_and_add_sum,
2099 so that we don't have to repeat the placement algorithm
2100 of build_and_add_sum. */
2101 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2102 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2103 valid_vecs[i + 1]);
2104 tree lhs = make_ssa_name (vec_type);
2105 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2106 gimple_set_uid (g, gimple_uid (sum));
2107 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2108 gimple_assign_set_rhs2 (sum, lhs);
2109 if (sum_vec == tvec)
2111 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2112 lhs = make_ssa_name (vec_type);
2113 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2114 gimple_set_uid (g, gimple_uid (sum));
2115 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2116 gimple_assign_set_rhs1 (sum, lhs);
2118 update_stmt (sum);
2120 sum_vec = gimple_get_lhs (sum);
2121 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2122 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2123 /* Update those related ops of current candidate VECTOR. */
2124 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2126 idx = elem->second;
2127 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2128 /* Set this then op definition will get DCEd later. */
2129 gimple_set_visited (def, true);
2130 if (opcode == PLUS_EXPR
2131 || opcode == BIT_XOR_EXPR
2132 || opcode == BIT_IOR_EXPR)
2133 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2134 else if (opcode == MULT_EXPR)
2135 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2136 else
2138 gcc_assert (opcode == BIT_AND_EXPR);
2139 (*ops)[idx]->op
2140 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2142 (*ops)[idx]->rank = 0;
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2146 fprintf (dump_file, "Generating addition -> ");
2147 print_gimple_stmt (dump_file, sum, 0);
2149 i++;
2151 while ((i < valid_vecs.length () - 1)
2152 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2154 /* Referring to first valid VECTOR with this mode, generate the
2155 BIT_FIELD_REF statements accordingly. */
2156 info_ptr = *(v_info_map.get (tvec));
2157 gcc_assert (sum);
2158 tree elem_type = TREE_TYPE (vec_type);
2159 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2161 idx = elem->second;
2162 tree dst = make_ssa_name (elem_type);
2163 tree pos = bitsize_int (elem->first
2164 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2165 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2166 TYPE_SIZE (elem_type), pos);
2167 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2168 insert_stmt_after (gs, sum);
2169 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2170 /* Set this then op definition will get DCEd later. */
2171 gimple_set_visited (def, true);
2172 (*ops)[idx]->op = gimple_assign_lhs (gs);
2173 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2174 if (dump_file && (dump_flags & TDF_DETAILS))
2176 fprintf (dump_file, "Generating bit_field_ref -> ");
2177 print_gimple_stmt (dump_file, gs, 0);
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2185 cleanup_vinfo_map (v_info_map);
2187 return true;
2190 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2191 expression, examine the other OPS to see if any of them are comparisons
2192 of the same values, which we may be able to combine or eliminate.
2193 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2195 static bool
2196 eliminate_redundant_comparison (enum tree_code opcode,
2197 vec<operand_entry *> *ops,
2198 unsigned int currindex,
2199 operand_entry *curr)
2201 tree op1, op2;
2202 enum tree_code lcode, rcode;
2203 gimple *def1, *def2;
2204 int i;
2205 operand_entry *oe;
2207 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2208 return false;
2210 /* Check that CURR is a comparison. */
2211 if (TREE_CODE (curr->op) != SSA_NAME)
2212 return false;
2213 def1 = SSA_NAME_DEF_STMT (curr->op);
2214 if (!is_gimple_assign (def1))
2215 return false;
2216 lcode = gimple_assign_rhs_code (def1);
2217 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2218 return false;
2219 op1 = gimple_assign_rhs1 (def1);
2220 op2 = gimple_assign_rhs2 (def1);
2222 /* Now look for a similar comparison in the remaining OPS. */
2223 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2225 tree t;
2227 if (TREE_CODE (oe->op) != SSA_NAME)
2228 continue;
2229 def2 = SSA_NAME_DEF_STMT (oe->op);
2230 if (!is_gimple_assign (def2))
2231 continue;
2232 rcode = gimple_assign_rhs_code (def2);
2233 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2234 continue;
2236 /* If we got here, we have a match. See if we can combine the
2237 two comparisons. */
2238 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2239 if (opcode == BIT_IOR_EXPR)
2240 t = maybe_fold_or_comparisons (type,
2241 lcode, op1, op2,
2242 rcode, gimple_assign_rhs1 (def2),
2243 gimple_assign_rhs2 (def2));
2244 else
2245 t = maybe_fold_and_comparisons (type,
2246 lcode, op1, op2,
2247 rcode, gimple_assign_rhs1 (def2),
2248 gimple_assign_rhs2 (def2));
2249 if (!t)
2250 continue;
2252 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2253 always give us a boolean_type_node value back. If the original
2254 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2255 we need to convert. */
2256 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2258 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2259 continue;
2260 t = fold_convert (TREE_TYPE (curr->op), t);
2263 if (TREE_CODE (t) != INTEGER_CST
2264 && !operand_equal_p (t, curr->op, 0))
2266 enum tree_code subcode;
2267 tree newop1, newop2;
2268 if (!COMPARISON_CLASS_P (t))
2269 continue;
2270 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2271 STRIP_USELESS_TYPE_CONVERSION (newop1);
2272 STRIP_USELESS_TYPE_CONVERSION (newop2);
2273 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2274 continue;
2275 if (lcode == TREE_CODE (t)
2276 && operand_equal_p (op1, newop1, 0)
2277 && operand_equal_p (op2, newop2, 0))
2278 t = curr->op;
2279 else if ((TREE_CODE (newop1) == SSA_NAME
2280 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2281 || (TREE_CODE (newop2) == SSA_NAME
2282 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2283 continue;
2286 if (dump_file && (dump_flags & TDF_DETAILS))
2288 fprintf (dump_file, "Equivalence: ");
2289 print_generic_expr (dump_file, curr->op);
2290 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2291 print_generic_expr (dump_file, oe->op);
2292 fprintf (dump_file, " -> ");
2293 print_generic_expr (dump_file, t);
2294 fprintf (dump_file, "\n");
2297 /* Now we can delete oe, as it has been subsumed by the new combined
2298 expression t. */
2299 ops->ordered_remove (i);
2300 reassociate_stats.ops_eliminated ++;
2302 /* If t is the same as curr->op, we're done. Otherwise we must
2303 replace curr->op with t. Special case is if we got a constant
2304 back, in which case we add it to the end instead of in place of
2305 the current entry. */
2306 if (TREE_CODE (t) == INTEGER_CST)
2308 ops->ordered_remove (currindex);
2309 add_to_ops_vec (ops, t);
2311 else if (!operand_equal_p (t, curr->op, 0))
2313 gimple *sum;
2314 enum tree_code subcode;
2315 tree newop1;
2316 tree newop2;
2317 gcc_assert (COMPARISON_CLASS_P (t));
2318 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2319 STRIP_USELESS_TYPE_CONVERSION (newop1);
2320 STRIP_USELESS_TYPE_CONVERSION (newop2);
2321 gcc_checking_assert (is_gimple_val (newop1)
2322 && is_gimple_val (newop2));
2323 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2324 curr->op = gimple_get_lhs (sum);
2326 return true;
2329 return false;
2333 /* Transform repeated addition of same values into multiply with
2334 constant. */
2335 static bool
2336 transform_add_to_multiply (vec<operand_entry *> *ops)
2338 operand_entry *oe;
2339 tree op = NULL_TREE;
2340 int j;
2341 int i, start = -1, end = 0, count = 0;
2342 auto_vec<std::pair <int, int> > indxs;
2343 bool changed = false;
2345 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2346 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2347 || !flag_unsafe_math_optimizations))
2348 return false;
2350 /* Look for repeated operands. */
2351 FOR_EACH_VEC_ELT (*ops, i, oe)
2353 if (start == -1)
2355 count = 1;
2356 op = oe->op;
2357 start = i;
2359 else if (operand_equal_p (oe->op, op, 0))
2361 count++;
2362 end = i;
2364 else
2366 if (count > 1)
2367 indxs.safe_push (std::make_pair (start, end));
2368 count = 1;
2369 op = oe->op;
2370 start = i;
2374 if (count > 1)
2375 indxs.safe_push (std::make_pair (start, end));
2377 for (j = indxs.length () - 1; j >= 0; --j)
2379 /* Convert repeated operand addition to multiplication. */
2380 start = indxs[j].first;
2381 end = indxs[j].second;
2382 op = (*ops)[start]->op;
2383 count = end - start + 1;
2384 for (i = end; i >= start; --i)
2385 ops->unordered_remove (i);
2386 tree tmp = make_ssa_name (TREE_TYPE (op));
2387 tree cst = build_int_cst (integer_type_node, count);
2388 gassign *mul_stmt
2389 = gimple_build_assign (tmp, MULT_EXPR,
2390 op, fold_convert (TREE_TYPE (op), cst));
2391 gimple_set_visited (mul_stmt, true);
2392 add_to_ops_vec (ops, tmp, mul_stmt);
2393 changed = true;
2396 return changed;
2400 /* Perform various identities and other optimizations on the list of
2401 operand entries, stored in OPS. The tree code for the binary
2402 operation between all the operands is OPCODE. */
2404 static void
2405 optimize_ops_list (enum tree_code opcode,
2406 vec<operand_entry *> *ops)
2408 unsigned int length = ops->length ();
2409 unsigned int i;
2410 operand_entry *oe;
2411 operand_entry *oelast = NULL;
2412 bool iterate = false;
2414 if (length == 1)
2415 return;
2417 oelast = ops->last ();
2419 /* If the last two are constants, pop the constants off, merge them
2420 and try the next two. */
2421 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2423 operand_entry *oelm1 = (*ops)[length - 2];
2425 if (oelm1->rank == 0
2426 && is_gimple_min_invariant (oelm1->op)
2427 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2428 TREE_TYPE (oelast->op)))
2430 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2431 oelm1->op, oelast->op);
2433 if (folded && is_gimple_min_invariant (folded))
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file, "Merging constants\n");
2438 ops->pop ();
2439 ops->pop ();
2441 add_to_ops_vec (ops, folded);
2442 reassociate_stats.constants_eliminated++;
2444 optimize_ops_list (opcode, ops);
2445 return;
2450 eliminate_using_constants (opcode, ops);
2451 oelast = NULL;
2453 for (i = 0; ops->iterate (i, &oe);)
2455 bool done = false;
2457 if (eliminate_not_pairs (opcode, ops, i, oe))
2458 return;
2459 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2460 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2461 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2463 if (done)
2464 return;
2465 iterate = true;
2466 oelast = NULL;
2467 continue;
2469 oelast = oe;
2470 i++;
2473 if (iterate)
2474 optimize_ops_list (opcode, ops);
2477 /* The following functions are subroutines to optimize_range_tests and allow
2478 it to try to change a logical combination of comparisons into a range
2479 test.
2481 For example, both
2482 X == 2 || X == 5 || X == 3 || X == 4
2484 X >= 2 && X <= 5
2485 are converted to
2486 (unsigned) (X - 2) <= 3
2488 For more information see comments above fold_test_range in fold-const.cc,
2489 this implementation is for GIMPLE. */
2493 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2495 void
2496 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2498 if (!skip_exp)
2499 print_generic_expr (file, r->exp);
2500 fprintf (file, " %c[", r->in_p ? '+' : '-');
2501 print_generic_expr (file, r->low);
2502 fputs (", ", file);
2503 print_generic_expr (file, r->high);
2504 fputc (']', file);
2507 /* Dump the range entry R to STDERR. */
2509 DEBUG_FUNCTION void
2510 debug_range_entry (struct range_entry *r)
2512 dump_range_entry (stderr, r, false);
2513 fputc ('\n', stderr);
2516 /* This is similar to make_range in fold-const.cc, but on top of
2517 GIMPLE instead of trees. If EXP is non-NULL, it should be
2518 an SSA_NAME and STMT argument is ignored, otherwise STMT
2519 argument should be a GIMPLE_COND. */
2521 void
2522 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2524 int in_p;
2525 tree low, high;
2526 bool is_bool, strict_overflow_p;
2528 r->exp = NULL_TREE;
2529 r->in_p = false;
2530 r->strict_overflow_p = false;
2531 r->low = NULL_TREE;
2532 r->high = NULL_TREE;
2533 if (exp != NULL_TREE
2534 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2535 return;
2537 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2538 and see if we can refine the range. Some of the cases below may not
2539 happen, but it doesn't seem worth worrying about this. We "continue"
2540 the outer loop when we've changed something; otherwise we "break"
2541 the switch, which will "break" the while. */
2542 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2543 high = low;
2544 in_p = 0;
2545 strict_overflow_p = false;
2546 is_bool = false;
2547 if (exp == NULL_TREE)
2548 is_bool = true;
2549 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2551 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2552 is_bool = true;
2553 else
2554 return;
2556 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2557 is_bool = true;
2559 while (1)
2561 enum tree_code code;
2562 tree arg0, arg1, exp_type;
2563 tree nexp;
2564 location_t loc;
2566 if (exp != NULL_TREE)
2568 if (TREE_CODE (exp) != SSA_NAME
2569 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2570 break;
2572 stmt = SSA_NAME_DEF_STMT (exp);
2573 if (!is_gimple_assign (stmt))
2574 break;
2576 code = gimple_assign_rhs_code (stmt);
2577 arg0 = gimple_assign_rhs1 (stmt);
2578 arg1 = gimple_assign_rhs2 (stmt);
2579 exp_type = TREE_TYPE (exp);
2581 else
2583 code = gimple_cond_code (stmt);
2584 arg0 = gimple_cond_lhs (stmt);
2585 arg1 = gimple_cond_rhs (stmt);
2586 exp_type = boolean_type_node;
2589 if (TREE_CODE (arg0) != SSA_NAME
2590 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2591 break;
2592 loc = gimple_location (stmt);
2593 switch (code)
2595 case BIT_NOT_EXPR:
2596 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2597 /* Ensure the range is either +[-,0], +[0,0],
2598 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2599 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2600 or similar expression of unconditional true or
2601 false, it should not be negated. */
2602 && ((high && integer_zerop (high))
2603 || (low && integer_onep (low))))
2605 in_p = !in_p;
2606 exp = arg0;
2607 continue;
2609 break;
2610 case SSA_NAME:
2611 exp = arg0;
2612 continue;
2613 CASE_CONVERT:
2614 if (is_bool)
2616 if ((TYPE_PRECISION (exp_type) == 1
2617 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2618 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2619 return;
2621 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2623 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2624 is_bool = true;
2625 else
2626 return;
2628 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2629 is_bool = true;
2630 goto do_default;
2631 case EQ_EXPR:
2632 case NE_EXPR:
2633 case LT_EXPR:
2634 case LE_EXPR:
2635 case GE_EXPR:
2636 case GT_EXPR:
2637 is_bool = true;
2638 /* FALLTHRU */
2639 default:
2640 if (!is_bool)
2641 return;
2642 do_default:
2643 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2644 &low, &high, &in_p,
2645 &strict_overflow_p);
2646 if (nexp != NULL_TREE)
2648 exp = nexp;
2649 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2650 continue;
2652 break;
2654 break;
2656 if (is_bool)
2658 r->exp = exp;
2659 r->in_p = in_p;
2660 r->low = low;
2661 r->high = high;
2662 r->strict_overflow_p = strict_overflow_p;
2666 /* Comparison function for qsort. Sort entries
2667 without SSA_NAME exp first, then with SSA_NAMEs sorted
2668 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2669 by increasing ->low and if ->low is the same, by increasing
2670 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2671 maximum. */
2673 static int
2674 range_entry_cmp (const void *a, const void *b)
2676 const struct range_entry *p = (const struct range_entry *) a;
2677 const struct range_entry *q = (const struct range_entry *) b;
2679 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2681 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2683 /* Group range_entries for the same SSA_NAME together. */
2684 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2685 return -1;
2686 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2687 return 1;
2688 /* If ->low is different, NULL low goes first, then by
2689 ascending low. */
2690 if (p->low != NULL_TREE)
2692 if (q->low != NULL_TREE)
2694 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2695 p->low, q->low);
2696 if (tem && integer_onep (tem))
2697 return -1;
2698 tem = fold_binary (GT_EXPR, boolean_type_node,
2699 p->low, q->low);
2700 if (tem && integer_onep (tem))
2701 return 1;
2703 else
2704 return 1;
2706 else if (q->low != NULL_TREE)
2707 return -1;
2708 /* If ->high is different, NULL high goes last, before that by
2709 ascending high. */
2710 if (p->high != NULL_TREE)
2712 if (q->high != NULL_TREE)
2714 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2715 p->high, q->high);
2716 if (tem && integer_onep (tem))
2717 return -1;
2718 tem = fold_binary (GT_EXPR, boolean_type_node,
2719 p->high, q->high);
2720 if (tem && integer_onep (tem))
2721 return 1;
2723 else
2724 return -1;
2726 else if (q->high != NULL_TREE)
2727 return 1;
2728 /* If both ranges are the same, sort below by ascending idx. */
2730 else
2731 return 1;
2733 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2734 return -1;
2736 if (p->idx < q->idx)
2737 return -1;
2738 else
2740 gcc_checking_assert (p->idx > q->idx);
2741 return 1;
2745 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2746 insert needed statements BEFORE or after GSI. */
2748 static tree
2749 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2751 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2752 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2753 if (TREE_CODE (ret) != SSA_NAME)
2755 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2756 if (before)
2757 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2758 else
2759 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2760 ret = gimple_assign_lhs (g);
2762 return ret;
2765 /* Helper routine of optimize_range_test.
2766 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2767 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2768 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2769 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2770 an array of COUNT pointers to other ranges. Return
2771 true if the range merge has been successful.
2772 If OPCODE is ERROR_MARK, this is called from within
2773 maybe_optimize_range_tests and is performing inter-bb range optimization.
2774 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2775 oe->rank. */
2777 static bool
2778 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2779 struct range_entry **otherrangep,
2780 unsigned int count, enum tree_code opcode,
2781 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2782 bool in_p, tree low, tree high, bool strict_overflow_p)
2784 unsigned int idx = range->idx;
2785 struct range_entry *swap_with = NULL;
2786 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2787 if (opcode == ERROR_MARK)
2789 /* For inter-bb range test optimization, pick from the range tests
2790 the one which is tested in the earliest condition (one dominating
2791 the others), because otherwise there could be some UB (e.g. signed
2792 overflow) in following bbs that we'd expose which wasn't there in
2793 the original program. See PR104196. */
2794 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2795 basic_block range_bb = orig_range_bb;
2796 for (unsigned int i = 0; i < count; i++)
2798 struct range_entry *this_range;
2799 if (otherrange)
2800 this_range = otherrange + i;
2801 else
2802 this_range = otherrangep[i];
2803 operand_entry *oe = (*ops)[this_range->idx];
2804 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2805 if (range_bb != this_bb
2806 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2808 swap_with = this_range;
2809 range_bb = this_bb;
2810 idx = this_range->idx;
2813 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2814 only defined in later blocks. In this case we can't move the
2815 merged comparison earlier, so instead check if there are any stmts
2816 that might trigger signed integer overflow in between and rewrite
2817 them. But only after we check if the optimization is possible. */
2818 if (seq && swap_with)
2820 rewrite_bb_first = range_bb;
2821 rewrite_bb_last = orig_range_bb;
2822 idx = range->idx;
2823 swap_with = NULL;
2826 operand_entry *oe = (*ops)[idx];
2827 tree op = oe->op;
2828 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2829 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2830 location_t loc = gimple_location (stmt);
2831 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2832 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2833 in_p, low, high);
2834 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2835 gimple_stmt_iterator gsi;
2836 unsigned int i, uid;
2838 if (tem == NULL_TREE)
2839 return false;
2841 /* If op is default def SSA_NAME, there is no place to insert the
2842 new comparison. Give up, unless we can use OP itself as the
2843 range test. */
2844 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2846 if (op == range->exp
2847 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2848 || TREE_CODE (optype) == BOOLEAN_TYPE)
2849 && (op == tem
2850 || (TREE_CODE (tem) == EQ_EXPR
2851 && TREE_OPERAND (tem, 0) == op
2852 && integer_onep (TREE_OPERAND (tem, 1))))
2853 && opcode != BIT_IOR_EXPR
2854 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2856 stmt = NULL;
2857 tem = op;
2859 else
2860 return false;
2863 if (swap_with)
2864 std::swap (range->idx, swap_with->idx);
2866 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2867 warning_at (loc, OPT_Wstrict_overflow,
2868 "assuming signed overflow does not occur "
2869 "when simplifying range test");
2871 if (dump_file && (dump_flags & TDF_DETAILS))
2873 struct range_entry *r;
2874 fprintf (dump_file, "Optimizing range tests ");
2875 dump_range_entry (dump_file, range, false);
2876 for (i = 0; i < count; i++)
2878 if (otherrange)
2879 r = otherrange + i;
2880 else
2881 r = otherrangep[i];
2882 if (r->exp
2883 && r->exp != range->exp
2884 && TREE_CODE (r->exp) == SSA_NAME)
2886 fprintf (dump_file, " and ");
2887 dump_range_entry (dump_file, r, false);
2889 else
2891 fprintf (dump_file, " and");
2892 dump_range_entry (dump_file, r, true);
2895 fprintf (dump_file, "\n into ");
2896 print_generic_expr (dump_file, tem);
2897 fprintf (dump_file, "\n");
2900 /* In inter-bb range optimization mode, if we have a seq, we can't
2901 move the merged comparison to the earliest bb from the comparisons
2902 being replaced, so instead rewrite stmts that could trigger signed
2903 integer overflow. */
2904 for (basic_block bb = rewrite_bb_last;
2905 bb != rewrite_bb_first; bb = single_pred (bb))
2906 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2907 !gsi_end_p (gsi); gsi_next (&gsi))
2909 gimple *stmt = gsi_stmt (gsi);
2910 if (is_gimple_assign (stmt))
2911 if (tree lhs = gimple_assign_lhs (stmt))
2912 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2913 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2914 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2916 enum tree_code code = gimple_assign_rhs_code (stmt);
2917 if (arith_code_with_undefined_signed_overflow (code))
2919 gimple_stmt_iterator gsip = gsi;
2920 gimple_stmt_iterator gsin = gsi;
2921 gsi_prev (&gsip);
2922 gsi_next (&gsin);
2923 rewrite_to_defined_overflow (stmt, true);
2924 unsigned uid = gimple_uid (stmt);
2925 if (gsi_end_p (gsip))
2926 gsip = gsi_after_labels (bb);
2927 else
2928 gsi_next (&gsip);
2929 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2930 gsi_next (&gsip))
2931 gimple_set_uid (gsi_stmt (gsip), uid);
2936 if (opcode == BIT_IOR_EXPR
2937 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2938 tem = invert_truthvalue_loc (loc, tem);
2940 tem = fold_convert_loc (loc, optype, tem);
2941 if (stmt)
2943 gsi = gsi_for_stmt (stmt);
2944 uid = gimple_uid (stmt);
2946 else
2948 gsi = gsi_none ();
2949 uid = 0;
2951 if (stmt == NULL)
2952 gcc_checking_assert (tem == op);
2953 /* In rare cases range->exp can be equal to lhs of stmt.
2954 In that case we have to insert after the stmt rather then before
2955 it. If stmt is a PHI, insert it at the start of the basic block. */
2956 else if (op != range->exp)
2958 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2959 tem = force_into_ssa_name (&gsi, tem, true);
2960 gsi_prev (&gsi);
2962 else if (gimple_code (stmt) != GIMPLE_PHI)
2964 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2965 tem = force_into_ssa_name (&gsi, tem, false);
2967 else
2969 gsi = gsi_after_labels (gimple_bb (stmt));
2970 if (!gsi_end_p (gsi))
2971 uid = gimple_uid (gsi_stmt (gsi));
2972 else
2974 gsi = gsi_start_bb (gimple_bb (stmt));
2975 uid = 1;
2976 while (!gsi_end_p (gsi))
2978 uid = gimple_uid (gsi_stmt (gsi));
2979 gsi_next (&gsi);
2982 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2983 tem = force_into_ssa_name (&gsi, tem, true);
2984 if (gsi_end_p (gsi))
2985 gsi = gsi_last_bb (gimple_bb (stmt));
2986 else
2987 gsi_prev (&gsi);
2989 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2990 if (gimple_uid (gsi_stmt (gsi)))
2991 break;
2992 else
2993 gimple_set_uid (gsi_stmt (gsi), uid);
2995 oe->op = tem;
2996 range->exp = exp;
2997 range->low = low;
2998 range->high = high;
2999 range->in_p = in_p;
3000 range->strict_overflow_p = false;
3002 for (i = 0; i < count; i++)
3004 if (otherrange)
3005 range = otherrange + i;
3006 else
3007 range = otherrangep[i];
3008 oe = (*ops)[range->idx];
3009 /* Now change all the other range test immediate uses, so that
3010 those tests will be optimized away. */
3011 if (opcode == ERROR_MARK)
3013 if (oe->op)
3014 oe->op = build_int_cst (TREE_TYPE (oe->op),
3015 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3016 else
3017 oe->op = (oe->rank == BIT_IOR_EXPR
3018 ? boolean_false_node : boolean_true_node);
3020 else
3021 oe->op = error_mark_node;
3022 range->exp = NULL_TREE;
3023 range->low = NULL_TREE;
3024 range->high = NULL_TREE;
3026 return true;
3029 /* Optimize X == CST1 || X == CST2
3030 if popcount (CST1 ^ CST2) == 1 into
3031 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3032 Similarly for ranges. E.g.
3033 X != 2 && X != 3 && X != 10 && X != 11
3034 will be transformed by the previous optimization into
3035 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3036 and this loop can transform that into
3037 !(((X & ~8) - 2U) <= 1U). */
3039 static bool
3040 optimize_range_tests_xor (enum tree_code opcode, tree type,
3041 tree lowi, tree lowj, tree highi, tree highj,
3042 vec<operand_entry *> *ops,
3043 struct range_entry *rangei,
3044 struct range_entry *rangej)
3046 tree lowxor, highxor, tem, exp;
3047 /* Check lowi ^ lowj == highi ^ highj and
3048 popcount (lowi ^ lowj) == 1. */
3049 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3050 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3051 return false;
3052 if (!integer_pow2p (lowxor))
3053 return false;
3054 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3055 if (!tree_int_cst_equal (lowxor, highxor))
3056 return false;
3058 exp = rangei->exp;
3059 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3060 int prec = GET_MODE_PRECISION (mode);
3061 if (TYPE_PRECISION (type) < prec
3062 || (wi::to_wide (TYPE_MIN_VALUE (type))
3063 != wi::min_value (prec, TYPE_SIGN (type)))
3064 || (wi::to_wide (TYPE_MAX_VALUE (type))
3065 != wi::max_value (prec, TYPE_SIGN (type))))
3067 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3068 exp = fold_convert (type, exp);
3069 lowxor = fold_convert (type, lowxor);
3070 lowi = fold_convert (type, lowi);
3071 highi = fold_convert (type, highi);
3073 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3074 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3075 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3076 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3077 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3078 NULL, rangei->in_p, lowj, highj,
3079 rangei->strict_overflow_p
3080 || rangej->strict_overflow_p))
3081 return true;
3082 return false;
3085 /* Optimize X == CST1 || X == CST2
3086 if popcount (CST2 - CST1) == 1 into
3087 ((X - CST1) & ~(CST2 - CST1)) == 0.
3088 Similarly for ranges. E.g.
3089 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3090 || X == 75 || X == 45
3091 will be transformed by the previous optimization into
3092 (X - 43U) <= 3U || (X - 75U) <= 3U
3093 and this loop can transform that into
3094 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3095 static bool
3096 optimize_range_tests_diff (enum tree_code opcode, tree type,
3097 tree lowi, tree lowj, tree highi, tree highj,
3098 vec<operand_entry *> *ops,
3099 struct range_entry *rangei,
3100 struct range_entry *rangej)
3102 tree tem1, tem2, mask;
3103 /* Check highi - lowi == highj - lowj. */
3104 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3105 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3106 return false;
3107 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3108 if (!tree_int_cst_equal (tem1, tem2))
3109 return false;
3110 /* Check popcount (lowj - lowi) == 1. */
3111 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3112 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3113 return false;
3114 if (!integer_pow2p (tem1))
3115 return false;
3117 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3118 int prec = GET_MODE_PRECISION (mode);
3119 if (TYPE_PRECISION (type) < prec
3120 || (wi::to_wide (TYPE_MIN_VALUE (type))
3121 != wi::min_value (prec, TYPE_SIGN (type)))
3122 || (wi::to_wide (TYPE_MAX_VALUE (type))
3123 != wi::max_value (prec, TYPE_SIGN (type))))
3124 type = build_nonstandard_integer_type (prec, 1);
3125 else
3126 type = unsigned_type_for (type);
3127 tem1 = fold_convert (type, tem1);
3128 tem2 = fold_convert (type, tem2);
3129 lowi = fold_convert (type, lowi);
3130 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3131 tem1 = fold_build2 (MINUS_EXPR, type,
3132 fold_convert (type, rangei->exp), lowi);
3133 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3134 lowj = build_int_cst (type, 0);
3135 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3136 NULL, rangei->in_p, lowj, tem2,
3137 rangei->strict_overflow_p
3138 || rangej->strict_overflow_p))
3139 return true;
3140 return false;
3143 /* It does some common checks for function optimize_range_tests_xor and
3144 optimize_range_tests_diff.
3145 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3146 Else it calls optimize_range_tests_diff. */
3148 static bool
3149 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3150 bool optimize_xor, vec<operand_entry *> *ops,
3151 struct range_entry *ranges)
3153 int i, j;
3154 bool any_changes = false;
3155 for (i = first; i < length; i++)
3157 tree lowi, highi, lowj, highj, type, tem;
3159 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3160 continue;
3161 type = TREE_TYPE (ranges[i].exp);
3162 if (!INTEGRAL_TYPE_P (type))
3163 continue;
3164 lowi = ranges[i].low;
3165 if (lowi == NULL_TREE)
3166 lowi = TYPE_MIN_VALUE (type);
3167 highi = ranges[i].high;
3168 if (highi == NULL_TREE)
3169 continue;
3170 for (j = i + 1; j < length && j < i + 64; j++)
3172 bool changes;
3173 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3174 continue;
3175 lowj = ranges[j].low;
3176 if (lowj == NULL_TREE)
3177 continue;
3178 highj = ranges[j].high;
3179 if (highj == NULL_TREE)
3180 highj = TYPE_MAX_VALUE (type);
3181 /* Check lowj > highi. */
3182 tem = fold_binary (GT_EXPR, boolean_type_node,
3183 lowj, highi);
3184 if (tem == NULL_TREE || !integer_onep (tem))
3185 continue;
3186 if (optimize_xor)
3187 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3188 highi, highj, ops,
3189 ranges + i, ranges + j);
3190 else
3191 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3192 highi, highj, ops,
3193 ranges + i, ranges + j);
3194 if (changes)
3196 any_changes = true;
3197 break;
3201 return any_changes;
3204 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3205 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3206 EXP on success, NULL otherwise. */
3208 static tree
3209 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3210 wide_int *mask, tree *totallowp)
3212 tree tem = int_const_binop (MINUS_EXPR, high, low);
3213 if (tem == NULL_TREE
3214 || TREE_CODE (tem) != INTEGER_CST
3215 || TREE_OVERFLOW (tem)
3216 || tree_int_cst_sgn (tem) == -1
3217 || compare_tree_int (tem, prec) != -1)
3218 return NULL_TREE;
3220 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3221 *mask = wi::shifted_mask (0, max, false, prec);
3222 if (TREE_CODE (exp) == BIT_AND_EXPR
3223 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3225 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3226 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3227 if (wi::popcount (msk) == 1
3228 && wi::ltu_p (msk, prec - max))
3230 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3231 max += msk.to_uhwi ();
3232 exp = TREE_OPERAND (exp, 0);
3233 if (integer_zerop (low)
3234 && TREE_CODE (exp) == PLUS_EXPR
3235 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3237 tree ret = TREE_OPERAND (exp, 0);
3238 STRIP_NOPS (ret);
3239 widest_int bias
3240 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3241 TYPE_PRECISION (TREE_TYPE (low))));
3242 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3243 if (totallowp)
3245 *totallowp = tbias;
3246 return ret;
3248 else if (!tree_int_cst_lt (totallow, tbias))
3249 return NULL_TREE;
3250 bias = wi::to_widest (tbias);
3251 bias -= wi::to_widest (totallow);
3252 if (bias >= 0 && bias < prec - max)
3254 *mask = wi::lshift (*mask, bias);
3255 return ret;
3260 if (totallowp)
3261 return exp;
3262 if (!tree_int_cst_lt (totallow, low))
3263 return exp;
3264 tem = int_const_binop (MINUS_EXPR, low, totallow);
3265 if (tem == NULL_TREE
3266 || TREE_CODE (tem) != INTEGER_CST
3267 || TREE_OVERFLOW (tem)
3268 || compare_tree_int (tem, prec - max) == 1)
3269 return NULL_TREE;
3271 *mask = wi::lshift (*mask, wi::to_widest (tem));
3272 return exp;
3275 /* Attempt to optimize small range tests using bit test.
3276 E.g.
3277 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3278 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3279 has been by earlier optimizations optimized into:
3280 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3281 As all the 43 through 82 range is less than 64 numbers,
3282 for 64-bit word targets optimize that into:
3283 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3285 static bool
3286 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3287 vec<operand_entry *> *ops,
3288 struct range_entry *ranges)
3290 int i, j;
3291 bool any_changes = false;
3292 int prec = GET_MODE_BITSIZE (word_mode);
3293 auto_vec<struct range_entry *, 64> candidates;
3295 for (i = first; i < length - 1; i++)
3297 tree lowi, highi, lowj, highj, type;
3299 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3300 continue;
3301 type = TREE_TYPE (ranges[i].exp);
3302 if (!INTEGRAL_TYPE_P (type))
3303 continue;
3304 lowi = ranges[i].low;
3305 if (lowi == NULL_TREE)
3306 lowi = TYPE_MIN_VALUE (type);
3307 highi = ranges[i].high;
3308 if (highi == NULL_TREE)
3309 continue;
3310 wide_int mask;
3311 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3312 highi, &mask, &lowi);
3313 if (exp == NULL_TREE)
3314 continue;
3315 bool strict_overflow_p = ranges[i].strict_overflow_p;
3316 candidates.truncate (0);
3317 int end = MIN (i + 64, length);
3318 for (j = i + 1; j < end; j++)
3320 tree exp2;
3321 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3322 continue;
3323 if (ranges[j].exp == exp)
3325 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3327 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3328 if (exp2 == exp)
3330 else if (TREE_CODE (exp2) == PLUS_EXPR)
3332 exp2 = TREE_OPERAND (exp2, 0);
3333 STRIP_NOPS (exp2);
3334 if (exp2 != exp)
3335 continue;
3337 else
3338 continue;
3340 else
3341 continue;
3342 lowj = ranges[j].low;
3343 if (lowj == NULL_TREE)
3344 continue;
3345 highj = ranges[j].high;
3346 if (highj == NULL_TREE)
3347 highj = TYPE_MAX_VALUE (type);
3348 wide_int mask2;
3349 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3350 highj, &mask2, NULL);
3351 if (exp2 != exp)
3352 continue;
3353 mask |= mask2;
3354 strict_overflow_p |= ranges[j].strict_overflow_p;
3355 candidates.safe_push (&ranges[j]);
3358 /* If every possible relative value of the expression is a valid shift
3359 amount, then we can merge the entry test in the bit test. In this
3360 case, if we would need otherwise 2 or more comparisons, then use
3361 the bit test; in the other cases, the threshold is 3 comparisons. */
3362 bool entry_test_needed;
3363 value_range r;
3364 if (TREE_CODE (exp) == SSA_NAME
3365 && get_range_query (cfun)->range_of_expr (r, exp)
3366 && r.kind () == VR_RANGE
3367 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3369 wide_int min = r.lower_bound ();
3370 wide_int ilowi = wi::to_wide (lowi);
3371 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3373 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3374 mask = wi::lshift (mask, ilowi - min);
3376 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3378 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3379 mask = wi::lrshift (mask, min - ilowi);
3381 entry_test_needed = false;
3383 else
3384 entry_test_needed = true;
3385 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3387 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3388 wi::to_widest (lowi)
3389 + prec - 1 - wi::clz (mask));
3390 operand_entry *oe = (*ops)[ranges[i].idx];
3391 tree op = oe->op;
3392 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3393 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3394 location_t loc = gimple_location (stmt);
3395 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3397 /* See if it isn't cheaper to pretend the minimum value of the
3398 range is 0, if maximum value is small enough.
3399 We can avoid then subtraction of the minimum value, but the
3400 mask constant could be perhaps more expensive. */
3401 if (compare_tree_int (lowi, 0) > 0
3402 && compare_tree_int (high, prec) < 0)
3404 int cost_diff;
3405 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3406 rtx reg = gen_raw_REG (word_mode, 10000);
3407 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3408 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3409 GEN_INT (-m)),
3410 word_mode, speed_p);
3411 rtx r = immed_wide_int_const (mask, word_mode);
3412 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3413 word_mode, speed_p);
3414 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3415 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3416 word_mode, speed_p);
3417 if (cost_diff > 0)
3419 mask = wi::lshift (mask, m);
3420 lowi = build_zero_cst (TREE_TYPE (lowi));
3424 tree tem;
3425 if (entry_test_needed)
3427 tem = build_range_check (loc, optype, unshare_expr (exp),
3428 false, lowi, high);
3429 if (tem == NULL_TREE || is_gimple_val (tem))
3430 continue;
3432 else
3433 tem = NULL_TREE;
3434 tree etype = unsigned_type_for (TREE_TYPE (exp));
3435 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3436 fold_convert_loc (loc, etype, exp),
3437 fold_convert_loc (loc, etype, lowi));
3438 exp = fold_convert_loc (loc, integer_type_node, exp);
3439 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3440 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3441 build_int_cst (word_type, 1), exp);
3442 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3443 wide_int_to_tree (word_type, mask));
3444 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3445 build_zero_cst (word_type));
3446 if (is_gimple_val (exp))
3447 continue;
3449 /* The shift might have undefined behavior if TEM is true,
3450 but reassociate_bb isn't prepared to have basic blocks
3451 split when it is running. So, temporarily emit a code
3452 with BIT_IOR_EXPR instead of &&, and fix it up in
3453 branch_fixup. */
3454 gimple_seq seq = NULL;
3455 if (tem)
3457 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3458 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3459 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3461 gimple_seq seq2;
3462 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3463 gimple_seq_add_seq_without_update (&seq, seq2);
3464 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3465 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3466 if (tem)
3468 gimple *g = gimple_build_assign (make_ssa_name (optype),
3469 BIT_IOR_EXPR, tem, exp);
3470 gimple_set_location (g, loc);
3471 gimple_seq_add_stmt_without_update (&seq, g);
3472 exp = gimple_assign_lhs (g);
3474 tree val = build_zero_cst (optype);
3475 if (update_range_test (&ranges[i], NULL, candidates.address (),
3476 candidates.length (), opcode, ops, exp,
3477 seq, false, val, val, strict_overflow_p))
3479 any_changes = true;
3480 if (tem)
3481 reassoc_branch_fixups.safe_push (tem);
3483 else
3484 gimple_seq_discard (seq);
3487 return any_changes;
3490 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3491 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3492 Also, handle x < C && y < C && z < C where C is power of two as
3493 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3494 as (x | y | z) < 0. */
3496 static bool
3497 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3498 vec<operand_entry *> *ops,
3499 struct range_entry *ranges)
3501 int i;
3502 unsigned int b;
3503 bool any_changes = false;
3504 auto_vec<int, 128> buckets;
3505 auto_vec<int, 32> chains;
3506 auto_vec<struct range_entry *, 32> candidates;
3508 for (i = first; i < length; i++)
3510 int idx;
3512 if (ranges[i].exp == NULL_TREE
3513 || TREE_CODE (ranges[i].exp) != SSA_NAME
3514 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3515 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3516 continue;
3518 if (ranges[i].low != NULL_TREE
3519 && ranges[i].high != NULL_TREE
3520 && ranges[i].in_p
3521 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3523 idx = !integer_zerop (ranges[i].low);
3524 if (idx && !integer_all_onesp (ranges[i].low))
3525 continue;
3527 else if (ranges[i].high != NULL_TREE
3528 && TREE_CODE (ranges[i].high) == INTEGER_CST
3529 && ranges[i].in_p)
3531 wide_int w = wi::to_wide (ranges[i].high);
3532 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3533 int l = wi::clz (w);
3534 idx = 2;
3535 if (l <= 0
3536 || l >= prec
3537 || w != wi::mask (prec - l, false, prec))
3538 continue;
3539 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3540 && ranges[i].low == NULL_TREE)
3541 || (ranges[i].low
3542 && integer_zerop (ranges[i].low))))
3543 continue;
3545 else if (ranges[i].high == NULL_TREE
3546 && ranges[i].low != NULL_TREE
3547 /* Perform this optimization only in the last
3548 reassoc pass, as it interferes with the reassociation
3549 itself or could also with VRP etc. which might not
3550 be able to virtually undo the optimization. */
3551 && !reassoc_insert_powi_p
3552 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3553 && integer_zerop (ranges[i].low))
3554 idx = 3;
3555 else
3556 continue;
3558 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3559 if (buckets.length () <= b)
3560 buckets.safe_grow_cleared (b + 1, true);
3561 if (chains.length () <= (unsigned) i)
3562 chains.safe_grow (i + 1, true);
3563 chains[i] = buckets[b];
3564 buckets[b] = i + 1;
3567 FOR_EACH_VEC_ELT (buckets, b, i)
3568 if (i && chains[i - 1])
3570 int j, k = i;
3571 if ((b % 4) == 2)
3573 /* When ranges[X - 1].high + 1 is a power of two,
3574 we need to process the same bucket up to
3575 precision - 1 times, each time split the entries
3576 with the same high bound into one chain and the
3577 rest into another one to be processed later. */
3578 int this_prev = i;
3579 int other_prev = 0;
3580 for (j = chains[i - 1]; j; j = chains[j - 1])
3582 if (tree_int_cst_equal (ranges[i - 1].high,
3583 ranges[j - 1].high))
3585 chains[this_prev - 1] = j;
3586 this_prev = j;
3588 else if (other_prev == 0)
3590 buckets[b] = j;
3591 other_prev = j;
3593 else
3595 chains[other_prev - 1] = j;
3596 other_prev = j;
3599 chains[this_prev - 1] = 0;
3600 if (other_prev)
3601 chains[other_prev - 1] = 0;
3602 if (chains[i - 1] == 0)
3604 if (other_prev)
3605 b--;
3606 continue;
3609 for (j = chains[i - 1]; j; j = chains[j - 1])
3611 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3612 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3613 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3614 k = j;
3616 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3617 tree type2 = NULL_TREE;
3618 bool strict_overflow_p = false;
3619 candidates.truncate (0);
3620 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3621 type1 = pointer_sized_int_node;
3622 for (j = i; j; j = chains[j - 1])
3624 tree type = TREE_TYPE (ranges[j - 1].exp);
3625 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3626 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3627 type = pointer_sized_int_node;
3628 if ((b % 4) == 3)
3630 /* For the signed < 0 cases, the types should be
3631 really compatible (all signed with the same precision,
3632 instead put ranges that have different in_p from
3633 k first. */
3634 if (!useless_type_conversion_p (type1, type))
3635 continue;
3636 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3637 candidates.safe_push (&ranges[j - 1]);
3638 type2 = type1;
3639 continue;
3641 if (j == k
3642 || useless_type_conversion_p (type1, type))
3644 else if (type2 == NULL_TREE
3645 || useless_type_conversion_p (type2, type))
3647 if (type2 == NULL_TREE)
3648 type2 = type;
3649 candidates.safe_push (&ranges[j - 1]);
3652 unsigned l = candidates.length ();
3653 for (j = i; j; j = chains[j - 1])
3655 tree type = TREE_TYPE (ranges[j - 1].exp);
3656 if (j == k)
3657 continue;
3658 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3659 type = pointer_sized_int_node;
3660 if ((b % 4) == 3)
3662 if (!useless_type_conversion_p (type1, type))
3663 continue;
3664 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3665 candidates.safe_push (&ranges[j - 1]);
3666 continue;
3668 if (useless_type_conversion_p (type1, type))
3670 else if (type2 == NULL_TREE
3671 || useless_type_conversion_p (type2, type))
3672 continue;
3673 candidates.safe_push (&ranges[j - 1]);
3675 gimple_seq seq = NULL;
3676 tree op = NULL_TREE;
3677 unsigned int id;
3678 struct range_entry *r;
3679 candidates.safe_push (&ranges[k - 1]);
3680 FOR_EACH_VEC_ELT (candidates, id, r)
3682 gimple *g;
3683 enum tree_code code;
3684 if (id == 0)
3686 op = r->exp;
3687 continue;
3689 if (id == l
3690 || POINTER_TYPE_P (TREE_TYPE (op))
3691 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3693 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3694 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3695 if (code == BIT_NOT_EXPR
3696 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3698 g = gimple_build_assign (make_ssa_name (type3),
3699 NOP_EXPR, op);
3700 gimple_seq_add_stmt_without_update (&seq, g);
3701 op = gimple_assign_lhs (g);
3703 g = gimple_build_assign (make_ssa_name (type3), code, op);
3704 gimple_seq_add_stmt_without_update (&seq, g);
3705 op = gimple_assign_lhs (g);
3707 tree type = TREE_TYPE (r->exp);
3708 tree exp = r->exp;
3709 if (POINTER_TYPE_P (type)
3710 || TREE_CODE (type) == OFFSET_TYPE
3711 || (id >= l && !useless_type_conversion_p (type1, type)))
3713 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3714 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3715 gimple_seq_add_stmt_without_update (&seq, g);
3716 exp = gimple_assign_lhs (g);
3718 if ((b % 4) == 3)
3719 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3720 else
3721 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3722 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3723 code, op, exp);
3724 gimple_seq_add_stmt_without_update (&seq, g);
3725 op = gimple_assign_lhs (g);
3727 type1 = TREE_TYPE (ranges[k - 1].exp);
3728 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3730 gimple *g
3731 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3732 gimple_seq_add_stmt_without_update (&seq, g);
3733 op = gimple_assign_lhs (g);
3735 candidates.pop ();
3736 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3737 candidates.length (), opcode, ops, op,
3738 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3739 ranges[k - 1].high, strict_overflow_p))
3740 any_changes = true;
3741 else
3742 gimple_seq_discard (seq);
3743 if ((b % 4) == 2 && buckets[b] != i)
3744 /* There is more work to do for this bucket. */
3745 b--;
3748 return any_changes;
3751 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3752 a >= 0 && a < b into (unsigned) a < (unsigned) b
3753 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3755 static bool
3756 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3757 vec<operand_entry *> *ops,
3758 struct range_entry *ranges,
3759 basic_block first_bb)
3761 int i;
3762 bool any_changes = false;
3763 hash_map<tree, int> *map = NULL;
3765 for (i = first; i < length; i++)
3767 if (ranges[i].exp == NULL_TREE
3768 || TREE_CODE (ranges[i].exp) != SSA_NAME
3769 || !ranges[i].in_p)
3770 continue;
3772 tree type = TREE_TYPE (ranges[i].exp);
3773 if (!INTEGRAL_TYPE_P (type)
3774 || TYPE_UNSIGNED (type)
3775 || ranges[i].low == NULL_TREE
3776 || !integer_zerop (ranges[i].low)
3777 || ranges[i].high != NULL_TREE)
3778 continue;
3779 /* EXP >= 0 here. */
3780 if (map == NULL)
3781 map = new hash_map <tree, int>;
3782 map->put (ranges[i].exp, i);
3785 if (map == NULL)
3786 return false;
3788 for (i = 0; i < length; i++)
3790 bool in_p = ranges[i].in_p;
3791 if (ranges[i].low == NULL_TREE
3792 || ranges[i].high == NULL_TREE)
3793 continue;
3794 if (!integer_zerop (ranges[i].low)
3795 || !integer_zerop (ranges[i].high))
3797 if (ranges[i].exp
3798 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3799 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3800 && integer_onep (ranges[i].low)
3801 && integer_onep (ranges[i].high))
3802 in_p = !in_p;
3803 else
3804 continue;
3807 gimple *stmt;
3808 tree_code ccode;
3809 tree rhs1, rhs2;
3810 if (ranges[i].exp)
3812 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3813 continue;
3814 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3815 if (!is_gimple_assign (stmt))
3816 continue;
3817 ccode = gimple_assign_rhs_code (stmt);
3818 rhs1 = gimple_assign_rhs1 (stmt);
3819 rhs2 = gimple_assign_rhs2 (stmt);
3821 else
3823 operand_entry *oe = (*ops)[ranges[i].idx];
3824 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3825 if (gimple_code (stmt) != GIMPLE_COND)
3826 continue;
3827 ccode = gimple_cond_code (stmt);
3828 rhs1 = gimple_cond_lhs (stmt);
3829 rhs2 = gimple_cond_rhs (stmt);
3832 if (TREE_CODE (rhs1) != SSA_NAME
3833 || rhs2 == NULL_TREE
3834 || TREE_CODE (rhs2) != SSA_NAME)
3835 continue;
3837 switch (ccode)
3839 case GT_EXPR:
3840 case GE_EXPR:
3841 case LT_EXPR:
3842 case LE_EXPR:
3843 break;
3844 default:
3845 continue;
3847 if (in_p)
3848 ccode = invert_tree_comparison (ccode, false);
3849 switch (ccode)
3851 case GT_EXPR:
3852 case GE_EXPR:
3853 std::swap (rhs1, rhs2);
3854 ccode = swap_tree_comparison (ccode);
3855 break;
3856 case LT_EXPR:
3857 case LE_EXPR:
3858 break;
3859 default:
3860 gcc_unreachable ();
3863 int *idx = map->get (rhs1);
3864 if (idx == NULL)
3865 continue;
3867 /* maybe_optimize_range_tests allows statements without side-effects
3868 in the basic blocks as long as they are consumed in the same bb.
3869 Make sure rhs2's def stmt is not among them, otherwise we can't
3870 use safely get_nonzero_bits on it. E.g. in:
3871 # RANGE [-83, 1] NONZERO 173
3872 # k_32 = PHI <k_47(13), k_12(9)>
3874 if (k_32 >= 0)
3875 goto <bb 5>; [26.46%]
3876 else
3877 goto <bb 9>; [73.54%]
3879 <bb 5> [local count: 140323371]:
3880 # RANGE [0, 1] NONZERO 1
3881 _5 = (int) k_32;
3882 # RANGE [0, 4] NONZERO 4
3883 _21 = _5 << 2;
3884 # RANGE [0, 4] NONZERO 4
3885 iftmp.0_44 = (char) _21;
3886 if (k_32 < iftmp.0_44)
3887 goto <bb 6>; [84.48%]
3888 else
3889 goto <bb 9>; [15.52%]
3890 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3891 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3892 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3893 those stmts even for negative k_32 and the value ranges would be no
3894 longer guaranteed and so the optimization would be invalid. */
3895 while (opcode == ERROR_MARK)
3897 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3898 basic_block bb2 = gimple_bb (g);
3899 if (bb2
3900 && bb2 != first_bb
3901 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3903 /* As an exception, handle a few common cases. */
3904 if (gimple_assign_cast_p (g)
3905 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3907 tree op0 = gimple_assign_rhs1 (g);
3908 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3909 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3910 > TYPE_PRECISION (TREE_TYPE (op0))))
3911 /* Zero-extension is always ok. */
3912 break;
3913 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3914 == TYPE_PRECISION (TREE_TYPE (op0))
3915 && TREE_CODE (op0) == SSA_NAME)
3917 /* Cast from signed to unsigned or vice versa. Retry
3918 with the op0 as new rhs2. */
3919 rhs2 = op0;
3920 continue;
3923 else if (is_gimple_assign (g)
3924 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3925 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3926 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3927 /* Masking with INTEGER_CST with MSB clear is always ok
3928 too. */
3929 break;
3930 rhs2 = NULL_TREE;
3932 break;
3934 if (rhs2 == NULL_TREE)
3935 continue;
3937 wide_int nz = get_nonzero_bits (rhs2);
3938 if (wi::neg_p (nz))
3939 continue;
3941 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3942 and RHS2 is known to be RHS2 >= 0. */
3943 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3945 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3946 if ((ranges[*idx].strict_overflow_p
3947 || ranges[i].strict_overflow_p)
3948 && issue_strict_overflow_warning (wc))
3949 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3950 "assuming signed overflow does not occur "
3951 "when simplifying range test");
3953 if (dump_file && (dump_flags & TDF_DETAILS))
3955 struct range_entry *r = &ranges[*idx];
3956 fprintf (dump_file, "Optimizing range test ");
3957 print_generic_expr (dump_file, r->exp);
3958 fprintf (dump_file, " +[");
3959 print_generic_expr (dump_file, r->low);
3960 fprintf (dump_file, ", ");
3961 print_generic_expr (dump_file, r->high);
3962 fprintf (dump_file, "] and comparison ");
3963 print_generic_expr (dump_file, rhs1);
3964 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3965 print_generic_expr (dump_file, rhs2);
3966 fprintf (dump_file, "\n into (");
3967 print_generic_expr (dump_file, utype);
3968 fprintf (dump_file, ") ");
3969 print_generic_expr (dump_file, rhs1);
3970 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3971 print_generic_expr (dump_file, utype);
3972 fprintf (dump_file, ") ");
3973 print_generic_expr (dump_file, rhs2);
3974 fprintf (dump_file, "\n");
3977 operand_entry *oe = (*ops)[ranges[i].idx];
3978 ranges[i].in_p = 0;
3979 if (opcode == BIT_IOR_EXPR
3980 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3982 ranges[i].in_p = 1;
3983 ccode = invert_tree_comparison (ccode, false);
3986 unsigned int uid = gimple_uid (stmt);
3987 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3988 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3989 gimple_set_uid (g, uid);
3990 rhs1 = gimple_assign_lhs (g);
3991 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3992 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3994 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3995 gimple_set_uid (g, uid);
3996 rhs2 = gimple_assign_lhs (g);
3997 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3999 if (tree_swap_operands_p (rhs1, rhs2))
4001 std::swap (rhs1, rhs2);
4002 ccode = swap_tree_comparison (ccode);
4004 if (gimple_code (stmt) == GIMPLE_COND)
4006 gcond *c = as_a <gcond *> (stmt);
4007 gimple_cond_set_code (c, ccode);
4008 gimple_cond_set_lhs (c, rhs1);
4009 gimple_cond_set_rhs (c, rhs2);
4010 update_stmt (stmt);
4012 else
4014 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4015 if (!INTEGRAL_TYPE_P (ctype)
4016 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4017 && TYPE_PRECISION (ctype) != 1))
4018 ctype = boolean_type_node;
4019 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4020 gimple_set_uid (g, uid);
4021 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4022 if (oe->op && ctype != TREE_TYPE (oe->op))
4024 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4025 NOP_EXPR, gimple_assign_lhs (g));
4026 gimple_set_uid (g, uid);
4027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4029 ranges[i].exp = gimple_assign_lhs (g);
4030 oe->op = ranges[i].exp;
4031 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4032 ranges[i].high = ranges[i].low;
4034 ranges[i].strict_overflow_p = false;
4035 oe = (*ops)[ranges[*idx].idx];
4036 /* Now change all the other range test immediate uses, so that
4037 those tests will be optimized away. */
4038 if (opcode == ERROR_MARK)
4040 if (oe->op)
4041 oe->op = build_int_cst (TREE_TYPE (oe->op),
4042 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4043 else
4044 oe->op = (oe->rank == BIT_IOR_EXPR
4045 ? boolean_false_node : boolean_true_node);
4047 else
4048 oe->op = error_mark_node;
4049 ranges[*idx].exp = NULL_TREE;
4050 ranges[*idx].low = NULL_TREE;
4051 ranges[*idx].high = NULL_TREE;
4052 any_changes = true;
4055 delete map;
4056 return any_changes;
4059 /* Optimize range tests, similarly how fold_range_test optimizes
4060 it on trees. The tree code for the binary
4061 operation between all the operands is OPCODE.
4062 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4063 maybe_optimize_range_tests for inter-bb range optimization.
4064 In that case if oe->op is NULL, oe->id is bb->index whose
4065 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4066 the actual opcode.
4067 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4069 static bool
4070 optimize_range_tests (enum tree_code opcode,
4071 vec<operand_entry *> *ops, basic_block first_bb)
4073 unsigned int length = ops->length (), i, j, first;
4074 operand_entry *oe;
4075 struct range_entry *ranges;
4076 bool any_changes = false;
4078 if (length == 1)
4079 return false;
4081 ranges = XNEWVEC (struct range_entry, length);
4082 for (i = 0; i < length; i++)
4084 oe = (*ops)[i];
4085 ranges[i].idx = i;
4086 init_range_entry (ranges + i, oe->op,
4087 oe->op
4088 ? NULL
4089 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4090 /* For | invert it now, we will invert it again before emitting
4091 the optimized expression. */
4092 if (opcode == BIT_IOR_EXPR
4093 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4094 ranges[i].in_p = !ranges[i].in_p;
4097 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4098 for (i = 0; i < length; i++)
4099 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4100 break;
4102 /* Try to merge ranges. */
4103 for (first = i; i < length; i++)
4105 tree low = ranges[i].low;
4106 tree high = ranges[i].high;
4107 int in_p = ranges[i].in_p;
4108 bool strict_overflow_p = ranges[i].strict_overflow_p;
4109 int update_fail_count = 0;
4111 for (j = i + 1; j < length; j++)
4113 if (ranges[i].exp != ranges[j].exp)
4114 break;
4115 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4116 ranges[j].in_p, ranges[j].low, ranges[j].high))
4117 break;
4118 strict_overflow_p |= ranges[j].strict_overflow_p;
4121 if (j == i + 1)
4122 continue;
4124 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4125 opcode, ops, ranges[i].exp, NULL, in_p,
4126 low, high, strict_overflow_p))
4128 i = j - 1;
4129 any_changes = true;
4131 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4132 while update_range_test would fail. */
4133 else if (update_fail_count == 64)
4134 i = j - 1;
4135 else
4136 ++update_fail_count;
4139 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4140 ops, ranges);
4142 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4143 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4144 ops, ranges);
4145 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4146 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4147 ops, ranges);
4148 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4149 ranges, first_bb);
4150 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4151 ops, ranges);
4153 if (any_changes && opcode != ERROR_MARK)
4155 j = 0;
4156 FOR_EACH_VEC_ELT (*ops, i, oe)
4158 if (oe->op == error_mark_node)
4159 continue;
4160 else if (i != j)
4161 (*ops)[j] = oe;
4162 j++;
4164 ops->truncate (j);
4167 XDELETEVEC (ranges);
4168 return any_changes;
4171 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4172 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4173 otherwise the comparison code. TYPE is a return value that is set
4174 to type of comparison. */
4176 static tree_code
4177 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4178 tree *lhs, tree *rhs, gassign **vcond)
4180 if (TREE_CODE (var) != SSA_NAME)
4181 return ERROR_MARK;
4183 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4184 if (stmt == NULL)
4185 return ERROR_MARK;
4186 if (vcond)
4187 *vcond = stmt;
4189 /* ??? If we start creating more COND_EXPR, we could perform
4190 this same optimization with them. For now, simplify. */
4191 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4192 return ERROR_MARK;
4194 tree cond = gimple_assign_rhs1 (stmt);
4195 tree_code cmp = TREE_CODE (cond);
4196 if (cmp != SSA_NAME)
4197 return ERROR_MARK;
4199 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4200 if (assign == NULL
4201 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4202 return ERROR_MARK;
4204 cmp = gimple_assign_rhs_code (assign);
4205 if (lhs)
4206 *lhs = gimple_assign_rhs1 (assign);
4207 if (rhs)
4208 *rhs = gimple_assign_rhs2 (assign);
4210 /* ??? For now, allow only canonical true and false result vectors.
4211 We could expand this to other constants should the need arise,
4212 but at the moment we don't create them. */
4213 tree t = gimple_assign_rhs2 (stmt);
4214 tree f = gimple_assign_rhs3 (stmt);
4215 bool inv;
4216 if (integer_all_onesp (t))
4217 inv = false;
4218 else if (integer_all_onesp (f))
4220 cmp = invert_tree_comparison (cmp, false);
4221 inv = true;
4223 else
4224 return ERROR_MARK;
4225 if (!integer_zerop (f))
4226 return ERROR_MARK;
4228 /* Success! */
4229 if (rets)
4230 *rets = assign;
4231 if (reti)
4232 *reti = inv;
4233 if (type)
4234 *type = TREE_TYPE (cond);
4235 return cmp;
4238 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4239 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4241 static bool
4242 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4244 unsigned int length = ops->length (), i, j;
4245 bool any_changes = false;
4247 if (length == 1)
4248 return false;
4250 for (i = 0; i < length; ++i)
4252 tree elt0 = (*ops)[i]->op;
4254 gassign *stmt0, *vcond0;
4255 bool invert;
4256 tree type, lhs0, rhs0;
4257 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4258 &rhs0, &vcond0);
4259 if (cmp0 == ERROR_MARK)
4260 continue;
4262 for (j = i + 1; j < length; ++j)
4264 tree &elt1 = (*ops)[j]->op;
4266 gassign *stmt1, *vcond1;
4267 tree lhs1, rhs1;
4268 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4269 &rhs1, &vcond1);
4270 if (cmp1 == ERROR_MARK)
4271 continue;
4273 tree comb;
4274 if (opcode == BIT_AND_EXPR)
4275 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4276 cmp1, lhs1, rhs1);
4277 else if (opcode == BIT_IOR_EXPR)
4278 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4279 cmp1, lhs1, rhs1);
4280 else
4281 gcc_unreachable ();
4282 if (comb == NULL)
4283 continue;
4285 /* Success! */
4286 if (dump_file && (dump_flags & TDF_DETAILS))
4288 fprintf (dump_file, "Transforming ");
4289 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4290 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4291 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4292 fprintf (dump_file, " into ");
4293 print_generic_expr (dump_file, comb);
4294 fputc ('\n', dump_file);
4297 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4298 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4299 true, GSI_SAME_STMT);
4300 if (invert)
4301 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4302 gimple_assign_rhs3_ptr (vcond0));
4303 gimple_assign_set_rhs1 (vcond0, exp);
4304 update_stmt (vcond0);
4306 elt1 = error_mark_node;
4307 any_changes = true;
4311 if (any_changes)
4313 operand_entry *oe;
4314 j = 0;
4315 FOR_EACH_VEC_ELT (*ops, i, oe)
4317 if (oe->op == error_mark_node)
4318 continue;
4319 else if (i != j)
4320 (*ops)[j] = oe;
4321 j++;
4323 ops->truncate (j);
4326 return any_changes;
4329 /* Return true if STMT is a cast like:
4330 <bb N>:
4332 _123 = (int) _234;
4334 <bb M>:
4335 # _345 = PHI <_123(N), 1(...), 1(...)>
4336 where _234 has bool type, _123 has single use and
4337 bb N has a single successor M. This is commonly used in
4338 the last block of a range test.
4340 Also Return true if STMT is tcc_compare like:
4341 <bb N>:
4343 _234 = a_2(D) == 2;
4345 <bb M>:
4346 # _345 = PHI <_234(N), 1(...), 1(...)>
4347 _346 = (int) _345;
4348 where _234 has booltype, single use and
4349 bb N has a single successor M. This is commonly used in
4350 the last block of a range test. */
4352 static bool
4353 final_range_test_p (gimple *stmt)
4355 basic_block bb, rhs_bb, lhs_bb;
4356 edge e;
4357 tree lhs, rhs;
4358 use_operand_p use_p;
4359 gimple *use_stmt;
4361 if (!gimple_assign_cast_p (stmt)
4362 && (!is_gimple_assign (stmt)
4363 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4364 != tcc_comparison)))
4365 return false;
4366 bb = gimple_bb (stmt);
4367 if (!single_succ_p (bb))
4368 return false;
4369 e = single_succ_edge (bb);
4370 if (e->flags & EDGE_COMPLEX)
4371 return false;
4373 lhs = gimple_assign_lhs (stmt);
4374 rhs = gimple_assign_rhs1 (stmt);
4375 if (gimple_assign_cast_p (stmt)
4376 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4377 || TREE_CODE (rhs) != SSA_NAME
4378 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4379 return false;
4381 if (!gimple_assign_cast_p (stmt)
4382 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4383 return false;
4385 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4386 if (!single_imm_use (lhs, &use_p, &use_stmt))
4387 return false;
4389 if (gimple_code (use_stmt) != GIMPLE_PHI
4390 || gimple_bb (use_stmt) != e->dest)
4391 return false;
4393 /* And that the rhs is defined in the same loop. */
4394 if (gimple_assign_cast_p (stmt))
4396 if (TREE_CODE (rhs) != SSA_NAME
4397 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4398 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4399 return false;
4401 else
4403 if (TREE_CODE (lhs) != SSA_NAME
4404 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4405 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4406 return false;
4409 return true;
4412 /* Return true if BB is suitable basic block for inter-bb range test
4413 optimization. If BACKWARD is true, BB should be the only predecessor
4414 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4415 or compared with to find a common basic block to which all conditions
4416 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4417 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4418 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4419 block points to an empty block that falls through into *OTHER_BB and
4420 the phi args match that path. */
4422 static bool
4423 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4424 bool *test_swapped_p, bool backward)
4426 edge_iterator ei, ei2;
4427 edge e, e2;
4428 gimple *stmt;
4429 gphi_iterator gsi;
4430 bool other_edge_seen = false;
4431 bool is_cond;
4433 if (test_bb == bb)
4434 return false;
4435 /* Check last stmt first. */
4436 stmt = last_stmt (bb);
4437 if (stmt == NULL
4438 || (gimple_code (stmt) != GIMPLE_COND
4439 && (backward || !final_range_test_p (stmt)))
4440 || gimple_visited_p (stmt)
4441 || stmt_could_throw_p (cfun, stmt)
4442 || *other_bb == bb)
4443 return false;
4444 is_cond = gimple_code (stmt) == GIMPLE_COND;
4445 if (is_cond)
4447 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4448 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4449 to *OTHER_BB (if not set yet, try to find it out). */
4450 if (EDGE_COUNT (bb->succs) != 2)
4451 return false;
4452 FOR_EACH_EDGE (e, ei, bb->succs)
4454 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4455 return false;
4456 if (e->dest == test_bb)
4458 if (backward)
4459 continue;
4460 else
4461 return false;
4463 if (e->dest == bb)
4464 return false;
4465 if (*other_bb == NULL)
4467 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4468 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4469 return false;
4470 else if (e->dest == e2->dest)
4471 *other_bb = e->dest;
4472 if (*other_bb == NULL)
4473 return false;
4475 if (e->dest == *other_bb)
4476 other_edge_seen = true;
4477 else if (backward)
4478 return false;
4480 if (*other_bb == NULL || !other_edge_seen)
4481 return false;
4483 else if (single_succ (bb) != *other_bb)
4484 return false;
4486 /* Now check all PHIs of *OTHER_BB. */
4487 e = find_edge (bb, *other_bb);
4488 e2 = find_edge (test_bb, *other_bb);
4489 retry:;
4490 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4492 gphi *phi = gsi.phi ();
4493 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4494 corresponding to BB and TEST_BB predecessor must be the same. */
4495 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4496 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4498 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4499 one of the PHIs should have the lhs of the last stmt in
4500 that block as PHI arg and that PHI should have 0 or 1
4501 corresponding to it in all other range test basic blocks
4502 considered. */
4503 if (!is_cond)
4505 if (gimple_phi_arg_def (phi, e->dest_idx)
4506 == gimple_assign_lhs (stmt)
4507 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4508 || integer_onep (gimple_phi_arg_def (phi,
4509 e2->dest_idx))))
4510 continue;
4512 else
4514 gimple *test_last = last_stmt (test_bb);
4515 if (gimple_code (test_last) == GIMPLE_COND)
4517 if (backward ? e2->src != test_bb : e->src != bb)
4518 return false;
4520 /* For last_bb, handle also:
4521 if (x_3(D) == 3)
4522 goto <bb 6>; [34.00%]
4523 else
4524 goto <bb 7>; [66.00%]
4526 <bb 6> [local count: 79512730]:
4528 <bb 7> [local count: 1073741824]:
4529 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4530 where bb 7 is *OTHER_BB, but the PHI values from the
4531 earlier bbs match the path through the empty bb
4532 in between. */
4533 edge e3;
4534 if (backward)
4535 e3 = EDGE_SUCC (test_bb,
4536 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4537 else
4538 e3 = EDGE_SUCC (bb,
4539 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4540 if (empty_block_p (e3->dest)
4541 && single_succ_p (e3->dest)
4542 && single_succ (e3->dest) == *other_bb
4543 && single_pred_p (e3->dest)
4544 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4546 if (backward)
4547 e2 = single_succ_edge (e3->dest);
4548 else
4549 e = single_succ_edge (e3->dest);
4550 if (test_swapped_p)
4551 *test_swapped_p = true;
4552 goto retry;
4555 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4556 == gimple_assign_lhs (test_last)
4557 && (integer_zerop (gimple_phi_arg_def (phi,
4558 e->dest_idx))
4559 || integer_onep (gimple_phi_arg_def (phi,
4560 e->dest_idx))))
4561 continue;
4564 return false;
4567 return true;
4570 /* Return true if BB doesn't have side-effects that would disallow
4571 range test optimization, all SSA_NAMEs set in the bb are consumed
4572 in the bb and there are no PHIs. */
4574 bool
4575 no_side_effect_bb (basic_block bb)
4577 gimple_stmt_iterator gsi;
4578 gimple *last;
4580 if (!gimple_seq_empty_p (phi_nodes (bb)))
4581 return false;
4582 last = last_stmt (bb);
4583 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4585 gimple *stmt = gsi_stmt (gsi);
4586 tree lhs;
4587 imm_use_iterator imm_iter;
4588 use_operand_p use_p;
4590 if (is_gimple_debug (stmt))
4591 continue;
4592 if (gimple_has_side_effects (stmt))
4593 return false;
4594 if (stmt == last)
4595 return true;
4596 if (!is_gimple_assign (stmt))
4597 return false;
4598 lhs = gimple_assign_lhs (stmt);
4599 if (TREE_CODE (lhs) != SSA_NAME)
4600 return false;
4601 if (gimple_assign_rhs_could_trap_p (stmt))
4602 return false;
4603 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4605 gimple *use_stmt = USE_STMT (use_p);
4606 if (is_gimple_debug (use_stmt))
4607 continue;
4608 if (gimple_bb (use_stmt) != bb)
4609 return false;
4612 return false;
4615 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4616 return true and fill in *OPS recursively. */
4618 static bool
4619 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4620 class loop *loop)
4622 gimple *stmt = SSA_NAME_DEF_STMT (var);
4623 tree rhs[2];
4624 int i;
4626 if (!is_reassociable_op (stmt, code, loop))
4627 return false;
4629 rhs[0] = gimple_assign_rhs1 (stmt);
4630 rhs[1] = gimple_assign_rhs2 (stmt);
4631 gimple_set_visited (stmt, true);
4632 for (i = 0; i < 2; i++)
4633 if (TREE_CODE (rhs[i]) == SSA_NAME
4634 && !get_ops (rhs[i], code, ops, loop)
4635 && has_single_use (rhs[i]))
4637 operand_entry *oe = operand_entry_pool.allocate ();
4639 oe->op = rhs[i];
4640 oe->rank = code;
4641 oe->id = 0;
4642 oe->count = 1;
4643 oe->stmt_to_insert = NULL;
4644 ops->safe_push (oe);
4646 return true;
4649 /* Find the ops that were added by get_ops starting from VAR, see if
4650 they were changed during update_range_test and if yes, create new
4651 stmts. */
4653 static tree
4654 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4655 unsigned int *pidx, class loop *loop)
4657 gimple *stmt = SSA_NAME_DEF_STMT (var);
4658 tree rhs[4];
4659 int i;
4661 if (!is_reassociable_op (stmt, code, loop))
4662 return NULL;
4664 rhs[0] = gimple_assign_rhs1 (stmt);
4665 rhs[1] = gimple_assign_rhs2 (stmt);
4666 rhs[2] = rhs[0];
4667 rhs[3] = rhs[1];
4668 for (i = 0; i < 2; i++)
4669 if (TREE_CODE (rhs[i]) == SSA_NAME)
4671 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4672 if (rhs[2 + i] == NULL_TREE)
4674 if (has_single_use (rhs[i]))
4675 rhs[2 + i] = ops[(*pidx)++]->op;
4676 else
4677 rhs[2 + i] = rhs[i];
4680 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4681 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4683 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4684 var = make_ssa_name (TREE_TYPE (var));
4685 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4686 rhs[2], rhs[3]);
4687 gimple_set_uid (g, gimple_uid (stmt));
4688 gimple_set_visited (g, true);
4689 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4690 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4691 if (fold_stmt_inplace (&gsi2))
4692 update_stmt (g);
4694 return var;
4697 /* Structure to track the initial value passed to get_ops and
4698 the range in the ops vector for each basic block. */
4700 struct inter_bb_range_test_entry
4702 tree op;
4703 unsigned int first_idx, last_idx;
4706 /* Inter-bb range test optimization.
4708 Returns TRUE if a gimple conditional is optimized to a true/false,
4709 otherwise return FALSE.
4711 This indicates to the caller that it should run a CFG cleanup pass
4712 once reassociation is completed. */
4714 static bool
4715 maybe_optimize_range_tests (gimple *stmt)
4717 basic_block first_bb = gimple_bb (stmt);
4718 basic_block last_bb = first_bb;
4719 basic_block other_bb = NULL;
4720 basic_block bb;
4721 edge_iterator ei;
4722 edge e;
4723 auto_vec<operand_entry *> ops;
4724 auto_vec<inter_bb_range_test_entry> bbinfo;
4725 bool any_changes = false;
4726 bool cfg_cleanup_needed = false;
4728 /* Consider only basic blocks that end with GIMPLE_COND or
4729 a cast statement satisfying final_range_test_p. All
4730 but the last bb in the first_bb .. last_bb range
4731 should end with GIMPLE_COND. */
4732 if (gimple_code (stmt) == GIMPLE_COND)
4734 if (EDGE_COUNT (first_bb->succs) != 2)
4735 return cfg_cleanup_needed;
4737 else if (final_range_test_p (stmt))
4738 other_bb = single_succ (first_bb);
4739 else
4740 return cfg_cleanup_needed;
4742 if (stmt_could_throw_p (cfun, stmt))
4743 return cfg_cleanup_needed;
4745 /* As relative ordering of post-dominator sons isn't fixed,
4746 maybe_optimize_range_tests can be called first on any
4747 bb in the range we want to optimize. So, start searching
4748 backwards, if first_bb can be set to a predecessor. */
4749 while (single_pred_p (first_bb))
4751 basic_block pred_bb = single_pred (first_bb);
4752 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4753 break;
4754 if (!no_side_effect_bb (first_bb))
4755 break;
4756 first_bb = pred_bb;
4758 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4759 Before starting forward search in last_bb successors, find
4760 out the other_bb. */
4761 if (first_bb == last_bb)
4763 other_bb = NULL;
4764 /* As non-GIMPLE_COND last stmt always terminates the range,
4765 if forward search didn't discover anything, just give up. */
4766 if (gimple_code (stmt) != GIMPLE_COND)
4767 return cfg_cleanup_needed;
4768 /* Look at both successors. Either it ends with a GIMPLE_COND
4769 and satisfies suitable_cond_bb, or ends with a cast and
4770 other_bb is that cast's successor. */
4771 FOR_EACH_EDGE (e, ei, first_bb->succs)
4772 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4773 || e->dest == first_bb)
4774 return cfg_cleanup_needed;
4775 else if (single_pred_p (e->dest))
4777 stmt = last_stmt (e->dest);
4778 if (stmt
4779 && gimple_code (stmt) == GIMPLE_COND
4780 && EDGE_COUNT (e->dest->succs) == 2)
4782 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4783 NULL, true))
4784 break;
4785 else
4786 other_bb = NULL;
4788 else if (stmt
4789 && final_range_test_p (stmt)
4790 && find_edge (first_bb, single_succ (e->dest)))
4792 other_bb = single_succ (e->dest);
4793 if (other_bb == first_bb)
4794 other_bb = NULL;
4797 if (other_bb == NULL)
4798 return cfg_cleanup_needed;
4800 /* Now do the forward search, moving last_bb to successor bbs
4801 that aren't other_bb. */
4802 while (EDGE_COUNT (last_bb->succs) == 2)
4804 FOR_EACH_EDGE (e, ei, last_bb->succs)
4805 if (e->dest != other_bb)
4806 break;
4807 if (e == NULL)
4808 break;
4809 if (!single_pred_p (e->dest))
4810 break;
4811 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4812 break;
4813 if (!no_side_effect_bb (e->dest))
4814 break;
4815 last_bb = e->dest;
4817 if (first_bb == last_bb)
4818 return cfg_cleanup_needed;
4819 /* Here basic blocks first_bb through last_bb's predecessor
4820 end with GIMPLE_COND, all of them have one of the edges to
4821 other_bb and another to another block in the range,
4822 all blocks except first_bb don't have side-effects and
4823 last_bb ends with either GIMPLE_COND, or cast satisfying
4824 final_range_test_p. */
4825 for (bb = last_bb; ; bb = single_pred (bb))
4827 enum tree_code code;
4828 tree lhs, rhs;
4829 inter_bb_range_test_entry bb_ent;
4831 bb_ent.op = NULL_TREE;
4832 bb_ent.first_idx = ops.length ();
4833 bb_ent.last_idx = bb_ent.first_idx;
4834 e = find_edge (bb, other_bb);
4835 stmt = last_stmt (bb);
4836 gimple_set_visited (stmt, true);
4837 if (gimple_code (stmt) != GIMPLE_COND)
4839 use_operand_p use_p;
4840 gimple *phi;
4841 edge e2;
4842 unsigned int d;
4844 lhs = gimple_assign_lhs (stmt);
4845 rhs = gimple_assign_rhs1 (stmt);
4846 gcc_assert (bb == last_bb);
4848 /* stmt is
4849 _123 = (int) _234;
4851 _234 = a_2(D) == 2;
4853 followed by:
4854 <bb M>:
4855 # _345 = PHI <_123(N), 1(...), 1(...)>
4857 or 0 instead of 1. If it is 0, the _234
4858 range test is anded together with all the
4859 other range tests, if it is 1, it is ored with
4860 them. */
4861 single_imm_use (lhs, &use_p, &phi);
4862 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4863 e2 = find_edge (first_bb, other_bb);
4864 d = e2->dest_idx;
4865 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4866 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4867 code = BIT_AND_EXPR;
4868 else
4870 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4871 code = BIT_IOR_EXPR;
4874 /* If _234 SSA_NAME_DEF_STMT is
4875 _234 = _567 | _789;
4876 (or &, corresponding to 1/0 in the phi arguments,
4877 push into ops the individual range test arguments
4878 of the bitwise or resp. and, recursively. */
4879 if (TREE_CODE (rhs) == SSA_NAME
4880 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4881 != tcc_comparison)
4882 && !get_ops (rhs, code, &ops,
4883 loop_containing_stmt (stmt))
4884 && has_single_use (rhs))
4886 /* Otherwise, push the _234 range test itself. */
4887 operand_entry *oe = operand_entry_pool.allocate ();
4889 oe->op = rhs;
4890 oe->rank = code;
4891 oe->id = 0;
4892 oe->count = 1;
4893 oe->stmt_to_insert = NULL;
4894 ops.safe_push (oe);
4895 bb_ent.last_idx++;
4896 bb_ent.op = rhs;
4898 else if (is_gimple_assign (stmt)
4899 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4900 == tcc_comparison)
4901 && !get_ops (lhs, code, &ops,
4902 loop_containing_stmt (stmt))
4903 && has_single_use (lhs))
4905 operand_entry *oe = operand_entry_pool.allocate ();
4906 oe->op = lhs;
4907 oe->rank = code;
4908 oe->id = 0;
4909 oe->count = 1;
4910 ops.safe_push (oe);
4911 bb_ent.last_idx++;
4912 bb_ent.op = lhs;
4914 else
4916 bb_ent.last_idx = ops.length ();
4917 bb_ent.op = rhs;
4919 bbinfo.safe_push (bb_ent);
4920 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4921 ops[i]->id = bb->index;
4922 continue;
4924 else if (bb == last_bb)
4926 /* For last_bb, handle also:
4927 if (x_3(D) == 3)
4928 goto <bb 6>; [34.00%]
4929 else
4930 goto <bb 7>; [66.00%]
4932 <bb 6> [local count: 79512730]:
4934 <bb 7> [local count: 1073741824]:
4935 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4936 where bb 7 is OTHER_BB, but the PHI values from the
4937 earlier bbs match the path through the empty bb
4938 in between. */
4939 bool test_swapped_p = false;
4940 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4941 &other_bb, &test_swapped_p, true);
4942 gcc_assert (ok);
4943 if (test_swapped_p)
4944 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4946 /* Otherwise stmt is GIMPLE_COND. */
4947 code = gimple_cond_code (stmt);
4948 lhs = gimple_cond_lhs (stmt);
4949 rhs = gimple_cond_rhs (stmt);
4950 if (TREE_CODE (lhs) == SSA_NAME
4951 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4952 && ((code != EQ_EXPR && code != NE_EXPR)
4953 || rhs != boolean_false_node
4954 /* Either push into ops the individual bitwise
4955 or resp. and operands, depending on which
4956 edge is other_bb. */
4957 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4958 ^ (code == EQ_EXPR))
4959 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4960 loop_containing_stmt (stmt))))
4962 /* Or push the GIMPLE_COND stmt itself. */
4963 operand_entry *oe = operand_entry_pool.allocate ();
4965 oe->op = NULL;
4966 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4967 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4968 /* oe->op = NULL signs that there is no SSA_NAME
4969 for the range test, and oe->id instead is the
4970 basic block number, at which's end the GIMPLE_COND
4971 is. */
4972 oe->id = bb->index;
4973 oe->count = 1;
4974 oe->stmt_to_insert = NULL;
4975 ops.safe_push (oe);
4976 bb_ent.op = NULL;
4977 bb_ent.last_idx++;
4979 else if (ops.length () > bb_ent.first_idx)
4981 bb_ent.op = lhs;
4982 bb_ent.last_idx = ops.length ();
4984 bbinfo.safe_push (bb_ent);
4985 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4986 ops[i]->id = bb->index;
4987 if (bb == first_bb)
4988 break;
4990 if (ops.length () > 1)
4991 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4992 if (any_changes)
4994 unsigned int idx, max_idx = 0;
4995 /* update_ops relies on has_single_use predicates returning the
4996 same values as it did during get_ops earlier. Additionally it
4997 never removes statements, only adds new ones and it should walk
4998 from the single imm use and check the predicate already before
4999 making those changes.
5000 On the other side, the handling of GIMPLE_COND directly can turn
5001 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5002 it needs to be done in a separate loop afterwards. */
5003 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5005 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5006 && bbinfo[idx].op != NULL_TREE)
5008 tree new_op;
5010 max_idx = idx;
5011 stmt = last_stmt (bb);
5012 new_op = update_ops (bbinfo[idx].op,
5013 (enum tree_code)
5014 ops[bbinfo[idx].first_idx]->rank,
5015 ops, &bbinfo[idx].first_idx,
5016 loop_containing_stmt (stmt));
5017 if (new_op == NULL_TREE)
5019 gcc_assert (bb == last_bb);
5020 new_op = ops[bbinfo[idx].first_idx++]->op;
5022 if (bbinfo[idx].op != new_op)
5024 imm_use_iterator iter;
5025 use_operand_p use_p;
5026 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5028 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5029 if (is_gimple_debug (use_stmt))
5030 continue;
5031 else if (gimple_code (use_stmt) == GIMPLE_COND
5032 || gimple_code (use_stmt) == GIMPLE_PHI)
5033 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5034 SET_USE (use_p, new_op);
5035 else if ((is_gimple_assign (use_stmt)
5036 && (TREE_CODE_CLASS
5037 (gimple_assign_rhs_code (use_stmt))
5038 == tcc_comparison)))
5039 cast_or_tcc_cmp_stmt = use_stmt;
5040 else if (gimple_assign_cast_p (use_stmt))
5041 cast_or_tcc_cmp_stmt = use_stmt;
5042 else
5043 gcc_unreachable ();
5045 if (cast_or_tcc_cmp_stmt)
5047 gcc_assert (bb == last_bb);
5048 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5049 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5050 enum tree_code rhs_code
5051 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5052 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5053 : CONVERT_EXPR;
5054 gassign *g;
5055 if (is_gimple_min_invariant (new_op))
5057 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5058 g = gimple_build_assign (new_lhs, new_op);
5060 else
5061 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5062 gimple_stmt_iterator gsi
5063 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5064 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5065 gimple_set_visited (g, true);
5066 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5067 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5068 if (is_gimple_debug (use_stmt))
5069 continue;
5070 else if (gimple_code (use_stmt) == GIMPLE_COND
5071 || gimple_code (use_stmt) == GIMPLE_PHI)
5072 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5073 SET_USE (use_p, new_lhs);
5074 else
5075 gcc_unreachable ();
5079 if (bb == first_bb)
5080 break;
5082 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5084 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5085 && bbinfo[idx].op == NULL_TREE
5086 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5088 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
5090 if (idx > max_idx)
5091 max_idx = idx;
5093 /* If we collapse the conditional to a true/false
5094 condition, then bubble that knowledge up to our caller. */
5095 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5097 gimple_cond_make_false (cond_stmt);
5098 cfg_cleanup_needed = true;
5100 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5102 gimple_cond_make_true (cond_stmt);
5103 cfg_cleanup_needed = true;
5105 else
5107 gimple_cond_set_code (cond_stmt, NE_EXPR);
5108 gimple_cond_set_lhs (cond_stmt,
5109 ops[bbinfo[idx].first_idx]->op);
5110 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5112 update_stmt (cond_stmt);
5114 if (bb == first_bb)
5115 break;
5118 /* The above changes could result in basic blocks after the first
5119 modified one, up to and including last_bb, to be executed even if
5120 they would not be in the original program. If the value ranges of
5121 assignment lhs' in those bbs were dependent on the conditions
5122 guarding those basic blocks which now can change, the VRs might
5123 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5124 are only used within the same bb, it should be not a big deal if
5125 we just reset all the VRs in those bbs. See PR68671. */
5126 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5127 reset_flow_sensitive_info_in_bb (bb);
5129 return cfg_cleanup_needed;
5132 /* Remove def stmt of VAR if VAR has zero uses and recurse
5133 on rhs1 operand if so. */
5135 static void
5136 remove_visited_stmt_chain (tree var)
5138 gimple *stmt;
5139 gimple_stmt_iterator gsi;
5141 while (1)
5143 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5144 return;
5145 stmt = SSA_NAME_DEF_STMT (var);
5146 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5148 var = gimple_assign_rhs1 (stmt);
5149 gsi = gsi_for_stmt (stmt);
5150 reassoc_remove_stmt (&gsi);
5151 release_defs (stmt);
5153 else
5154 return;
5158 /* This function checks three consequtive operands in
5159 passed operands vector OPS starting from OPINDEX and
5160 swaps two operands if it is profitable for binary operation
5161 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5163 We pair ops with the same rank if possible. */
5165 static void
5166 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5167 unsigned int opindex)
5169 operand_entry *oe1, *oe2, *oe3;
5171 oe1 = ops[opindex];
5172 oe2 = ops[opindex + 1];
5173 oe3 = ops[opindex + 2];
5175 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5176 std::swap (*oe1, *oe3);
5177 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5178 std::swap (*oe1, *oe2);
5181 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5182 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5183 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5184 after the returned stmt. */
5186 static inline gimple *
5187 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5189 insert_before = true;
5190 if (TREE_CODE (rhs1) == SSA_NAME
5191 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5193 stmt = SSA_NAME_DEF_STMT (rhs1);
5194 insert_before = false;
5196 if (TREE_CODE (rhs2) == SSA_NAME
5197 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5199 stmt = SSA_NAME_DEF_STMT (rhs2);
5200 insert_before = false;
5202 return stmt;
5205 /* If the stmt that defines operand has to be inserted, insert it
5206 before the use. */
5207 static void
5208 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5210 gcc_assert (is_gimple_assign (stmt_to_insert));
5211 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5212 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5213 bool insert_before;
5214 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5215 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5216 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5218 /* If the insert point is not stmt, then insert_point would be
5219 the point where operand rhs1 or rhs2 is defined. In this case,
5220 stmt_to_insert has to be inserted afterwards. This would
5221 only happen when the stmt insertion point is flexible. */
5222 if (insert_before)
5223 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5224 else
5225 insert_stmt_after (stmt_to_insert, insert_point);
5229 /* Recursively rewrite our linearized statements so that the operators
5230 match those in OPS[OPINDEX], putting the computation in rank
5231 order. Return new lhs.
5232 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5233 the current stmt and during recursive invocations.
5234 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5235 recursive invocations. */
5237 static tree
5238 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5239 const vec<operand_entry *> &ops, bool changed,
5240 bool next_changed)
5242 tree rhs1 = gimple_assign_rhs1 (stmt);
5243 tree rhs2 = gimple_assign_rhs2 (stmt);
5244 tree lhs = gimple_assign_lhs (stmt);
5245 operand_entry *oe;
5247 /* The final recursion case for this function is that you have
5248 exactly two operations left.
5249 If we had exactly one op in the entire list to start with, we
5250 would have never called this function, and the tail recursion
5251 rewrites them one at a time. */
5252 if (opindex + 2 == ops.length ())
5254 operand_entry *oe1, *oe2;
5256 oe1 = ops[opindex];
5257 oe2 = ops[opindex + 1];
5259 if (rhs1 != oe1->op || rhs2 != oe2->op)
5261 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5262 unsigned int uid = gimple_uid (stmt);
5264 if (dump_file && (dump_flags & TDF_DETAILS))
5266 fprintf (dump_file, "Transforming ");
5267 print_gimple_stmt (dump_file, stmt, 0);
5270 /* If the stmt that defines operand has to be inserted, insert it
5271 before the use. */
5272 if (oe1->stmt_to_insert)
5273 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5274 if (oe2->stmt_to_insert)
5275 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5276 /* Even when changed is false, reassociation could have e.g. removed
5277 some redundant operations, so unless we are just swapping the
5278 arguments or unless there is no change at all (then we just
5279 return lhs), force creation of a new SSA_NAME. */
5280 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5282 bool insert_before;
5283 gimple *insert_point
5284 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5285 lhs = make_ssa_name (TREE_TYPE (lhs));
5286 stmt
5287 = gimple_build_assign (lhs, rhs_code,
5288 oe1->op, oe2->op);
5289 gimple_set_uid (stmt, uid);
5290 gimple_set_visited (stmt, true);
5291 if (insert_before)
5292 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5293 else
5294 insert_stmt_after (stmt, insert_point);
5296 else
5298 bool insert_before;
5299 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5300 insert_before)
5301 == stmt);
5302 gimple_assign_set_rhs1 (stmt, oe1->op);
5303 gimple_assign_set_rhs2 (stmt, oe2->op);
5304 update_stmt (stmt);
5307 if (rhs1 != oe1->op && rhs1 != oe2->op)
5308 remove_visited_stmt_chain (rhs1);
5310 if (dump_file && (dump_flags & TDF_DETAILS))
5312 fprintf (dump_file, " into ");
5313 print_gimple_stmt (dump_file, stmt, 0);
5316 return lhs;
5319 /* If we hit here, we should have 3 or more ops left. */
5320 gcc_assert (opindex + 2 < ops.length ());
5322 /* Rewrite the next operator. */
5323 oe = ops[opindex];
5325 /* If the stmt that defines operand has to be inserted, insert it
5326 before the use. */
5327 if (oe->stmt_to_insert)
5328 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5330 /* Recurse on the LHS of the binary operator, which is guaranteed to
5331 be the non-leaf side. */
5332 tree new_rhs1
5333 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5334 changed || oe->op != rhs2 || next_changed,
5335 false);
5337 if (oe->op != rhs2 || new_rhs1 != rhs1)
5339 if (dump_file && (dump_flags & TDF_DETAILS))
5341 fprintf (dump_file, "Transforming ");
5342 print_gimple_stmt (dump_file, stmt, 0);
5345 /* If changed is false, this is either opindex == 0
5346 or all outer rhs2's were equal to corresponding oe->op,
5347 and powi_result is NULL.
5348 That means lhs is equivalent before and after reassociation.
5349 Otherwise ensure the old lhs SSA_NAME is not reused and
5350 create a new stmt as well, so that any debug stmts will be
5351 properly adjusted. */
5352 if (changed)
5354 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5355 unsigned int uid = gimple_uid (stmt);
5356 bool insert_before;
5357 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5358 insert_before);
5360 lhs = make_ssa_name (TREE_TYPE (lhs));
5361 stmt = gimple_build_assign (lhs, rhs_code,
5362 new_rhs1, oe->op);
5363 gimple_set_uid (stmt, uid);
5364 gimple_set_visited (stmt, true);
5365 if (insert_before)
5366 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5367 else
5368 insert_stmt_after (stmt, insert_point);
5370 else
5372 bool insert_before;
5373 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5374 insert_before)
5375 == stmt);
5376 gimple_assign_set_rhs1 (stmt, new_rhs1);
5377 gimple_assign_set_rhs2 (stmt, oe->op);
5378 update_stmt (stmt);
5381 if (dump_file && (dump_flags & TDF_DETAILS))
5383 fprintf (dump_file, " into ");
5384 print_gimple_stmt (dump_file, stmt, 0);
5387 return lhs;
5390 /* Find out how many cycles we need to compute statements chain.
5391 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5392 maximum number of independent statements we may execute per cycle. */
5394 static int
5395 get_required_cycles (int ops_num, int cpu_width)
5397 int res;
5398 int elog;
5399 unsigned int rest;
5401 /* While we have more than 2 * cpu_width operands
5402 we may reduce number of operands by cpu_width
5403 per cycle. */
5404 res = ops_num / (2 * cpu_width);
5406 /* Remained operands count may be reduced twice per cycle
5407 until we have only one operand. */
5408 rest = (unsigned)(ops_num - res * cpu_width);
5409 elog = exact_log2 (rest);
5410 if (elog >= 0)
5411 res += elog;
5412 else
5413 res += floor_log2 (rest) + 1;
5415 return res;
5418 /* Returns an optimal number of registers to use for computation of
5419 given statements. */
5421 static int
5422 get_reassociation_width (int ops_num, enum tree_code opc,
5423 machine_mode mode)
5425 int param_width = param_tree_reassoc_width;
5426 int width;
5427 int width_min;
5428 int cycles_best;
5430 if (param_width > 0)
5431 width = param_width;
5432 else
5433 width = targetm.sched.reassociation_width (opc, mode);
5435 if (width == 1)
5436 return width;
5438 /* Get the minimal time required for sequence computation. */
5439 cycles_best = get_required_cycles (ops_num, width);
5441 /* Check if we may use less width and still compute sequence for
5442 the same time. It will allow us to reduce registers usage.
5443 get_required_cycles is monotonically increasing with lower width
5444 so we can perform a binary search for the minimal width that still
5445 results in the optimal cycle count. */
5446 width_min = 1;
5447 while (width > width_min)
5449 int width_mid = (width + width_min) / 2;
5451 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5452 width = width_mid;
5453 else if (width_min < width_mid)
5454 width_min = width_mid;
5455 else
5456 break;
5459 return width;
5462 /* Recursively rewrite our linearized statements so that the operators
5463 match those in OPS[OPINDEX], putting the computation in rank
5464 order and trying to allow operations to be executed in
5465 parallel. */
5467 static void
5468 rewrite_expr_tree_parallel (gassign *stmt, int width,
5469 const vec<operand_entry *> &ops)
5471 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5472 int op_num = ops.length ();
5473 gcc_assert (op_num > 0);
5474 int stmt_num = op_num - 1;
5475 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5476 int op_index = op_num - 1;
5477 int stmt_index = 0;
5478 int ready_stmts_end = 0;
5479 int i = 0;
5480 gimple *stmt1 = NULL, *stmt2 = NULL;
5481 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5483 /* We start expression rewriting from the top statements.
5484 So, in this loop we create a full list of statements
5485 we will work with. */
5486 stmts[stmt_num - 1] = stmt;
5487 for (i = stmt_num - 2; i >= 0; i--)
5488 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5490 for (i = 0; i < stmt_num; i++)
5492 tree op1, op2;
5494 /* Determine whether we should use results of
5495 already handled statements or not. */
5496 if (ready_stmts_end == 0
5497 && (i - stmt_index >= width || op_index < 1))
5498 ready_stmts_end = i;
5500 /* Now we choose operands for the next statement. Non zero
5501 value in ready_stmts_end means here that we should use
5502 the result of already generated statements as new operand. */
5503 if (ready_stmts_end > 0)
5505 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5506 if (ready_stmts_end > stmt_index)
5507 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5508 else if (op_index >= 0)
5510 operand_entry *oe = ops[op_index--];
5511 stmt2 = oe->stmt_to_insert;
5512 op2 = oe->op;
5514 else
5516 gcc_assert (stmt_index < i);
5517 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5520 if (stmt_index >= ready_stmts_end)
5521 ready_stmts_end = 0;
5523 else
5525 if (op_index > 1)
5526 swap_ops_for_binary_stmt (ops, op_index - 2);
5527 operand_entry *oe2 = ops[op_index--];
5528 operand_entry *oe1 = ops[op_index--];
5529 op2 = oe2->op;
5530 stmt2 = oe2->stmt_to_insert;
5531 op1 = oe1->op;
5532 stmt1 = oe1->stmt_to_insert;
5535 /* If we emit the last statement then we should put
5536 operands into the last statement. It will also
5537 break the loop. */
5538 if (op_index < 0 && stmt_index == i)
5539 i = stmt_num - 1;
5541 if (dump_file && (dump_flags & TDF_DETAILS))
5543 fprintf (dump_file, "Transforming ");
5544 print_gimple_stmt (dump_file, stmts[i], 0);
5547 /* If the stmt that defines operand has to be inserted, insert it
5548 before the use. */
5549 if (stmt1)
5550 insert_stmt_before_use (stmts[i], stmt1);
5551 if (stmt2)
5552 insert_stmt_before_use (stmts[i], stmt2);
5553 stmt1 = stmt2 = NULL;
5555 /* We keep original statement only for the last one. All
5556 others are recreated. */
5557 if (i == stmt_num - 1)
5559 gimple_assign_set_rhs1 (stmts[i], op1);
5560 gimple_assign_set_rhs2 (stmts[i], op2);
5561 update_stmt (stmts[i]);
5563 else
5565 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5566 gimple_set_visited (stmts[i], true);
5568 if (dump_file && (dump_flags & TDF_DETAILS))
5570 fprintf (dump_file, " into ");
5571 print_gimple_stmt (dump_file, stmts[i], 0);
5575 remove_visited_stmt_chain (last_rhs1);
5578 /* Transform STMT, which is really (A +B) + (C + D) into the left
5579 linear form, ((A+B)+C)+D.
5580 Recurse on D if necessary. */
5582 static void
5583 linearize_expr (gimple *stmt)
5585 gimple_stmt_iterator gsi;
5586 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5587 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5588 gimple *oldbinrhs = binrhs;
5589 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5590 gimple *newbinrhs = NULL;
5591 class loop *loop = loop_containing_stmt (stmt);
5592 tree lhs = gimple_assign_lhs (stmt);
5594 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5595 && is_reassociable_op (binrhs, rhscode, loop));
5597 gsi = gsi_for_stmt (stmt);
5599 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5600 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5601 gimple_assign_rhs_code (binrhs),
5602 gimple_assign_lhs (binlhs),
5603 gimple_assign_rhs2 (binrhs));
5604 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5605 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5606 gimple_set_uid (binrhs, gimple_uid (stmt));
5608 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5609 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5611 if (dump_file && (dump_flags & TDF_DETAILS))
5613 fprintf (dump_file, "Linearized: ");
5614 print_gimple_stmt (dump_file, stmt, 0);
5617 reassociate_stats.linearized++;
5618 update_stmt (stmt);
5620 gsi = gsi_for_stmt (oldbinrhs);
5621 reassoc_remove_stmt (&gsi);
5622 release_defs (oldbinrhs);
5624 gimple_set_visited (stmt, true);
5625 gimple_set_visited (binlhs, true);
5626 gimple_set_visited (binrhs, true);
5628 /* Tail recurse on the new rhs if it still needs reassociation. */
5629 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5630 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5631 want to change the algorithm while converting to tuples. */
5632 linearize_expr (stmt);
5635 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5636 it. Otherwise, return NULL. */
5638 static gimple *
5639 get_single_immediate_use (tree lhs)
5641 use_operand_p immuse;
5642 gimple *immusestmt;
5644 if (TREE_CODE (lhs) == SSA_NAME
5645 && single_imm_use (lhs, &immuse, &immusestmt)
5646 && is_gimple_assign (immusestmt))
5647 return immusestmt;
5649 return NULL;
5652 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5653 representing the negated value. Insertions of any necessary
5654 instructions go before GSI.
5655 This function is recursive in that, if you hand it "a_5" as the
5656 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5657 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5659 static tree
5660 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5662 gimple *negatedefstmt = NULL;
5663 tree resultofnegate;
5664 gimple_stmt_iterator gsi;
5665 unsigned int uid;
5667 /* If we are trying to negate a name, defined by an add, negate the
5668 add operands instead. */
5669 if (TREE_CODE (tonegate) == SSA_NAME)
5670 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5671 if (TREE_CODE (tonegate) == SSA_NAME
5672 && is_gimple_assign (negatedefstmt)
5673 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5674 && has_single_use (gimple_assign_lhs (negatedefstmt))
5675 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5677 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5678 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5679 tree lhs = gimple_assign_lhs (negatedefstmt);
5680 gimple *g;
5682 gsi = gsi_for_stmt (negatedefstmt);
5683 rhs1 = negate_value (rhs1, &gsi);
5685 gsi = gsi_for_stmt (negatedefstmt);
5686 rhs2 = negate_value (rhs2, &gsi);
5688 gsi = gsi_for_stmt (negatedefstmt);
5689 lhs = make_ssa_name (TREE_TYPE (lhs));
5690 gimple_set_visited (negatedefstmt, true);
5691 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5692 gimple_set_uid (g, gimple_uid (negatedefstmt));
5693 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5694 return lhs;
5697 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5698 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5699 NULL_TREE, true, GSI_SAME_STMT);
5700 gsi = *gsip;
5701 uid = gimple_uid (gsi_stmt (gsi));
5702 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5704 gimple *stmt = gsi_stmt (gsi);
5705 if (gimple_uid (stmt) != 0)
5706 break;
5707 gimple_set_uid (stmt, uid);
5709 return resultofnegate;
5712 /* Return true if we should break up the subtract in STMT into an add
5713 with negate. This is true when we the subtract operands are really
5714 adds, or the subtract itself is used in an add expression. In
5715 either case, breaking up the subtract into an add with negate
5716 exposes the adds to reassociation. */
5718 static bool
5719 should_break_up_subtract (gimple *stmt)
5721 tree lhs = gimple_assign_lhs (stmt);
5722 tree binlhs = gimple_assign_rhs1 (stmt);
5723 tree binrhs = gimple_assign_rhs2 (stmt);
5724 gimple *immusestmt;
5725 class loop *loop = loop_containing_stmt (stmt);
5727 if (TREE_CODE (binlhs) == SSA_NAME
5728 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5729 return true;
5731 if (TREE_CODE (binrhs) == SSA_NAME
5732 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5733 return true;
5735 if (TREE_CODE (lhs) == SSA_NAME
5736 && (immusestmt = get_single_immediate_use (lhs))
5737 && is_gimple_assign (immusestmt)
5738 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5739 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5740 && gimple_assign_rhs1 (immusestmt) == lhs)
5741 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5742 return true;
5743 return false;
5746 /* Transform STMT from A - B into A + -B. */
5748 static void
5749 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5751 tree rhs1 = gimple_assign_rhs1 (stmt);
5752 tree rhs2 = gimple_assign_rhs2 (stmt);
5754 if (dump_file && (dump_flags & TDF_DETAILS))
5756 fprintf (dump_file, "Breaking up subtract ");
5757 print_gimple_stmt (dump_file, stmt, 0);
5760 rhs2 = negate_value (rhs2, gsip);
5761 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5762 update_stmt (stmt);
5765 /* Determine whether STMT is a builtin call that raises an SSA name
5766 to an integer power and has only one use. If so, and this is early
5767 reassociation and unsafe math optimizations are permitted, place
5768 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5769 If any of these conditions does not hold, return FALSE. */
5771 static bool
5772 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5774 tree arg1;
5775 REAL_VALUE_TYPE c, cint;
5777 switch (gimple_call_combined_fn (stmt))
5779 CASE_CFN_POW:
5780 if (flag_errno_math)
5781 return false;
5783 *base = gimple_call_arg (stmt, 0);
5784 arg1 = gimple_call_arg (stmt, 1);
5786 if (TREE_CODE (arg1) != REAL_CST)
5787 return false;
5789 c = TREE_REAL_CST (arg1);
5791 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5792 return false;
5794 *exponent = real_to_integer (&c);
5795 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5796 if (!real_identical (&c, &cint))
5797 return false;
5799 break;
5801 CASE_CFN_POWI:
5802 *base = gimple_call_arg (stmt, 0);
5803 arg1 = gimple_call_arg (stmt, 1);
5805 if (!tree_fits_shwi_p (arg1))
5806 return false;
5808 *exponent = tree_to_shwi (arg1);
5809 break;
5811 default:
5812 return false;
5815 /* Expanding negative exponents is generally unproductive, so we don't
5816 complicate matters with those. Exponents of zero and one should
5817 have been handled by expression folding. */
5818 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5819 return false;
5821 return true;
5824 /* Try to derive and add operand entry for OP to *OPS. Return false if
5825 unsuccessful. */
5827 static bool
5828 try_special_add_to_ops (vec<operand_entry *> *ops,
5829 enum tree_code code,
5830 tree op, gimple* def_stmt)
5832 tree base = NULL_TREE;
5833 HOST_WIDE_INT exponent = 0;
5835 if (TREE_CODE (op) != SSA_NAME
5836 || ! has_single_use (op))
5837 return false;
5839 if (code == MULT_EXPR
5840 && reassoc_insert_powi_p
5841 && flag_unsafe_math_optimizations
5842 && is_gimple_call (def_stmt)
5843 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5845 add_repeat_to_ops_vec (ops, base, exponent);
5846 gimple_set_visited (def_stmt, true);
5847 return true;
5849 else if (code == MULT_EXPR
5850 && is_gimple_assign (def_stmt)
5851 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5852 && !HONOR_SNANS (TREE_TYPE (op))
5853 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5854 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5855 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5856 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5858 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5859 tree cst = build_minus_one_cst (TREE_TYPE (op));
5860 add_to_ops_vec (ops, rhs1);
5861 add_to_ops_vec (ops, cst);
5862 gimple_set_visited (def_stmt, true);
5863 return true;
5866 return false;
5869 /* Recursively linearize a binary expression that is the RHS of STMT.
5870 Place the operands of the expression tree in the vector named OPS. */
5872 static void
5873 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5874 bool is_associative, bool set_visited)
5876 tree binlhs = gimple_assign_rhs1 (stmt);
5877 tree binrhs = gimple_assign_rhs2 (stmt);
5878 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5879 bool binlhsisreassoc = false;
5880 bool binrhsisreassoc = false;
5881 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5882 class loop *loop = loop_containing_stmt (stmt);
5884 if (set_visited)
5885 gimple_set_visited (stmt, true);
5887 if (TREE_CODE (binlhs) == SSA_NAME)
5889 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5890 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5891 && !stmt_could_throw_p (cfun, binlhsdef));
5894 if (TREE_CODE (binrhs) == SSA_NAME)
5896 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5897 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5898 && !stmt_could_throw_p (cfun, binrhsdef));
5901 /* If the LHS is not reassociable, but the RHS is, we need to swap
5902 them. If neither is reassociable, there is nothing we can do, so
5903 just put them in the ops vector. If the LHS is reassociable,
5904 linearize it. If both are reassociable, then linearize the RHS
5905 and the LHS. */
5907 if (!binlhsisreassoc)
5909 /* If this is not a associative operation like division, give up. */
5910 if (!is_associative)
5912 add_to_ops_vec (ops, binrhs);
5913 return;
5916 if (!binrhsisreassoc)
5918 bool swap = false;
5919 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5920 /* If we add ops for the rhs we expect to be able to recurse
5921 to it via the lhs during expression rewrite so swap
5922 operands. */
5923 swap = true;
5924 else
5925 add_to_ops_vec (ops, binrhs);
5927 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5928 add_to_ops_vec (ops, binlhs);
5930 if (!swap)
5931 return;
5934 if (dump_file && (dump_flags & TDF_DETAILS))
5936 fprintf (dump_file, "swapping operands of ");
5937 print_gimple_stmt (dump_file, stmt, 0);
5940 swap_ssa_operands (stmt,
5941 gimple_assign_rhs1_ptr (stmt),
5942 gimple_assign_rhs2_ptr (stmt));
5943 update_stmt (stmt);
5945 if (dump_file && (dump_flags & TDF_DETAILS))
5947 fprintf (dump_file, " is now ");
5948 print_gimple_stmt (dump_file, stmt, 0);
5950 if (!binrhsisreassoc)
5951 return;
5953 /* We want to make it so the lhs is always the reassociative op,
5954 so swap. */
5955 std::swap (binlhs, binrhs);
5957 else if (binrhsisreassoc)
5959 linearize_expr (stmt);
5960 binlhs = gimple_assign_rhs1 (stmt);
5961 binrhs = gimple_assign_rhs2 (stmt);
5964 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5965 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5966 rhscode, loop));
5967 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5968 is_associative, set_visited);
5970 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5971 add_to_ops_vec (ops, binrhs);
5974 /* Repropagate the negates back into subtracts, since no other pass
5975 currently does it. */
5977 static void
5978 repropagate_negates (void)
5980 unsigned int i = 0;
5981 tree negate;
5983 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5985 gimple *user = get_single_immediate_use (negate);
5986 if (!user || !is_gimple_assign (user))
5987 continue;
5989 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5990 if (TREE_CODE (negateop) == SSA_NAME
5991 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
5992 continue;
5994 /* The negate operand can be either operand of a PLUS_EXPR
5995 (it can be the LHS if the RHS is a constant for example).
5997 Force the negate operand to the RHS of the PLUS_EXPR, then
5998 transform the PLUS_EXPR into a MINUS_EXPR. */
5999 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6001 /* If the negated operand appears on the LHS of the
6002 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6003 to force the negated operand to the RHS of the PLUS_EXPR. */
6004 if (gimple_assign_rhs1 (user) == negate)
6006 swap_ssa_operands (user,
6007 gimple_assign_rhs1_ptr (user),
6008 gimple_assign_rhs2_ptr (user));
6011 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6012 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6013 if (gimple_assign_rhs2 (user) == negate)
6015 tree rhs1 = gimple_assign_rhs1 (user);
6016 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6017 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6018 negateop);
6019 update_stmt (user);
6022 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6024 if (gimple_assign_rhs1 (user) == negate)
6026 /* We have
6027 x = -negateop
6028 y = x - b
6029 which we transform into
6030 x = negateop + b
6031 y = -x .
6032 This pushes down the negate which we possibly can merge
6033 into some other operation, hence insert it into the
6034 plus_negates vector. */
6035 gimple *feed = SSA_NAME_DEF_STMT (negate);
6036 tree b = gimple_assign_rhs2 (user);
6037 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6038 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6039 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6040 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6041 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6042 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6043 user = gsi_stmt (gsi2);
6044 update_stmt (user);
6045 reassoc_remove_stmt (&gsi);
6046 release_defs (feed);
6047 plus_negates.safe_push (gimple_assign_lhs (user));
6049 else
6051 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6052 getting rid of one operation. */
6053 tree rhs1 = gimple_assign_rhs1 (user);
6054 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6055 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6056 update_stmt (gsi_stmt (gsi));
6062 /* Break up subtract operations in block BB.
6064 We do this top down because we don't know whether the subtract is
6065 part of a possible chain of reassociation except at the top.
6067 IE given
6068 d = f + g
6069 c = a + e
6070 b = c - d
6071 q = b - r
6072 k = t - q
6074 we want to break up k = t - q, but we won't until we've transformed q
6075 = b - r, which won't be broken up until we transform b = c - d.
6077 En passant, clear the GIMPLE visited flag on every statement
6078 and set UIDs within each basic block. */
6080 static void
6081 break_up_subtract_bb (basic_block bb)
6083 gimple_stmt_iterator gsi;
6084 basic_block son;
6085 unsigned int uid = 1;
6087 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6089 gimple *stmt = gsi_stmt (gsi);
6090 gimple_set_visited (stmt, false);
6091 gimple_set_uid (stmt, uid++);
6093 if (!is_gimple_assign (stmt)
6094 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6095 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6096 continue;
6098 /* Look for simple gimple subtract operations. */
6099 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6101 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6102 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6103 continue;
6105 /* Check for a subtract used only in an addition. If this
6106 is the case, transform it into add of a negate for better
6107 reassociation. IE transform C = A-B into C = A + -B if C
6108 is only used in an addition. */
6109 if (should_break_up_subtract (stmt))
6110 break_up_subtract (stmt, &gsi);
6112 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6113 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6114 plus_negates.safe_push (gimple_assign_lhs (stmt));
6116 for (son = first_dom_son (CDI_DOMINATORS, bb);
6117 son;
6118 son = next_dom_son (CDI_DOMINATORS, son))
6119 break_up_subtract_bb (son);
6122 /* Used for repeated factor analysis. */
6123 struct repeat_factor
6125 /* An SSA name that occurs in a multiply chain. */
6126 tree factor;
6128 /* Cached rank of the factor. */
6129 unsigned rank;
6131 /* Number of occurrences of the factor in the chain. */
6132 HOST_WIDE_INT count;
6134 /* An SSA name representing the product of this factor and
6135 all factors appearing later in the repeated factor vector. */
6136 tree repr;
6140 static vec<repeat_factor> repeat_factor_vec;
6142 /* Used for sorting the repeat factor vector. Sort primarily by
6143 ascending occurrence count, secondarily by descending rank. */
6145 static int
6146 compare_repeat_factors (const void *x1, const void *x2)
6148 const repeat_factor *rf1 = (const repeat_factor *) x1;
6149 const repeat_factor *rf2 = (const repeat_factor *) x2;
6151 if (rf1->count != rf2->count)
6152 return rf1->count - rf2->count;
6154 return rf2->rank - rf1->rank;
6157 /* Look for repeated operands in OPS in the multiply tree rooted at
6158 STMT. Replace them with an optimal sequence of multiplies and powi
6159 builtin calls, and remove the used operands from OPS. Return an
6160 SSA name representing the value of the replacement sequence. */
6162 static tree
6163 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6165 unsigned i, j, vec_len;
6166 int ii;
6167 operand_entry *oe;
6168 repeat_factor *rf1, *rf2;
6169 repeat_factor rfnew;
6170 tree result = NULL_TREE;
6171 tree target_ssa, iter_result;
6172 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6173 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6174 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6175 gimple *mul_stmt, *pow_stmt;
6177 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6178 target, unless type is integral. */
6179 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6180 return NULL_TREE;
6182 /* Allocate the repeated factor vector. */
6183 repeat_factor_vec.create (10);
6185 /* Scan the OPS vector for all SSA names in the product and build
6186 up a vector of occurrence counts for each factor. */
6187 FOR_EACH_VEC_ELT (*ops, i, oe)
6189 if (TREE_CODE (oe->op) == SSA_NAME)
6191 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6193 if (rf1->factor == oe->op)
6195 rf1->count += oe->count;
6196 break;
6200 if (j >= repeat_factor_vec.length ())
6202 rfnew.factor = oe->op;
6203 rfnew.rank = oe->rank;
6204 rfnew.count = oe->count;
6205 rfnew.repr = NULL_TREE;
6206 repeat_factor_vec.safe_push (rfnew);
6211 /* Sort the repeated factor vector by (a) increasing occurrence count,
6212 and (b) decreasing rank. */
6213 repeat_factor_vec.qsort (compare_repeat_factors);
6215 /* It is generally best to combine as many base factors as possible
6216 into a product before applying __builtin_powi to the result.
6217 However, the sort order chosen for the repeated factor vector
6218 allows us to cache partial results for the product of the base
6219 factors for subsequent use. When we already have a cached partial
6220 result from a previous iteration, it is best to make use of it
6221 before looking for another __builtin_pow opportunity.
6223 As an example, consider x * x * y * y * y * z * z * z * z.
6224 We want to first compose the product x * y * z, raise it to the
6225 second power, then multiply this by y * z, and finally multiply
6226 by z. This can be done in 5 multiplies provided we cache y * z
6227 for use in both expressions:
6229 t1 = y * z
6230 t2 = t1 * x
6231 t3 = t2 * t2
6232 t4 = t1 * t3
6233 result = t4 * z
6235 If we instead ignored the cached y * z and first multiplied by
6236 the __builtin_pow opportunity z * z, we would get the inferior:
6238 t1 = y * z
6239 t2 = t1 * x
6240 t3 = t2 * t2
6241 t4 = z * z
6242 t5 = t3 * t4
6243 result = t5 * y */
6245 vec_len = repeat_factor_vec.length ();
6247 /* Repeatedly look for opportunities to create a builtin_powi call. */
6248 while (true)
6250 HOST_WIDE_INT power;
6252 /* First look for the largest cached product of factors from
6253 preceding iterations. If found, create a builtin_powi for
6254 it if the minimum occurrence count for its factors is at
6255 least 2, or just use this cached product as our next
6256 multiplicand if the minimum occurrence count is 1. */
6257 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6259 if (rf1->repr && rf1->count > 0)
6260 break;
6263 if (j < vec_len)
6265 power = rf1->count;
6267 if (power == 1)
6269 iter_result = rf1->repr;
6271 if (dump_file && (dump_flags & TDF_DETAILS))
6273 unsigned elt;
6274 repeat_factor *rf;
6275 fputs ("Multiplying by cached product ", dump_file);
6276 for (elt = j; elt < vec_len; elt++)
6278 rf = &repeat_factor_vec[elt];
6279 print_generic_expr (dump_file, rf->factor);
6280 if (elt < vec_len - 1)
6281 fputs (" * ", dump_file);
6283 fputs ("\n", dump_file);
6286 else
6288 if (INTEGRAL_TYPE_P (type))
6290 gcc_assert (power > 1);
6291 gimple_stmt_iterator gsip = gsi;
6292 gsi_prev (&gsip);
6293 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6294 rf1->repr, power);
6295 gimple_stmt_iterator gsic = gsi;
6296 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6298 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6299 gimple_set_visited (gsi_stmt (gsic), true);
6300 gsi_prev (&gsic);
6303 else
6305 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6306 pow_stmt
6307 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6308 build_int_cst (integer_type_node,
6309 power));
6310 gimple_call_set_lhs (pow_stmt, iter_result);
6311 gimple_set_location (pow_stmt, gimple_location (stmt));
6312 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6313 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6316 if (dump_file && (dump_flags & TDF_DETAILS))
6318 unsigned elt;
6319 repeat_factor *rf;
6320 fputs ("Building __builtin_pow call for cached product (",
6321 dump_file);
6322 for (elt = j; elt < vec_len; elt++)
6324 rf = &repeat_factor_vec[elt];
6325 print_generic_expr (dump_file, rf->factor);
6326 if (elt < vec_len - 1)
6327 fputs (" * ", dump_file);
6329 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6330 power);
6334 else
6336 /* Otherwise, find the first factor in the repeated factor
6337 vector whose occurrence count is at least 2. If no such
6338 factor exists, there are no builtin_powi opportunities
6339 remaining. */
6340 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6342 if (rf1->count >= 2)
6343 break;
6346 if (j >= vec_len)
6347 break;
6349 power = rf1->count;
6351 if (dump_file && (dump_flags & TDF_DETAILS))
6353 unsigned elt;
6354 repeat_factor *rf;
6355 fputs ("Building __builtin_pow call for (", dump_file);
6356 for (elt = j; elt < vec_len; elt++)
6358 rf = &repeat_factor_vec[elt];
6359 print_generic_expr (dump_file, rf->factor);
6360 if (elt < vec_len - 1)
6361 fputs (" * ", dump_file);
6363 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6366 reassociate_stats.pows_created++;
6368 /* Visit each element of the vector in reverse order (so that
6369 high-occurrence elements are visited first, and within the
6370 same occurrence count, lower-ranked elements are visited
6371 first). Form a linear product of all elements in this order
6372 whose occurrencce count is at least that of element J.
6373 Record the SSA name representing the product of each element
6374 with all subsequent elements in the vector. */
6375 if (j == vec_len - 1)
6376 rf1->repr = rf1->factor;
6377 else
6379 for (ii = vec_len - 2; ii >= (int)j; ii--)
6381 tree op1, op2;
6383 rf1 = &repeat_factor_vec[ii];
6384 rf2 = &repeat_factor_vec[ii + 1];
6386 /* Init the last factor's representative to be itself. */
6387 if (!rf2->repr)
6388 rf2->repr = rf2->factor;
6390 op1 = rf1->factor;
6391 op2 = rf2->repr;
6393 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6394 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6395 op1, op2);
6396 gimple_set_location (mul_stmt, gimple_location (stmt));
6397 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6398 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6399 rf1->repr = target_ssa;
6401 /* Don't reprocess the multiply we just introduced. */
6402 gimple_set_visited (mul_stmt, true);
6406 /* Form a call to __builtin_powi for the maximum product
6407 just formed, raised to the power obtained earlier. */
6408 rf1 = &repeat_factor_vec[j];
6409 if (INTEGRAL_TYPE_P (type))
6411 gcc_assert (power > 1);
6412 gimple_stmt_iterator gsip = gsi;
6413 gsi_prev (&gsip);
6414 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6415 rf1->repr, power);
6416 gimple_stmt_iterator gsic = gsi;
6417 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6419 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6420 gimple_set_visited (gsi_stmt (gsic), true);
6421 gsi_prev (&gsic);
6424 else
6426 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6427 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6428 build_int_cst (integer_type_node,
6429 power));
6430 gimple_call_set_lhs (pow_stmt, iter_result);
6431 gimple_set_location (pow_stmt, gimple_location (stmt));
6432 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6433 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6437 /* If we previously formed at least one other builtin_powi call,
6438 form the product of this one and those others. */
6439 if (result)
6441 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6442 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6443 result, iter_result);
6444 gimple_set_location (mul_stmt, gimple_location (stmt));
6445 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6446 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6447 gimple_set_visited (mul_stmt, true);
6448 result = new_result;
6450 else
6451 result = iter_result;
6453 /* Decrement the occurrence count of each element in the product
6454 by the count found above, and remove this many copies of each
6455 factor from OPS. */
6456 for (i = j; i < vec_len; i++)
6458 unsigned k = power;
6459 unsigned n;
6461 rf1 = &repeat_factor_vec[i];
6462 rf1->count -= power;
6464 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6466 if (oe->op == rf1->factor)
6468 if (oe->count <= k)
6470 ops->ordered_remove (n);
6471 k -= oe->count;
6473 if (k == 0)
6474 break;
6476 else
6478 oe->count -= k;
6479 break;
6486 /* At this point all elements in the repeated factor vector have a
6487 remaining occurrence count of 0 or 1, and those with a count of 1
6488 don't have cached representatives. Re-sort the ops vector and
6489 clean up. */
6490 ops->qsort (sort_by_operand_rank);
6491 repeat_factor_vec.release ();
6493 /* Return the final product computed herein. Note that there may
6494 still be some elements with single occurrence count left in OPS;
6495 those will be handled by the normal reassociation logic. */
6496 return result;
6499 /* Attempt to optimize
6500 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6501 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6503 static void
6504 attempt_builtin_copysign (vec<operand_entry *> *ops)
6506 operand_entry *oe;
6507 unsigned int i;
6508 unsigned int length = ops->length ();
6509 tree cst = ops->last ()->op;
6511 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6512 return;
6514 FOR_EACH_VEC_ELT (*ops, i, oe)
6516 if (TREE_CODE (oe->op) == SSA_NAME
6517 && has_single_use (oe->op))
6519 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6520 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6522 tree arg0, arg1;
6523 switch (gimple_call_combined_fn (old_call))
6525 CASE_CFN_COPYSIGN:
6526 CASE_CFN_COPYSIGN_FN:
6527 arg0 = gimple_call_arg (old_call, 0);
6528 arg1 = gimple_call_arg (old_call, 1);
6529 /* The first argument of copysign must be a constant,
6530 otherwise there's nothing to do. */
6531 if (TREE_CODE (arg0) == REAL_CST)
6533 tree type = TREE_TYPE (arg0);
6534 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6535 /* If we couldn't fold to a single constant, skip it.
6536 That happens e.g. for inexact multiplication when
6537 -frounding-math. */
6538 if (mul == NULL_TREE)
6539 break;
6540 /* Instead of adjusting OLD_CALL, let's build a new
6541 call to not leak the LHS and prevent keeping bogus
6542 debug statements. DCE will clean up the old call. */
6543 gcall *new_call;
6544 if (gimple_call_internal_p (old_call))
6545 new_call = gimple_build_call_internal
6546 (IFN_COPYSIGN, 2, mul, arg1);
6547 else
6548 new_call = gimple_build_call
6549 (gimple_call_fndecl (old_call), 2, mul, arg1);
6550 tree lhs = make_ssa_name (type);
6551 gimple_call_set_lhs (new_call, lhs);
6552 gimple_set_location (new_call,
6553 gimple_location (old_call));
6554 insert_stmt_after (new_call, old_call);
6555 /* We've used the constant, get rid of it. */
6556 ops->pop ();
6557 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6558 /* Handle the CST1 < 0 case by negating the result. */
6559 if (cst1_neg)
6561 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6562 gimple *negate_stmt
6563 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6564 insert_stmt_after (negate_stmt, new_call);
6565 oe->op = negrhs;
6567 else
6568 oe->op = lhs;
6569 if (dump_file && (dump_flags & TDF_DETAILS))
6571 fprintf (dump_file, "Optimizing copysign: ");
6572 print_generic_expr (dump_file, cst);
6573 fprintf (dump_file, " * COPYSIGN (");
6574 print_generic_expr (dump_file, arg0);
6575 fprintf (dump_file, ", ");
6576 print_generic_expr (dump_file, arg1);
6577 fprintf (dump_file, ") into %sCOPYSIGN (",
6578 cst1_neg ? "-" : "");
6579 print_generic_expr (dump_file, mul);
6580 fprintf (dump_file, ", ");
6581 print_generic_expr (dump_file, arg1);
6582 fprintf (dump_file, "\n");
6584 return;
6586 break;
6587 default:
6588 break;
6595 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6597 static void
6598 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6600 tree rhs1;
6602 if (dump_file && (dump_flags & TDF_DETAILS))
6604 fprintf (dump_file, "Transforming ");
6605 print_gimple_stmt (dump_file, stmt, 0);
6608 rhs1 = gimple_assign_rhs1 (stmt);
6609 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6610 update_stmt (stmt);
6611 remove_visited_stmt_chain (rhs1);
6613 if (dump_file && (dump_flags & TDF_DETAILS))
6615 fprintf (dump_file, " into ");
6616 print_gimple_stmt (dump_file, stmt, 0);
6620 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6622 static void
6623 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6624 tree rhs1, tree rhs2)
6626 if (dump_file && (dump_flags & TDF_DETAILS))
6628 fprintf (dump_file, "Transforming ");
6629 print_gimple_stmt (dump_file, stmt, 0);
6632 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6633 update_stmt (gsi_stmt (*gsi));
6634 remove_visited_stmt_chain (rhs1);
6636 if (dump_file && (dump_flags & TDF_DETAILS))
6638 fprintf (dump_file, " into ");
6639 print_gimple_stmt (dump_file, stmt, 0);
6643 /* Reassociate expressions in basic block BB and its post-dominator as
6644 children.
6646 Bubble up return status from maybe_optimize_range_tests. */
6648 static bool
6649 reassociate_bb (basic_block bb)
6651 gimple_stmt_iterator gsi;
6652 basic_block son;
6653 gimple *stmt = last_stmt (bb);
6654 bool cfg_cleanup_needed = false;
6656 if (stmt && !gimple_visited_p (stmt))
6657 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6659 bool do_prev = false;
6660 for (gsi = gsi_last_bb (bb);
6661 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6663 do_prev = true;
6664 stmt = gsi_stmt (gsi);
6666 if (is_gimple_assign (stmt)
6667 && !stmt_could_throw_p (cfun, stmt))
6669 tree lhs, rhs1, rhs2;
6670 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6672 /* If this was part of an already processed statement,
6673 we don't need to touch it again. */
6674 if (gimple_visited_p (stmt))
6676 /* This statement might have become dead because of previous
6677 reassociations. */
6678 if (has_zero_uses (gimple_get_lhs (stmt)))
6680 reassoc_remove_stmt (&gsi);
6681 release_defs (stmt);
6682 /* We might end up removing the last stmt above which
6683 places the iterator to the end of the sequence.
6684 Reset it to the last stmt in this case and make sure
6685 we don't do gsi_prev in that case. */
6686 if (gsi_end_p (gsi))
6688 gsi = gsi_last_bb (bb);
6689 do_prev = false;
6692 continue;
6695 /* If this is not a gimple binary expression, there is
6696 nothing for us to do with it. */
6697 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6698 continue;
6700 lhs = gimple_assign_lhs (stmt);
6701 rhs1 = gimple_assign_rhs1 (stmt);
6702 rhs2 = gimple_assign_rhs2 (stmt);
6704 /* For non-bit or min/max operations we can't associate
6705 all types. Verify that here. */
6706 if ((rhs_code != BIT_IOR_EXPR
6707 && rhs_code != BIT_AND_EXPR
6708 && rhs_code != BIT_XOR_EXPR
6709 && rhs_code != MIN_EXPR
6710 && rhs_code != MAX_EXPR
6711 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6712 || !can_reassociate_op_p (rhs1)
6713 || !can_reassociate_op_p (rhs2))
6714 continue;
6716 if (associative_tree_code (rhs_code))
6718 auto_vec<operand_entry *> ops;
6719 tree powi_result = NULL_TREE;
6720 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6722 /* There may be no immediate uses left by the time we
6723 get here because we may have eliminated them all. */
6724 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6725 continue;
6727 gimple_set_visited (stmt, true);
6728 linearize_expr_tree (&ops, stmt, true, true);
6729 ops.qsort (sort_by_operand_rank);
6730 int orig_len = ops.length ();
6731 optimize_ops_list (rhs_code, &ops);
6732 if (undistribute_ops_list (rhs_code, &ops,
6733 loop_containing_stmt (stmt)))
6735 ops.qsort (sort_by_operand_rank);
6736 optimize_ops_list (rhs_code, &ops);
6738 if (undistribute_bitref_for_vector (rhs_code, &ops,
6739 loop_containing_stmt (stmt)))
6741 ops.qsort (sort_by_operand_rank);
6742 optimize_ops_list (rhs_code, &ops);
6744 if (rhs_code == PLUS_EXPR
6745 && transform_add_to_multiply (&ops))
6746 ops.qsort (sort_by_operand_rank);
6748 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6750 if (is_vector)
6751 optimize_vec_cond_expr (rhs_code, &ops);
6752 else
6753 optimize_range_tests (rhs_code, &ops, NULL);
6756 if (rhs_code == MULT_EXPR && !is_vector)
6758 attempt_builtin_copysign (&ops);
6760 if (reassoc_insert_powi_p
6761 && (flag_unsafe_math_optimizations
6762 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6763 powi_result = attempt_builtin_powi (stmt, &ops);
6766 operand_entry *last;
6767 bool negate_result = false;
6768 if (ops.length () > 1
6769 && rhs_code == MULT_EXPR)
6771 last = ops.last ();
6772 if ((integer_minus_onep (last->op)
6773 || real_minus_onep (last->op))
6774 && !HONOR_SNANS (TREE_TYPE (lhs))
6775 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6776 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6778 ops.pop ();
6779 negate_result = true;
6783 tree new_lhs = lhs;
6784 /* If the operand vector is now empty, all operands were
6785 consumed by the __builtin_powi optimization. */
6786 if (ops.length () == 0)
6787 transform_stmt_to_copy (&gsi, stmt, powi_result);
6788 else if (ops.length () == 1)
6790 tree last_op = ops.last ()->op;
6792 /* If the stmt that defines operand has to be inserted, insert it
6793 before the use. */
6794 if (ops.last ()->stmt_to_insert)
6795 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6796 if (powi_result)
6797 transform_stmt_to_multiply (&gsi, stmt, last_op,
6798 powi_result);
6799 else
6800 transform_stmt_to_copy (&gsi, stmt, last_op);
6802 else
6804 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6805 int ops_num = ops.length ();
6806 int width;
6808 /* For binary bit operations, if there are at least 3
6809 operands and the last operand in OPS is a constant,
6810 move it to the front. This helps ensure that we generate
6811 (X & Y) & C rather than (X & C) & Y. The former will
6812 often match a canonical bit test when we get to RTL. */
6813 if (ops.length () > 2
6814 && (rhs_code == BIT_AND_EXPR
6815 || rhs_code == BIT_IOR_EXPR
6816 || rhs_code == BIT_XOR_EXPR)
6817 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6818 std::swap (*ops[0], *ops[ops_num - 1]);
6820 /* Only rewrite the expression tree to parallel in the
6821 last reassoc pass to avoid useless work back-and-forth
6822 with initial linearization. */
6823 if (!reassoc_insert_powi_p
6824 && ops.length () > 3
6825 && (width = get_reassociation_width (ops_num, rhs_code,
6826 mode)) > 1)
6828 if (dump_file && (dump_flags & TDF_DETAILS))
6829 fprintf (dump_file,
6830 "Width = %d was chosen for reassociation\n",
6831 width);
6832 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6833 width, ops);
6835 else
6837 /* When there are three operands left, we want
6838 to make sure the ones that get the double
6839 binary op are chosen wisely. */
6840 int len = ops.length ();
6841 if (len >= 3)
6842 swap_ops_for_binary_stmt (ops, len - 3);
6844 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6845 powi_result != NULL
6846 || negate_result,
6847 len != orig_len);
6850 /* If we combined some repeated factors into a
6851 __builtin_powi call, multiply that result by the
6852 reassociated operands. */
6853 if (powi_result)
6855 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6856 tree type = TREE_TYPE (lhs);
6857 tree target_ssa = make_temp_ssa_name (type, NULL,
6858 "reassocpow");
6859 gimple_set_lhs (lhs_stmt, target_ssa);
6860 update_stmt (lhs_stmt);
6861 if (lhs != new_lhs)
6863 target_ssa = new_lhs;
6864 new_lhs = lhs;
6866 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6867 powi_result, target_ssa);
6868 gimple_set_location (mul_stmt, gimple_location (stmt));
6869 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6870 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6874 if (negate_result)
6876 stmt = SSA_NAME_DEF_STMT (lhs);
6877 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6878 gimple_set_lhs (stmt, tmp);
6879 if (lhs != new_lhs)
6880 tmp = new_lhs;
6881 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6882 tmp);
6883 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6884 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6885 update_stmt (stmt);
6890 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6891 son;
6892 son = next_dom_son (CDI_POST_DOMINATORS, son))
6893 cfg_cleanup_needed |= reassociate_bb (son);
6895 return cfg_cleanup_needed;
6898 /* Add jumps around shifts for range tests turned into bit tests.
6899 For each SSA_NAME VAR we have code like:
6900 VAR = ...; // final stmt of range comparison
6901 // bit test here...;
6902 OTHERVAR = ...; // final stmt of the bit test sequence
6903 RES = VAR | OTHERVAR;
6904 Turn the above into:
6905 VAR = ...;
6906 if (VAR != 0)
6907 goto <l3>;
6908 else
6909 goto <l2>;
6910 <l2>:
6911 // bit test here...;
6912 OTHERVAR = ...;
6913 <l3>:
6914 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6916 static void
6917 branch_fixup (void)
6919 tree var;
6920 unsigned int i;
6922 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6924 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6925 gimple *use_stmt;
6926 use_operand_p use;
6927 bool ok = single_imm_use (var, &use, &use_stmt);
6928 gcc_assert (ok
6929 && is_gimple_assign (use_stmt)
6930 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6931 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6933 basic_block cond_bb = gimple_bb (def_stmt);
6934 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6935 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6937 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6938 gimple *g = gimple_build_cond (NE_EXPR, var,
6939 build_zero_cst (TREE_TYPE (var)),
6940 NULL_TREE, NULL_TREE);
6941 location_t loc = gimple_location (use_stmt);
6942 gimple_set_location (g, loc);
6943 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6945 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6946 etrue->probability = profile_probability::even ();
6947 edge efalse = find_edge (cond_bb, then_bb);
6948 efalse->flags = EDGE_FALSE_VALUE;
6949 efalse->probability -= etrue->probability;
6950 then_bb->count -= etrue->count ();
6952 tree othervar = NULL_TREE;
6953 if (gimple_assign_rhs1 (use_stmt) == var)
6954 othervar = gimple_assign_rhs2 (use_stmt);
6955 else if (gimple_assign_rhs2 (use_stmt) == var)
6956 othervar = gimple_assign_rhs1 (use_stmt);
6957 else
6958 gcc_unreachable ();
6959 tree lhs = gimple_assign_lhs (use_stmt);
6960 gphi *phi = create_phi_node (lhs, merge_bb);
6961 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6962 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6963 gsi = gsi_for_stmt (use_stmt);
6964 gsi_remove (&gsi, true);
6966 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6967 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6969 reassoc_branch_fixups.release ();
6972 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6973 void debug_ops_vector (vec<operand_entry *> ops);
6975 /* Dump the operand entry vector OPS to FILE. */
6977 void
6978 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6980 operand_entry *oe;
6981 unsigned int i;
6983 FOR_EACH_VEC_ELT (ops, i, oe)
6985 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6986 print_generic_expr (file, oe->op);
6987 fprintf (file, "\n");
6991 /* Dump the operand entry vector OPS to STDERR. */
6993 DEBUG_FUNCTION void
6994 debug_ops_vector (vec<operand_entry *> ops)
6996 dump_ops_vector (stderr, ops);
6999 /* Bubble up return status from reassociate_bb. */
7001 static bool
7002 do_reassoc (void)
7004 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7005 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7008 /* Initialize the reassociation pass. */
7010 static void
7011 init_reassoc (void)
7013 int i;
7014 int64_t rank = 2;
7015 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7017 /* Find the loops, so that we can prevent moving calculations in
7018 them. */
7019 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7021 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7023 next_operand_entry_id = 0;
7025 /* Reverse RPO (Reverse Post Order) will give us something where
7026 deeper loops come later. */
7027 pre_and_rev_post_order_compute (NULL, bbs, false);
7028 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7029 operand_rank = new hash_map<tree, int64_t>;
7031 /* Give each default definition a distinct rank. This includes
7032 parameters and the static chain. Walk backwards over all
7033 SSA names so that we get proper rank ordering according
7034 to tree_swap_operands_p. */
7035 for (i = num_ssa_names - 1; i > 0; --i)
7037 tree name = ssa_name (i);
7038 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7039 insert_operand_rank (name, ++rank);
7042 /* Set up rank for each BB */
7043 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7044 bb_rank[bbs[i]] = ++rank << 16;
7046 free (bbs);
7047 calculate_dominance_info (CDI_POST_DOMINATORS);
7048 plus_negates = vNULL;
7051 /* Cleanup after the reassociation pass, and print stats if
7052 requested. */
7054 static void
7055 fini_reassoc (void)
7057 statistics_counter_event (cfun, "Linearized",
7058 reassociate_stats.linearized);
7059 statistics_counter_event (cfun, "Constants eliminated",
7060 reassociate_stats.constants_eliminated);
7061 statistics_counter_event (cfun, "Ops eliminated",
7062 reassociate_stats.ops_eliminated);
7063 statistics_counter_event (cfun, "Statements rewritten",
7064 reassociate_stats.rewritten);
7065 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7066 reassociate_stats.pows_encountered);
7067 statistics_counter_event (cfun, "Built-in powi calls created",
7068 reassociate_stats.pows_created);
7070 delete operand_rank;
7071 bitmap_clear (biased_names);
7072 operand_entry_pool.release ();
7073 free (bb_rank);
7074 plus_negates.release ();
7075 free_dominance_info (CDI_POST_DOMINATORS);
7076 loop_optimizer_finalize ();
7079 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7080 insertion of __builtin_powi calls.
7082 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7083 optimization of a gimple conditional. Otherwise returns zero. */
7085 static unsigned int
7086 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7088 reassoc_insert_powi_p = insert_powi_p;
7089 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7091 init_reassoc ();
7093 bool cfg_cleanup_needed;
7094 cfg_cleanup_needed = do_reassoc ();
7095 repropagate_negates ();
7096 branch_fixup ();
7098 fini_reassoc ();
7099 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7102 namespace {
7104 const pass_data pass_data_reassoc =
7106 GIMPLE_PASS, /* type */
7107 "reassoc", /* name */
7108 OPTGROUP_NONE, /* optinfo_flags */
7109 TV_TREE_REASSOC, /* tv_id */
7110 ( PROP_cfg | PROP_ssa ), /* properties_required */
7111 0, /* properties_provided */
7112 0, /* properties_destroyed */
7113 0, /* todo_flags_start */
7114 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7117 class pass_reassoc : public gimple_opt_pass
7119 public:
7120 pass_reassoc (gcc::context *ctxt)
7121 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7124 /* opt_pass methods: */
7125 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7126 void set_pass_param (unsigned int n, bool param) final override
7128 gcc_assert (n == 0);
7129 insert_powi_p = param;
7130 bias_loop_carried_phi_ranks_p = !param;
7132 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7133 unsigned int execute (function *) final override
7135 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7138 private:
7139 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7140 point 3a in the pass header comment. */
7141 bool insert_powi_p;
7142 bool bias_loop_carried_phi_ranks_p;
7143 }; // class pass_reassoc
7145 } // anon namespace
7147 gimple_opt_pass *
7148 make_pass_reassoc (gcc::context *ctxt)
7150 return new pass_reassoc (ctxt);