compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blobb39c3c882c4b65494f94cefbb6e46249d044b2c9
1 /* Reassociation for trees.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 static bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
278 propagate_bias_p (gimple *stmt)
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 gimple *current_use_stmt = USE_STMT (use);
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
300 if (single_use_stmt == NULL)
301 return false;
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
307 return true;
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
318 phi_rank (gimple *stmt)
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 int64_t op_rank;
376 op_rank = get_rank (op);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
388 return MAX (rank, op_rank);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e)
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
402 static inline void
403 insert_operand_rank (tree e, int64_t rank)
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
409 /* Given an expression E, return the rank of the expression. */
411 static int64_t
412 get_rank (tree e)
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e) == SSA_NAME)
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
474 else
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
521 constant_type (tree t)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
534 else
535 return OTHER_CONST_TYPE;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
541 sort_by_operand_rank (const void *pa, const void *pb)
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
562 if (TREE_CODE (oea->op) != SSA_NAME)
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 return oeb->id > oea->id ? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
610 static void
611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
613 operand_entry *oe = operand_entry_pool.allocate ();
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
626 static void
627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
630 operand_entry *oe = operand_entry_pool.allocate ();
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
639 reassociate_stats.pows_encountered++;
642 /* Returns true if we can associate the SSA def OP. */
644 static bool
645 can_reassociate_op_p (tree op)
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
662 static bool
663 can_reassociate_type_p (tree type)
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
675 static bool
676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
678 basic_block bb = gimple_bb (stmt);
680 if (gimple_bb (stmt) == NULL)
681 return false;
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
698 return false;
702 /* Return true if STMT is a nop-conversion. */
704 static bool
705 gimple_nop_conversion_p (gimple *stmt)
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
714 return false;
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
720 static tree
721 get_unary_op (tree name, enum tree_code opcode)
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
740 static bool
741 ops_equal_values_p (tree op1, tree op2)
743 if (op1 == op2)
744 return true;
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
758 if (TREE_CODE (op2) == SSA_NAME)
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
770 return false;
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
778 static bool
779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last && last->op == curr->op)
794 switch (opcode)
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
825 reassociate_stats.ops_eliminated += 2;
827 if (ops->length () == 2)
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
833 else
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
839 return true;
841 default:
842 break;
845 return false;
848 static vec<tree> plus_negates;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
856 static bool
857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
887 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
901 return true;
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
906 tree op_type = TREE_TYPE (oe->op);
908 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
922 return true;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
932 return false;
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
941 static bool
942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
968 if (oe->op == notop)
970 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
997 return false;
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1017 switch (opcode)
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1022 if (ops->length () != 1)
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1034 else if (integer_all_onesp (oelast->op))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1060 else if (integer_zerop (oelast->op))
1062 if (ops->length () != 1)
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1078 if (ops->length () != 1)
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1094 if (ops->length () != 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1113 if (ops->length () != 1)
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1122 break;
1123 default:
1124 break;
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher : int_hash <int, 0, 1>
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1163 /* Comparison function for oecount. */
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1193 if (!is_gimple_call (stmt))
1194 return false;
1196 switch (gimple_call_combined_fn (stmt))
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1202 default:
1203 return false;
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1218 switch (gimple_call_combined_fn (stmt))
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1234 default:
1235 gcc_unreachable ();
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1260 if (new_debug_lhs == NULL_TREE)
1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 gimple_set_uid (def_temp, gimple_uid (stmt));
1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1272 repl = new_debug_lhs;
1274 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 SET_USE (use, repl);
1276 update_stmt (use_stmt);
1278 return new_lhs;
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1285 static void
1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 vec<gimple *> &stmts_to_fix)
1289 unsigned i;
1290 gimple *stmt;
1292 if (*def != op
1293 && TREE_CODE (*def) == SSA_NAME
1294 && (stmt = SSA_NAME_DEF_STMT (*def))
1295 && gimple_code (stmt) != GIMPLE_NOP)
1296 *def = make_new_ssa_for_def (stmt, opcode, op);
1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299 make_new_ssa_for_def (stmt, opcode, op);
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1306 static void
1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1309 tree lhs;
1310 gimple *use_stmt;
1311 use_operand_p use;
1312 gimple_stmt_iterator gsi;
1314 if (is_gimple_call (stmt))
1315 lhs = gimple_call_lhs (stmt);
1316 else
1317 lhs = gimple_assign_lhs (stmt);
1319 gcc_assert (has_single_use (lhs));
1320 single_imm_use (lhs, &use, &use_stmt);
1321 if (lhs == *def)
1322 *def = op;
1323 SET_USE (use, op);
1324 if (TREE_CODE (op) != SSA_NAME)
1325 update_stmt (use_stmt);
1326 gsi = gsi_for_stmt (stmt);
1327 unlink_stmt_vdef (stmt);
1328 reassoc_remove_stmt (&gsi);
1329 release_defs (stmt);
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1336 static void
1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1339 tree orig_def = *def;
1340 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec<gimple *, 64> stmts_to_fix;
1347 tree name;
1349 if (opcode == MULT_EXPR)
1351 if (stmt_is_power_of_op (stmt, op))
1353 if (decrement_power (stmt) == 1)
1355 if (stmts_to_fix.length () > 0)
1356 stmts_to_fix.pop ();
1357 propagate_op_to_single_use (op, stmt, def);
1359 break;
1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1363 if (gimple_assign_rhs1 (stmt) == op)
1365 tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 if (stmts_to_fix.length () > 0)
1367 stmts_to_fix.pop ();
1368 propagate_op_to_single_use (cst, stmt, def);
1369 break;
1371 else if (integer_minus_onep (op)
1372 || real_minus_onep (op))
1374 gimple_assign_set_rhs_code
1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 break;
1381 name = gimple_assign_rhs1 (stmt);
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1385 single use. */
1386 if (gimple_assign_rhs_code (stmt) == opcode
1387 && (name == op
1388 || gimple_assign_rhs2 (stmt) == op))
1390 if (name == op)
1391 name = gimple_assign_rhs2 (stmt);
1392 if (stmts_to_fix.length () > 0)
1393 stmts_to_fix.pop ();
1394 propagate_op_to_single_use (name, stmt, def);
1395 break;
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode == MULT_EXPR
1402 && gimple_assign_rhs_code (stmt) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt)))
1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 if (stmt_is_power_of_op (stmt2, op))
1409 if (decrement_power (stmt2) == 1)
1410 propagate_op_to_single_use (op, stmt2, def);
1411 else
1412 stmts_to_fix.safe_push (stmt2);
1413 break;
1415 else if (is_gimple_assign (stmt2)
1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1418 if (gimple_assign_rhs1 (stmt2) == op)
1420 tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 propagate_op_to_single_use (cst, stmt2, def);
1422 break;
1424 else if (integer_minus_onep (op)
1425 || real_minus_onep (op))
1427 stmts_to_fix.safe_push (stmt2);
1428 gimple_assign_set_rhs_code
1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 break;
1435 /* Continue walking the chain. */
1436 gcc_assert (name != op
1437 && TREE_CODE (name) == SSA_NAME);
1438 stmt = SSA_NAME_DEF_STMT (name);
1439 stmts_to_fix.safe_push (stmt);
1441 while (1);
1443 if (stmts_to_fix.length () > 0 || *def == orig_def)
1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1450 static bool
1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1 || s1 == s2)
1459 return true;
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1462 if (!bb2)
1463 return false;
1465 if (bb1 == bb2)
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1) == GIMPLE_PHI)
1471 return true;
1473 if (gimple_code (s2) == GIMPLE_PHI)
1474 return false;
1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1478 if (gimple_uid (s1) < gimple_uid (s2))
1479 return true;
1481 if (gimple_uid (s1) > gimple_uid (s2))
1482 return false;
1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485 unsigned int uid = gimple_uid (s1);
1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1488 gimple *s = gsi_stmt (gsi);
1489 if (gimple_uid (s) != uid)
1490 break;
1491 if (s == s2)
1492 return true;
1495 return false;
1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1501 /* Insert STMT after INSERT_POINT. */
1503 static void
1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1506 gimple_stmt_iterator gsi;
1507 basic_block bb;
1509 if (gimple_code (insert_point) == GIMPLE_PHI)
1510 bb = gimple_bb (insert_point);
1511 else if (!stmt_ends_bb_p (insert_point))
1513 gsi = gsi_for_stmt (insert_point);
1514 gimple_set_uid (stmt, gimple_uid (insert_point));
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 return;
1518 else if (gimple_code (insert_point) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1522 gcc_unreachable ();
1523 else
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530 gsi = gsi_after_labels (bb);
1531 if (gsi_end_p (gsi))
1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534 gimple_set_uid (stmt,
1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1537 else
1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1546 static gimple *
1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1549 gimple *op1def = NULL, *op2def = NULL;
1550 gimple_stmt_iterator gsi;
1551 tree op;
1552 gassign *sum;
1554 /* Create the addition statement. */
1555 op = make_ssa_name (type);
1556 sum = gimple_build_assign (op, opcode, op1, op2);
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1) == SSA_NAME)
1560 op1def = SSA_NAME_DEF_STMT (op1);
1561 if (TREE_CODE (op2) == SSA_NAME)
1562 op2def = SSA_NAME_DEF_STMT (op2);
1563 if ((!op1def || gimple_nop_p (op1def))
1564 && (!op2def || gimple_nop_p (op2def)))
1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567 if (gsi_end_p (gsi))
1569 gimple_stmt_iterator gsi2
1570 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1571 gimple_set_uid (sum,
1572 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1574 else
1575 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1576 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1578 else
1580 gimple *insert_point;
1581 if ((!op1def || gimple_nop_p (op1def))
1582 || (op2def && !gimple_nop_p (op2def)
1583 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1584 insert_point = op2def;
1585 else
1586 insert_point = op1def;
1587 insert_stmt_after (sum, insert_point);
1589 update_stmt (sum);
1591 return sum;
1594 /* Perform un-distribution of divisions and multiplications.
1595 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1596 to (A + B) / X for real X.
1598 The algorithm is organized as follows.
1600 - First we walk the addition chain *OPS looking for summands that
1601 are defined by a multiplication or a real division. This results
1602 in the candidates bitmap with relevant indices into *OPS.
1604 - Second we build the chains of multiplications or divisions for
1605 these candidates, counting the number of occurrences of (operand, code)
1606 pairs in all of the candidates chains.
1608 - Third we sort the (operand, code) pairs by number of occurrence and
1609 process them starting with the pair with the most uses.
1611 * For each such pair we walk the candidates again to build a
1612 second candidate bitmap noting all multiplication/division chains
1613 that have at least one occurrence of (operand, code).
1615 * We build an alternate addition chain only covering these
1616 candidates with one (operand, code) operation removed from their
1617 multiplication/division chain.
1619 * The first candidate gets replaced by the alternate addition chain
1620 multiplied/divided by the operand.
1622 * All candidate chains get disabled for further processing and
1623 processing of (operand, code) pairs continues.
1625 The alternate addition chains built are re-processed by the main
1626 reassociation algorithm which allows optimizing a * x * y + b * y * x
1627 to (a + b ) * x * y in one invocation of the reassociation pass. */
1629 static bool
1630 undistribute_ops_list (enum tree_code opcode,
1631 vec<operand_entry *> *ops, class loop *loop)
1633 unsigned int length = ops->length ();
1634 operand_entry *oe1;
1635 unsigned i, j;
1636 unsigned nr_candidates, nr_candidates2;
1637 sbitmap_iterator sbi0;
1638 vec<operand_entry *> *subops;
1639 bool changed = false;
1640 unsigned int next_oecount_id = 0;
1642 if (length <= 1
1643 || opcode != PLUS_EXPR)
1644 return false;
1646 /* Build a list of candidates to process. */
1647 auto_sbitmap candidates (length);
1648 bitmap_clear (candidates);
1649 nr_candidates = 0;
1650 FOR_EACH_VEC_ELT (*ops, i, oe1)
1652 enum tree_code dcode;
1653 gimple *oe1def;
1655 if (TREE_CODE (oe1->op) != SSA_NAME)
1656 continue;
1657 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1658 if (!is_gimple_assign (oe1def))
1659 continue;
1660 dcode = gimple_assign_rhs_code (oe1def);
1661 if ((dcode != MULT_EXPR
1662 && dcode != RDIV_EXPR)
1663 || !is_reassociable_op (oe1def, dcode, loop))
1664 continue;
1666 bitmap_set_bit (candidates, i);
1667 nr_candidates++;
1670 if (nr_candidates < 2)
1671 return false;
1673 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "searching for un-distribute opportunities ");
1676 print_generic_expr (dump_file,
1677 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1678 fprintf (dump_file, " %d\n", nr_candidates);
1681 /* Build linearized sub-operand lists and the counting table. */
1682 cvec.create (0);
1684 hash_table<oecount_hasher> ctable (15);
1686 /* ??? Macro arguments cannot have multi-argument template types in
1687 them. This typedef is needed to workaround that limitation. */
1688 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1689 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1690 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1692 gimple *oedef;
1693 enum tree_code oecode;
1694 unsigned j;
1696 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1697 oecode = gimple_assign_rhs_code (oedef);
1698 linearize_expr_tree (&subops[i], oedef,
1699 associative_tree_code (oecode), false);
1701 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1703 oecount c;
1704 int *slot;
1705 int idx;
1706 c.oecode = oecode;
1707 c.cnt = 1;
1708 c.id = next_oecount_id++;
1709 c.op = oe1->op;
1710 cvec.safe_push (c);
1711 idx = cvec.length () + 41;
1712 slot = ctable.find_slot (idx, INSERT);
1713 if (!*slot)
1715 *slot = idx;
1717 else
1719 cvec.pop ();
1720 cvec[*slot - 42].cnt++;
1725 /* Sort the counting table. */
1726 cvec.qsort (oecount_cmp);
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1730 oecount *c;
1731 fprintf (dump_file, "Candidates:\n");
1732 FOR_EACH_VEC_ELT (cvec, j, c)
1734 fprintf (dump_file, " %u %s: ", c->cnt,
1735 c->oecode == MULT_EXPR
1736 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1737 print_generic_expr (dump_file, c->op);
1738 fprintf (dump_file, "\n");
1742 /* Process the (operand, code) pairs in order of most occurrence. */
1743 auto_sbitmap candidates2 (length);
1744 while (!cvec.is_empty ())
1746 oecount *c = &cvec.last ();
1747 if (c->cnt < 2)
1748 break;
1750 /* Now collect the operands in the outer chain that contain
1751 the common operand in their inner chain. */
1752 bitmap_clear (candidates2);
1753 nr_candidates2 = 0;
1754 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1756 gimple *oedef;
1757 enum tree_code oecode;
1758 unsigned j;
1759 tree op = (*ops)[i]->op;
1761 /* If we undistributed in this chain already this may be
1762 a constant. */
1763 if (TREE_CODE (op) != SSA_NAME)
1764 continue;
1766 oedef = SSA_NAME_DEF_STMT (op);
1767 oecode = gimple_assign_rhs_code (oedef);
1768 if (oecode != c->oecode)
1769 continue;
1771 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1773 if (oe1->op == c->op)
1775 bitmap_set_bit (candidates2, i);
1776 ++nr_candidates2;
1777 break;
1782 if (nr_candidates2 >= 2)
1784 operand_entry *oe1, *oe2;
1785 gimple *prod;
1786 int first = bitmap_first_set_bit (candidates2);
1788 /* Build the new addition chain. */
1789 oe1 = (*ops)[first];
1790 if (dump_file && (dump_flags & TDF_DETAILS))
1792 fprintf (dump_file, "Building (");
1793 print_generic_expr (dump_file, oe1->op);
1795 zero_one_operation (&oe1->op, c->oecode, c->op);
1796 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1798 gimple *sum;
1799 oe2 = (*ops)[i];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, " + ");
1803 print_generic_expr (dump_file, oe2->op);
1805 zero_one_operation (&oe2->op, c->oecode, c->op);
1806 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1807 oe1->op, oe2->op, opcode);
1808 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1809 oe2->rank = 0;
1810 oe1->op = gimple_get_lhs (sum);
1813 /* Apply the multiplication/division. */
1814 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1815 oe1->op, c->op, c->oecode);
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1819 print_generic_expr (dump_file, c->op);
1820 fprintf (dump_file, "\n");
1823 /* Record it in the addition chain and disable further
1824 undistribution with this op. */
1825 oe1->op = gimple_assign_lhs (prod);
1826 oe1->rank = get_rank (oe1->op);
1827 subops[first].release ();
1829 changed = true;
1832 cvec.pop ();
1835 for (i = 0; i < ops->length (); ++i)
1836 subops[i].release ();
1837 free (subops);
1838 cvec.release ();
1840 return changed;
1843 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1844 first: element index for each relevant BIT_FIELD_REF.
1845 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1846 typedef std::pair<unsigned, unsigned> v_info_elem;
1847 struct v_info {
1848 tree vec_type;
1849 auto_vec<v_info_elem, 32> vec;
1851 typedef v_info *v_info_ptr;
1853 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1854 static int
1855 sort_by_mach_mode (const void *p_i, const void *p_j)
1857 const tree tr1 = *((const tree *) p_i);
1858 const tree tr2 = *((const tree *) p_j);
1859 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1860 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1861 if (mode1 > mode2)
1862 return 1;
1863 else if (mode1 < mode2)
1864 return -1;
1865 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1866 return -1;
1867 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1868 return 1;
1869 return 0;
1872 /* Cleanup hash map for VECTOR information. */
1873 static void
1874 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1876 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1877 it != info_map.end (); ++it)
1879 v_info_ptr info = (*it).second;
1880 delete info;
1881 (*it).second = NULL;
1885 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1886 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1887 is transformed to
1888 Vs = (V1 + V2 + ... + Vn)
1889 Vs[0] + Vs[1] + ... + Vs[k]
1891 The basic steps are listed below:
1893 1) Check the addition chain *OPS by looking those summands coming from
1894 VECTOR bit_field_ref on VECTOR type. Put the information into
1895 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1897 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1898 continuous, they can cover the whole VECTOR perfectly without any holes.
1899 Obtain one VECTOR list which contain candidates to be transformed.
1901 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1902 candidates with same mode, build the addition statements for them and
1903 generate BIT_FIELD_REFs accordingly.
1905 TODO:
1906 The current implementation requires the whole VECTORs should be fully
1907 covered, but it can be extended to support partial, checking adjacent
1908 but not fill the whole, it may need some cost model to define the
1909 boundary to do or not.
1911 static bool
1912 undistribute_bitref_for_vector (enum tree_code opcode,
1913 vec<operand_entry *> *ops, struct loop *loop)
1915 if (ops->length () <= 1)
1916 return false;
1918 if (opcode != PLUS_EXPR
1919 && opcode != MULT_EXPR
1920 && opcode != BIT_XOR_EXPR
1921 && opcode != BIT_IOR_EXPR
1922 && opcode != BIT_AND_EXPR)
1923 return false;
1925 hash_map<tree, v_info_ptr> v_info_map;
1926 operand_entry *oe1;
1927 unsigned i;
1929 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1930 information into map. */
1931 FOR_EACH_VEC_ELT (*ops, i, oe1)
1933 enum tree_code dcode;
1934 gimple *oe1def;
1936 if (TREE_CODE (oe1->op) != SSA_NAME)
1937 continue;
1938 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1939 if (!is_gimple_assign (oe1def))
1940 continue;
1941 dcode = gimple_assign_rhs_code (oe1def);
1942 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1943 continue;
1945 tree rhs = gimple_assign_rhs1 (oe1def);
1946 tree vec = TREE_OPERAND (rhs, 0);
1947 tree vec_type = TREE_TYPE (vec);
1949 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1950 continue;
1952 /* Ignore it if target machine can't support this VECTOR type. */
1953 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1954 continue;
1956 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1957 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1958 continue;
1960 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1961 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1962 continue;
1964 /* The type of BIT_FIELD_REF might not be equal to the element type of
1965 the vector. We want to use a vector type with element type the
1966 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1967 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1969 machine_mode simd_mode;
1970 unsigned HOST_WIDE_INT size, nunits;
1971 unsigned HOST_WIDE_INT elem_size
1972 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1973 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1974 continue;
1975 if (size <= elem_size || (size % elem_size) != 0)
1976 continue;
1977 nunits = size / elem_size;
1978 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1979 nunits).exists (&simd_mode))
1980 continue;
1981 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1983 /* Ignore it if target machine can't support this VECTOR type. */
1984 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1985 continue;
1987 /* Check const vector type, constrain BIT_FIELD_REF offset and
1988 size. */
1989 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1990 continue;
1992 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1993 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1994 continue;
1997 tree elem_type = TREE_TYPE (vec_type);
1998 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1999 if (maybe_ne (bit_field_size (rhs), elem_size))
2000 continue;
2002 unsigned idx;
2003 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2004 continue;
2006 /* Ignore it if target machine can't support this type of VECTOR
2007 operation. */
2008 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2009 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2010 continue;
2012 bool existed;
2013 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2014 if (!existed)
2016 info = new v_info;
2017 info->vec_type = vec_type;
2019 else if (!types_compatible_p (vec_type, info->vec_type))
2020 continue;
2021 info->vec.safe_push (std::make_pair (idx, i));
2024 /* At least two VECTOR to combine. */
2025 if (v_info_map.elements () <= 1)
2027 cleanup_vinfo_map (v_info_map);
2028 return false;
2031 /* Verify all VECTOR candidates by checking two conditions:
2032 1) sorted offsets are adjacent, no holes.
2033 2) can fill the whole VECTOR perfectly.
2034 And add the valid candidates to a vector for further handling. */
2035 auto_vec<tree> valid_vecs (v_info_map.elements ());
2036 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2037 it != v_info_map.end (); ++it)
2039 tree cand_vec = (*it).first;
2040 v_info_ptr cand_info = (*it).second;
2041 unsigned int num_elems
2042 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2043 if (cand_info->vec.length () != num_elems)
2044 continue;
2045 sbitmap holes = sbitmap_alloc (num_elems);
2046 bitmap_ones (holes);
2047 bool valid = true;
2048 v_info_elem *curr;
2049 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2051 if (!bitmap_bit_p (holes, curr->first))
2053 valid = false;
2054 break;
2056 else
2057 bitmap_clear_bit (holes, curr->first);
2059 if (valid && bitmap_empty_p (holes))
2060 valid_vecs.quick_push (cand_vec);
2061 sbitmap_free (holes);
2064 /* At least two VECTOR to combine. */
2065 if (valid_vecs.length () <= 1)
2067 cleanup_vinfo_map (v_info_map);
2068 return false;
2071 valid_vecs.qsort (sort_by_mach_mode);
2072 /* Go through all candidates by machine mode order, query the mode_to_total
2073 to get the total number for each mode and skip the single one. */
2074 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2076 tree tvec = valid_vecs[i];
2077 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2079 /* Skip modes with only a single candidate. */
2080 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2081 continue;
2083 unsigned int idx, j;
2084 gimple *sum = NULL;
2085 tree sum_vec = tvec;
2086 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2087 v_info_elem *elem;
2088 tree vec_type = info_ptr->vec_type;
2090 /* Build the sum for all candidates with same mode. */
2093 sum = build_and_add_sum (vec_type, sum_vec,
2094 valid_vecs[i + 1], opcode);
2095 if (!useless_type_conversion_p (vec_type,
2096 TREE_TYPE (valid_vecs[i + 1])))
2098 /* Update the operands only after build_and_add_sum,
2099 so that we don't have to repeat the placement algorithm
2100 of build_and_add_sum. */
2101 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2102 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2103 valid_vecs[i + 1]);
2104 tree lhs = make_ssa_name (vec_type);
2105 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2106 gimple_set_uid (g, gimple_uid (sum));
2107 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2108 gimple_assign_set_rhs2 (sum, lhs);
2109 if (sum_vec == tvec)
2111 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2112 lhs = make_ssa_name (vec_type);
2113 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2114 gimple_set_uid (g, gimple_uid (sum));
2115 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2116 gimple_assign_set_rhs1 (sum, lhs);
2118 update_stmt (sum);
2120 sum_vec = gimple_get_lhs (sum);
2121 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2122 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2123 /* Update those related ops of current candidate VECTOR. */
2124 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2126 idx = elem->second;
2127 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2128 /* Set this then op definition will get DCEd later. */
2129 gimple_set_visited (def, true);
2130 if (opcode == PLUS_EXPR
2131 || opcode == BIT_XOR_EXPR
2132 || opcode == BIT_IOR_EXPR)
2133 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2134 else if (opcode == MULT_EXPR)
2135 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2136 else
2138 gcc_assert (opcode == BIT_AND_EXPR);
2139 (*ops)[idx]->op
2140 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2142 (*ops)[idx]->rank = 0;
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2146 fprintf (dump_file, "Generating addition -> ");
2147 print_gimple_stmt (dump_file, sum, 0);
2149 i++;
2151 while ((i < valid_vecs.length () - 1)
2152 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2154 /* Referring to first valid VECTOR with this mode, generate the
2155 BIT_FIELD_REF statements accordingly. */
2156 info_ptr = *(v_info_map.get (tvec));
2157 gcc_assert (sum);
2158 tree elem_type = TREE_TYPE (vec_type);
2159 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2161 idx = elem->second;
2162 tree dst = make_ssa_name (elem_type);
2163 tree pos = bitsize_int (elem->first
2164 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2165 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2166 TYPE_SIZE (elem_type), pos);
2167 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2168 insert_stmt_after (gs, sum);
2169 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2170 /* Set this then op definition will get DCEd later. */
2171 gimple_set_visited (def, true);
2172 (*ops)[idx]->op = gimple_assign_lhs (gs);
2173 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2174 if (dump_file && (dump_flags & TDF_DETAILS))
2176 fprintf (dump_file, "Generating bit_field_ref -> ");
2177 print_gimple_stmt (dump_file, gs, 0);
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2185 cleanup_vinfo_map (v_info_map);
2187 return true;
2190 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2191 expression, examine the other OPS to see if any of them are comparisons
2192 of the same values, which we may be able to combine or eliminate.
2193 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2195 static bool
2196 eliminate_redundant_comparison (enum tree_code opcode,
2197 vec<operand_entry *> *ops,
2198 unsigned int currindex,
2199 operand_entry *curr)
2201 tree op1, op2;
2202 enum tree_code lcode, rcode;
2203 gimple *def1, *def2;
2204 int i;
2205 operand_entry *oe;
2207 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2208 return false;
2210 /* Check that CURR is a comparison. */
2211 if (TREE_CODE (curr->op) != SSA_NAME)
2212 return false;
2213 def1 = SSA_NAME_DEF_STMT (curr->op);
2214 if (!is_gimple_assign (def1))
2215 return false;
2216 lcode = gimple_assign_rhs_code (def1);
2217 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2218 return false;
2219 op1 = gimple_assign_rhs1 (def1);
2220 op2 = gimple_assign_rhs2 (def1);
2222 /* Now look for a similar comparison in the remaining OPS. */
2223 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2225 tree t;
2227 if (TREE_CODE (oe->op) != SSA_NAME)
2228 continue;
2229 def2 = SSA_NAME_DEF_STMT (oe->op);
2230 if (!is_gimple_assign (def2))
2231 continue;
2232 rcode = gimple_assign_rhs_code (def2);
2233 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2234 continue;
2236 /* If we got here, we have a match. See if we can combine the
2237 two comparisons. */
2238 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2239 if (opcode == BIT_IOR_EXPR)
2240 t = maybe_fold_or_comparisons (type,
2241 lcode, op1, op2,
2242 rcode, gimple_assign_rhs1 (def2),
2243 gimple_assign_rhs2 (def2));
2244 else
2245 t = maybe_fold_and_comparisons (type,
2246 lcode, op1, op2,
2247 rcode, gimple_assign_rhs1 (def2),
2248 gimple_assign_rhs2 (def2));
2249 if (!t)
2250 continue;
2252 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2253 always give us a boolean_type_node value back. If the original
2254 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2255 we need to convert. */
2256 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2258 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2259 continue;
2260 t = fold_convert (TREE_TYPE (curr->op), t);
2263 if (TREE_CODE (t) != INTEGER_CST
2264 && !operand_equal_p (t, curr->op, 0))
2266 enum tree_code subcode;
2267 tree newop1, newop2;
2268 if (!COMPARISON_CLASS_P (t))
2269 continue;
2270 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2271 STRIP_USELESS_TYPE_CONVERSION (newop1);
2272 STRIP_USELESS_TYPE_CONVERSION (newop2);
2273 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2274 continue;
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2279 fprintf (dump_file, "Equivalence: ");
2280 print_generic_expr (dump_file, curr->op);
2281 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2282 print_generic_expr (dump_file, oe->op);
2283 fprintf (dump_file, " -> ");
2284 print_generic_expr (dump_file, t);
2285 fprintf (dump_file, "\n");
2288 /* Now we can delete oe, as it has been subsumed by the new combined
2289 expression t. */
2290 ops->ordered_remove (i);
2291 reassociate_stats.ops_eliminated ++;
2293 /* If t is the same as curr->op, we're done. Otherwise we must
2294 replace curr->op with t. Special case is if we got a constant
2295 back, in which case we add it to the end instead of in place of
2296 the current entry. */
2297 if (TREE_CODE (t) == INTEGER_CST)
2299 ops->ordered_remove (currindex);
2300 add_to_ops_vec (ops, t);
2302 else if (!operand_equal_p (t, curr->op, 0))
2304 gimple *sum;
2305 enum tree_code subcode;
2306 tree newop1;
2307 tree newop2;
2308 gcc_assert (COMPARISON_CLASS_P (t));
2309 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2310 STRIP_USELESS_TYPE_CONVERSION (newop1);
2311 STRIP_USELESS_TYPE_CONVERSION (newop2);
2312 gcc_checking_assert (is_gimple_val (newop1)
2313 && is_gimple_val (newop2));
2314 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2315 curr->op = gimple_get_lhs (sum);
2317 return true;
2320 return false;
2324 /* Transform repeated addition of same values into multiply with
2325 constant. */
2326 static bool
2327 transform_add_to_multiply (vec<operand_entry *> *ops)
2329 operand_entry *oe;
2330 tree op = NULL_TREE;
2331 int j;
2332 int i, start = -1, end = 0, count = 0;
2333 auto_vec<std::pair <int, int> > indxs;
2334 bool changed = false;
2336 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2337 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2338 || !flag_unsafe_math_optimizations))
2339 return false;
2341 /* Look for repeated operands. */
2342 FOR_EACH_VEC_ELT (*ops, i, oe)
2344 if (start == -1)
2346 count = 1;
2347 op = oe->op;
2348 start = i;
2350 else if (operand_equal_p (oe->op, op, 0))
2352 count++;
2353 end = i;
2355 else
2357 if (count > 1)
2358 indxs.safe_push (std::make_pair (start, end));
2359 count = 1;
2360 op = oe->op;
2361 start = i;
2365 if (count > 1)
2366 indxs.safe_push (std::make_pair (start, end));
2368 for (j = indxs.length () - 1; j >= 0; --j)
2370 /* Convert repeated operand addition to multiplication. */
2371 start = indxs[j].first;
2372 end = indxs[j].second;
2373 op = (*ops)[start]->op;
2374 count = end - start + 1;
2375 for (i = end; i >= start; --i)
2376 ops->unordered_remove (i);
2377 tree tmp = make_ssa_name (TREE_TYPE (op));
2378 tree cst = build_int_cst (integer_type_node, count);
2379 gassign *mul_stmt
2380 = gimple_build_assign (tmp, MULT_EXPR,
2381 op, fold_convert (TREE_TYPE (op), cst));
2382 gimple_set_visited (mul_stmt, true);
2383 add_to_ops_vec (ops, tmp, mul_stmt);
2384 changed = true;
2387 return changed;
2391 /* Perform various identities and other optimizations on the list of
2392 operand entries, stored in OPS. The tree code for the binary
2393 operation between all the operands is OPCODE. */
2395 static void
2396 optimize_ops_list (enum tree_code opcode,
2397 vec<operand_entry *> *ops)
2399 unsigned int length = ops->length ();
2400 unsigned int i;
2401 operand_entry *oe;
2402 operand_entry *oelast = NULL;
2403 bool iterate = false;
2405 if (length == 1)
2406 return;
2408 oelast = ops->last ();
2410 /* If the last two are constants, pop the constants off, merge them
2411 and try the next two. */
2412 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2414 operand_entry *oelm1 = (*ops)[length - 2];
2416 if (oelm1->rank == 0
2417 && is_gimple_min_invariant (oelm1->op)
2418 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2419 TREE_TYPE (oelast->op)))
2421 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2422 oelm1->op, oelast->op);
2424 if (folded && is_gimple_min_invariant (folded))
2426 if (dump_file && (dump_flags & TDF_DETAILS))
2427 fprintf (dump_file, "Merging constants\n");
2429 ops->pop ();
2430 ops->pop ();
2432 add_to_ops_vec (ops, folded);
2433 reassociate_stats.constants_eliminated++;
2435 optimize_ops_list (opcode, ops);
2436 return;
2441 eliminate_using_constants (opcode, ops);
2442 oelast = NULL;
2444 for (i = 0; ops->iterate (i, &oe);)
2446 bool done = false;
2448 if (eliminate_not_pairs (opcode, ops, i, oe))
2449 return;
2450 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2451 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2452 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2454 if (done)
2455 return;
2456 iterate = true;
2457 oelast = NULL;
2458 continue;
2460 oelast = oe;
2461 i++;
2464 if (iterate)
2465 optimize_ops_list (opcode, ops);
2468 /* The following functions are subroutines to optimize_range_tests and allow
2469 it to try to change a logical combination of comparisons into a range
2470 test.
2472 For example, both
2473 X == 2 || X == 5 || X == 3 || X == 4
2475 X >= 2 && X <= 5
2476 are converted to
2477 (unsigned) (X - 2) <= 3
2479 For more information see comments above fold_test_range in fold-const.cc,
2480 this implementation is for GIMPLE. */
2484 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2486 void
2487 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2489 if (!skip_exp)
2490 print_generic_expr (file, r->exp);
2491 fprintf (file, " %c[", r->in_p ? '+' : '-');
2492 print_generic_expr (file, r->low);
2493 fputs (", ", file);
2494 print_generic_expr (file, r->high);
2495 fputc (']', file);
2498 /* Dump the range entry R to STDERR. */
2500 DEBUG_FUNCTION void
2501 debug_range_entry (struct range_entry *r)
2503 dump_range_entry (stderr, r, false);
2504 fputc ('\n', stderr);
2507 /* This is similar to make_range in fold-const.cc, but on top of
2508 GIMPLE instead of trees. If EXP is non-NULL, it should be
2509 an SSA_NAME and STMT argument is ignored, otherwise STMT
2510 argument should be a GIMPLE_COND. */
2512 void
2513 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2515 int in_p;
2516 tree low, high;
2517 bool is_bool, strict_overflow_p;
2519 r->exp = NULL_TREE;
2520 r->in_p = false;
2521 r->strict_overflow_p = false;
2522 r->low = NULL_TREE;
2523 r->high = NULL_TREE;
2524 if (exp != NULL_TREE
2525 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2526 return;
2528 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2529 and see if we can refine the range. Some of the cases below may not
2530 happen, but it doesn't seem worth worrying about this. We "continue"
2531 the outer loop when we've changed something; otherwise we "break"
2532 the switch, which will "break" the while. */
2533 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2534 high = low;
2535 in_p = 0;
2536 strict_overflow_p = false;
2537 is_bool = false;
2538 if (exp == NULL_TREE)
2539 is_bool = true;
2540 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2542 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2543 is_bool = true;
2544 else
2545 return;
2547 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2548 is_bool = true;
2550 while (1)
2552 enum tree_code code;
2553 tree arg0, arg1, exp_type;
2554 tree nexp;
2555 location_t loc;
2557 if (exp != NULL_TREE)
2559 if (TREE_CODE (exp) != SSA_NAME
2560 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2561 break;
2563 stmt = SSA_NAME_DEF_STMT (exp);
2564 if (!is_gimple_assign (stmt))
2565 break;
2567 code = gimple_assign_rhs_code (stmt);
2568 arg0 = gimple_assign_rhs1 (stmt);
2569 arg1 = gimple_assign_rhs2 (stmt);
2570 exp_type = TREE_TYPE (exp);
2572 else
2574 code = gimple_cond_code (stmt);
2575 arg0 = gimple_cond_lhs (stmt);
2576 arg1 = gimple_cond_rhs (stmt);
2577 exp_type = boolean_type_node;
2580 if (TREE_CODE (arg0) != SSA_NAME
2581 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2582 break;
2583 loc = gimple_location (stmt);
2584 switch (code)
2586 case BIT_NOT_EXPR:
2587 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2588 /* Ensure the range is either +[-,0], +[0,0],
2589 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2590 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2591 or similar expression of unconditional true or
2592 false, it should not be negated. */
2593 && ((high && integer_zerop (high))
2594 || (low && integer_onep (low))))
2596 in_p = !in_p;
2597 exp = arg0;
2598 continue;
2600 break;
2601 case SSA_NAME:
2602 exp = arg0;
2603 continue;
2604 CASE_CONVERT:
2605 if (is_bool)
2607 if ((TYPE_PRECISION (exp_type) == 1
2608 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2609 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2610 return;
2612 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2614 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2615 is_bool = true;
2616 else
2617 return;
2619 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2620 is_bool = true;
2621 goto do_default;
2622 case EQ_EXPR:
2623 case NE_EXPR:
2624 case LT_EXPR:
2625 case LE_EXPR:
2626 case GE_EXPR:
2627 case GT_EXPR:
2628 is_bool = true;
2629 /* FALLTHRU */
2630 default:
2631 if (!is_bool)
2632 return;
2633 do_default:
2634 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2635 &low, &high, &in_p,
2636 &strict_overflow_p);
2637 if (nexp != NULL_TREE)
2639 exp = nexp;
2640 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2641 continue;
2643 break;
2645 break;
2647 if (is_bool)
2649 r->exp = exp;
2650 r->in_p = in_p;
2651 r->low = low;
2652 r->high = high;
2653 r->strict_overflow_p = strict_overflow_p;
2657 /* Comparison function for qsort. Sort entries
2658 without SSA_NAME exp first, then with SSA_NAMEs sorted
2659 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2660 by increasing ->low and if ->low is the same, by increasing
2661 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2662 maximum. */
2664 static int
2665 range_entry_cmp (const void *a, const void *b)
2667 const struct range_entry *p = (const struct range_entry *) a;
2668 const struct range_entry *q = (const struct range_entry *) b;
2670 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2672 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2674 /* Group range_entries for the same SSA_NAME together. */
2675 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2676 return -1;
2677 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2678 return 1;
2679 /* If ->low is different, NULL low goes first, then by
2680 ascending low. */
2681 if (p->low != NULL_TREE)
2683 if (q->low != NULL_TREE)
2685 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2686 p->low, q->low);
2687 if (tem && integer_onep (tem))
2688 return -1;
2689 tem = fold_binary (GT_EXPR, boolean_type_node,
2690 p->low, q->low);
2691 if (tem && integer_onep (tem))
2692 return 1;
2694 else
2695 return 1;
2697 else if (q->low != NULL_TREE)
2698 return -1;
2699 /* If ->high is different, NULL high goes last, before that by
2700 ascending high. */
2701 if (p->high != NULL_TREE)
2703 if (q->high != NULL_TREE)
2705 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2706 p->high, q->high);
2707 if (tem && integer_onep (tem))
2708 return -1;
2709 tem = fold_binary (GT_EXPR, boolean_type_node,
2710 p->high, q->high);
2711 if (tem && integer_onep (tem))
2712 return 1;
2714 else
2715 return -1;
2717 else if (q->high != NULL_TREE)
2718 return 1;
2719 /* If both ranges are the same, sort below by ascending idx. */
2721 else
2722 return 1;
2724 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2725 return -1;
2727 if (p->idx < q->idx)
2728 return -1;
2729 else
2731 gcc_checking_assert (p->idx > q->idx);
2732 return 1;
2736 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2737 insert needed statements BEFORE or after GSI. */
2739 static tree
2740 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2742 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2743 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2744 if (TREE_CODE (ret) != SSA_NAME)
2746 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2747 if (before)
2748 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2749 else
2750 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2751 ret = gimple_assign_lhs (g);
2753 return ret;
2756 /* Helper routine of optimize_range_test.
2757 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2758 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2759 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2760 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2761 an array of COUNT pointers to other ranges. Return
2762 true if the range merge has been successful.
2763 If OPCODE is ERROR_MARK, this is called from within
2764 maybe_optimize_range_tests and is performing inter-bb range optimization.
2765 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2766 oe->rank. */
2768 static bool
2769 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2770 struct range_entry **otherrangep,
2771 unsigned int count, enum tree_code opcode,
2772 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2773 bool in_p, tree low, tree high, bool strict_overflow_p)
2775 unsigned int idx = range->idx;
2776 struct range_entry *swap_with = NULL;
2777 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2778 if (opcode == ERROR_MARK)
2780 /* For inter-bb range test optimization, pick from the range tests
2781 the one which is tested in the earliest condition (one dominating
2782 the others), because otherwise there could be some UB (e.g. signed
2783 overflow) in following bbs that we'd expose which wasn't there in
2784 the original program. See PR104196. */
2785 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2786 basic_block range_bb = orig_range_bb;
2787 for (unsigned int i = 0; i < count; i++)
2789 struct range_entry *this_range;
2790 if (otherrange)
2791 this_range = otherrange + i;
2792 else
2793 this_range = otherrangep[i];
2794 operand_entry *oe = (*ops)[this_range->idx];
2795 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2796 if (range_bb != this_bb
2797 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2799 swap_with = this_range;
2800 range_bb = this_bb;
2801 idx = this_range->idx;
2804 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2805 only defined in later blocks. In this case we can't move the
2806 merged comparison earlier, so instead check if there are any stmts
2807 that might trigger signed integer overflow in between and rewrite
2808 them. But only after we check if the optimization is possible. */
2809 if (seq && swap_with)
2811 rewrite_bb_first = range_bb;
2812 rewrite_bb_last = orig_range_bb;
2813 idx = range->idx;
2814 swap_with = NULL;
2817 operand_entry *oe = (*ops)[idx];
2818 tree op = oe->op;
2819 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2820 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2821 location_t loc = gimple_location (stmt);
2822 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2823 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2824 in_p, low, high);
2825 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2826 gimple_stmt_iterator gsi;
2827 unsigned int i, uid;
2829 if (tem == NULL_TREE)
2830 return false;
2832 /* If op is default def SSA_NAME, there is no place to insert the
2833 new comparison. Give up, unless we can use OP itself as the
2834 range test. */
2835 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2837 if (op == range->exp
2838 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2839 || TREE_CODE (optype) == BOOLEAN_TYPE)
2840 && (op == tem
2841 || (TREE_CODE (tem) == EQ_EXPR
2842 && TREE_OPERAND (tem, 0) == op
2843 && integer_onep (TREE_OPERAND (tem, 1))))
2844 && opcode != BIT_IOR_EXPR
2845 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2847 stmt = NULL;
2848 tem = op;
2850 else
2851 return false;
2854 if (swap_with)
2855 std::swap (range->idx, swap_with->idx);
2857 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2858 warning_at (loc, OPT_Wstrict_overflow,
2859 "assuming signed overflow does not occur "
2860 "when simplifying range test");
2862 if (dump_file && (dump_flags & TDF_DETAILS))
2864 struct range_entry *r;
2865 fprintf (dump_file, "Optimizing range tests ");
2866 dump_range_entry (dump_file, range, false);
2867 for (i = 0; i < count; i++)
2869 if (otherrange)
2870 r = otherrange + i;
2871 else
2872 r = otherrangep[i];
2873 if (r->exp
2874 && r->exp != range->exp
2875 && TREE_CODE (r->exp) == SSA_NAME)
2877 fprintf (dump_file, " and ");
2878 dump_range_entry (dump_file, r, false);
2880 else
2882 fprintf (dump_file, " and");
2883 dump_range_entry (dump_file, r, true);
2886 fprintf (dump_file, "\n into ");
2887 print_generic_expr (dump_file, tem);
2888 fprintf (dump_file, "\n");
2891 /* In inter-bb range optimization mode, if we have a seq, we can't
2892 move the merged comparison to the earliest bb from the comparisons
2893 being replaced, so instead rewrite stmts that could trigger signed
2894 integer overflow. */
2895 for (basic_block bb = rewrite_bb_last;
2896 bb != rewrite_bb_first; bb = single_pred (bb))
2897 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2898 !gsi_end_p (gsi); gsi_next (&gsi))
2900 gimple *stmt = gsi_stmt (gsi);
2901 if (is_gimple_assign (stmt))
2902 if (tree lhs = gimple_assign_lhs (stmt))
2903 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2904 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2905 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2907 enum tree_code code = gimple_assign_rhs_code (stmt);
2908 if (arith_code_with_undefined_signed_overflow (code))
2910 gimple_stmt_iterator gsip = gsi;
2911 gimple_stmt_iterator gsin = gsi;
2912 gsi_prev (&gsip);
2913 gsi_next (&gsin);
2914 rewrite_to_defined_overflow (stmt, true);
2915 unsigned uid = gimple_uid (stmt);
2916 if (gsi_end_p (gsip))
2917 gsip = gsi_after_labels (bb);
2918 else
2919 gsi_next (&gsip);
2920 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2921 gsi_next (&gsip))
2922 gimple_set_uid (gsi_stmt (gsip), uid);
2927 if (opcode == BIT_IOR_EXPR
2928 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2929 tem = invert_truthvalue_loc (loc, tem);
2931 tem = fold_convert_loc (loc, optype, tem);
2932 if (stmt)
2934 gsi = gsi_for_stmt (stmt);
2935 uid = gimple_uid (stmt);
2937 else
2939 gsi = gsi_none ();
2940 uid = 0;
2942 if (stmt == NULL)
2943 gcc_checking_assert (tem == op);
2944 /* In rare cases range->exp can be equal to lhs of stmt.
2945 In that case we have to insert after the stmt rather then before
2946 it. If stmt is a PHI, insert it at the start of the basic block. */
2947 else if (op != range->exp)
2949 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2950 tem = force_into_ssa_name (&gsi, tem, true);
2951 gsi_prev (&gsi);
2953 else if (gimple_code (stmt) != GIMPLE_PHI)
2955 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2956 tem = force_into_ssa_name (&gsi, tem, false);
2958 else
2960 gsi = gsi_after_labels (gimple_bb (stmt));
2961 if (!gsi_end_p (gsi))
2962 uid = gimple_uid (gsi_stmt (gsi));
2963 else
2965 gsi = gsi_start_bb (gimple_bb (stmt));
2966 uid = 1;
2967 while (!gsi_end_p (gsi))
2969 uid = gimple_uid (gsi_stmt (gsi));
2970 gsi_next (&gsi);
2973 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2974 tem = force_into_ssa_name (&gsi, tem, true);
2975 if (gsi_end_p (gsi))
2976 gsi = gsi_last_bb (gimple_bb (stmt));
2977 else
2978 gsi_prev (&gsi);
2980 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2981 if (gimple_uid (gsi_stmt (gsi)))
2982 break;
2983 else
2984 gimple_set_uid (gsi_stmt (gsi), uid);
2986 oe->op = tem;
2987 range->exp = exp;
2988 range->low = low;
2989 range->high = high;
2990 range->in_p = in_p;
2991 range->strict_overflow_p = false;
2993 for (i = 0; i < count; i++)
2995 if (otherrange)
2996 range = otherrange + i;
2997 else
2998 range = otherrangep[i];
2999 oe = (*ops)[range->idx];
3000 /* Now change all the other range test immediate uses, so that
3001 those tests will be optimized away. */
3002 if (opcode == ERROR_MARK)
3004 if (oe->op)
3005 oe->op = build_int_cst (TREE_TYPE (oe->op),
3006 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3007 else
3008 oe->op = (oe->rank == BIT_IOR_EXPR
3009 ? boolean_false_node : boolean_true_node);
3011 else
3012 oe->op = error_mark_node;
3013 range->exp = NULL_TREE;
3014 range->low = NULL_TREE;
3015 range->high = NULL_TREE;
3017 return true;
3020 /* Optimize X == CST1 || X == CST2
3021 if popcount (CST1 ^ CST2) == 1 into
3022 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3023 Similarly for ranges. E.g.
3024 X != 2 && X != 3 && X != 10 && X != 11
3025 will be transformed by the previous optimization into
3026 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3027 and this loop can transform that into
3028 !(((X & ~8) - 2U) <= 1U). */
3030 static bool
3031 optimize_range_tests_xor (enum tree_code opcode, tree type,
3032 tree lowi, tree lowj, tree highi, tree highj,
3033 vec<operand_entry *> *ops,
3034 struct range_entry *rangei,
3035 struct range_entry *rangej)
3037 tree lowxor, highxor, tem, exp;
3038 /* Check lowi ^ lowj == highi ^ highj and
3039 popcount (lowi ^ lowj) == 1. */
3040 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3041 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3042 return false;
3043 if (!integer_pow2p (lowxor))
3044 return false;
3045 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3046 if (!tree_int_cst_equal (lowxor, highxor))
3047 return false;
3049 exp = rangei->exp;
3050 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3051 int prec = GET_MODE_PRECISION (mode);
3052 if (TYPE_PRECISION (type) < prec
3053 || (wi::to_wide (TYPE_MIN_VALUE (type))
3054 != wi::min_value (prec, TYPE_SIGN (type)))
3055 || (wi::to_wide (TYPE_MAX_VALUE (type))
3056 != wi::max_value (prec, TYPE_SIGN (type))))
3058 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3059 exp = fold_convert (type, exp);
3060 lowxor = fold_convert (type, lowxor);
3061 lowi = fold_convert (type, lowi);
3062 highi = fold_convert (type, highi);
3064 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3065 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3066 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3067 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3068 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3069 NULL, rangei->in_p, lowj, highj,
3070 rangei->strict_overflow_p
3071 || rangej->strict_overflow_p))
3072 return true;
3073 return false;
3076 /* Optimize X == CST1 || X == CST2
3077 if popcount (CST2 - CST1) == 1 into
3078 ((X - CST1) & ~(CST2 - CST1)) == 0.
3079 Similarly for ranges. E.g.
3080 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3081 || X == 75 || X == 45
3082 will be transformed by the previous optimization into
3083 (X - 43U) <= 3U || (X - 75U) <= 3U
3084 and this loop can transform that into
3085 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3086 static bool
3087 optimize_range_tests_diff (enum tree_code opcode, tree type,
3088 tree lowi, tree lowj, tree highi, tree highj,
3089 vec<operand_entry *> *ops,
3090 struct range_entry *rangei,
3091 struct range_entry *rangej)
3093 tree tem1, tem2, mask;
3094 /* Check highi - lowi == highj - lowj. */
3095 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3096 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3097 return false;
3098 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3099 if (!tree_int_cst_equal (tem1, tem2))
3100 return false;
3101 /* Check popcount (lowj - lowi) == 1. */
3102 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3103 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3104 return false;
3105 if (!integer_pow2p (tem1))
3106 return false;
3108 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3109 int prec = GET_MODE_PRECISION (mode);
3110 if (TYPE_PRECISION (type) < prec
3111 || (wi::to_wide (TYPE_MIN_VALUE (type))
3112 != wi::min_value (prec, TYPE_SIGN (type)))
3113 || (wi::to_wide (TYPE_MAX_VALUE (type))
3114 != wi::max_value (prec, TYPE_SIGN (type))))
3115 type = build_nonstandard_integer_type (prec, 1);
3116 else
3117 type = unsigned_type_for (type);
3118 tem1 = fold_convert (type, tem1);
3119 tem2 = fold_convert (type, tem2);
3120 lowi = fold_convert (type, lowi);
3121 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3122 tem1 = fold_build2 (MINUS_EXPR, type,
3123 fold_convert (type, rangei->exp), lowi);
3124 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3125 lowj = build_int_cst (type, 0);
3126 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3127 NULL, rangei->in_p, lowj, tem2,
3128 rangei->strict_overflow_p
3129 || rangej->strict_overflow_p))
3130 return true;
3131 return false;
3134 /* It does some common checks for function optimize_range_tests_xor and
3135 optimize_range_tests_diff.
3136 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3137 Else it calls optimize_range_tests_diff. */
3139 static bool
3140 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3141 bool optimize_xor, vec<operand_entry *> *ops,
3142 struct range_entry *ranges)
3144 int i, j;
3145 bool any_changes = false;
3146 for (i = first; i < length; i++)
3148 tree lowi, highi, lowj, highj, type, tem;
3150 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3151 continue;
3152 type = TREE_TYPE (ranges[i].exp);
3153 if (!INTEGRAL_TYPE_P (type))
3154 continue;
3155 lowi = ranges[i].low;
3156 if (lowi == NULL_TREE)
3157 lowi = TYPE_MIN_VALUE (type);
3158 highi = ranges[i].high;
3159 if (highi == NULL_TREE)
3160 continue;
3161 for (j = i + 1; j < length && j < i + 64; j++)
3163 bool changes;
3164 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3165 continue;
3166 lowj = ranges[j].low;
3167 if (lowj == NULL_TREE)
3168 continue;
3169 highj = ranges[j].high;
3170 if (highj == NULL_TREE)
3171 highj = TYPE_MAX_VALUE (type);
3172 /* Check lowj > highi. */
3173 tem = fold_binary (GT_EXPR, boolean_type_node,
3174 lowj, highi);
3175 if (tem == NULL_TREE || !integer_onep (tem))
3176 continue;
3177 if (optimize_xor)
3178 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3179 highi, highj, ops,
3180 ranges + i, ranges + j);
3181 else
3182 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3183 highi, highj, ops,
3184 ranges + i, ranges + j);
3185 if (changes)
3187 any_changes = true;
3188 break;
3192 return any_changes;
3195 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3196 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3197 EXP on success, NULL otherwise. */
3199 static tree
3200 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3201 wide_int *mask, tree *totallowp)
3203 tree tem = int_const_binop (MINUS_EXPR, high, low);
3204 if (tem == NULL_TREE
3205 || TREE_CODE (tem) != INTEGER_CST
3206 || TREE_OVERFLOW (tem)
3207 || tree_int_cst_sgn (tem) == -1
3208 || compare_tree_int (tem, prec) != -1)
3209 return NULL_TREE;
3211 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3212 *mask = wi::shifted_mask (0, max, false, prec);
3213 if (TREE_CODE (exp) == BIT_AND_EXPR
3214 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3216 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3217 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3218 if (wi::popcount (msk) == 1
3219 && wi::ltu_p (msk, prec - max))
3221 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3222 max += msk.to_uhwi ();
3223 exp = TREE_OPERAND (exp, 0);
3224 if (integer_zerop (low)
3225 && TREE_CODE (exp) == PLUS_EXPR
3226 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3228 tree ret = TREE_OPERAND (exp, 0);
3229 STRIP_NOPS (ret);
3230 widest_int bias
3231 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3232 TYPE_PRECISION (TREE_TYPE (low))));
3233 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3234 if (totallowp)
3236 *totallowp = tbias;
3237 return ret;
3239 else if (!tree_int_cst_lt (totallow, tbias))
3240 return NULL_TREE;
3241 bias = wi::to_widest (tbias);
3242 bias -= wi::to_widest (totallow);
3243 if (bias >= 0 && bias < prec - max)
3245 *mask = wi::lshift (*mask, bias);
3246 return ret;
3251 if (totallowp)
3252 return exp;
3253 if (!tree_int_cst_lt (totallow, low))
3254 return exp;
3255 tem = int_const_binop (MINUS_EXPR, low, totallow);
3256 if (tem == NULL_TREE
3257 || TREE_CODE (tem) != INTEGER_CST
3258 || TREE_OVERFLOW (tem)
3259 || compare_tree_int (tem, prec - max) == 1)
3260 return NULL_TREE;
3262 *mask = wi::lshift (*mask, wi::to_widest (tem));
3263 return exp;
3266 /* Attempt to optimize small range tests using bit test.
3267 E.g.
3268 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3269 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3270 has been by earlier optimizations optimized into:
3271 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3272 As all the 43 through 82 range is less than 64 numbers,
3273 for 64-bit word targets optimize that into:
3274 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3276 static bool
3277 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3278 vec<operand_entry *> *ops,
3279 struct range_entry *ranges)
3281 int i, j;
3282 bool any_changes = false;
3283 int prec = GET_MODE_BITSIZE (word_mode);
3284 auto_vec<struct range_entry *, 64> candidates;
3286 for (i = first; i < length - 1; i++)
3288 tree lowi, highi, lowj, highj, type;
3290 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3291 continue;
3292 type = TREE_TYPE (ranges[i].exp);
3293 if (!INTEGRAL_TYPE_P (type))
3294 continue;
3295 lowi = ranges[i].low;
3296 if (lowi == NULL_TREE)
3297 lowi = TYPE_MIN_VALUE (type);
3298 highi = ranges[i].high;
3299 if (highi == NULL_TREE)
3300 continue;
3301 wide_int mask;
3302 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3303 highi, &mask, &lowi);
3304 if (exp == NULL_TREE)
3305 continue;
3306 bool strict_overflow_p = ranges[i].strict_overflow_p;
3307 candidates.truncate (0);
3308 int end = MIN (i + 64, length);
3309 for (j = i + 1; j < end; j++)
3311 tree exp2;
3312 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3313 continue;
3314 if (ranges[j].exp == exp)
3316 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3318 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3319 if (exp2 == exp)
3321 else if (TREE_CODE (exp2) == PLUS_EXPR)
3323 exp2 = TREE_OPERAND (exp2, 0);
3324 STRIP_NOPS (exp2);
3325 if (exp2 != exp)
3326 continue;
3328 else
3329 continue;
3331 else
3332 continue;
3333 lowj = ranges[j].low;
3334 if (lowj == NULL_TREE)
3335 continue;
3336 highj = ranges[j].high;
3337 if (highj == NULL_TREE)
3338 highj = TYPE_MAX_VALUE (type);
3339 wide_int mask2;
3340 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3341 highj, &mask2, NULL);
3342 if (exp2 != exp)
3343 continue;
3344 mask |= mask2;
3345 strict_overflow_p |= ranges[j].strict_overflow_p;
3346 candidates.safe_push (&ranges[j]);
3349 /* If every possible relative value of the expression is a valid shift
3350 amount, then we can merge the entry test in the bit test. In this
3351 case, if we would need otherwise 2 or more comparisons, then use
3352 the bit test; in the other cases, the threshold is 3 comparisons. */
3353 bool entry_test_needed;
3354 value_range r;
3355 if (TREE_CODE (exp) == SSA_NAME
3356 && get_range_query (cfun)->range_of_expr (r, exp)
3357 && r.kind () == VR_RANGE
3358 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3360 wide_int min = r.lower_bound ();
3361 wide_int ilowi = wi::to_wide (lowi);
3362 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3364 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3365 mask = wi::lshift (mask, ilowi - min);
3367 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3369 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3370 mask = wi::lrshift (mask, min - ilowi);
3372 entry_test_needed = false;
3374 else
3375 entry_test_needed = true;
3376 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3378 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3379 wi::to_widest (lowi)
3380 + prec - 1 - wi::clz (mask));
3381 operand_entry *oe = (*ops)[ranges[i].idx];
3382 tree op = oe->op;
3383 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3384 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3385 location_t loc = gimple_location (stmt);
3386 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3388 /* See if it isn't cheaper to pretend the minimum value of the
3389 range is 0, if maximum value is small enough.
3390 We can avoid then subtraction of the minimum value, but the
3391 mask constant could be perhaps more expensive. */
3392 if (compare_tree_int (lowi, 0) > 0
3393 && compare_tree_int (high, prec) < 0)
3395 int cost_diff;
3396 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3397 rtx reg = gen_raw_REG (word_mode, 10000);
3398 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3399 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3400 GEN_INT (-m)),
3401 word_mode, speed_p);
3402 rtx r = immed_wide_int_const (mask, word_mode);
3403 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3404 word_mode, speed_p);
3405 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3406 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3407 word_mode, speed_p);
3408 if (cost_diff > 0)
3410 mask = wi::lshift (mask, m);
3411 lowi = build_zero_cst (TREE_TYPE (lowi));
3415 tree tem;
3416 if (entry_test_needed)
3418 tem = build_range_check (loc, optype, unshare_expr (exp),
3419 false, lowi, high);
3420 if (tem == NULL_TREE || is_gimple_val (tem))
3421 continue;
3423 else
3424 tem = NULL_TREE;
3425 tree etype = unsigned_type_for (TREE_TYPE (exp));
3426 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3427 fold_convert_loc (loc, etype, exp),
3428 fold_convert_loc (loc, etype, lowi));
3429 exp = fold_convert_loc (loc, integer_type_node, exp);
3430 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3431 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3432 build_int_cst (word_type, 1), exp);
3433 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3434 wide_int_to_tree (word_type, mask));
3435 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3436 build_zero_cst (word_type));
3437 if (is_gimple_val (exp))
3438 continue;
3440 /* The shift might have undefined behavior if TEM is true,
3441 but reassociate_bb isn't prepared to have basic blocks
3442 split when it is running. So, temporarily emit a code
3443 with BIT_IOR_EXPR instead of &&, and fix it up in
3444 branch_fixup. */
3445 gimple_seq seq = NULL;
3446 if (tem)
3448 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3449 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3450 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3452 gimple_seq seq2;
3453 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3454 gimple_seq_add_seq_without_update (&seq, seq2);
3455 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3456 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3457 if (tem)
3459 gimple *g = gimple_build_assign (make_ssa_name (optype),
3460 BIT_IOR_EXPR, tem, exp);
3461 gimple_set_location (g, loc);
3462 gimple_seq_add_stmt_without_update (&seq, g);
3463 exp = gimple_assign_lhs (g);
3465 tree val = build_zero_cst (optype);
3466 if (update_range_test (&ranges[i], NULL, candidates.address (),
3467 candidates.length (), opcode, ops, exp,
3468 seq, false, val, val, strict_overflow_p))
3470 any_changes = true;
3471 if (tem)
3472 reassoc_branch_fixups.safe_push (tem);
3474 else
3475 gimple_seq_discard (seq);
3478 return any_changes;
3481 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3482 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3483 Also, handle x < C && y < C && z < C where C is power of two as
3484 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3485 as (x | y | z) < 0. */
3487 static bool
3488 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3489 vec<operand_entry *> *ops,
3490 struct range_entry *ranges)
3492 int i;
3493 unsigned int b;
3494 bool any_changes = false;
3495 auto_vec<int, 128> buckets;
3496 auto_vec<int, 32> chains;
3497 auto_vec<struct range_entry *, 32> candidates;
3499 for (i = first; i < length; i++)
3501 int idx;
3503 if (ranges[i].exp == NULL_TREE
3504 || TREE_CODE (ranges[i].exp) != SSA_NAME
3505 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3506 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3507 continue;
3509 if (ranges[i].low != NULL_TREE
3510 && ranges[i].high != NULL_TREE
3511 && ranges[i].in_p
3512 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3514 idx = !integer_zerop (ranges[i].low);
3515 if (idx && !integer_all_onesp (ranges[i].low))
3516 continue;
3518 else if (ranges[i].high != NULL_TREE
3519 && TREE_CODE (ranges[i].high) == INTEGER_CST
3520 && ranges[i].in_p)
3522 wide_int w = wi::to_wide (ranges[i].high);
3523 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3524 int l = wi::clz (w);
3525 idx = 2;
3526 if (l <= 0
3527 || l >= prec
3528 || w != wi::mask (prec - l, false, prec))
3529 continue;
3530 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3531 && ranges[i].low == NULL_TREE)
3532 || (ranges[i].low
3533 && integer_zerop (ranges[i].low))))
3534 continue;
3536 else if (ranges[i].high == NULL_TREE
3537 && ranges[i].low != NULL_TREE
3538 /* Perform this optimization only in the last
3539 reassoc pass, as it interferes with the reassociation
3540 itself or could also with VRP etc. which might not
3541 be able to virtually undo the optimization. */
3542 && !reassoc_insert_powi_p
3543 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3544 && integer_zerop (ranges[i].low))
3545 idx = 3;
3546 else
3547 continue;
3549 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3550 if (buckets.length () <= b)
3551 buckets.safe_grow_cleared (b + 1, true);
3552 if (chains.length () <= (unsigned) i)
3553 chains.safe_grow (i + 1, true);
3554 chains[i] = buckets[b];
3555 buckets[b] = i + 1;
3558 FOR_EACH_VEC_ELT (buckets, b, i)
3559 if (i && chains[i - 1])
3561 int j, k = i;
3562 if ((b % 4) == 2)
3564 /* When ranges[X - 1].high + 1 is a power of two,
3565 we need to process the same bucket up to
3566 precision - 1 times, each time split the entries
3567 with the same high bound into one chain and the
3568 rest into another one to be processed later. */
3569 int this_prev = i;
3570 int other_prev = 0;
3571 for (j = chains[i - 1]; j; j = chains[j - 1])
3573 if (tree_int_cst_equal (ranges[i - 1].high,
3574 ranges[j - 1].high))
3576 chains[this_prev - 1] = j;
3577 this_prev = j;
3579 else if (other_prev == 0)
3581 buckets[b] = j;
3582 other_prev = j;
3584 else
3586 chains[other_prev - 1] = j;
3587 other_prev = j;
3590 chains[this_prev - 1] = 0;
3591 if (other_prev)
3592 chains[other_prev - 1] = 0;
3593 if (chains[i - 1] == 0)
3595 if (other_prev)
3596 b--;
3597 continue;
3600 for (j = chains[i - 1]; j; j = chains[j - 1])
3602 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3603 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3604 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3605 k = j;
3607 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3608 tree type2 = NULL_TREE;
3609 bool strict_overflow_p = false;
3610 candidates.truncate (0);
3611 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3612 type1 = pointer_sized_int_node;
3613 for (j = i; j; j = chains[j - 1])
3615 tree type = TREE_TYPE (ranges[j - 1].exp);
3616 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3617 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3618 type = pointer_sized_int_node;
3619 if ((b % 4) == 3)
3621 /* For the signed < 0 cases, the types should be
3622 really compatible (all signed with the same precision,
3623 instead put ranges that have different in_p from
3624 k first. */
3625 if (!useless_type_conversion_p (type1, type))
3626 continue;
3627 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3628 candidates.safe_push (&ranges[j - 1]);
3629 type2 = type1;
3630 continue;
3632 if (j == k
3633 || useless_type_conversion_p (type1, type))
3635 else if (type2 == NULL_TREE
3636 || useless_type_conversion_p (type2, type))
3638 if (type2 == NULL_TREE)
3639 type2 = type;
3640 candidates.safe_push (&ranges[j - 1]);
3643 unsigned l = candidates.length ();
3644 for (j = i; j; j = chains[j - 1])
3646 tree type = TREE_TYPE (ranges[j - 1].exp);
3647 if (j == k)
3648 continue;
3649 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3650 type = pointer_sized_int_node;
3651 if ((b % 4) == 3)
3653 if (!useless_type_conversion_p (type1, type))
3654 continue;
3655 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3656 candidates.safe_push (&ranges[j - 1]);
3657 continue;
3659 if (useless_type_conversion_p (type1, type))
3661 else if (type2 == NULL_TREE
3662 || useless_type_conversion_p (type2, type))
3663 continue;
3664 candidates.safe_push (&ranges[j - 1]);
3666 gimple_seq seq = NULL;
3667 tree op = NULL_TREE;
3668 unsigned int id;
3669 struct range_entry *r;
3670 candidates.safe_push (&ranges[k - 1]);
3671 FOR_EACH_VEC_ELT (candidates, id, r)
3673 gimple *g;
3674 enum tree_code code;
3675 if (id == 0)
3677 op = r->exp;
3678 continue;
3680 if (id == l
3681 || POINTER_TYPE_P (TREE_TYPE (op))
3682 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3684 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3685 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3686 if (code == BIT_NOT_EXPR
3687 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3689 g = gimple_build_assign (make_ssa_name (type3),
3690 NOP_EXPR, op);
3691 gimple_seq_add_stmt_without_update (&seq, g);
3692 op = gimple_assign_lhs (g);
3694 g = gimple_build_assign (make_ssa_name (type3), code, op);
3695 gimple_seq_add_stmt_without_update (&seq, g);
3696 op = gimple_assign_lhs (g);
3698 tree type = TREE_TYPE (r->exp);
3699 tree exp = r->exp;
3700 if (POINTER_TYPE_P (type)
3701 || TREE_CODE (type) == OFFSET_TYPE
3702 || (id >= l && !useless_type_conversion_p (type1, type)))
3704 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3705 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3706 gimple_seq_add_stmt_without_update (&seq, g);
3707 exp = gimple_assign_lhs (g);
3709 if ((b % 4) == 3)
3710 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3711 else
3712 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3713 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3714 code, op, exp);
3715 gimple_seq_add_stmt_without_update (&seq, g);
3716 op = gimple_assign_lhs (g);
3718 type1 = TREE_TYPE (ranges[k - 1].exp);
3719 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3721 gimple *g
3722 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3723 gimple_seq_add_stmt_without_update (&seq, g);
3724 op = gimple_assign_lhs (g);
3726 candidates.pop ();
3727 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3728 candidates.length (), opcode, ops, op,
3729 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3730 ranges[k - 1].high, strict_overflow_p))
3731 any_changes = true;
3732 else
3733 gimple_seq_discard (seq);
3734 if ((b % 4) == 2 && buckets[b] != i)
3735 /* There is more work to do for this bucket. */
3736 b--;
3739 return any_changes;
3742 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3743 a >= 0 && a < b into (unsigned) a < (unsigned) b
3744 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3746 static bool
3747 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3748 vec<operand_entry *> *ops,
3749 struct range_entry *ranges,
3750 basic_block first_bb)
3752 int i;
3753 bool any_changes = false;
3754 hash_map<tree, int> *map = NULL;
3756 for (i = first; i < length; i++)
3758 if (ranges[i].exp == NULL_TREE
3759 || TREE_CODE (ranges[i].exp) != SSA_NAME
3760 || !ranges[i].in_p)
3761 continue;
3763 tree type = TREE_TYPE (ranges[i].exp);
3764 if (!INTEGRAL_TYPE_P (type)
3765 || TYPE_UNSIGNED (type)
3766 || ranges[i].low == NULL_TREE
3767 || !integer_zerop (ranges[i].low)
3768 || ranges[i].high != NULL_TREE)
3769 continue;
3770 /* EXP >= 0 here. */
3771 if (map == NULL)
3772 map = new hash_map <tree, int>;
3773 map->put (ranges[i].exp, i);
3776 if (map == NULL)
3777 return false;
3779 for (i = 0; i < length; i++)
3781 bool in_p = ranges[i].in_p;
3782 if (ranges[i].low == NULL_TREE
3783 || ranges[i].high == NULL_TREE)
3784 continue;
3785 if (!integer_zerop (ranges[i].low)
3786 || !integer_zerop (ranges[i].high))
3788 if (ranges[i].exp
3789 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3790 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3791 && integer_onep (ranges[i].low)
3792 && integer_onep (ranges[i].high))
3793 in_p = !in_p;
3794 else
3795 continue;
3798 gimple *stmt;
3799 tree_code ccode;
3800 tree rhs1, rhs2;
3801 if (ranges[i].exp)
3803 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3804 continue;
3805 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3806 if (!is_gimple_assign (stmt))
3807 continue;
3808 ccode = gimple_assign_rhs_code (stmt);
3809 rhs1 = gimple_assign_rhs1 (stmt);
3810 rhs2 = gimple_assign_rhs2 (stmt);
3812 else
3814 operand_entry *oe = (*ops)[ranges[i].idx];
3815 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3816 if (gimple_code (stmt) != GIMPLE_COND)
3817 continue;
3818 ccode = gimple_cond_code (stmt);
3819 rhs1 = gimple_cond_lhs (stmt);
3820 rhs2 = gimple_cond_rhs (stmt);
3823 if (TREE_CODE (rhs1) != SSA_NAME
3824 || rhs2 == NULL_TREE
3825 || TREE_CODE (rhs2) != SSA_NAME)
3826 continue;
3828 switch (ccode)
3830 case GT_EXPR:
3831 case GE_EXPR:
3832 case LT_EXPR:
3833 case LE_EXPR:
3834 break;
3835 default:
3836 continue;
3838 if (in_p)
3839 ccode = invert_tree_comparison (ccode, false);
3840 switch (ccode)
3842 case GT_EXPR:
3843 case GE_EXPR:
3844 std::swap (rhs1, rhs2);
3845 ccode = swap_tree_comparison (ccode);
3846 break;
3847 case LT_EXPR:
3848 case LE_EXPR:
3849 break;
3850 default:
3851 gcc_unreachable ();
3854 int *idx = map->get (rhs1);
3855 if (idx == NULL)
3856 continue;
3858 /* maybe_optimize_range_tests allows statements without side-effects
3859 in the basic blocks as long as they are consumed in the same bb.
3860 Make sure rhs2's def stmt is not among them, otherwise we can't
3861 use safely get_nonzero_bits on it. E.g. in:
3862 # RANGE [-83, 1] NONZERO 173
3863 # k_32 = PHI <k_47(13), k_12(9)>
3865 if (k_32 >= 0)
3866 goto <bb 5>; [26.46%]
3867 else
3868 goto <bb 9>; [73.54%]
3870 <bb 5> [local count: 140323371]:
3871 # RANGE [0, 1] NONZERO 1
3872 _5 = (int) k_32;
3873 # RANGE [0, 4] NONZERO 4
3874 _21 = _5 << 2;
3875 # RANGE [0, 4] NONZERO 4
3876 iftmp.0_44 = (char) _21;
3877 if (k_32 < iftmp.0_44)
3878 goto <bb 6>; [84.48%]
3879 else
3880 goto <bb 9>; [15.52%]
3881 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3882 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3883 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3884 those stmts even for negative k_32 and the value ranges would be no
3885 longer guaranteed and so the optimization would be invalid. */
3886 while (opcode == ERROR_MARK)
3888 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3889 basic_block bb2 = gimple_bb (g);
3890 if (bb2
3891 && bb2 != first_bb
3892 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3894 /* As an exception, handle a few common cases. */
3895 if (gimple_assign_cast_p (g)
3896 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3898 tree op0 = gimple_assign_rhs1 (g);
3899 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3900 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3901 > TYPE_PRECISION (TREE_TYPE (op0))))
3902 /* Zero-extension is always ok. */
3903 break;
3904 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3905 == TYPE_PRECISION (TREE_TYPE (op0))
3906 && TREE_CODE (op0) == SSA_NAME)
3908 /* Cast from signed to unsigned or vice versa. Retry
3909 with the op0 as new rhs2. */
3910 rhs2 = op0;
3911 continue;
3914 else if (is_gimple_assign (g)
3915 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3916 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3917 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3918 /* Masking with INTEGER_CST with MSB clear is always ok
3919 too. */
3920 break;
3921 rhs2 = NULL_TREE;
3923 break;
3925 if (rhs2 == NULL_TREE)
3926 continue;
3928 wide_int nz = get_nonzero_bits (rhs2);
3929 if (wi::neg_p (nz))
3930 continue;
3932 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3933 and RHS2 is known to be RHS2 >= 0. */
3934 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3936 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3937 if ((ranges[*idx].strict_overflow_p
3938 || ranges[i].strict_overflow_p)
3939 && issue_strict_overflow_warning (wc))
3940 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3941 "assuming signed overflow does not occur "
3942 "when simplifying range test");
3944 if (dump_file && (dump_flags & TDF_DETAILS))
3946 struct range_entry *r = &ranges[*idx];
3947 fprintf (dump_file, "Optimizing range test ");
3948 print_generic_expr (dump_file, r->exp);
3949 fprintf (dump_file, " +[");
3950 print_generic_expr (dump_file, r->low);
3951 fprintf (dump_file, ", ");
3952 print_generic_expr (dump_file, r->high);
3953 fprintf (dump_file, "] and comparison ");
3954 print_generic_expr (dump_file, rhs1);
3955 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3956 print_generic_expr (dump_file, rhs2);
3957 fprintf (dump_file, "\n into (");
3958 print_generic_expr (dump_file, utype);
3959 fprintf (dump_file, ") ");
3960 print_generic_expr (dump_file, rhs1);
3961 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3962 print_generic_expr (dump_file, utype);
3963 fprintf (dump_file, ") ");
3964 print_generic_expr (dump_file, rhs2);
3965 fprintf (dump_file, "\n");
3968 operand_entry *oe = (*ops)[ranges[i].idx];
3969 ranges[i].in_p = 0;
3970 if (opcode == BIT_IOR_EXPR
3971 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3973 ranges[i].in_p = 1;
3974 ccode = invert_tree_comparison (ccode, false);
3977 unsigned int uid = gimple_uid (stmt);
3978 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3979 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3980 gimple_set_uid (g, uid);
3981 rhs1 = gimple_assign_lhs (g);
3982 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3983 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3985 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3986 gimple_set_uid (g, uid);
3987 rhs2 = gimple_assign_lhs (g);
3988 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3990 if (tree_swap_operands_p (rhs1, rhs2))
3992 std::swap (rhs1, rhs2);
3993 ccode = swap_tree_comparison (ccode);
3995 if (gimple_code (stmt) == GIMPLE_COND)
3997 gcond *c = as_a <gcond *> (stmt);
3998 gimple_cond_set_code (c, ccode);
3999 gimple_cond_set_lhs (c, rhs1);
4000 gimple_cond_set_rhs (c, rhs2);
4001 update_stmt (stmt);
4003 else
4005 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4006 if (!INTEGRAL_TYPE_P (ctype)
4007 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4008 && TYPE_PRECISION (ctype) != 1))
4009 ctype = boolean_type_node;
4010 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4011 gimple_set_uid (g, uid);
4012 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4013 if (oe->op && ctype != TREE_TYPE (oe->op))
4015 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4016 NOP_EXPR, gimple_assign_lhs (g));
4017 gimple_set_uid (g, uid);
4018 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4020 ranges[i].exp = gimple_assign_lhs (g);
4021 oe->op = ranges[i].exp;
4022 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4023 ranges[i].high = ranges[i].low;
4025 ranges[i].strict_overflow_p = false;
4026 oe = (*ops)[ranges[*idx].idx];
4027 /* Now change all the other range test immediate uses, so that
4028 those tests will be optimized away. */
4029 if (opcode == ERROR_MARK)
4031 if (oe->op)
4032 oe->op = build_int_cst (TREE_TYPE (oe->op),
4033 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4034 else
4035 oe->op = (oe->rank == BIT_IOR_EXPR
4036 ? boolean_false_node : boolean_true_node);
4038 else
4039 oe->op = error_mark_node;
4040 ranges[*idx].exp = NULL_TREE;
4041 ranges[*idx].low = NULL_TREE;
4042 ranges[*idx].high = NULL_TREE;
4043 any_changes = true;
4046 delete map;
4047 return any_changes;
4050 /* Optimize range tests, similarly how fold_range_test optimizes
4051 it on trees. The tree code for the binary
4052 operation between all the operands is OPCODE.
4053 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4054 maybe_optimize_range_tests for inter-bb range optimization.
4055 In that case if oe->op is NULL, oe->id is bb->index whose
4056 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4057 the actual opcode.
4058 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4060 static bool
4061 optimize_range_tests (enum tree_code opcode,
4062 vec<operand_entry *> *ops, basic_block first_bb)
4064 unsigned int length = ops->length (), i, j, first;
4065 operand_entry *oe;
4066 struct range_entry *ranges;
4067 bool any_changes = false;
4069 if (length == 1)
4070 return false;
4072 ranges = XNEWVEC (struct range_entry, length);
4073 for (i = 0; i < length; i++)
4075 oe = (*ops)[i];
4076 ranges[i].idx = i;
4077 init_range_entry (ranges + i, oe->op,
4078 oe->op
4079 ? NULL
4080 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4081 /* For | invert it now, we will invert it again before emitting
4082 the optimized expression. */
4083 if (opcode == BIT_IOR_EXPR
4084 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4085 ranges[i].in_p = !ranges[i].in_p;
4088 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4089 for (i = 0; i < length; i++)
4090 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4091 break;
4093 /* Try to merge ranges. */
4094 for (first = i; i < length; i++)
4096 tree low = ranges[i].low;
4097 tree high = ranges[i].high;
4098 int in_p = ranges[i].in_p;
4099 bool strict_overflow_p = ranges[i].strict_overflow_p;
4100 int update_fail_count = 0;
4102 for (j = i + 1; j < length; j++)
4104 if (ranges[i].exp != ranges[j].exp)
4105 break;
4106 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4107 ranges[j].in_p, ranges[j].low, ranges[j].high))
4108 break;
4109 strict_overflow_p |= ranges[j].strict_overflow_p;
4112 if (j == i + 1)
4113 continue;
4115 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4116 opcode, ops, ranges[i].exp, NULL, in_p,
4117 low, high, strict_overflow_p))
4119 i = j - 1;
4120 any_changes = true;
4122 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4123 while update_range_test would fail. */
4124 else if (update_fail_count == 64)
4125 i = j - 1;
4126 else
4127 ++update_fail_count;
4130 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4131 ops, ranges);
4133 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4134 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4135 ops, ranges);
4136 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4137 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4138 ops, ranges);
4139 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4140 ranges, first_bb);
4141 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4142 ops, ranges);
4144 if (any_changes && opcode != ERROR_MARK)
4146 j = 0;
4147 FOR_EACH_VEC_ELT (*ops, i, oe)
4149 if (oe->op == error_mark_node)
4150 continue;
4151 else if (i != j)
4152 (*ops)[j] = oe;
4153 j++;
4155 ops->truncate (j);
4158 XDELETEVEC (ranges);
4159 return any_changes;
4162 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4163 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4164 otherwise the comparison code. TYPE is a return value that is set
4165 to type of comparison. */
4167 static tree_code
4168 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4169 tree *lhs, tree *rhs, gassign **vcond)
4171 if (TREE_CODE (var) != SSA_NAME)
4172 return ERROR_MARK;
4174 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4175 if (stmt == NULL)
4176 return ERROR_MARK;
4177 if (vcond)
4178 *vcond = stmt;
4180 /* ??? If we start creating more COND_EXPR, we could perform
4181 this same optimization with them. For now, simplify. */
4182 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4183 return ERROR_MARK;
4185 tree cond = gimple_assign_rhs1 (stmt);
4186 tree_code cmp = TREE_CODE (cond);
4187 if (cmp != SSA_NAME)
4188 return ERROR_MARK;
4190 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4191 if (assign == NULL
4192 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4193 return ERROR_MARK;
4195 cmp = gimple_assign_rhs_code (assign);
4196 if (lhs)
4197 *lhs = gimple_assign_rhs1 (assign);
4198 if (rhs)
4199 *rhs = gimple_assign_rhs2 (assign);
4201 /* ??? For now, allow only canonical true and false result vectors.
4202 We could expand this to other constants should the need arise,
4203 but at the moment we don't create them. */
4204 tree t = gimple_assign_rhs2 (stmt);
4205 tree f = gimple_assign_rhs3 (stmt);
4206 bool inv;
4207 if (integer_all_onesp (t))
4208 inv = false;
4209 else if (integer_all_onesp (f))
4211 cmp = invert_tree_comparison (cmp, false);
4212 inv = true;
4214 else
4215 return ERROR_MARK;
4216 if (!integer_zerop (f))
4217 return ERROR_MARK;
4219 /* Success! */
4220 if (rets)
4221 *rets = assign;
4222 if (reti)
4223 *reti = inv;
4224 if (type)
4225 *type = TREE_TYPE (cond);
4226 return cmp;
4229 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4230 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4232 static bool
4233 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4235 unsigned int length = ops->length (), i, j;
4236 bool any_changes = false;
4238 if (length == 1)
4239 return false;
4241 for (i = 0; i < length; ++i)
4243 tree elt0 = (*ops)[i]->op;
4245 gassign *stmt0, *vcond0;
4246 bool invert;
4247 tree type, lhs0, rhs0;
4248 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4249 &rhs0, &vcond0);
4250 if (cmp0 == ERROR_MARK)
4251 continue;
4253 for (j = i + 1; j < length; ++j)
4255 tree &elt1 = (*ops)[j]->op;
4257 gassign *stmt1, *vcond1;
4258 tree lhs1, rhs1;
4259 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4260 &rhs1, &vcond1);
4261 if (cmp1 == ERROR_MARK)
4262 continue;
4264 tree comb;
4265 if (opcode == BIT_AND_EXPR)
4266 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4267 cmp1, lhs1, rhs1);
4268 else if (opcode == BIT_IOR_EXPR)
4269 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4270 cmp1, lhs1, rhs1);
4271 else
4272 gcc_unreachable ();
4273 if (comb == NULL)
4274 continue;
4276 /* Success! */
4277 if (dump_file && (dump_flags & TDF_DETAILS))
4279 fprintf (dump_file, "Transforming ");
4280 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4281 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4282 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4283 fprintf (dump_file, " into ");
4284 print_generic_expr (dump_file, comb);
4285 fputc ('\n', dump_file);
4288 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4289 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4290 true, GSI_SAME_STMT);
4291 if (invert)
4292 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4293 gimple_assign_rhs3_ptr (vcond0));
4294 gimple_assign_set_rhs1 (vcond0, exp);
4295 update_stmt (vcond0);
4297 elt1 = error_mark_node;
4298 any_changes = true;
4302 if (any_changes)
4304 operand_entry *oe;
4305 j = 0;
4306 FOR_EACH_VEC_ELT (*ops, i, oe)
4308 if (oe->op == error_mark_node)
4309 continue;
4310 else if (i != j)
4311 (*ops)[j] = oe;
4312 j++;
4314 ops->truncate (j);
4317 return any_changes;
4320 /* Return true if STMT is a cast like:
4321 <bb N>:
4323 _123 = (int) _234;
4325 <bb M>:
4326 # _345 = PHI <_123(N), 1(...), 1(...)>
4327 where _234 has bool type, _123 has single use and
4328 bb N has a single successor M. This is commonly used in
4329 the last block of a range test.
4331 Also Return true if STMT is tcc_compare like:
4332 <bb N>:
4334 _234 = a_2(D) == 2;
4336 <bb M>:
4337 # _345 = PHI <_234(N), 1(...), 1(...)>
4338 _346 = (int) _345;
4339 where _234 has booltype, single use and
4340 bb N has a single successor M. This is commonly used in
4341 the last block of a range test. */
4343 static bool
4344 final_range_test_p (gimple *stmt)
4346 basic_block bb, rhs_bb, lhs_bb;
4347 edge e;
4348 tree lhs, rhs;
4349 use_operand_p use_p;
4350 gimple *use_stmt;
4352 if (!gimple_assign_cast_p (stmt)
4353 && (!is_gimple_assign (stmt)
4354 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4355 != tcc_comparison)))
4356 return false;
4357 bb = gimple_bb (stmt);
4358 if (!single_succ_p (bb))
4359 return false;
4360 e = single_succ_edge (bb);
4361 if (e->flags & EDGE_COMPLEX)
4362 return false;
4364 lhs = gimple_assign_lhs (stmt);
4365 rhs = gimple_assign_rhs1 (stmt);
4366 if (gimple_assign_cast_p (stmt)
4367 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4368 || TREE_CODE (rhs) != SSA_NAME
4369 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4370 return false;
4372 if (!gimple_assign_cast_p (stmt)
4373 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4374 return false;
4376 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4377 if (!single_imm_use (lhs, &use_p, &use_stmt))
4378 return false;
4380 if (gimple_code (use_stmt) != GIMPLE_PHI
4381 || gimple_bb (use_stmt) != e->dest)
4382 return false;
4384 /* And that the rhs is defined in the same loop. */
4385 if (gimple_assign_cast_p (stmt))
4387 if (TREE_CODE (rhs) != SSA_NAME
4388 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4389 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4390 return false;
4392 else
4394 if (TREE_CODE (lhs) != SSA_NAME
4395 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4396 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4397 return false;
4400 return true;
4403 /* Return true if BB is suitable basic block for inter-bb range test
4404 optimization. If BACKWARD is true, BB should be the only predecessor
4405 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4406 or compared with to find a common basic block to which all conditions
4407 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4408 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4409 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4410 block points to an empty block that falls through into *OTHER_BB and
4411 the phi args match that path. */
4413 static bool
4414 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4415 bool *test_swapped_p, bool backward)
4417 edge_iterator ei, ei2;
4418 edge e, e2;
4419 gimple *stmt;
4420 gphi_iterator gsi;
4421 bool other_edge_seen = false;
4422 bool is_cond;
4424 if (test_bb == bb)
4425 return false;
4426 /* Check last stmt first. */
4427 stmt = last_stmt (bb);
4428 if (stmt == NULL
4429 || (gimple_code (stmt) != GIMPLE_COND
4430 && (backward || !final_range_test_p (stmt)))
4431 || gimple_visited_p (stmt)
4432 || stmt_could_throw_p (cfun, stmt)
4433 || *other_bb == bb)
4434 return false;
4435 is_cond = gimple_code (stmt) == GIMPLE_COND;
4436 if (is_cond)
4438 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4439 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4440 to *OTHER_BB (if not set yet, try to find it out). */
4441 if (EDGE_COUNT (bb->succs) != 2)
4442 return false;
4443 FOR_EACH_EDGE (e, ei, bb->succs)
4445 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4446 return false;
4447 if (e->dest == test_bb)
4449 if (backward)
4450 continue;
4451 else
4452 return false;
4454 if (e->dest == bb)
4455 return false;
4456 if (*other_bb == NULL)
4458 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4459 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4460 return false;
4461 else if (e->dest == e2->dest)
4462 *other_bb = e->dest;
4463 if (*other_bb == NULL)
4464 return false;
4466 if (e->dest == *other_bb)
4467 other_edge_seen = true;
4468 else if (backward)
4469 return false;
4471 if (*other_bb == NULL || !other_edge_seen)
4472 return false;
4474 else if (single_succ (bb) != *other_bb)
4475 return false;
4477 /* Now check all PHIs of *OTHER_BB. */
4478 e = find_edge (bb, *other_bb);
4479 e2 = find_edge (test_bb, *other_bb);
4480 retry:;
4481 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4483 gphi *phi = gsi.phi ();
4484 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4485 corresponding to BB and TEST_BB predecessor must be the same. */
4486 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4487 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4489 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4490 one of the PHIs should have the lhs of the last stmt in
4491 that block as PHI arg and that PHI should have 0 or 1
4492 corresponding to it in all other range test basic blocks
4493 considered. */
4494 if (!is_cond)
4496 if (gimple_phi_arg_def (phi, e->dest_idx)
4497 == gimple_assign_lhs (stmt)
4498 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4499 || integer_onep (gimple_phi_arg_def (phi,
4500 e2->dest_idx))))
4501 continue;
4503 else
4505 gimple *test_last = last_stmt (test_bb);
4506 if (gimple_code (test_last) == GIMPLE_COND)
4508 if (backward ? e2->src != test_bb : e->src != bb)
4509 return false;
4511 /* For last_bb, handle also:
4512 if (x_3(D) == 3)
4513 goto <bb 6>; [34.00%]
4514 else
4515 goto <bb 7>; [66.00%]
4517 <bb 6> [local count: 79512730]:
4519 <bb 7> [local count: 1073741824]:
4520 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4521 where bb 7 is *OTHER_BB, but the PHI values from the
4522 earlier bbs match the path through the empty bb
4523 in between. */
4524 edge e3;
4525 if (backward)
4526 e3 = EDGE_SUCC (test_bb,
4527 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4528 else
4529 e3 = EDGE_SUCC (bb,
4530 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4531 if (empty_block_p (e3->dest)
4532 && single_succ_p (e3->dest)
4533 && single_succ (e3->dest) == *other_bb
4534 && single_pred_p (e3->dest)
4535 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4537 if (backward)
4538 e2 = single_succ_edge (e3->dest);
4539 else
4540 e = single_succ_edge (e3->dest);
4541 if (test_swapped_p)
4542 *test_swapped_p = true;
4543 goto retry;
4546 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4547 == gimple_assign_lhs (test_last)
4548 && (integer_zerop (gimple_phi_arg_def (phi,
4549 e->dest_idx))
4550 || integer_onep (gimple_phi_arg_def (phi,
4551 e->dest_idx))))
4552 continue;
4555 return false;
4558 return true;
4561 /* Return true if BB doesn't have side-effects that would disallow
4562 range test optimization, all SSA_NAMEs set in the bb are consumed
4563 in the bb and there are no PHIs. */
4565 bool
4566 no_side_effect_bb (basic_block bb)
4568 gimple_stmt_iterator gsi;
4569 gimple *last;
4571 if (!gimple_seq_empty_p (phi_nodes (bb)))
4572 return false;
4573 last = last_stmt (bb);
4574 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4576 gimple *stmt = gsi_stmt (gsi);
4577 tree lhs;
4578 imm_use_iterator imm_iter;
4579 use_operand_p use_p;
4581 if (is_gimple_debug (stmt))
4582 continue;
4583 if (gimple_has_side_effects (stmt))
4584 return false;
4585 if (stmt == last)
4586 return true;
4587 if (!is_gimple_assign (stmt))
4588 return false;
4589 lhs = gimple_assign_lhs (stmt);
4590 if (TREE_CODE (lhs) != SSA_NAME)
4591 return false;
4592 if (gimple_assign_rhs_could_trap_p (stmt))
4593 return false;
4594 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4596 gimple *use_stmt = USE_STMT (use_p);
4597 if (is_gimple_debug (use_stmt))
4598 continue;
4599 if (gimple_bb (use_stmt) != bb)
4600 return false;
4603 return false;
4606 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4607 return true and fill in *OPS recursively. */
4609 static bool
4610 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4611 class loop *loop)
4613 gimple *stmt = SSA_NAME_DEF_STMT (var);
4614 tree rhs[2];
4615 int i;
4617 if (!is_reassociable_op (stmt, code, loop))
4618 return false;
4620 rhs[0] = gimple_assign_rhs1 (stmt);
4621 rhs[1] = gimple_assign_rhs2 (stmt);
4622 gimple_set_visited (stmt, true);
4623 for (i = 0; i < 2; i++)
4624 if (TREE_CODE (rhs[i]) == SSA_NAME
4625 && !get_ops (rhs[i], code, ops, loop)
4626 && has_single_use (rhs[i]))
4628 operand_entry *oe = operand_entry_pool.allocate ();
4630 oe->op = rhs[i];
4631 oe->rank = code;
4632 oe->id = 0;
4633 oe->count = 1;
4634 oe->stmt_to_insert = NULL;
4635 ops->safe_push (oe);
4637 return true;
4640 /* Find the ops that were added by get_ops starting from VAR, see if
4641 they were changed during update_range_test and if yes, create new
4642 stmts. */
4644 static tree
4645 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4646 unsigned int *pidx, class loop *loop)
4648 gimple *stmt = SSA_NAME_DEF_STMT (var);
4649 tree rhs[4];
4650 int i;
4652 if (!is_reassociable_op (stmt, code, loop))
4653 return NULL;
4655 rhs[0] = gimple_assign_rhs1 (stmt);
4656 rhs[1] = gimple_assign_rhs2 (stmt);
4657 rhs[2] = rhs[0];
4658 rhs[3] = rhs[1];
4659 for (i = 0; i < 2; i++)
4660 if (TREE_CODE (rhs[i]) == SSA_NAME)
4662 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4663 if (rhs[2 + i] == NULL_TREE)
4665 if (has_single_use (rhs[i]))
4666 rhs[2 + i] = ops[(*pidx)++]->op;
4667 else
4668 rhs[2 + i] = rhs[i];
4671 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4672 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4674 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4675 var = make_ssa_name (TREE_TYPE (var));
4676 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4677 rhs[2], rhs[3]);
4678 gimple_set_uid (g, gimple_uid (stmt));
4679 gimple_set_visited (g, true);
4680 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4682 return var;
4685 /* Structure to track the initial value passed to get_ops and
4686 the range in the ops vector for each basic block. */
4688 struct inter_bb_range_test_entry
4690 tree op;
4691 unsigned int first_idx, last_idx;
4694 /* Inter-bb range test optimization.
4696 Returns TRUE if a gimple conditional is optimized to a true/false,
4697 otherwise return FALSE.
4699 This indicates to the caller that it should run a CFG cleanup pass
4700 once reassociation is completed. */
4702 static bool
4703 maybe_optimize_range_tests (gimple *stmt)
4705 basic_block first_bb = gimple_bb (stmt);
4706 basic_block last_bb = first_bb;
4707 basic_block other_bb = NULL;
4708 basic_block bb;
4709 edge_iterator ei;
4710 edge e;
4711 auto_vec<operand_entry *> ops;
4712 auto_vec<inter_bb_range_test_entry> bbinfo;
4713 bool any_changes = false;
4714 bool cfg_cleanup_needed = false;
4716 /* Consider only basic blocks that end with GIMPLE_COND or
4717 a cast statement satisfying final_range_test_p. All
4718 but the last bb in the first_bb .. last_bb range
4719 should end with GIMPLE_COND. */
4720 if (gimple_code (stmt) == GIMPLE_COND)
4722 if (EDGE_COUNT (first_bb->succs) != 2)
4723 return cfg_cleanup_needed;
4725 else if (final_range_test_p (stmt))
4726 other_bb = single_succ (first_bb);
4727 else
4728 return cfg_cleanup_needed;
4730 if (stmt_could_throw_p (cfun, stmt))
4731 return cfg_cleanup_needed;
4733 /* As relative ordering of post-dominator sons isn't fixed,
4734 maybe_optimize_range_tests can be called first on any
4735 bb in the range we want to optimize. So, start searching
4736 backwards, if first_bb can be set to a predecessor. */
4737 while (single_pred_p (first_bb))
4739 basic_block pred_bb = single_pred (first_bb);
4740 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4741 break;
4742 if (!no_side_effect_bb (first_bb))
4743 break;
4744 first_bb = pred_bb;
4746 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4747 Before starting forward search in last_bb successors, find
4748 out the other_bb. */
4749 if (first_bb == last_bb)
4751 other_bb = NULL;
4752 /* As non-GIMPLE_COND last stmt always terminates the range,
4753 if forward search didn't discover anything, just give up. */
4754 if (gimple_code (stmt) != GIMPLE_COND)
4755 return cfg_cleanup_needed;
4756 /* Look at both successors. Either it ends with a GIMPLE_COND
4757 and satisfies suitable_cond_bb, or ends with a cast and
4758 other_bb is that cast's successor. */
4759 FOR_EACH_EDGE (e, ei, first_bb->succs)
4760 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4761 || e->dest == first_bb)
4762 return cfg_cleanup_needed;
4763 else if (single_pred_p (e->dest))
4765 stmt = last_stmt (e->dest);
4766 if (stmt
4767 && gimple_code (stmt) == GIMPLE_COND
4768 && EDGE_COUNT (e->dest->succs) == 2)
4770 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4771 NULL, true))
4772 break;
4773 else
4774 other_bb = NULL;
4776 else if (stmt
4777 && final_range_test_p (stmt)
4778 && find_edge (first_bb, single_succ (e->dest)))
4780 other_bb = single_succ (e->dest);
4781 if (other_bb == first_bb)
4782 other_bb = NULL;
4785 if (other_bb == NULL)
4786 return cfg_cleanup_needed;
4788 /* Now do the forward search, moving last_bb to successor bbs
4789 that aren't other_bb. */
4790 while (EDGE_COUNT (last_bb->succs) == 2)
4792 FOR_EACH_EDGE (e, ei, last_bb->succs)
4793 if (e->dest != other_bb)
4794 break;
4795 if (e == NULL)
4796 break;
4797 if (!single_pred_p (e->dest))
4798 break;
4799 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4800 break;
4801 if (!no_side_effect_bb (e->dest))
4802 break;
4803 last_bb = e->dest;
4805 if (first_bb == last_bb)
4806 return cfg_cleanup_needed;
4807 /* Here basic blocks first_bb through last_bb's predecessor
4808 end with GIMPLE_COND, all of them have one of the edges to
4809 other_bb and another to another block in the range,
4810 all blocks except first_bb don't have side-effects and
4811 last_bb ends with either GIMPLE_COND, or cast satisfying
4812 final_range_test_p. */
4813 for (bb = last_bb; ; bb = single_pred (bb))
4815 enum tree_code code;
4816 tree lhs, rhs;
4817 inter_bb_range_test_entry bb_ent;
4819 bb_ent.op = NULL_TREE;
4820 bb_ent.first_idx = ops.length ();
4821 bb_ent.last_idx = bb_ent.first_idx;
4822 e = find_edge (bb, other_bb);
4823 stmt = last_stmt (bb);
4824 gimple_set_visited (stmt, true);
4825 if (gimple_code (stmt) != GIMPLE_COND)
4827 use_operand_p use_p;
4828 gimple *phi;
4829 edge e2;
4830 unsigned int d;
4832 lhs = gimple_assign_lhs (stmt);
4833 rhs = gimple_assign_rhs1 (stmt);
4834 gcc_assert (bb == last_bb);
4836 /* stmt is
4837 _123 = (int) _234;
4839 _234 = a_2(D) == 2;
4841 followed by:
4842 <bb M>:
4843 # _345 = PHI <_123(N), 1(...), 1(...)>
4845 or 0 instead of 1. If it is 0, the _234
4846 range test is anded together with all the
4847 other range tests, if it is 1, it is ored with
4848 them. */
4849 single_imm_use (lhs, &use_p, &phi);
4850 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4851 e2 = find_edge (first_bb, other_bb);
4852 d = e2->dest_idx;
4853 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4854 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4855 code = BIT_AND_EXPR;
4856 else
4858 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4859 code = BIT_IOR_EXPR;
4862 /* If _234 SSA_NAME_DEF_STMT is
4863 _234 = _567 | _789;
4864 (or &, corresponding to 1/0 in the phi arguments,
4865 push into ops the individual range test arguments
4866 of the bitwise or resp. and, recursively. */
4867 if (TREE_CODE (rhs) == SSA_NAME
4868 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4869 != tcc_comparison)
4870 && !get_ops (rhs, code, &ops,
4871 loop_containing_stmt (stmt))
4872 && has_single_use (rhs))
4874 /* Otherwise, push the _234 range test itself. */
4875 operand_entry *oe = operand_entry_pool.allocate ();
4877 oe->op = rhs;
4878 oe->rank = code;
4879 oe->id = 0;
4880 oe->count = 1;
4881 oe->stmt_to_insert = NULL;
4882 ops.safe_push (oe);
4883 bb_ent.last_idx++;
4884 bb_ent.op = rhs;
4886 else if (is_gimple_assign (stmt)
4887 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4888 == tcc_comparison)
4889 && !get_ops (lhs, code, &ops,
4890 loop_containing_stmt (stmt))
4891 && has_single_use (lhs))
4893 operand_entry *oe = operand_entry_pool.allocate ();
4894 oe->op = lhs;
4895 oe->rank = code;
4896 oe->id = 0;
4897 oe->count = 1;
4898 ops.safe_push (oe);
4899 bb_ent.last_idx++;
4900 bb_ent.op = lhs;
4902 else
4904 bb_ent.last_idx = ops.length ();
4905 bb_ent.op = rhs;
4907 bbinfo.safe_push (bb_ent);
4908 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4909 ops[i]->id = bb->index;
4910 continue;
4912 else if (bb == last_bb)
4914 /* For last_bb, handle also:
4915 if (x_3(D) == 3)
4916 goto <bb 6>; [34.00%]
4917 else
4918 goto <bb 7>; [66.00%]
4920 <bb 6> [local count: 79512730]:
4922 <bb 7> [local count: 1073741824]:
4923 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4924 where bb 7 is OTHER_BB, but the PHI values from the
4925 earlier bbs match the path through the empty bb
4926 in between. */
4927 bool test_swapped_p = false;
4928 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4929 &other_bb, &test_swapped_p, true);
4930 gcc_assert (ok);
4931 if (test_swapped_p)
4932 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4934 /* Otherwise stmt is GIMPLE_COND. */
4935 code = gimple_cond_code (stmt);
4936 lhs = gimple_cond_lhs (stmt);
4937 rhs = gimple_cond_rhs (stmt);
4938 if (TREE_CODE (lhs) == SSA_NAME
4939 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4940 && ((code != EQ_EXPR && code != NE_EXPR)
4941 || rhs != boolean_false_node
4942 /* Either push into ops the individual bitwise
4943 or resp. and operands, depending on which
4944 edge is other_bb. */
4945 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4946 ^ (code == EQ_EXPR))
4947 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4948 loop_containing_stmt (stmt))))
4950 /* Or push the GIMPLE_COND stmt itself. */
4951 operand_entry *oe = operand_entry_pool.allocate ();
4953 oe->op = NULL;
4954 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4955 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4956 /* oe->op = NULL signs that there is no SSA_NAME
4957 for the range test, and oe->id instead is the
4958 basic block number, at which's end the GIMPLE_COND
4959 is. */
4960 oe->id = bb->index;
4961 oe->count = 1;
4962 oe->stmt_to_insert = NULL;
4963 ops.safe_push (oe);
4964 bb_ent.op = NULL;
4965 bb_ent.last_idx++;
4967 else if (ops.length () > bb_ent.first_idx)
4969 bb_ent.op = lhs;
4970 bb_ent.last_idx = ops.length ();
4972 bbinfo.safe_push (bb_ent);
4973 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4974 ops[i]->id = bb->index;
4975 if (bb == first_bb)
4976 break;
4978 if (ops.length () > 1)
4979 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4980 if (any_changes)
4982 unsigned int idx, max_idx = 0;
4983 /* update_ops relies on has_single_use predicates returning the
4984 same values as it did during get_ops earlier. Additionally it
4985 never removes statements, only adds new ones and it should walk
4986 from the single imm use and check the predicate already before
4987 making those changes.
4988 On the other side, the handling of GIMPLE_COND directly can turn
4989 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4990 it needs to be done in a separate loop afterwards. */
4991 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4993 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4994 && bbinfo[idx].op != NULL_TREE)
4996 tree new_op;
4998 max_idx = idx;
4999 stmt = last_stmt (bb);
5000 new_op = update_ops (bbinfo[idx].op,
5001 (enum tree_code)
5002 ops[bbinfo[idx].first_idx]->rank,
5003 ops, &bbinfo[idx].first_idx,
5004 loop_containing_stmt (stmt));
5005 if (new_op == NULL_TREE)
5007 gcc_assert (bb == last_bb);
5008 new_op = ops[bbinfo[idx].first_idx++]->op;
5010 if (bbinfo[idx].op != new_op)
5012 imm_use_iterator iter;
5013 use_operand_p use_p;
5014 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5016 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5017 if (is_gimple_debug (use_stmt))
5018 continue;
5019 else if (gimple_code (use_stmt) == GIMPLE_COND
5020 || gimple_code (use_stmt) == GIMPLE_PHI)
5021 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5022 SET_USE (use_p, new_op);
5023 else if ((is_gimple_assign (use_stmt)
5024 && (TREE_CODE_CLASS
5025 (gimple_assign_rhs_code (use_stmt))
5026 == tcc_comparison)))
5027 cast_or_tcc_cmp_stmt = use_stmt;
5028 else if (gimple_assign_cast_p (use_stmt))
5029 cast_or_tcc_cmp_stmt = use_stmt;
5030 else
5031 gcc_unreachable ();
5033 if (cast_or_tcc_cmp_stmt)
5035 gcc_assert (bb == last_bb);
5036 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5037 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5038 enum tree_code rhs_code
5039 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5040 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5041 : CONVERT_EXPR;
5042 gassign *g;
5043 if (is_gimple_min_invariant (new_op))
5045 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5046 g = gimple_build_assign (new_lhs, new_op);
5048 else
5049 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5050 gimple_stmt_iterator gsi
5051 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5052 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5053 gimple_set_visited (g, true);
5054 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5055 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5056 if (is_gimple_debug (use_stmt))
5057 continue;
5058 else if (gimple_code (use_stmt) == GIMPLE_COND
5059 || gimple_code (use_stmt) == GIMPLE_PHI)
5060 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5061 SET_USE (use_p, new_lhs);
5062 else
5063 gcc_unreachable ();
5067 if (bb == first_bb)
5068 break;
5070 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5072 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5073 && bbinfo[idx].op == NULL_TREE
5074 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5076 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
5078 if (idx > max_idx)
5079 max_idx = idx;
5081 /* If we collapse the conditional to a true/false
5082 condition, then bubble that knowledge up to our caller. */
5083 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5085 gimple_cond_make_false (cond_stmt);
5086 cfg_cleanup_needed = true;
5088 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5090 gimple_cond_make_true (cond_stmt);
5091 cfg_cleanup_needed = true;
5093 else
5095 gimple_cond_set_code (cond_stmt, NE_EXPR);
5096 gimple_cond_set_lhs (cond_stmt,
5097 ops[bbinfo[idx].first_idx]->op);
5098 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5100 update_stmt (cond_stmt);
5102 if (bb == first_bb)
5103 break;
5106 /* The above changes could result in basic blocks after the first
5107 modified one, up to and including last_bb, to be executed even if
5108 they would not be in the original program. If the value ranges of
5109 assignment lhs' in those bbs were dependent on the conditions
5110 guarding those basic blocks which now can change, the VRs might
5111 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5112 are only used within the same bb, it should be not a big deal if
5113 we just reset all the VRs in those bbs. See PR68671. */
5114 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5115 reset_flow_sensitive_info_in_bb (bb);
5117 return cfg_cleanup_needed;
5120 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5121 of STMT in it's operands. This is also known as a "destructive
5122 update" operation. */
5124 static bool
5125 is_phi_for_stmt (gimple *stmt, tree operand)
5127 gimple *def_stmt;
5128 gphi *def_phi;
5129 tree lhs;
5130 use_operand_p arg_p;
5131 ssa_op_iter i;
5133 if (TREE_CODE (operand) != SSA_NAME)
5134 return false;
5136 lhs = gimple_assign_lhs (stmt);
5138 def_stmt = SSA_NAME_DEF_STMT (operand);
5139 def_phi = dyn_cast <gphi *> (def_stmt);
5140 if (!def_phi)
5141 return false;
5143 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5144 if (lhs == USE_FROM_PTR (arg_p))
5145 return true;
5146 return false;
5149 /* Remove def stmt of VAR if VAR has zero uses and recurse
5150 on rhs1 operand if so. */
5152 static void
5153 remove_visited_stmt_chain (tree var)
5155 gimple *stmt;
5156 gimple_stmt_iterator gsi;
5158 while (1)
5160 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5161 return;
5162 stmt = SSA_NAME_DEF_STMT (var);
5163 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5165 var = gimple_assign_rhs1 (stmt);
5166 gsi = gsi_for_stmt (stmt);
5167 reassoc_remove_stmt (&gsi);
5168 release_defs (stmt);
5170 else
5171 return;
5175 /* This function checks three consequtive operands in
5176 passed operands vector OPS starting from OPINDEX and
5177 swaps two operands if it is profitable for binary operation
5178 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5180 We pair ops with the same rank if possible.
5182 The alternative we try is to see if STMT is a destructive
5183 update style statement, which is like:
5184 b = phi (a, ...)
5185 a = c + b;
5186 In that case, we want to use the destructive update form to
5187 expose the possible vectorizer sum reduction opportunity.
5188 In that case, the third operand will be the phi node. This
5189 check is not performed if STMT is null.
5191 We could, of course, try to be better as noted above, and do a
5192 lot of work to try to find these opportunities in >3 operand
5193 cases, but it is unlikely to be worth it. */
5195 static void
5196 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5197 unsigned int opindex, gimple *stmt)
5199 operand_entry *oe1, *oe2, *oe3;
5201 oe1 = ops[opindex];
5202 oe2 = ops[opindex + 1];
5203 oe3 = ops[opindex + 2];
5205 if ((oe1->rank == oe2->rank
5206 && oe2->rank != oe3->rank)
5207 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5208 && !is_phi_for_stmt (stmt, oe1->op)
5209 && !is_phi_for_stmt (stmt, oe2->op)))
5210 std::swap (*oe1, *oe3);
5211 else if ((oe1->rank == oe3->rank
5212 && oe2->rank != oe3->rank)
5213 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5214 && !is_phi_for_stmt (stmt, oe1->op)
5215 && !is_phi_for_stmt (stmt, oe3->op)))
5216 std::swap (*oe1, *oe2);
5219 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5220 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5221 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5222 after the returned stmt. */
5224 static inline gimple *
5225 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5227 insert_before = true;
5228 if (TREE_CODE (rhs1) == SSA_NAME
5229 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5231 stmt = SSA_NAME_DEF_STMT (rhs1);
5232 insert_before = false;
5234 if (TREE_CODE (rhs2) == SSA_NAME
5235 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5237 stmt = SSA_NAME_DEF_STMT (rhs2);
5238 insert_before = false;
5240 return stmt;
5243 /* If the stmt that defines operand has to be inserted, insert it
5244 before the use. */
5245 static void
5246 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5248 gcc_assert (is_gimple_assign (stmt_to_insert));
5249 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5250 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5251 bool insert_before;
5252 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5253 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5254 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5256 /* If the insert point is not stmt, then insert_point would be
5257 the point where operand rhs1 or rhs2 is defined. In this case,
5258 stmt_to_insert has to be inserted afterwards. This would
5259 only happen when the stmt insertion point is flexible. */
5260 if (insert_before)
5261 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5262 else
5263 insert_stmt_after (stmt_to_insert, insert_point);
5267 /* Recursively rewrite our linearized statements so that the operators
5268 match those in OPS[OPINDEX], putting the computation in rank
5269 order. Return new lhs.
5270 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5271 the current stmt and during recursive invocations.
5272 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5273 recursive invocations. */
5275 static tree
5276 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5277 const vec<operand_entry *> &ops, bool changed,
5278 bool next_changed)
5280 tree rhs1 = gimple_assign_rhs1 (stmt);
5281 tree rhs2 = gimple_assign_rhs2 (stmt);
5282 tree lhs = gimple_assign_lhs (stmt);
5283 operand_entry *oe;
5285 /* The final recursion case for this function is that you have
5286 exactly two operations left.
5287 If we had exactly one op in the entire list to start with, we
5288 would have never called this function, and the tail recursion
5289 rewrites them one at a time. */
5290 if (opindex + 2 == ops.length ())
5292 operand_entry *oe1, *oe2;
5294 oe1 = ops[opindex];
5295 oe2 = ops[opindex + 1];
5297 if (rhs1 != oe1->op || rhs2 != oe2->op)
5299 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5300 unsigned int uid = gimple_uid (stmt);
5302 if (dump_file && (dump_flags & TDF_DETAILS))
5304 fprintf (dump_file, "Transforming ");
5305 print_gimple_stmt (dump_file, stmt, 0);
5308 /* If the stmt that defines operand has to be inserted, insert it
5309 before the use. */
5310 if (oe1->stmt_to_insert)
5311 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5312 if (oe2->stmt_to_insert)
5313 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5314 /* Even when changed is false, reassociation could have e.g. removed
5315 some redundant operations, so unless we are just swapping the
5316 arguments or unless there is no change at all (then we just
5317 return lhs), force creation of a new SSA_NAME. */
5318 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5320 bool insert_before;
5321 gimple *insert_point
5322 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5323 lhs = make_ssa_name (TREE_TYPE (lhs));
5324 stmt
5325 = gimple_build_assign (lhs, rhs_code,
5326 oe1->op, oe2->op);
5327 gimple_set_uid (stmt, uid);
5328 gimple_set_visited (stmt, true);
5329 if (insert_before)
5330 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5331 else
5332 insert_stmt_after (stmt, insert_point);
5334 else
5336 bool insert_before;
5337 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5338 insert_before)
5339 == stmt);
5340 gimple_assign_set_rhs1 (stmt, oe1->op);
5341 gimple_assign_set_rhs2 (stmt, oe2->op);
5342 update_stmt (stmt);
5345 if (rhs1 != oe1->op && rhs1 != oe2->op)
5346 remove_visited_stmt_chain (rhs1);
5348 if (dump_file && (dump_flags & TDF_DETAILS))
5350 fprintf (dump_file, " into ");
5351 print_gimple_stmt (dump_file, stmt, 0);
5354 return lhs;
5357 /* If we hit here, we should have 3 or more ops left. */
5358 gcc_assert (opindex + 2 < ops.length ());
5360 /* Rewrite the next operator. */
5361 oe = ops[opindex];
5363 /* If the stmt that defines operand has to be inserted, insert it
5364 before the use. */
5365 if (oe->stmt_to_insert)
5366 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5368 /* Recurse on the LHS of the binary operator, which is guaranteed to
5369 be the non-leaf side. */
5370 tree new_rhs1
5371 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5372 changed || oe->op != rhs2 || next_changed,
5373 false);
5375 if (oe->op != rhs2 || new_rhs1 != rhs1)
5377 if (dump_file && (dump_flags & TDF_DETAILS))
5379 fprintf (dump_file, "Transforming ");
5380 print_gimple_stmt (dump_file, stmt, 0);
5383 /* If changed is false, this is either opindex == 0
5384 or all outer rhs2's were equal to corresponding oe->op,
5385 and powi_result is NULL.
5386 That means lhs is equivalent before and after reassociation.
5387 Otherwise ensure the old lhs SSA_NAME is not reused and
5388 create a new stmt as well, so that any debug stmts will be
5389 properly adjusted. */
5390 if (changed)
5392 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5393 unsigned int uid = gimple_uid (stmt);
5394 bool insert_before;
5395 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5396 insert_before);
5398 lhs = make_ssa_name (TREE_TYPE (lhs));
5399 stmt = gimple_build_assign (lhs, rhs_code,
5400 new_rhs1, oe->op);
5401 gimple_set_uid (stmt, uid);
5402 gimple_set_visited (stmt, true);
5403 if (insert_before)
5404 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5405 else
5406 insert_stmt_after (stmt, insert_point);
5408 else
5410 bool insert_before;
5411 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5412 insert_before)
5413 == stmt);
5414 gimple_assign_set_rhs1 (stmt, new_rhs1);
5415 gimple_assign_set_rhs2 (stmt, oe->op);
5416 update_stmt (stmt);
5419 if (dump_file && (dump_flags & TDF_DETAILS))
5421 fprintf (dump_file, " into ");
5422 print_gimple_stmt (dump_file, stmt, 0);
5425 return lhs;
5428 /* Find out how many cycles we need to compute statements chain.
5429 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5430 maximum number of independent statements we may execute per cycle. */
5432 static int
5433 get_required_cycles (int ops_num, int cpu_width)
5435 int res;
5436 int elog;
5437 unsigned int rest;
5439 /* While we have more than 2 * cpu_width operands
5440 we may reduce number of operands by cpu_width
5441 per cycle. */
5442 res = ops_num / (2 * cpu_width);
5444 /* Remained operands count may be reduced twice per cycle
5445 until we have only one operand. */
5446 rest = (unsigned)(ops_num - res * cpu_width);
5447 elog = exact_log2 (rest);
5448 if (elog >= 0)
5449 res += elog;
5450 else
5451 res += floor_log2 (rest) + 1;
5453 return res;
5456 /* Returns an optimal number of registers to use for computation of
5457 given statements. */
5459 static int
5460 get_reassociation_width (int ops_num, enum tree_code opc,
5461 machine_mode mode)
5463 int param_width = param_tree_reassoc_width;
5464 int width;
5465 int width_min;
5466 int cycles_best;
5468 if (param_width > 0)
5469 width = param_width;
5470 else
5471 width = targetm.sched.reassociation_width (opc, mode);
5473 if (width == 1)
5474 return width;
5476 /* Get the minimal time required for sequence computation. */
5477 cycles_best = get_required_cycles (ops_num, width);
5479 /* Check if we may use less width and still compute sequence for
5480 the same time. It will allow us to reduce registers usage.
5481 get_required_cycles is monotonically increasing with lower width
5482 so we can perform a binary search for the minimal width that still
5483 results in the optimal cycle count. */
5484 width_min = 1;
5485 while (width > width_min)
5487 int width_mid = (width + width_min) / 2;
5489 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5490 width = width_mid;
5491 else if (width_min < width_mid)
5492 width_min = width_mid;
5493 else
5494 break;
5497 return width;
5500 /* Recursively rewrite our linearized statements so that the operators
5501 match those in OPS[OPINDEX], putting the computation in rank
5502 order and trying to allow operations to be executed in
5503 parallel. */
5505 static void
5506 rewrite_expr_tree_parallel (gassign *stmt, int width,
5507 const vec<operand_entry *> &ops)
5509 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5510 int op_num = ops.length ();
5511 gcc_assert (op_num > 0);
5512 int stmt_num = op_num - 1;
5513 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5514 int op_index = op_num - 1;
5515 int stmt_index = 0;
5516 int ready_stmts_end = 0;
5517 int i = 0;
5518 gimple *stmt1 = NULL, *stmt2 = NULL;
5519 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5521 /* We start expression rewriting from the top statements.
5522 So, in this loop we create a full list of statements
5523 we will work with. */
5524 stmts[stmt_num - 1] = stmt;
5525 for (i = stmt_num - 2; i >= 0; i--)
5526 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5528 for (i = 0; i < stmt_num; i++)
5530 tree op1, op2;
5532 /* Determine whether we should use results of
5533 already handled statements or not. */
5534 if (ready_stmts_end == 0
5535 && (i - stmt_index >= width || op_index < 1))
5536 ready_stmts_end = i;
5538 /* Now we choose operands for the next statement. Non zero
5539 value in ready_stmts_end means here that we should use
5540 the result of already generated statements as new operand. */
5541 if (ready_stmts_end > 0)
5543 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5544 if (ready_stmts_end > stmt_index)
5545 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5546 else if (op_index >= 0)
5548 operand_entry *oe = ops[op_index--];
5549 stmt2 = oe->stmt_to_insert;
5550 op2 = oe->op;
5552 else
5554 gcc_assert (stmt_index < i);
5555 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5558 if (stmt_index >= ready_stmts_end)
5559 ready_stmts_end = 0;
5561 else
5563 if (op_index > 1)
5564 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5565 operand_entry *oe2 = ops[op_index--];
5566 operand_entry *oe1 = ops[op_index--];
5567 op2 = oe2->op;
5568 stmt2 = oe2->stmt_to_insert;
5569 op1 = oe1->op;
5570 stmt1 = oe1->stmt_to_insert;
5573 /* If we emit the last statement then we should put
5574 operands into the last statement. It will also
5575 break the loop. */
5576 if (op_index < 0 && stmt_index == i)
5577 i = stmt_num - 1;
5579 if (dump_file && (dump_flags & TDF_DETAILS))
5581 fprintf (dump_file, "Transforming ");
5582 print_gimple_stmt (dump_file, stmts[i], 0);
5585 /* If the stmt that defines operand has to be inserted, insert it
5586 before the use. */
5587 if (stmt1)
5588 insert_stmt_before_use (stmts[i], stmt1);
5589 if (stmt2)
5590 insert_stmt_before_use (stmts[i], stmt2);
5591 stmt1 = stmt2 = NULL;
5593 /* We keep original statement only for the last one. All
5594 others are recreated. */
5595 if (i == stmt_num - 1)
5597 gimple_assign_set_rhs1 (stmts[i], op1);
5598 gimple_assign_set_rhs2 (stmts[i], op2);
5599 update_stmt (stmts[i]);
5601 else
5603 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5604 gimple_set_visited (stmts[i], true);
5606 if (dump_file && (dump_flags & TDF_DETAILS))
5608 fprintf (dump_file, " into ");
5609 print_gimple_stmt (dump_file, stmts[i], 0);
5613 remove_visited_stmt_chain (last_rhs1);
5616 /* Transform STMT, which is really (A +B) + (C + D) into the left
5617 linear form, ((A+B)+C)+D.
5618 Recurse on D if necessary. */
5620 static void
5621 linearize_expr (gimple *stmt)
5623 gimple_stmt_iterator gsi;
5624 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5625 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5626 gimple *oldbinrhs = binrhs;
5627 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5628 gimple *newbinrhs = NULL;
5629 class loop *loop = loop_containing_stmt (stmt);
5630 tree lhs = gimple_assign_lhs (stmt);
5632 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5633 && is_reassociable_op (binrhs, rhscode, loop));
5635 gsi = gsi_for_stmt (stmt);
5637 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5638 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5639 gimple_assign_rhs_code (binrhs),
5640 gimple_assign_lhs (binlhs),
5641 gimple_assign_rhs2 (binrhs));
5642 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5643 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5644 gimple_set_uid (binrhs, gimple_uid (stmt));
5646 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5647 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5649 if (dump_file && (dump_flags & TDF_DETAILS))
5651 fprintf (dump_file, "Linearized: ");
5652 print_gimple_stmt (dump_file, stmt, 0);
5655 reassociate_stats.linearized++;
5656 update_stmt (stmt);
5658 gsi = gsi_for_stmt (oldbinrhs);
5659 reassoc_remove_stmt (&gsi);
5660 release_defs (oldbinrhs);
5662 gimple_set_visited (stmt, true);
5663 gimple_set_visited (binlhs, true);
5664 gimple_set_visited (binrhs, true);
5666 /* Tail recurse on the new rhs if it still needs reassociation. */
5667 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5668 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5669 want to change the algorithm while converting to tuples. */
5670 linearize_expr (stmt);
5673 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5674 it. Otherwise, return NULL. */
5676 static gimple *
5677 get_single_immediate_use (tree lhs)
5679 use_operand_p immuse;
5680 gimple *immusestmt;
5682 if (TREE_CODE (lhs) == SSA_NAME
5683 && single_imm_use (lhs, &immuse, &immusestmt)
5684 && is_gimple_assign (immusestmt))
5685 return immusestmt;
5687 return NULL;
5690 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5691 representing the negated value. Insertions of any necessary
5692 instructions go before GSI.
5693 This function is recursive in that, if you hand it "a_5" as the
5694 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5695 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5697 static tree
5698 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5700 gimple *negatedefstmt = NULL;
5701 tree resultofnegate;
5702 gimple_stmt_iterator gsi;
5703 unsigned int uid;
5705 /* If we are trying to negate a name, defined by an add, negate the
5706 add operands instead. */
5707 if (TREE_CODE (tonegate) == SSA_NAME)
5708 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5709 if (TREE_CODE (tonegate) == SSA_NAME
5710 && is_gimple_assign (negatedefstmt)
5711 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5712 && has_single_use (gimple_assign_lhs (negatedefstmt))
5713 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5715 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5716 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5717 tree lhs = gimple_assign_lhs (negatedefstmt);
5718 gimple *g;
5720 gsi = gsi_for_stmt (negatedefstmt);
5721 rhs1 = negate_value (rhs1, &gsi);
5723 gsi = gsi_for_stmt (negatedefstmt);
5724 rhs2 = negate_value (rhs2, &gsi);
5726 gsi = gsi_for_stmt (negatedefstmt);
5727 lhs = make_ssa_name (TREE_TYPE (lhs));
5728 gimple_set_visited (negatedefstmt, true);
5729 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5730 gimple_set_uid (g, gimple_uid (negatedefstmt));
5731 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5732 return lhs;
5735 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5736 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5737 NULL_TREE, true, GSI_SAME_STMT);
5738 gsi = *gsip;
5739 uid = gimple_uid (gsi_stmt (gsi));
5740 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5742 gimple *stmt = gsi_stmt (gsi);
5743 if (gimple_uid (stmt) != 0)
5744 break;
5745 gimple_set_uid (stmt, uid);
5747 return resultofnegate;
5750 /* Return true if we should break up the subtract in STMT into an add
5751 with negate. This is true when we the subtract operands are really
5752 adds, or the subtract itself is used in an add expression. In
5753 either case, breaking up the subtract into an add with negate
5754 exposes the adds to reassociation. */
5756 static bool
5757 should_break_up_subtract (gimple *stmt)
5759 tree lhs = gimple_assign_lhs (stmt);
5760 tree binlhs = gimple_assign_rhs1 (stmt);
5761 tree binrhs = gimple_assign_rhs2 (stmt);
5762 gimple *immusestmt;
5763 class loop *loop = loop_containing_stmt (stmt);
5765 if (TREE_CODE (binlhs) == SSA_NAME
5766 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5767 return true;
5769 if (TREE_CODE (binrhs) == SSA_NAME
5770 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5771 return true;
5773 if (TREE_CODE (lhs) == SSA_NAME
5774 && (immusestmt = get_single_immediate_use (lhs))
5775 && is_gimple_assign (immusestmt)
5776 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5777 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5778 && gimple_assign_rhs1 (immusestmt) == lhs)
5779 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5780 return true;
5781 return false;
5784 /* Transform STMT from A - B into A + -B. */
5786 static void
5787 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5789 tree rhs1 = gimple_assign_rhs1 (stmt);
5790 tree rhs2 = gimple_assign_rhs2 (stmt);
5792 if (dump_file && (dump_flags & TDF_DETAILS))
5794 fprintf (dump_file, "Breaking up subtract ");
5795 print_gimple_stmt (dump_file, stmt, 0);
5798 rhs2 = negate_value (rhs2, gsip);
5799 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5800 update_stmt (stmt);
5803 /* Determine whether STMT is a builtin call that raises an SSA name
5804 to an integer power and has only one use. If so, and this is early
5805 reassociation and unsafe math optimizations are permitted, place
5806 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5807 If any of these conditions does not hold, return FALSE. */
5809 static bool
5810 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5812 tree arg1;
5813 REAL_VALUE_TYPE c, cint;
5815 switch (gimple_call_combined_fn (stmt))
5817 CASE_CFN_POW:
5818 if (flag_errno_math)
5819 return false;
5821 *base = gimple_call_arg (stmt, 0);
5822 arg1 = gimple_call_arg (stmt, 1);
5824 if (TREE_CODE (arg1) != REAL_CST)
5825 return false;
5827 c = TREE_REAL_CST (arg1);
5829 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5830 return false;
5832 *exponent = real_to_integer (&c);
5833 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5834 if (!real_identical (&c, &cint))
5835 return false;
5837 break;
5839 CASE_CFN_POWI:
5840 *base = gimple_call_arg (stmt, 0);
5841 arg1 = gimple_call_arg (stmt, 1);
5843 if (!tree_fits_shwi_p (arg1))
5844 return false;
5846 *exponent = tree_to_shwi (arg1);
5847 break;
5849 default:
5850 return false;
5853 /* Expanding negative exponents is generally unproductive, so we don't
5854 complicate matters with those. Exponents of zero and one should
5855 have been handled by expression folding. */
5856 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5857 return false;
5859 return true;
5862 /* Try to derive and add operand entry for OP to *OPS. Return false if
5863 unsuccessful. */
5865 static bool
5866 try_special_add_to_ops (vec<operand_entry *> *ops,
5867 enum tree_code code,
5868 tree op, gimple* def_stmt)
5870 tree base = NULL_TREE;
5871 HOST_WIDE_INT exponent = 0;
5873 if (TREE_CODE (op) != SSA_NAME
5874 || ! has_single_use (op))
5875 return false;
5877 if (code == MULT_EXPR
5878 && reassoc_insert_powi_p
5879 && flag_unsafe_math_optimizations
5880 && is_gimple_call (def_stmt)
5881 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5883 add_repeat_to_ops_vec (ops, base, exponent);
5884 gimple_set_visited (def_stmt, true);
5885 return true;
5887 else if (code == MULT_EXPR
5888 && is_gimple_assign (def_stmt)
5889 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5890 && !HONOR_SNANS (TREE_TYPE (op))
5891 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5892 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
5893 && (!FLOAT_TYPE_P (TREE_TYPE (op))
5894 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
5896 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5897 tree cst = build_minus_one_cst (TREE_TYPE (op));
5898 add_to_ops_vec (ops, rhs1);
5899 add_to_ops_vec (ops, cst);
5900 gimple_set_visited (def_stmt, true);
5901 return true;
5904 return false;
5907 /* Recursively linearize a binary expression that is the RHS of STMT.
5908 Place the operands of the expression tree in the vector named OPS. */
5910 static void
5911 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5912 bool is_associative, bool set_visited)
5914 tree binlhs = gimple_assign_rhs1 (stmt);
5915 tree binrhs = gimple_assign_rhs2 (stmt);
5916 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5917 bool binlhsisreassoc = false;
5918 bool binrhsisreassoc = false;
5919 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5920 class loop *loop = loop_containing_stmt (stmt);
5922 if (set_visited)
5923 gimple_set_visited (stmt, true);
5925 if (TREE_CODE (binlhs) == SSA_NAME)
5927 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5928 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5929 && !stmt_could_throw_p (cfun, binlhsdef));
5932 if (TREE_CODE (binrhs) == SSA_NAME)
5934 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5935 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5936 && !stmt_could_throw_p (cfun, binrhsdef));
5939 /* If the LHS is not reassociable, but the RHS is, we need to swap
5940 them. If neither is reassociable, there is nothing we can do, so
5941 just put them in the ops vector. If the LHS is reassociable,
5942 linearize it. If both are reassociable, then linearize the RHS
5943 and the LHS. */
5945 if (!binlhsisreassoc)
5947 /* If this is not a associative operation like division, give up. */
5948 if (!is_associative)
5950 add_to_ops_vec (ops, binrhs);
5951 return;
5954 if (!binrhsisreassoc)
5956 bool swap = false;
5957 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5958 /* If we add ops for the rhs we expect to be able to recurse
5959 to it via the lhs during expression rewrite so swap
5960 operands. */
5961 swap = true;
5962 else
5963 add_to_ops_vec (ops, binrhs);
5965 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5966 add_to_ops_vec (ops, binlhs);
5968 if (!swap)
5969 return;
5972 if (dump_file && (dump_flags & TDF_DETAILS))
5974 fprintf (dump_file, "swapping operands of ");
5975 print_gimple_stmt (dump_file, stmt, 0);
5978 swap_ssa_operands (stmt,
5979 gimple_assign_rhs1_ptr (stmt),
5980 gimple_assign_rhs2_ptr (stmt));
5981 update_stmt (stmt);
5983 if (dump_file && (dump_flags & TDF_DETAILS))
5985 fprintf (dump_file, " is now ");
5986 print_gimple_stmt (dump_file, stmt, 0);
5988 if (!binrhsisreassoc)
5989 return;
5991 /* We want to make it so the lhs is always the reassociative op,
5992 so swap. */
5993 std::swap (binlhs, binrhs);
5995 else if (binrhsisreassoc)
5997 linearize_expr (stmt);
5998 binlhs = gimple_assign_rhs1 (stmt);
5999 binrhs = gimple_assign_rhs2 (stmt);
6002 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6003 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6004 rhscode, loop));
6005 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6006 is_associative, set_visited);
6008 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6009 add_to_ops_vec (ops, binrhs);
6012 /* Repropagate the negates back into subtracts, since no other pass
6013 currently does it. */
6015 static void
6016 repropagate_negates (void)
6018 unsigned int i = 0;
6019 tree negate;
6021 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6023 gimple *user = get_single_immediate_use (negate);
6024 if (!user || !is_gimple_assign (user))
6025 continue;
6027 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6028 if (TREE_CODE (negateop) == SSA_NAME
6029 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6030 continue;
6032 /* The negate operand can be either operand of a PLUS_EXPR
6033 (it can be the LHS if the RHS is a constant for example).
6035 Force the negate operand to the RHS of the PLUS_EXPR, then
6036 transform the PLUS_EXPR into a MINUS_EXPR. */
6037 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6039 /* If the negated operand appears on the LHS of the
6040 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6041 to force the negated operand to the RHS of the PLUS_EXPR. */
6042 if (gimple_assign_rhs1 (user) == negate)
6044 swap_ssa_operands (user,
6045 gimple_assign_rhs1_ptr (user),
6046 gimple_assign_rhs2_ptr (user));
6049 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6050 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6051 if (gimple_assign_rhs2 (user) == negate)
6053 tree rhs1 = gimple_assign_rhs1 (user);
6054 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6055 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6056 negateop);
6057 update_stmt (user);
6060 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6062 if (gimple_assign_rhs1 (user) == negate)
6064 /* We have
6065 x = -negateop
6066 y = x - b
6067 which we transform into
6068 x = negateop + b
6069 y = -x .
6070 This pushes down the negate which we possibly can merge
6071 into some other operation, hence insert it into the
6072 plus_negates vector. */
6073 gimple *feed = SSA_NAME_DEF_STMT (negate);
6074 tree b = gimple_assign_rhs2 (user);
6075 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6076 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6077 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6078 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6079 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6080 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6081 user = gsi_stmt (gsi2);
6082 update_stmt (user);
6083 reassoc_remove_stmt (&gsi);
6084 release_defs (feed);
6085 plus_negates.safe_push (gimple_assign_lhs (user));
6087 else
6089 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6090 getting rid of one operation. */
6091 tree rhs1 = gimple_assign_rhs1 (user);
6092 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6093 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6094 update_stmt (gsi_stmt (gsi));
6100 /* Break up subtract operations in block BB.
6102 We do this top down because we don't know whether the subtract is
6103 part of a possible chain of reassociation except at the top.
6105 IE given
6106 d = f + g
6107 c = a + e
6108 b = c - d
6109 q = b - r
6110 k = t - q
6112 we want to break up k = t - q, but we won't until we've transformed q
6113 = b - r, which won't be broken up until we transform b = c - d.
6115 En passant, clear the GIMPLE visited flag on every statement
6116 and set UIDs within each basic block. */
6118 static void
6119 break_up_subtract_bb (basic_block bb)
6121 gimple_stmt_iterator gsi;
6122 basic_block son;
6123 unsigned int uid = 1;
6125 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6127 gimple *stmt = gsi_stmt (gsi);
6128 gimple_set_visited (stmt, false);
6129 gimple_set_uid (stmt, uid++);
6131 if (!is_gimple_assign (stmt)
6132 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6133 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6134 continue;
6136 /* Look for simple gimple subtract operations. */
6137 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6139 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6140 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6141 continue;
6143 /* Check for a subtract used only in an addition. If this
6144 is the case, transform it into add of a negate for better
6145 reassociation. IE transform C = A-B into C = A + -B if C
6146 is only used in an addition. */
6147 if (should_break_up_subtract (stmt))
6148 break_up_subtract (stmt, &gsi);
6150 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6151 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6152 plus_negates.safe_push (gimple_assign_lhs (stmt));
6154 for (son = first_dom_son (CDI_DOMINATORS, bb);
6155 son;
6156 son = next_dom_son (CDI_DOMINATORS, son))
6157 break_up_subtract_bb (son);
6160 /* Used for repeated factor analysis. */
6161 struct repeat_factor
6163 /* An SSA name that occurs in a multiply chain. */
6164 tree factor;
6166 /* Cached rank of the factor. */
6167 unsigned rank;
6169 /* Number of occurrences of the factor in the chain. */
6170 HOST_WIDE_INT count;
6172 /* An SSA name representing the product of this factor and
6173 all factors appearing later in the repeated factor vector. */
6174 tree repr;
6178 static vec<repeat_factor> repeat_factor_vec;
6180 /* Used for sorting the repeat factor vector. Sort primarily by
6181 ascending occurrence count, secondarily by descending rank. */
6183 static int
6184 compare_repeat_factors (const void *x1, const void *x2)
6186 const repeat_factor *rf1 = (const repeat_factor *) x1;
6187 const repeat_factor *rf2 = (const repeat_factor *) x2;
6189 if (rf1->count != rf2->count)
6190 return rf1->count - rf2->count;
6192 return rf2->rank - rf1->rank;
6195 /* Look for repeated operands in OPS in the multiply tree rooted at
6196 STMT. Replace them with an optimal sequence of multiplies and powi
6197 builtin calls, and remove the used operands from OPS. Return an
6198 SSA name representing the value of the replacement sequence. */
6200 static tree
6201 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6203 unsigned i, j, vec_len;
6204 int ii;
6205 operand_entry *oe;
6206 repeat_factor *rf1, *rf2;
6207 repeat_factor rfnew;
6208 tree result = NULL_TREE;
6209 tree target_ssa, iter_result;
6210 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6211 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6212 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6213 gimple *mul_stmt, *pow_stmt;
6215 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6216 target, unless type is integral. */
6217 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6218 return NULL_TREE;
6220 /* Allocate the repeated factor vector. */
6221 repeat_factor_vec.create (10);
6223 /* Scan the OPS vector for all SSA names in the product and build
6224 up a vector of occurrence counts for each factor. */
6225 FOR_EACH_VEC_ELT (*ops, i, oe)
6227 if (TREE_CODE (oe->op) == SSA_NAME)
6229 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6231 if (rf1->factor == oe->op)
6233 rf1->count += oe->count;
6234 break;
6238 if (j >= repeat_factor_vec.length ())
6240 rfnew.factor = oe->op;
6241 rfnew.rank = oe->rank;
6242 rfnew.count = oe->count;
6243 rfnew.repr = NULL_TREE;
6244 repeat_factor_vec.safe_push (rfnew);
6249 /* Sort the repeated factor vector by (a) increasing occurrence count,
6250 and (b) decreasing rank. */
6251 repeat_factor_vec.qsort (compare_repeat_factors);
6253 /* It is generally best to combine as many base factors as possible
6254 into a product before applying __builtin_powi to the result.
6255 However, the sort order chosen for the repeated factor vector
6256 allows us to cache partial results for the product of the base
6257 factors for subsequent use. When we already have a cached partial
6258 result from a previous iteration, it is best to make use of it
6259 before looking for another __builtin_pow opportunity.
6261 As an example, consider x * x * y * y * y * z * z * z * z.
6262 We want to first compose the product x * y * z, raise it to the
6263 second power, then multiply this by y * z, and finally multiply
6264 by z. This can be done in 5 multiplies provided we cache y * z
6265 for use in both expressions:
6267 t1 = y * z
6268 t2 = t1 * x
6269 t3 = t2 * t2
6270 t4 = t1 * t3
6271 result = t4 * z
6273 If we instead ignored the cached y * z and first multiplied by
6274 the __builtin_pow opportunity z * z, we would get the inferior:
6276 t1 = y * z
6277 t2 = t1 * x
6278 t3 = t2 * t2
6279 t4 = z * z
6280 t5 = t3 * t4
6281 result = t5 * y */
6283 vec_len = repeat_factor_vec.length ();
6285 /* Repeatedly look for opportunities to create a builtin_powi call. */
6286 while (true)
6288 HOST_WIDE_INT power;
6290 /* First look for the largest cached product of factors from
6291 preceding iterations. If found, create a builtin_powi for
6292 it if the minimum occurrence count for its factors is at
6293 least 2, or just use this cached product as our next
6294 multiplicand if the minimum occurrence count is 1. */
6295 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6297 if (rf1->repr && rf1->count > 0)
6298 break;
6301 if (j < vec_len)
6303 power = rf1->count;
6305 if (power == 1)
6307 iter_result = rf1->repr;
6309 if (dump_file && (dump_flags & TDF_DETAILS))
6311 unsigned elt;
6312 repeat_factor *rf;
6313 fputs ("Multiplying by cached product ", dump_file);
6314 for (elt = j; elt < vec_len; elt++)
6316 rf = &repeat_factor_vec[elt];
6317 print_generic_expr (dump_file, rf->factor);
6318 if (elt < vec_len - 1)
6319 fputs (" * ", dump_file);
6321 fputs ("\n", dump_file);
6324 else
6326 if (INTEGRAL_TYPE_P (type))
6328 gcc_assert (power > 1);
6329 gimple_stmt_iterator gsip = gsi;
6330 gsi_prev (&gsip);
6331 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6332 rf1->repr, power);
6333 gimple_stmt_iterator gsic = gsi;
6334 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6336 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6337 gimple_set_visited (gsi_stmt (gsic), true);
6338 gsi_prev (&gsic);
6341 else
6343 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6344 pow_stmt
6345 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6346 build_int_cst (integer_type_node,
6347 power));
6348 gimple_call_set_lhs (pow_stmt, iter_result);
6349 gimple_set_location (pow_stmt, gimple_location (stmt));
6350 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6351 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6354 if (dump_file && (dump_flags & TDF_DETAILS))
6356 unsigned elt;
6357 repeat_factor *rf;
6358 fputs ("Building __builtin_pow call for cached product (",
6359 dump_file);
6360 for (elt = j; elt < vec_len; elt++)
6362 rf = &repeat_factor_vec[elt];
6363 print_generic_expr (dump_file, rf->factor);
6364 if (elt < vec_len - 1)
6365 fputs (" * ", dump_file);
6367 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6368 power);
6372 else
6374 /* Otherwise, find the first factor in the repeated factor
6375 vector whose occurrence count is at least 2. If no such
6376 factor exists, there are no builtin_powi opportunities
6377 remaining. */
6378 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6380 if (rf1->count >= 2)
6381 break;
6384 if (j >= vec_len)
6385 break;
6387 power = rf1->count;
6389 if (dump_file && (dump_flags & TDF_DETAILS))
6391 unsigned elt;
6392 repeat_factor *rf;
6393 fputs ("Building __builtin_pow call for (", dump_file);
6394 for (elt = j; elt < vec_len; elt++)
6396 rf = &repeat_factor_vec[elt];
6397 print_generic_expr (dump_file, rf->factor);
6398 if (elt < vec_len - 1)
6399 fputs (" * ", dump_file);
6401 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6404 reassociate_stats.pows_created++;
6406 /* Visit each element of the vector in reverse order (so that
6407 high-occurrence elements are visited first, and within the
6408 same occurrence count, lower-ranked elements are visited
6409 first). Form a linear product of all elements in this order
6410 whose occurrencce count is at least that of element J.
6411 Record the SSA name representing the product of each element
6412 with all subsequent elements in the vector. */
6413 if (j == vec_len - 1)
6414 rf1->repr = rf1->factor;
6415 else
6417 for (ii = vec_len - 2; ii >= (int)j; ii--)
6419 tree op1, op2;
6421 rf1 = &repeat_factor_vec[ii];
6422 rf2 = &repeat_factor_vec[ii + 1];
6424 /* Init the last factor's representative to be itself. */
6425 if (!rf2->repr)
6426 rf2->repr = rf2->factor;
6428 op1 = rf1->factor;
6429 op2 = rf2->repr;
6431 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6432 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6433 op1, op2);
6434 gimple_set_location (mul_stmt, gimple_location (stmt));
6435 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6436 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6437 rf1->repr = target_ssa;
6439 /* Don't reprocess the multiply we just introduced. */
6440 gimple_set_visited (mul_stmt, true);
6444 /* Form a call to __builtin_powi for the maximum product
6445 just formed, raised to the power obtained earlier. */
6446 rf1 = &repeat_factor_vec[j];
6447 if (INTEGRAL_TYPE_P (type))
6449 gcc_assert (power > 1);
6450 gimple_stmt_iterator gsip = gsi;
6451 gsi_prev (&gsip);
6452 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6453 rf1->repr, power);
6454 gimple_stmt_iterator gsic = gsi;
6455 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6457 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6458 gimple_set_visited (gsi_stmt (gsic), true);
6459 gsi_prev (&gsic);
6462 else
6464 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6465 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6466 build_int_cst (integer_type_node,
6467 power));
6468 gimple_call_set_lhs (pow_stmt, iter_result);
6469 gimple_set_location (pow_stmt, gimple_location (stmt));
6470 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6471 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6475 /* If we previously formed at least one other builtin_powi call,
6476 form the product of this one and those others. */
6477 if (result)
6479 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6480 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6481 result, iter_result);
6482 gimple_set_location (mul_stmt, gimple_location (stmt));
6483 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6484 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6485 gimple_set_visited (mul_stmt, true);
6486 result = new_result;
6488 else
6489 result = iter_result;
6491 /* Decrement the occurrence count of each element in the product
6492 by the count found above, and remove this many copies of each
6493 factor from OPS. */
6494 for (i = j; i < vec_len; i++)
6496 unsigned k = power;
6497 unsigned n;
6499 rf1 = &repeat_factor_vec[i];
6500 rf1->count -= power;
6502 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6504 if (oe->op == rf1->factor)
6506 if (oe->count <= k)
6508 ops->ordered_remove (n);
6509 k -= oe->count;
6511 if (k == 0)
6512 break;
6514 else
6516 oe->count -= k;
6517 break;
6524 /* At this point all elements in the repeated factor vector have a
6525 remaining occurrence count of 0 or 1, and those with a count of 1
6526 don't have cached representatives. Re-sort the ops vector and
6527 clean up. */
6528 ops->qsort (sort_by_operand_rank);
6529 repeat_factor_vec.release ();
6531 /* Return the final product computed herein. Note that there may
6532 still be some elements with single occurrence count left in OPS;
6533 those will be handled by the normal reassociation logic. */
6534 return result;
6537 /* Attempt to optimize
6538 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6539 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6541 static void
6542 attempt_builtin_copysign (vec<operand_entry *> *ops)
6544 operand_entry *oe;
6545 unsigned int i;
6546 unsigned int length = ops->length ();
6547 tree cst = ops->last ()->op;
6549 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6550 return;
6552 FOR_EACH_VEC_ELT (*ops, i, oe)
6554 if (TREE_CODE (oe->op) == SSA_NAME
6555 && has_single_use (oe->op))
6557 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6558 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6560 tree arg0, arg1;
6561 switch (gimple_call_combined_fn (old_call))
6563 CASE_CFN_COPYSIGN:
6564 CASE_CFN_COPYSIGN_FN:
6565 arg0 = gimple_call_arg (old_call, 0);
6566 arg1 = gimple_call_arg (old_call, 1);
6567 /* The first argument of copysign must be a constant,
6568 otherwise there's nothing to do. */
6569 if (TREE_CODE (arg0) == REAL_CST)
6571 tree type = TREE_TYPE (arg0);
6572 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6573 /* If we couldn't fold to a single constant, skip it.
6574 That happens e.g. for inexact multiplication when
6575 -frounding-math. */
6576 if (mul == NULL_TREE)
6577 break;
6578 /* Instead of adjusting OLD_CALL, let's build a new
6579 call to not leak the LHS and prevent keeping bogus
6580 debug statements. DCE will clean up the old call. */
6581 gcall *new_call;
6582 if (gimple_call_internal_p (old_call))
6583 new_call = gimple_build_call_internal
6584 (IFN_COPYSIGN, 2, mul, arg1);
6585 else
6586 new_call = gimple_build_call
6587 (gimple_call_fndecl (old_call), 2, mul, arg1);
6588 tree lhs = make_ssa_name (type);
6589 gimple_call_set_lhs (new_call, lhs);
6590 gimple_set_location (new_call,
6591 gimple_location (old_call));
6592 insert_stmt_after (new_call, old_call);
6593 /* We've used the constant, get rid of it. */
6594 ops->pop ();
6595 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6596 /* Handle the CST1 < 0 case by negating the result. */
6597 if (cst1_neg)
6599 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6600 gimple *negate_stmt
6601 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6602 insert_stmt_after (negate_stmt, new_call);
6603 oe->op = negrhs;
6605 else
6606 oe->op = lhs;
6607 if (dump_file && (dump_flags & TDF_DETAILS))
6609 fprintf (dump_file, "Optimizing copysign: ");
6610 print_generic_expr (dump_file, cst);
6611 fprintf (dump_file, " * COPYSIGN (");
6612 print_generic_expr (dump_file, arg0);
6613 fprintf (dump_file, ", ");
6614 print_generic_expr (dump_file, arg1);
6615 fprintf (dump_file, ") into %sCOPYSIGN (",
6616 cst1_neg ? "-" : "");
6617 print_generic_expr (dump_file, mul);
6618 fprintf (dump_file, ", ");
6619 print_generic_expr (dump_file, arg1);
6620 fprintf (dump_file, "\n");
6622 return;
6624 break;
6625 default:
6626 break;
6633 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6635 static void
6636 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6638 tree rhs1;
6640 if (dump_file && (dump_flags & TDF_DETAILS))
6642 fprintf (dump_file, "Transforming ");
6643 print_gimple_stmt (dump_file, stmt, 0);
6646 rhs1 = gimple_assign_rhs1 (stmt);
6647 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6648 update_stmt (stmt);
6649 remove_visited_stmt_chain (rhs1);
6651 if (dump_file && (dump_flags & TDF_DETAILS))
6653 fprintf (dump_file, " into ");
6654 print_gimple_stmt (dump_file, stmt, 0);
6658 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6660 static void
6661 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6662 tree rhs1, tree rhs2)
6664 if (dump_file && (dump_flags & TDF_DETAILS))
6666 fprintf (dump_file, "Transforming ");
6667 print_gimple_stmt (dump_file, stmt, 0);
6670 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6671 update_stmt (gsi_stmt (*gsi));
6672 remove_visited_stmt_chain (rhs1);
6674 if (dump_file && (dump_flags & TDF_DETAILS))
6676 fprintf (dump_file, " into ");
6677 print_gimple_stmt (dump_file, stmt, 0);
6681 /* Reassociate expressions in basic block BB and its post-dominator as
6682 children.
6684 Bubble up return status from maybe_optimize_range_tests. */
6686 static bool
6687 reassociate_bb (basic_block bb)
6689 gimple_stmt_iterator gsi;
6690 basic_block son;
6691 gimple *stmt = last_stmt (bb);
6692 bool cfg_cleanup_needed = false;
6694 if (stmt && !gimple_visited_p (stmt))
6695 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6697 bool do_prev = false;
6698 for (gsi = gsi_last_bb (bb);
6699 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6701 do_prev = true;
6702 stmt = gsi_stmt (gsi);
6704 if (is_gimple_assign (stmt)
6705 && !stmt_could_throw_p (cfun, stmt))
6707 tree lhs, rhs1, rhs2;
6708 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6710 /* If this was part of an already processed statement,
6711 we don't need to touch it again. */
6712 if (gimple_visited_p (stmt))
6714 /* This statement might have become dead because of previous
6715 reassociations. */
6716 if (has_zero_uses (gimple_get_lhs (stmt)))
6718 reassoc_remove_stmt (&gsi);
6719 release_defs (stmt);
6720 /* We might end up removing the last stmt above which
6721 places the iterator to the end of the sequence.
6722 Reset it to the last stmt in this case and make sure
6723 we don't do gsi_prev in that case. */
6724 if (gsi_end_p (gsi))
6726 gsi = gsi_last_bb (bb);
6727 do_prev = false;
6730 continue;
6733 /* If this is not a gimple binary expression, there is
6734 nothing for us to do with it. */
6735 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6736 continue;
6738 lhs = gimple_assign_lhs (stmt);
6739 rhs1 = gimple_assign_rhs1 (stmt);
6740 rhs2 = gimple_assign_rhs2 (stmt);
6742 /* For non-bit or min/max operations we can't associate
6743 all types. Verify that here. */
6744 if ((rhs_code != BIT_IOR_EXPR
6745 && rhs_code != BIT_AND_EXPR
6746 && rhs_code != BIT_XOR_EXPR
6747 && rhs_code != MIN_EXPR
6748 && rhs_code != MAX_EXPR
6749 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6750 || !can_reassociate_op_p (rhs1)
6751 || !can_reassociate_op_p (rhs2))
6752 continue;
6754 if (associative_tree_code (rhs_code))
6756 auto_vec<operand_entry *> ops;
6757 tree powi_result = NULL_TREE;
6758 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6760 /* There may be no immediate uses left by the time we
6761 get here because we may have eliminated them all. */
6762 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6763 continue;
6765 gimple_set_visited (stmt, true);
6766 linearize_expr_tree (&ops, stmt, true, true);
6767 ops.qsort (sort_by_operand_rank);
6768 int orig_len = ops.length ();
6769 optimize_ops_list (rhs_code, &ops);
6770 if (undistribute_ops_list (rhs_code, &ops,
6771 loop_containing_stmt (stmt)))
6773 ops.qsort (sort_by_operand_rank);
6774 optimize_ops_list (rhs_code, &ops);
6776 if (undistribute_bitref_for_vector (rhs_code, &ops,
6777 loop_containing_stmt (stmt)))
6779 ops.qsort (sort_by_operand_rank);
6780 optimize_ops_list (rhs_code, &ops);
6782 if (rhs_code == PLUS_EXPR
6783 && transform_add_to_multiply (&ops))
6784 ops.qsort (sort_by_operand_rank);
6786 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6788 if (is_vector)
6789 optimize_vec_cond_expr (rhs_code, &ops);
6790 else
6791 optimize_range_tests (rhs_code, &ops, NULL);
6794 if (rhs_code == MULT_EXPR && !is_vector)
6796 attempt_builtin_copysign (&ops);
6798 if (reassoc_insert_powi_p
6799 && (flag_unsafe_math_optimizations
6800 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6801 powi_result = attempt_builtin_powi (stmt, &ops);
6804 operand_entry *last;
6805 bool negate_result = false;
6806 if (ops.length () > 1
6807 && rhs_code == MULT_EXPR)
6809 last = ops.last ();
6810 if ((integer_minus_onep (last->op)
6811 || real_minus_onep (last->op))
6812 && !HONOR_SNANS (TREE_TYPE (lhs))
6813 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6814 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6816 ops.pop ();
6817 negate_result = true;
6821 tree new_lhs = lhs;
6822 /* If the operand vector is now empty, all operands were
6823 consumed by the __builtin_powi optimization. */
6824 if (ops.length () == 0)
6825 transform_stmt_to_copy (&gsi, stmt, powi_result);
6826 else if (ops.length () == 1)
6828 tree last_op = ops.last ()->op;
6830 /* If the stmt that defines operand has to be inserted, insert it
6831 before the use. */
6832 if (ops.last ()->stmt_to_insert)
6833 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6834 if (powi_result)
6835 transform_stmt_to_multiply (&gsi, stmt, last_op,
6836 powi_result);
6837 else
6838 transform_stmt_to_copy (&gsi, stmt, last_op);
6840 else
6842 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6843 int ops_num = ops.length ();
6844 int width;
6846 /* For binary bit operations, if there are at least 3
6847 operands and the last operand in OPS is a constant,
6848 move it to the front. This helps ensure that we generate
6849 (X & Y) & C rather than (X & C) & Y. The former will
6850 often match a canonical bit test when we get to RTL. */
6851 if (ops.length () > 2
6852 && (rhs_code == BIT_AND_EXPR
6853 || rhs_code == BIT_IOR_EXPR
6854 || rhs_code == BIT_XOR_EXPR)
6855 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6856 std::swap (*ops[0], *ops[ops_num - 1]);
6858 /* Only rewrite the expression tree to parallel in the
6859 last reassoc pass to avoid useless work back-and-forth
6860 with initial linearization. */
6861 if (!reassoc_insert_powi_p
6862 && ops.length () > 3
6863 && (width = get_reassociation_width (ops_num, rhs_code,
6864 mode)) > 1)
6866 if (dump_file && (dump_flags & TDF_DETAILS))
6867 fprintf (dump_file,
6868 "Width = %d was chosen for reassociation\n",
6869 width);
6870 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6871 width, ops);
6873 else
6875 /* When there are three operands left, we want
6876 to make sure the ones that get the double
6877 binary op are chosen wisely. */
6878 int len = ops.length ();
6879 if (len >= 3)
6880 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6882 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6883 powi_result != NULL
6884 || negate_result,
6885 len != orig_len);
6888 /* If we combined some repeated factors into a
6889 __builtin_powi call, multiply that result by the
6890 reassociated operands. */
6891 if (powi_result)
6893 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6894 tree type = TREE_TYPE (lhs);
6895 tree target_ssa = make_temp_ssa_name (type, NULL,
6896 "reassocpow");
6897 gimple_set_lhs (lhs_stmt, target_ssa);
6898 update_stmt (lhs_stmt);
6899 if (lhs != new_lhs)
6901 target_ssa = new_lhs;
6902 new_lhs = lhs;
6904 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6905 powi_result, target_ssa);
6906 gimple_set_location (mul_stmt, gimple_location (stmt));
6907 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6908 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6912 if (negate_result)
6914 stmt = SSA_NAME_DEF_STMT (lhs);
6915 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6916 gimple_set_lhs (stmt, tmp);
6917 if (lhs != new_lhs)
6918 tmp = new_lhs;
6919 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6920 tmp);
6921 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6922 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6923 update_stmt (stmt);
6928 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6929 son;
6930 son = next_dom_son (CDI_POST_DOMINATORS, son))
6931 cfg_cleanup_needed |= reassociate_bb (son);
6933 return cfg_cleanup_needed;
6936 /* Add jumps around shifts for range tests turned into bit tests.
6937 For each SSA_NAME VAR we have code like:
6938 VAR = ...; // final stmt of range comparison
6939 // bit test here...;
6940 OTHERVAR = ...; // final stmt of the bit test sequence
6941 RES = VAR | OTHERVAR;
6942 Turn the above into:
6943 VAR = ...;
6944 if (VAR != 0)
6945 goto <l3>;
6946 else
6947 goto <l2>;
6948 <l2>:
6949 // bit test here...;
6950 OTHERVAR = ...;
6951 <l3>:
6952 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6954 static void
6955 branch_fixup (void)
6957 tree var;
6958 unsigned int i;
6960 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6962 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6963 gimple *use_stmt;
6964 use_operand_p use;
6965 bool ok = single_imm_use (var, &use, &use_stmt);
6966 gcc_assert (ok
6967 && is_gimple_assign (use_stmt)
6968 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6969 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6971 basic_block cond_bb = gimple_bb (def_stmt);
6972 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6973 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6975 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6976 gimple *g = gimple_build_cond (NE_EXPR, var,
6977 build_zero_cst (TREE_TYPE (var)),
6978 NULL_TREE, NULL_TREE);
6979 location_t loc = gimple_location (use_stmt);
6980 gimple_set_location (g, loc);
6981 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6983 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6984 etrue->probability = profile_probability::even ();
6985 edge efalse = find_edge (cond_bb, then_bb);
6986 efalse->flags = EDGE_FALSE_VALUE;
6987 efalse->probability -= etrue->probability;
6988 then_bb->count -= etrue->count ();
6990 tree othervar = NULL_TREE;
6991 if (gimple_assign_rhs1 (use_stmt) == var)
6992 othervar = gimple_assign_rhs2 (use_stmt);
6993 else if (gimple_assign_rhs2 (use_stmt) == var)
6994 othervar = gimple_assign_rhs1 (use_stmt);
6995 else
6996 gcc_unreachable ();
6997 tree lhs = gimple_assign_lhs (use_stmt);
6998 gphi *phi = create_phi_node (lhs, merge_bb);
6999 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7000 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7001 gsi = gsi_for_stmt (use_stmt);
7002 gsi_remove (&gsi, true);
7004 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7005 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7007 reassoc_branch_fixups.release ();
7010 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7011 void debug_ops_vector (vec<operand_entry *> ops);
7013 /* Dump the operand entry vector OPS to FILE. */
7015 void
7016 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7018 operand_entry *oe;
7019 unsigned int i;
7021 FOR_EACH_VEC_ELT (ops, i, oe)
7023 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7024 print_generic_expr (file, oe->op);
7025 fprintf (file, "\n");
7029 /* Dump the operand entry vector OPS to STDERR. */
7031 DEBUG_FUNCTION void
7032 debug_ops_vector (vec<operand_entry *> ops)
7034 dump_ops_vector (stderr, ops);
7037 /* Bubble up return status from reassociate_bb. */
7039 static bool
7040 do_reassoc (void)
7042 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7043 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7046 /* Initialize the reassociation pass. */
7048 static void
7049 init_reassoc (void)
7051 int i;
7052 int64_t rank = 2;
7053 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7055 /* Find the loops, so that we can prevent moving calculations in
7056 them. */
7057 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7059 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7061 next_operand_entry_id = 0;
7063 /* Reverse RPO (Reverse Post Order) will give us something where
7064 deeper loops come later. */
7065 pre_and_rev_post_order_compute (NULL, bbs, false);
7066 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7067 operand_rank = new hash_map<tree, int64_t>;
7069 /* Give each default definition a distinct rank. This includes
7070 parameters and the static chain. Walk backwards over all
7071 SSA names so that we get proper rank ordering according
7072 to tree_swap_operands_p. */
7073 for (i = num_ssa_names - 1; i > 0; --i)
7075 tree name = ssa_name (i);
7076 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7077 insert_operand_rank (name, ++rank);
7080 /* Set up rank for each BB */
7081 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7082 bb_rank[bbs[i]] = ++rank << 16;
7084 free (bbs);
7085 calculate_dominance_info (CDI_POST_DOMINATORS);
7086 plus_negates = vNULL;
7089 /* Cleanup after the reassociation pass, and print stats if
7090 requested. */
7092 static void
7093 fini_reassoc (void)
7095 statistics_counter_event (cfun, "Linearized",
7096 reassociate_stats.linearized);
7097 statistics_counter_event (cfun, "Constants eliminated",
7098 reassociate_stats.constants_eliminated);
7099 statistics_counter_event (cfun, "Ops eliminated",
7100 reassociate_stats.ops_eliminated);
7101 statistics_counter_event (cfun, "Statements rewritten",
7102 reassociate_stats.rewritten);
7103 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7104 reassociate_stats.pows_encountered);
7105 statistics_counter_event (cfun, "Built-in powi calls created",
7106 reassociate_stats.pows_created);
7108 delete operand_rank;
7109 bitmap_clear (biased_names);
7110 operand_entry_pool.release ();
7111 free (bb_rank);
7112 plus_negates.release ();
7113 free_dominance_info (CDI_POST_DOMINATORS);
7114 loop_optimizer_finalize ();
7117 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7118 insertion of __builtin_powi calls.
7120 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7121 optimization of a gimple conditional. Otherwise returns zero. */
7123 static unsigned int
7124 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7126 reassoc_insert_powi_p = insert_powi_p;
7127 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7129 init_reassoc ();
7131 bool cfg_cleanup_needed;
7132 cfg_cleanup_needed = do_reassoc ();
7133 repropagate_negates ();
7134 branch_fixup ();
7136 fini_reassoc ();
7137 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7140 namespace {
7142 const pass_data pass_data_reassoc =
7144 GIMPLE_PASS, /* type */
7145 "reassoc", /* name */
7146 OPTGROUP_NONE, /* optinfo_flags */
7147 TV_TREE_REASSOC, /* tv_id */
7148 ( PROP_cfg | PROP_ssa ), /* properties_required */
7149 0, /* properties_provided */
7150 0, /* properties_destroyed */
7151 0, /* todo_flags_start */
7152 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7155 class pass_reassoc : public gimple_opt_pass
7157 public:
7158 pass_reassoc (gcc::context *ctxt)
7159 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7162 /* opt_pass methods: */
7163 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7164 void set_pass_param (unsigned int n, bool param) final override
7166 gcc_assert (n == 0);
7167 insert_powi_p = param;
7168 bias_loop_carried_phi_ranks_p = !param;
7170 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7171 unsigned int execute (function *) final override
7173 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7176 private:
7177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7178 point 3a in the pass header comment. */
7179 bool insert_powi_p;
7180 bool bias_loop_carried_phi_ranks_p;
7181 }; // class pass_reassoc
7183 } // anon namespace
7185 gimple_opt_pass *
7186 make_pass_reassoc (gcc::context *ctxt)
7188 return new pass_reassoc (ctxt);