Skip gcc.dg/guality/example.c on hppa-linux.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob65316223047924965e31d075ce3d851bca79620b
1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 static bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
278 propagate_bias_p (gimple *stmt)
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 gimple *current_use_stmt = USE_STMT (use);
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
300 if (single_use_stmt == NULL)
301 return false;
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
307 return true;
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
318 phi_rank (gimple *stmt)
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 int64_t op_rank;
376 op_rank = get_rank (op);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
388 return MAX (rank, op_rank);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e)
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
402 static inline void
403 insert_operand_rank (tree e, int64_t rank)
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
409 /* Given an expression E, return the rank of the expression. */
411 static int64_t
412 get_rank (tree e)
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e) == SSA_NAME)
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
474 else
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
521 constant_type (tree t)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
534 else
535 return OTHER_CONST_TYPE;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
541 sort_by_operand_rank (const void *pa, const void *pb)
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
562 if (TREE_CODE (oea->op) != SSA_NAME)
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 return oeb->id > oea->id ? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
610 static void
611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
613 operand_entry *oe = operand_entry_pool.allocate ();
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
626 static void
627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
630 operand_entry *oe = operand_entry_pool.allocate ();
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
639 reassociate_stats.pows_encountered++;
642 /* Returns true if we can associate the SSA def OP. */
644 static bool
645 can_reassociate_op_p (tree op)
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
662 static bool
663 can_reassociate_type_p (tree type)
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
675 static bool
676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
678 basic_block bb = gimple_bb (stmt);
680 if (gimple_bb (stmt) == NULL)
681 return false;
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
698 return false;
702 /* Return true if STMT is a nop-conversion. */
704 static bool
705 gimple_nop_conversion_p (gimple *stmt)
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
714 return false;
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
720 static tree
721 get_unary_op (tree name, enum tree_code opcode)
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
740 static bool
741 ops_equal_values_p (tree op1, tree op2)
743 if (op1 == op2)
744 return true;
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
758 if (TREE_CODE (op2) == SSA_NAME)
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
770 return false;
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
778 static bool
779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last && last->op == curr->op)
794 switch (opcode)
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
825 reassociate_stats.ops_eliminated += 2;
827 if (ops->length () == 2)
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
833 else
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
839 return true;
841 default:
842 break;
845 return false;
848 static vec<tree> plus_negates;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
856 static bool
857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
887 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
901 return true;
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
906 tree op_type = TREE_TYPE (oe->op);
908 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
922 return true;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
932 return false;
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
941 static bool
942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
968 if (oe->op == notop)
970 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
997 return false;
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1017 switch (opcode)
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1022 if (ops->length () != 1)
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1034 else if (integer_all_onesp (oelast->op))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1060 else if (integer_zerop (oelast->op))
1062 if (ops->length () != 1)
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1078 if (ops->length () != 1)
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1094 if (ops->length () != 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1113 if (ops->length () != 1)
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1122 break;
1123 default:
1124 break;
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher : int_hash <int, 0, 1>
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1163 /* Comparison function for oecount. */
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1193 if (!is_gimple_call (stmt))
1194 return false;
1196 switch (gimple_call_combined_fn (stmt))
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1202 default:
1203 return false;
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1218 switch (gimple_call_combined_fn (stmt))
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1234 default:
1235 gcc_unreachable ();
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1260 if (new_debug_lhs == NULL_TREE)
1262 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 gimple_set_uid (def_temp, gimple_uid (stmt));
1269 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1270 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1272 repl = new_debug_lhs;
1274 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1275 SET_USE (use, repl);
1276 update_stmt (use_stmt);
1278 return new_lhs;
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1285 static void
1286 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1287 vec<gimple *> &stmts_to_fix)
1289 unsigned i;
1290 gimple *stmt;
1292 if (*def != op
1293 && TREE_CODE (*def) == SSA_NAME
1294 && (stmt = SSA_NAME_DEF_STMT (*def))
1295 && gimple_code (stmt) != GIMPLE_NOP)
1296 *def = make_new_ssa_for_def (stmt, opcode, op);
1298 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1299 make_new_ssa_for_def (stmt, opcode, op);
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1306 static void
1307 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1309 tree lhs;
1310 gimple *use_stmt;
1311 use_operand_p use;
1312 gimple_stmt_iterator gsi;
1314 if (is_gimple_call (stmt))
1315 lhs = gimple_call_lhs (stmt);
1316 else
1317 lhs = gimple_assign_lhs (stmt);
1319 gcc_assert (has_single_use (lhs));
1320 single_imm_use (lhs, &use, &use_stmt);
1321 if (lhs == *def)
1322 *def = op;
1323 SET_USE (use, op);
1324 if (TREE_CODE (op) != SSA_NAME)
1325 update_stmt (use_stmt);
1326 gsi = gsi_for_stmt (stmt);
1327 unlink_stmt_vdef (stmt);
1328 reassoc_remove_stmt (&gsi);
1329 release_defs (stmt);
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1336 static void
1337 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1339 tree orig_def = *def;
1340 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec<gimple *, 64> stmts_to_fix;
1347 tree name;
1349 if (opcode == MULT_EXPR)
1351 if (stmt_is_power_of_op (stmt, op))
1353 if (decrement_power (stmt) == 1)
1355 if (stmts_to_fix.length () > 0)
1356 stmts_to_fix.pop ();
1357 propagate_op_to_single_use (op, stmt, def);
1359 break;
1361 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1363 if (gimple_assign_rhs1 (stmt) == op)
1365 tree cst = build_minus_one_cst (TREE_TYPE (op));
1366 if (stmts_to_fix.length () > 0)
1367 stmts_to_fix.pop ();
1368 propagate_op_to_single_use (cst, stmt, def);
1369 break;
1371 else if (integer_minus_onep (op)
1372 || real_minus_onep (op))
1374 gimple_assign_set_rhs_code
1375 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1376 break;
1381 name = gimple_assign_rhs1 (stmt);
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1385 single use. */
1386 if (gimple_assign_rhs_code (stmt) == opcode
1387 && (name == op
1388 || gimple_assign_rhs2 (stmt) == op))
1390 if (name == op)
1391 name = gimple_assign_rhs2 (stmt);
1392 if (stmts_to_fix.length () > 0)
1393 stmts_to_fix.pop ();
1394 propagate_op_to_single_use (name, stmt, def);
1395 break;
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode == MULT_EXPR
1402 && gimple_assign_rhs_code (stmt) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt)))
1406 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1407 if (stmt_is_power_of_op (stmt2, op))
1409 if (decrement_power (stmt2) == 1)
1410 propagate_op_to_single_use (op, stmt2, def);
1411 else
1412 stmts_to_fix.safe_push (stmt2);
1413 break;
1415 else if (is_gimple_assign (stmt2)
1416 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1418 if (gimple_assign_rhs1 (stmt2) == op)
1420 tree cst = build_minus_one_cst (TREE_TYPE (op));
1421 propagate_op_to_single_use (cst, stmt2, def);
1422 break;
1424 else if (integer_minus_onep (op)
1425 || real_minus_onep (op))
1427 stmts_to_fix.safe_push (stmt2);
1428 gimple_assign_set_rhs_code
1429 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1430 break;
1435 /* Continue walking the chain. */
1436 gcc_assert (name != op
1437 && TREE_CODE (name) == SSA_NAME);
1438 stmt = SSA_NAME_DEF_STMT (name);
1439 stmts_to_fix.safe_push (stmt);
1441 while (1);
1443 if (stmts_to_fix.length () > 0 || *def == orig_def)
1444 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1450 static bool
1451 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1453 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1 || s1 == s2)
1459 return true;
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1462 if (!bb2)
1463 return false;
1465 if (bb1 == bb2)
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1) == GIMPLE_PHI)
1471 return true;
1473 if (gimple_code (s2) == GIMPLE_PHI)
1474 return false;
1476 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1478 if (gimple_uid (s1) < gimple_uid (s2))
1479 return true;
1481 if (gimple_uid (s1) > gimple_uid (s2))
1482 return false;
1484 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1485 unsigned int uid = gimple_uid (s1);
1486 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1488 gimple *s = gsi_stmt (gsi);
1489 if (gimple_uid (s) != uid)
1490 break;
1491 if (s == s2)
1492 return true;
1495 return false;
1498 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1501 /* Insert STMT after INSERT_POINT. */
1503 static void
1504 insert_stmt_after (gimple *stmt, gimple *insert_point)
1506 gimple_stmt_iterator gsi;
1507 basic_block bb;
1509 if (gimple_code (insert_point) == GIMPLE_PHI)
1510 bb = gimple_bb (insert_point);
1511 else if (!stmt_ends_bb_p (insert_point))
1513 gsi = gsi_for_stmt (insert_point);
1514 gimple_set_uid (stmt, gimple_uid (insert_point));
1515 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1516 return;
1518 else if (gimple_code (insert_point) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1522 gcc_unreachable ();
1523 else
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1530 gsi = gsi_after_labels (bb);
1531 if (gsi_end_p (gsi))
1533 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1534 gimple_set_uid (stmt,
1535 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1537 else
1538 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1546 static gimple *
1547 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1549 gimple *op1def = NULL, *op2def = NULL;
1550 gimple_stmt_iterator gsi;
1551 tree op;
1552 gassign *sum;
1554 /* Create the addition statement. */
1555 op = make_ssa_name (type);
1556 sum = gimple_build_assign (op, opcode, op1, op2);
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1) == SSA_NAME)
1560 op1def = SSA_NAME_DEF_STMT (op1);
1561 if (TREE_CODE (op2) == SSA_NAME)
1562 op2def = SSA_NAME_DEF_STMT (op2);
1563 if ((!op1def || gimple_nop_p (op1def))
1564 && (!op2def || gimple_nop_p (op2def)))
1566 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1567 if (gsi_end_p (gsi))
1569 gimple_stmt_iterator gsi2
1570 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1571 gimple_set_uid (sum,
1572 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1574 else
1575 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1576 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1578 else
1580 gimple *insert_point;
1581 if ((!op1def || gimple_nop_p (op1def))
1582 || (op2def && !gimple_nop_p (op2def)
1583 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1584 insert_point = op2def;
1585 else
1586 insert_point = op1def;
1587 insert_stmt_after (sum, insert_point);
1589 update_stmt (sum);
1591 return sum;
1594 /* Perform un-distribution of divisions and multiplications.
1595 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1596 to (A + B) / X for real X.
1598 The algorithm is organized as follows.
1600 - First we walk the addition chain *OPS looking for summands that
1601 are defined by a multiplication or a real division. This results
1602 in the candidates bitmap with relevant indices into *OPS.
1604 - Second we build the chains of multiplications or divisions for
1605 these candidates, counting the number of occurrences of (operand, code)
1606 pairs in all of the candidates chains.
1608 - Third we sort the (operand, code) pairs by number of occurrence and
1609 process them starting with the pair with the most uses.
1611 * For each such pair we walk the candidates again to build a
1612 second candidate bitmap noting all multiplication/division chains
1613 that have at least one occurrence of (operand, code).
1615 * We build an alternate addition chain only covering these
1616 candidates with one (operand, code) operation removed from their
1617 multiplication/division chain.
1619 * The first candidate gets replaced by the alternate addition chain
1620 multiplied/divided by the operand.
1622 * All candidate chains get disabled for further processing and
1623 processing of (operand, code) pairs continues.
1625 The alternate addition chains built are re-processed by the main
1626 reassociation algorithm which allows optimizing a * x * y + b * y * x
1627 to (a + b ) * x * y in one invocation of the reassociation pass. */
1629 static bool
1630 undistribute_ops_list (enum tree_code opcode,
1631 vec<operand_entry *> *ops, class loop *loop)
1633 unsigned int length = ops->length ();
1634 operand_entry *oe1;
1635 unsigned i, j;
1636 unsigned nr_candidates, nr_candidates2;
1637 sbitmap_iterator sbi0;
1638 vec<operand_entry *> *subops;
1639 bool changed = false;
1640 unsigned int next_oecount_id = 0;
1642 if (length <= 1
1643 || opcode != PLUS_EXPR)
1644 return false;
1646 /* Build a list of candidates to process. */
1647 auto_sbitmap candidates (length);
1648 bitmap_clear (candidates);
1649 nr_candidates = 0;
1650 FOR_EACH_VEC_ELT (*ops, i, oe1)
1652 enum tree_code dcode;
1653 gimple *oe1def;
1655 if (TREE_CODE (oe1->op) != SSA_NAME)
1656 continue;
1657 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1658 if (!is_gimple_assign (oe1def))
1659 continue;
1660 dcode = gimple_assign_rhs_code (oe1def);
1661 if ((dcode != MULT_EXPR
1662 && dcode != RDIV_EXPR)
1663 || !is_reassociable_op (oe1def, dcode, loop))
1664 continue;
1666 bitmap_set_bit (candidates, i);
1667 nr_candidates++;
1670 if (nr_candidates < 2)
1671 return false;
1673 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "searching for un-distribute opportunities ");
1676 print_generic_expr (dump_file,
1677 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1678 fprintf (dump_file, " %d\n", nr_candidates);
1681 /* Build linearized sub-operand lists and the counting table. */
1682 cvec.create (0);
1684 hash_table<oecount_hasher> ctable (15);
1686 /* ??? Macro arguments cannot have multi-argument template types in
1687 them. This typedef is needed to workaround that limitation. */
1688 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1689 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1690 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1692 gimple *oedef;
1693 enum tree_code oecode;
1694 unsigned j;
1696 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1697 oecode = gimple_assign_rhs_code (oedef);
1698 linearize_expr_tree (&subops[i], oedef,
1699 associative_tree_code (oecode), false);
1701 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1703 oecount c;
1704 int *slot;
1705 int idx;
1706 c.oecode = oecode;
1707 c.cnt = 1;
1708 c.id = next_oecount_id++;
1709 c.op = oe1->op;
1710 cvec.safe_push (c);
1711 idx = cvec.length () + 41;
1712 slot = ctable.find_slot (idx, INSERT);
1713 if (!*slot)
1715 *slot = idx;
1717 else
1719 cvec.pop ();
1720 cvec[*slot - 42].cnt++;
1725 /* Sort the counting table. */
1726 cvec.qsort (oecount_cmp);
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1730 oecount *c;
1731 fprintf (dump_file, "Candidates:\n");
1732 FOR_EACH_VEC_ELT (cvec, j, c)
1734 fprintf (dump_file, " %u %s: ", c->cnt,
1735 c->oecode == MULT_EXPR
1736 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1737 print_generic_expr (dump_file, c->op);
1738 fprintf (dump_file, "\n");
1742 /* Process the (operand, code) pairs in order of most occurrence. */
1743 auto_sbitmap candidates2 (length);
1744 while (!cvec.is_empty ())
1746 oecount *c = &cvec.last ();
1747 if (c->cnt < 2)
1748 break;
1750 /* Now collect the operands in the outer chain that contain
1751 the common operand in their inner chain. */
1752 bitmap_clear (candidates2);
1753 nr_candidates2 = 0;
1754 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1756 gimple *oedef;
1757 enum tree_code oecode;
1758 unsigned j;
1759 tree op = (*ops)[i]->op;
1761 /* If we undistributed in this chain already this may be
1762 a constant. */
1763 if (TREE_CODE (op) != SSA_NAME)
1764 continue;
1766 oedef = SSA_NAME_DEF_STMT (op);
1767 oecode = gimple_assign_rhs_code (oedef);
1768 if (oecode != c->oecode)
1769 continue;
1771 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1773 if (oe1->op == c->op)
1775 bitmap_set_bit (candidates2, i);
1776 ++nr_candidates2;
1777 break;
1782 if (nr_candidates2 >= 2)
1784 operand_entry *oe1, *oe2;
1785 gimple *prod;
1786 int first = bitmap_first_set_bit (candidates2);
1788 /* Build the new addition chain. */
1789 oe1 = (*ops)[first];
1790 if (dump_file && (dump_flags & TDF_DETAILS))
1792 fprintf (dump_file, "Building (");
1793 print_generic_expr (dump_file, oe1->op);
1795 zero_one_operation (&oe1->op, c->oecode, c->op);
1796 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1798 gimple *sum;
1799 oe2 = (*ops)[i];
1800 if (dump_file && (dump_flags & TDF_DETAILS))
1802 fprintf (dump_file, " + ");
1803 print_generic_expr (dump_file, oe2->op);
1805 zero_one_operation (&oe2->op, c->oecode, c->op);
1806 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1807 oe1->op, oe2->op, opcode);
1808 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1809 oe2->rank = 0;
1810 oe1->op = gimple_get_lhs (sum);
1813 /* Apply the multiplication/division. */
1814 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1815 oe1->op, c->op, c->oecode);
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1818 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1819 print_generic_expr (dump_file, c->op);
1820 fprintf (dump_file, "\n");
1823 /* Record it in the addition chain and disable further
1824 undistribution with this op. */
1825 oe1->op = gimple_assign_lhs (prod);
1826 oe1->rank = get_rank (oe1->op);
1827 subops[first].release ();
1829 changed = true;
1832 cvec.pop ();
1835 for (i = 0; i < ops->length (); ++i)
1836 subops[i].release ();
1837 free (subops);
1838 cvec.release ();
1840 return changed;
1843 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1844 first: element index for each relevant BIT_FIELD_REF.
1845 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1846 typedef std::pair<unsigned, unsigned> v_info_elem;
1847 struct v_info {
1848 tree vec_type;
1849 auto_vec<v_info_elem, 32> vec;
1851 typedef v_info *v_info_ptr;
1853 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1854 static int
1855 sort_by_mach_mode (const void *p_i, const void *p_j)
1857 const tree tr1 = *((const tree *) p_i);
1858 const tree tr2 = *((const tree *) p_j);
1859 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1860 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1861 if (mode1 > mode2)
1862 return 1;
1863 else if (mode1 < mode2)
1864 return -1;
1865 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1866 return -1;
1867 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1868 return 1;
1869 return 0;
1872 /* Cleanup hash map for VECTOR information. */
1873 static void
1874 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1876 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1877 it != info_map.end (); ++it)
1879 v_info_ptr info = (*it).second;
1880 delete info;
1881 (*it).second = NULL;
1885 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1886 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1887 is transformed to
1888 Vs = (V1 + V2 + ... + Vn)
1889 Vs[0] + Vs[1] + ... + Vs[k]
1891 The basic steps are listed below:
1893 1) Check the addition chain *OPS by looking those summands coming from
1894 VECTOR bit_field_ref on VECTOR type. Put the information into
1895 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1897 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1898 continuous, they can cover the whole VECTOR perfectly without any holes.
1899 Obtain one VECTOR list which contain candidates to be transformed.
1901 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1902 candidates with same mode, build the addition statements for them and
1903 generate BIT_FIELD_REFs accordingly.
1905 TODO:
1906 The current implementation requires the whole VECTORs should be fully
1907 covered, but it can be extended to support partial, checking adjacent
1908 but not fill the whole, it may need some cost model to define the
1909 boundary to do or not.
1911 static bool
1912 undistribute_bitref_for_vector (enum tree_code opcode,
1913 vec<operand_entry *> *ops, struct loop *loop)
1915 if (ops->length () <= 1)
1916 return false;
1918 if (opcode != PLUS_EXPR
1919 && opcode != MULT_EXPR
1920 && opcode != BIT_XOR_EXPR
1921 && opcode != BIT_IOR_EXPR
1922 && opcode != BIT_AND_EXPR)
1923 return false;
1925 hash_map<tree, v_info_ptr> v_info_map;
1926 operand_entry *oe1;
1927 unsigned i;
1929 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1930 information into map. */
1931 FOR_EACH_VEC_ELT (*ops, i, oe1)
1933 enum tree_code dcode;
1934 gimple *oe1def;
1936 if (TREE_CODE (oe1->op) != SSA_NAME)
1937 continue;
1938 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1939 if (!is_gimple_assign (oe1def))
1940 continue;
1941 dcode = gimple_assign_rhs_code (oe1def);
1942 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1943 continue;
1945 tree rhs = gimple_assign_rhs1 (oe1def);
1946 tree vec = TREE_OPERAND (rhs, 0);
1947 tree vec_type = TREE_TYPE (vec);
1949 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1950 continue;
1952 /* Ignore it if target machine can't support this VECTOR type. */
1953 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1954 continue;
1956 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1957 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1958 continue;
1960 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1961 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1962 continue;
1964 /* The type of BIT_FIELD_REF might not be equal to the element type of
1965 the vector. We want to use a vector type with element type the
1966 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1967 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1969 machine_mode simd_mode;
1970 unsigned HOST_WIDE_INT size, nunits;
1971 unsigned HOST_WIDE_INT elem_size
1972 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1973 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1974 continue;
1975 if (size <= elem_size || (size % elem_size) != 0)
1976 continue;
1977 nunits = size / elem_size;
1978 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1979 nunits).exists (&simd_mode))
1980 continue;
1981 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1983 /* Ignore it if target machine can't support this VECTOR type. */
1984 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1985 continue;
1987 /* Check const vector type, constrain BIT_FIELD_REF offset and
1988 size. */
1989 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1990 continue;
1992 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1993 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1994 continue;
1997 tree elem_type = TREE_TYPE (vec_type);
1998 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1999 if (maybe_ne (bit_field_size (rhs), elem_size))
2000 continue;
2002 unsigned idx;
2003 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2004 continue;
2006 /* Ignore it if target machine can't support this type of VECTOR
2007 operation. */
2008 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2009 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2010 continue;
2012 bool existed;
2013 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2014 if (!existed)
2016 info = new v_info;
2017 info->vec_type = vec_type;
2019 else if (!types_compatible_p (vec_type, info->vec_type))
2020 continue;
2021 info->vec.safe_push (std::make_pair (idx, i));
2024 /* At least two VECTOR to combine. */
2025 if (v_info_map.elements () <= 1)
2027 cleanup_vinfo_map (v_info_map);
2028 return false;
2031 /* Verify all VECTOR candidates by checking two conditions:
2032 1) sorted offsets are adjacent, no holes.
2033 2) can fill the whole VECTOR perfectly.
2034 And add the valid candidates to a vector for further handling. */
2035 auto_vec<tree> valid_vecs (v_info_map.elements ());
2036 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2037 it != v_info_map.end (); ++it)
2039 tree cand_vec = (*it).first;
2040 v_info_ptr cand_info = (*it).second;
2041 unsigned int num_elems
2042 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2043 if (cand_info->vec.length () != num_elems)
2044 continue;
2045 sbitmap holes = sbitmap_alloc (num_elems);
2046 bitmap_ones (holes);
2047 bool valid = true;
2048 v_info_elem *curr;
2049 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2051 if (!bitmap_bit_p (holes, curr->first))
2053 valid = false;
2054 break;
2056 else
2057 bitmap_clear_bit (holes, curr->first);
2059 if (valid && bitmap_empty_p (holes))
2060 valid_vecs.quick_push (cand_vec);
2061 sbitmap_free (holes);
2064 /* At least two VECTOR to combine. */
2065 if (valid_vecs.length () <= 1)
2067 cleanup_vinfo_map (v_info_map);
2068 return false;
2071 valid_vecs.qsort (sort_by_mach_mode);
2072 /* Go through all candidates by machine mode order, query the mode_to_total
2073 to get the total number for each mode and skip the single one. */
2074 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2076 tree tvec = valid_vecs[i];
2077 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2079 /* Skip modes with only a single candidate. */
2080 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2081 continue;
2083 unsigned int idx, j;
2084 gimple *sum = NULL;
2085 tree sum_vec = tvec;
2086 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2087 v_info_elem *elem;
2088 tree vec_type = info_ptr->vec_type;
2090 /* Build the sum for all candidates with same mode. */
2093 sum = build_and_add_sum (vec_type, sum_vec,
2094 valid_vecs[i + 1], opcode);
2095 if (!useless_type_conversion_p (vec_type,
2096 TREE_TYPE (valid_vecs[i + 1])))
2098 /* Update the operands only after build_and_add_sum,
2099 so that we don't have to repeat the placement algorithm
2100 of build_and_add_sum. */
2101 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2102 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2103 valid_vecs[i + 1]);
2104 tree lhs = make_ssa_name (vec_type);
2105 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2106 gimple_set_uid (g, gimple_uid (sum));
2107 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2108 gimple_assign_set_rhs2 (sum, lhs);
2109 if (sum_vec == tvec)
2111 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2112 lhs = make_ssa_name (vec_type);
2113 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2114 gimple_set_uid (g, gimple_uid (sum));
2115 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2116 gimple_assign_set_rhs1 (sum, lhs);
2118 update_stmt (sum);
2120 sum_vec = gimple_get_lhs (sum);
2121 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2122 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2123 /* Update those related ops of current candidate VECTOR. */
2124 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2126 idx = elem->second;
2127 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2128 /* Set this then op definition will get DCEd later. */
2129 gimple_set_visited (def, true);
2130 if (opcode == PLUS_EXPR
2131 || opcode == BIT_XOR_EXPR
2132 || opcode == BIT_IOR_EXPR)
2133 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2134 else if (opcode == MULT_EXPR)
2135 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2136 else
2138 gcc_assert (opcode == BIT_AND_EXPR);
2139 (*ops)[idx]->op
2140 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2142 (*ops)[idx]->rank = 0;
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2146 fprintf (dump_file, "Generating addition -> ");
2147 print_gimple_stmt (dump_file, sum, 0);
2149 i++;
2151 while ((i < valid_vecs.length () - 1)
2152 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2154 /* Referring to first valid VECTOR with this mode, generate the
2155 BIT_FIELD_REF statements accordingly. */
2156 info_ptr = *(v_info_map.get (tvec));
2157 gcc_assert (sum);
2158 tree elem_type = TREE_TYPE (vec_type);
2159 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2161 idx = elem->second;
2162 tree dst = make_ssa_name (elem_type);
2163 tree pos = bitsize_int (elem->first
2164 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2165 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2166 TYPE_SIZE (elem_type), pos);
2167 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2168 insert_stmt_after (gs, sum);
2169 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2170 /* Set this then op definition will get DCEd later. */
2171 gimple_set_visited (def, true);
2172 (*ops)[idx]->op = gimple_assign_lhs (gs);
2173 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2174 if (dump_file && (dump_flags & TDF_DETAILS))
2176 fprintf (dump_file, "Generating bit_field_ref -> ");
2177 print_gimple_stmt (dump_file, gs, 0);
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2185 cleanup_vinfo_map (v_info_map);
2187 return true;
2190 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2191 expression, examine the other OPS to see if any of them are comparisons
2192 of the same values, which we may be able to combine or eliminate.
2193 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2195 static bool
2196 eliminate_redundant_comparison (enum tree_code opcode,
2197 vec<operand_entry *> *ops,
2198 unsigned int currindex,
2199 operand_entry *curr)
2201 tree op1, op2;
2202 enum tree_code lcode, rcode;
2203 gimple *def1, *def2;
2204 int i;
2205 operand_entry *oe;
2207 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2208 return false;
2210 /* Check that CURR is a comparison. */
2211 if (TREE_CODE (curr->op) != SSA_NAME)
2212 return false;
2213 def1 = SSA_NAME_DEF_STMT (curr->op);
2214 if (!is_gimple_assign (def1))
2215 return false;
2216 lcode = gimple_assign_rhs_code (def1);
2217 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2218 return false;
2219 op1 = gimple_assign_rhs1 (def1);
2220 op2 = gimple_assign_rhs2 (def1);
2222 /* Now look for a similar comparison in the remaining OPS. */
2223 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2225 tree t;
2227 if (TREE_CODE (oe->op) != SSA_NAME)
2228 continue;
2229 def2 = SSA_NAME_DEF_STMT (oe->op);
2230 if (!is_gimple_assign (def2))
2231 continue;
2232 rcode = gimple_assign_rhs_code (def2);
2233 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2234 continue;
2236 /* If we got here, we have a match. See if we can combine the
2237 two comparisons. */
2238 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2239 if (opcode == BIT_IOR_EXPR)
2240 t = maybe_fold_or_comparisons (type,
2241 lcode, op1, op2,
2242 rcode, gimple_assign_rhs1 (def2),
2243 gimple_assign_rhs2 (def2));
2244 else
2245 t = maybe_fold_and_comparisons (type,
2246 lcode, op1, op2,
2247 rcode, gimple_assign_rhs1 (def2),
2248 gimple_assign_rhs2 (def2));
2249 if (!t)
2250 continue;
2252 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2253 always give us a boolean_type_node value back. If the original
2254 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2255 we need to convert. */
2256 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2257 t = fold_convert (TREE_TYPE (curr->op), t);
2259 if (TREE_CODE (t) != INTEGER_CST
2260 && !operand_equal_p (t, curr->op, 0))
2262 enum tree_code subcode;
2263 tree newop1, newop2;
2264 if (!COMPARISON_CLASS_P (t))
2265 continue;
2266 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2267 STRIP_USELESS_TYPE_CONVERSION (newop1);
2268 STRIP_USELESS_TYPE_CONVERSION (newop2);
2269 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2270 continue;
2273 if (dump_file && (dump_flags & TDF_DETAILS))
2275 fprintf (dump_file, "Equivalence: ");
2276 print_generic_expr (dump_file, curr->op);
2277 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2278 print_generic_expr (dump_file, oe->op);
2279 fprintf (dump_file, " -> ");
2280 print_generic_expr (dump_file, t);
2281 fprintf (dump_file, "\n");
2284 /* Now we can delete oe, as it has been subsumed by the new combined
2285 expression t. */
2286 ops->ordered_remove (i);
2287 reassociate_stats.ops_eliminated ++;
2289 /* If t is the same as curr->op, we're done. Otherwise we must
2290 replace curr->op with t. Special case is if we got a constant
2291 back, in which case we add it to the end instead of in place of
2292 the current entry. */
2293 if (TREE_CODE (t) == INTEGER_CST)
2295 ops->ordered_remove (currindex);
2296 add_to_ops_vec (ops, t);
2298 else if (!operand_equal_p (t, curr->op, 0))
2300 gimple *sum;
2301 enum tree_code subcode;
2302 tree newop1;
2303 tree newop2;
2304 gcc_assert (COMPARISON_CLASS_P (t));
2305 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2306 STRIP_USELESS_TYPE_CONVERSION (newop1);
2307 STRIP_USELESS_TYPE_CONVERSION (newop2);
2308 gcc_checking_assert (is_gimple_val (newop1)
2309 && is_gimple_val (newop2));
2310 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2311 curr->op = gimple_get_lhs (sum);
2313 return true;
2316 return false;
2320 /* Transform repeated addition of same values into multiply with
2321 constant. */
2322 static bool
2323 transform_add_to_multiply (vec<operand_entry *> *ops)
2325 operand_entry *oe;
2326 tree op = NULL_TREE;
2327 int j;
2328 int i, start = -1, end = 0, count = 0;
2329 auto_vec<std::pair <int, int> > indxs;
2330 bool changed = false;
2332 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2333 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2334 || !flag_unsafe_math_optimizations))
2335 return false;
2337 /* Look for repeated operands. */
2338 FOR_EACH_VEC_ELT (*ops, i, oe)
2340 if (start == -1)
2342 count = 1;
2343 op = oe->op;
2344 start = i;
2346 else if (operand_equal_p (oe->op, op, 0))
2348 count++;
2349 end = i;
2351 else
2353 if (count > 1)
2354 indxs.safe_push (std::make_pair (start, end));
2355 count = 1;
2356 op = oe->op;
2357 start = i;
2361 if (count > 1)
2362 indxs.safe_push (std::make_pair (start, end));
2364 for (j = indxs.length () - 1; j >= 0; --j)
2366 /* Convert repeated operand addition to multiplication. */
2367 start = indxs[j].first;
2368 end = indxs[j].second;
2369 op = (*ops)[start]->op;
2370 count = end - start + 1;
2371 for (i = end; i >= start; --i)
2372 ops->unordered_remove (i);
2373 tree tmp = make_ssa_name (TREE_TYPE (op));
2374 tree cst = build_int_cst (integer_type_node, count);
2375 gassign *mul_stmt
2376 = gimple_build_assign (tmp, MULT_EXPR,
2377 op, fold_convert (TREE_TYPE (op), cst));
2378 gimple_set_visited (mul_stmt, true);
2379 add_to_ops_vec (ops, tmp, mul_stmt);
2380 changed = true;
2383 return changed;
2387 /* Perform various identities and other optimizations on the list of
2388 operand entries, stored in OPS. The tree code for the binary
2389 operation between all the operands is OPCODE. */
2391 static void
2392 optimize_ops_list (enum tree_code opcode,
2393 vec<operand_entry *> *ops)
2395 unsigned int length = ops->length ();
2396 unsigned int i;
2397 operand_entry *oe;
2398 operand_entry *oelast = NULL;
2399 bool iterate = false;
2401 if (length == 1)
2402 return;
2404 oelast = ops->last ();
2406 /* If the last two are constants, pop the constants off, merge them
2407 and try the next two. */
2408 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2410 operand_entry *oelm1 = (*ops)[length - 2];
2412 if (oelm1->rank == 0
2413 && is_gimple_min_invariant (oelm1->op)
2414 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2415 TREE_TYPE (oelast->op)))
2417 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2418 oelm1->op, oelast->op);
2420 if (folded && is_gimple_min_invariant (folded))
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 fprintf (dump_file, "Merging constants\n");
2425 ops->pop ();
2426 ops->pop ();
2428 add_to_ops_vec (ops, folded);
2429 reassociate_stats.constants_eliminated++;
2431 optimize_ops_list (opcode, ops);
2432 return;
2437 eliminate_using_constants (opcode, ops);
2438 oelast = NULL;
2440 for (i = 0; ops->iterate (i, &oe);)
2442 bool done = false;
2444 if (eliminate_not_pairs (opcode, ops, i, oe))
2445 return;
2446 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2447 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2448 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2450 if (done)
2451 return;
2452 iterate = true;
2453 oelast = NULL;
2454 continue;
2456 oelast = oe;
2457 i++;
2460 if (iterate)
2461 optimize_ops_list (opcode, ops);
2464 /* The following functions are subroutines to optimize_range_tests and allow
2465 it to try to change a logical combination of comparisons into a range
2466 test.
2468 For example, both
2469 X == 2 || X == 5 || X == 3 || X == 4
2471 X >= 2 && X <= 5
2472 are converted to
2473 (unsigned) (X - 2) <= 3
2475 For more information see comments above fold_test_range in fold-const.c,
2476 this implementation is for GIMPLE. */
2480 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2482 void
2483 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2485 if (!skip_exp)
2486 print_generic_expr (file, r->exp);
2487 fprintf (file, " %c[", r->in_p ? '+' : '-');
2488 print_generic_expr (file, r->low);
2489 fputs (", ", file);
2490 print_generic_expr (file, r->high);
2491 fputc (']', file);
2494 /* Dump the range entry R to STDERR. */
2496 DEBUG_FUNCTION void
2497 debug_range_entry (struct range_entry *r)
2499 dump_range_entry (stderr, r, false);
2500 fputc ('\n', stderr);
2503 /* This is similar to make_range in fold-const.c, but on top of
2504 GIMPLE instead of trees. If EXP is non-NULL, it should be
2505 an SSA_NAME and STMT argument is ignored, otherwise STMT
2506 argument should be a GIMPLE_COND. */
2508 void
2509 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2511 int in_p;
2512 tree low, high;
2513 bool is_bool, strict_overflow_p;
2515 r->exp = NULL_TREE;
2516 r->in_p = false;
2517 r->strict_overflow_p = false;
2518 r->low = NULL_TREE;
2519 r->high = NULL_TREE;
2520 if (exp != NULL_TREE
2521 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2522 return;
2524 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2525 and see if we can refine the range. Some of the cases below may not
2526 happen, but it doesn't seem worth worrying about this. We "continue"
2527 the outer loop when we've changed something; otherwise we "break"
2528 the switch, which will "break" the while. */
2529 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2530 high = low;
2531 in_p = 0;
2532 strict_overflow_p = false;
2533 is_bool = false;
2534 if (exp == NULL_TREE)
2535 is_bool = true;
2536 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2538 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2539 is_bool = true;
2540 else
2541 return;
2543 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2544 is_bool = true;
2546 while (1)
2548 enum tree_code code;
2549 tree arg0, arg1, exp_type;
2550 tree nexp;
2551 location_t loc;
2553 if (exp != NULL_TREE)
2555 if (TREE_CODE (exp) != SSA_NAME
2556 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2557 break;
2559 stmt = SSA_NAME_DEF_STMT (exp);
2560 if (!is_gimple_assign (stmt))
2561 break;
2563 code = gimple_assign_rhs_code (stmt);
2564 arg0 = gimple_assign_rhs1 (stmt);
2565 arg1 = gimple_assign_rhs2 (stmt);
2566 exp_type = TREE_TYPE (exp);
2568 else
2570 code = gimple_cond_code (stmt);
2571 arg0 = gimple_cond_lhs (stmt);
2572 arg1 = gimple_cond_rhs (stmt);
2573 exp_type = boolean_type_node;
2576 if (TREE_CODE (arg0) != SSA_NAME
2577 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2578 break;
2579 loc = gimple_location (stmt);
2580 switch (code)
2582 case BIT_NOT_EXPR:
2583 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2584 /* Ensure the range is either +[-,0], +[0,0],
2585 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2586 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2587 or similar expression of unconditional true or
2588 false, it should not be negated. */
2589 && ((high && integer_zerop (high))
2590 || (low && integer_onep (low))))
2592 in_p = !in_p;
2593 exp = arg0;
2594 continue;
2596 break;
2597 case SSA_NAME:
2598 exp = arg0;
2599 continue;
2600 CASE_CONVERT:
2601 if (is_bool)
2603 if ((TYPE_PRECISION (exp_type) == 1
2604 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2605 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2606 return;
2608 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2610 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2611 is_bool = true;
2612 else
2613 return;
2615 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2616 is_bool = true;
2617 goto do_default;
2618 case EQ_EXPR:
2619 case NE_EXPR:
2620 case LT_EXPR:
2621 case LE_EXPR:
2622 case GE_EXPR:
2623 case GT_EXPR:
2624 is_bool = true;
2625 /* FALLTHRU */
2626 default:
2627 if (!is_bool)
2628 return;
2629 do_default:
2630 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2631 &low, &high, &in_p,
2632 &strict_overflow_p);
2633 if (nexp != NULL_TREE)
2635 exp = nexp;
2636 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2637 continue;
2639 break;
2641 break;
2643 if (is_bool)
2645 r->exp = exp;
2646 r->in_p = in_p;
2647 r->low = low;
2648 r->high = high;
2649 r->strict_overflow_p = strict_overflow_p;
2653 /* Comparison function for qsort. Sort entries
2654 without SSA_NAME exp first, then with SSA_NAMEs sorted
2655 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2656 by increasing ->low and if ->low is the same, by increasing
2657 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2658 maximum. */
2660 static int
2661 range_entry_cmp (const void *a, const void *b)
2663 const struct range_entry *p = (const struct range_entry *) a;
2664 const struct range_entry *q = (const struct range_entry *) b;
2666 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2668 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2670 /* Group range_entries for the same SSA_NAME together. */
2671 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2672 return -1;
2673 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2674 return 1;
2675 /* If ->low is different, NULL low goes first, then by
2676 ascending low. */
2677 if (p->low != NULL_TREE)
2679 if (q->low != NULL_TREE)
2681 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2682 p->low, q->low);
2683 if (tem && integer_onep (tem))
2684 return -1;
2685 tem = fold_binary (GT_EXPR, boolean_type_node,
2686 p->low, q->low);
2687 if (tem && integer_onep (tem))
2688 return 1;
2690 else
2691 return 1;
2693 else if (q->low != NULL_TREE)
2694 return -1;
2695 /* If ->high is different, NULL high goes last, before that by
2696 ascending high. */
2697 if (p->high != NULL_TREE)
2699 if (q->high != NULL_TREE)
2701 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2702 p->high, q->high);
2703 if (tem && integer_onep (tem))
2704 return -1;
2705 tem = fold_binary (GT_EXPR, boolean_type_node,
2706 p->high, q->high);
2707 if (tem && integer_onep (tem))
2708 return 1;
2710 else
2711 return -1;
2713 else if (q->high != NULL_TREE)
2714 return 1;
2715 /* If both ranges are the same, sort below by ascending idx. */
2717 else
2718 return 1;
2720 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2721 return -1;
2723 if (p->idx < q->idx)
2724 return -1;
2725 else
2727 gcc_checking_assert (p->idx > q->idx);
2728 return 1;
2732 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2733 insert needed statements BEFORE or after GSI. */
2735 static tree
2736 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2738 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2739 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2740 if (TREE_CODE (ret) != SSA_NAME)
2742 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2743 if (before)
2744 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2745 else
2746 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2747 ret = gimple_assign_lhs (g);
2749 return ret;
2752 /* Helper routine of optimize_range_test.
2753 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2754 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2755 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2756 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2757 an array of COUNT pointers to other ranges. Return
2758 true if the range merge has been successful.
2759 If OPCODE is ERROR_MARK, this is called from within
2760 maybe_optimize_range_tests and is performing inter-bb range optimization.
2761 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2762 oe->rank. */
2764 static bool
2765 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2766 struct range_entry **otherrangep,
2767 unsigned int count, enum tree_code opcode,
2768 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2769 bool in_p, tree low, tree high, bool strict_overflow_p)
2771 operand_entry *oe = (*ops)[range->idx];
2772 tree op = oe->op;
2773 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2774 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2775 location_t loc = gimple_location (stmt);
2776 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2777 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2778 in_p, low, high);
2779 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2780 gimple_stmt_iterator gsi;
2781 unsigned int i, uid;
2783 if (tem == NULL_TREE)
2784 return false;
2786 /* If op is default def SSA_NAME, there is no place to insert the
2787 new comparison. Give up, unless we can use OP itself as the
2788 range test. */
2789 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2791 if (op == range->exp
2792 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2793 || TREE_CODE (optype) == BOOLEAN_TYPE)
2794 && (op == tem
2795 || (TREE_CODE (tem) == EQ_EXPR
2796 && TREE_OPERAND (tem, 0) == op
2797 && integer_onep (TREE_OPERAND (tem, 1))))
2798 && opcode != BIT_IOR_EXPR
2799 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2801 stmt = NULL;
2802 tem = op;
2804 else
2805 return false;
2808 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2809 warning_at (loc, OPT_Wstrict_overflow,
2810 "assuming signed overflow does not occur "
2811 "when simplifying range test");
2813 if (dump_file && (dump_flags & TDF_DETAILS))
2815 struct range_entry *r;
2816 fprintf (dump_file, "Optimizing range tests ");
2817 dump_range_entry (dump_file, range, false);
2818 for (i = 0; i < count; i++)
2820 if (otherrange)
2821 r = otherrange + i;
2822 else
2823 r = otherrangep[i];
2824 if (r->exp
2825 && r->exp != range->exp
2826 && TREE_CODE (r->exp) == SSA_NAME)
2828 fprintf (dump_file, " and ");
2829 dump_range_entry (dump_file, r, false);
2831 else
2833 fprintf (dump_file, " and");
2834 dump_range_entry (dump_file, r, true);
2837 fprintf (dump_file, "\n into ");
2838 print_generic_expr (dump_file, tem);
2839 fprintf (dump_file, "\n");
2842 if (opcode == BIT_IOR_EXPR
2843 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2844 tem = invert_truthvalue_loc (loc, tem);
2846 tem = fold_convert_loc (loc, optype, tem);
2847 if (stmt)
2849 gsi = gsi_for_stmt (stmt);
2850 uid = gimple_uid (stmt);
2852 else
2854 gsi = gsi_none ();
2855 uid = 0;
2857 if (stmt == NULL)
2858 gcc_checking_assert (tem == op);
2859 /* In rare cases range->exp can be equal to lhs of stmt.
2860 In that case we have to insert after the stmt rather then before
2861 it. If stmt is a PHI, insert it at the start of the basic block. */
2862 else if (op != range->exp)
2864 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2865 tem = force_into_ssa_name (&gsi, tem, true);
2866 gsi_prev (&gsi);
2868 else if (gimple_code (stmt) != GIMPLE_PHI)
2870 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2871 tem = force_into_ssa_name (&gsi, tem, false);
2873 else
2875 gsi = gsi_after_labels (gimple_bb (stmt));
2876 if (!gsi_end_p (gsi))
2877 uid = gimple_uid (gsi_stmt (gsi));
2878 else
2880 gsi = gsi_start_bb (gimple_bb (stmt));
2881 uid = 1;
2882 while (!gsi_end_p (gsi))
2884 uid = gimple_uid (gsi_stmt (gsi));
2885 gsi_next (&gsi);
2888 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2889 tem = force_into_ssa_name (&gsi, tem, true);
2890 if (gsi_end_p (gsi))
2891 gsi = gsi_last_bb (gimple_bb (stmt));
2892 else
2893 gsi_prev (&gsi);
2895 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2896 if (gimple_uid (gsi_stmt (gsi)))
2897 break;
2898 else
2899 gimple_set_uid (gsi_stmt (gsi), uid);
2901 oe->op = tem;
2902 range->exp = exp;
2903 range->low = low;
2904 range->high = high;
2905 range->in_p = in_p;
2906 range->strict_overflow_p = false;
2908 for (i = 0; i < count; i++)
2910 if (otherrange)
2911 range = otherrange + i;
2912 else
2913 range = otherrangep[i];
2914 oe = (*ops)[range->idx];
2915 /* Now change all the other range test immediate uses, so that
2916 those tests will be optimized away. */
2917 if (opcode == ERROR_MARK)
2919 if (oe->op)
2920 oe->op = build_int_cst (TREE_TYPE (oe->op),
2921 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2922 else
2923 oe->op = (oe->rank == BIT_IOR_EXPR
2924 ? boolean_false_node : boolean_true_node);
2926 else
2927 oe->op = error_mark_node;
2928 range->exp = NULL_TREE;
2929 range->low = NULL_TREE;
2930 range->high = NULL_TREE;
2932 return true;
2935 /* Optimize X == CST1 || X == CST2
2936 if popcount (CST1 ^ CST2) == 1 into
2937 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2938 Similarly for ranges. E.g.
2939 X != 2 && X != 3 && X != 10 && X != 11
2940 will be transformed by the previous optimization into
2941 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2942 and this loop can transform that into
2943 !(((X & ~8) - 2U) <= 1U). */
2945 static bool
2946 optimize_range_tests_xor (enum tree_code opcode, tree type,
2947 tree lowi, tree lowj, tree highi, tree highj,
2948 vec<operand_entry *> *ops,
2949 struct range_entry *rangei,
2950 struct range_entry *rangej)
2952 tree lowxor, highxor, tem, exp;
2953 /* Check lowi ^ lowj == highi ^ highj and
2954 popcount (lowi ^ lowj) == 1. */
2955 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2956 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2957 return false;
2958 if (!integer_pow2p (lowxor))
2959 return false;
2960 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2961 if (!tree_int_cst_equal (lowxor, highxor))
2962 return false;
2964 exp = rangei->exp;
2965 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2966 int prec = GET_MODE_PRECISION (mode);
2967 if (TYPE_PRECISION (type) < prec
2968 || (wi::to_wide (TYPE_MIN_VALUE (type))
2969 != wi::min_value (prec, TYPE_SIGN (type)))
2970 || (wi::to_wide (TYPE_MAX_VALUE (type))
2971 != wi::max_value (prec, TYPE_SIGN (type))))
2973 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2974 exp = fold_convert (type, exp);
2975 lowxor = fold_convert (type, lowxor);
2976 lowi = fold_convert (type, lowi);
2977 highi = fold_convert (type, highi);
2979 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2980 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2981 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2982 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2983 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2984 NULL, rangei->in_p, lowj, highj,
2985 rangei->strict_overflow_p
2986 || rangej->strict_overflow_p))
2987 return true;
2988 return false;
2991 /* Optimize X == CST1 || X == CST2
2992 if popcount (CST2 - CST1) == 1 into
2993 ((X - CST1) & ~(CST2 - CST1)) == 0.
2994 Similarly for ranges. E.g.
2995 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2996 || X == 75 || X == 45
2997 will be transformed by the previous optimization into
2998 (X - 43U) <= 3U || (X - 75U) <= 3U
2999 and this loop can transform that into
3000 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3001 static bool
3002 optimize_range_tests_diff (enum tree_code opcode, tree type,
3003 tree lowi, tree lowj, tree highi, tree highj,
3004 vec<operand_entry *> *ops,
3005 struct range_entry *rangei,
3006 struct range_entry *rangej)
3008 tree tem1, tem2, mask;
3009 /* Check highi - lowi == highj - lowj. */
3010 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3011 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3012 return false;
3013 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3014 if (!tree_int_cst_equal (tem1, tem2))
3015 return false;
3016 /* Check popcount (lowj - lowi) == 1. */
3017 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3018 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3019 return false;
3020 if (!integer_pow2p (tem1))
3021 return false;
3023 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3024 int prec = GET_MODE_PRECISION (mode);
3025 if (TYPE_PRECISION (type) < prec
3026 || (wi::to_wide (TYPE_MIN_VALUE (type))
3027 != wi::min_value (prec, TYPE_SIGN (type)))
3028 || (wi::to_wide (TYPE_MAX_VALUE (type))
3029 != wi::max_value (prec, TYPE_SIGN (type))))
3030 type = build_nonstandard_integer_type (prec, 1);
3031 else
3032 type = unsigned_type_for (type);
3033 tem1 = fold_convert (type, tem1);
3034 tem2 = fold_convert (type, tem2);
3035 lowi = fold_convert (type, lowi);
3036 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3037 tem1 = fold_build2 (MINUS_EXPR, type,
3038 fold_convert (type, rangei->exp), lowi);
3039 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3040 lowj = build_int_cst (type, 0);
3041 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3042 NULL, rangei->in_p, lowj, tem2,
3043 rangei->strict_overflow_p
3044 || rangej->strict_overflow_p))
3045 return true;
3046 return false;
3049 /* It does some common checks for function optimize_range_tests_xor and
3050 optimize_range_tests_diff.
3051 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3052 Else it calls optimize_range_tests_diff. */
3054 static bool
3055 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3056 bool optimize_xor, vec<operand_entry *> *ops,
3057 struct range_entry *ranges)
3059 int i, j;
3060 bool any_changes = false;
3061 for (i = first; i < length; i++)
3063 tree lowi, highi, lowj, highj, type, tem;
3065 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3066 continue;
3067 type = TREE_TYPE (ranges[i].exp);
3068 if (!INTEGRAL_TYPE_P (type))
3069 continue;
3070 lowi = ranges[i].low;
3071 if (lowi == NULL_TREE)
3072 lowi = TYPE_MIN_VALUE (type);
3073 highi = ranges[i].high;
3074 if (highi == NULL_TREE)
3075 continue;
3076 for (j = i + 1; j < length && j < i + 64; j++)
3078 bool changes;
3079 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3080 continue;
3081 lowj = ranges[j].low;
3082 if (lowj == NULL_TREE)
3083 continue;
3084 highj = ranges[j].high;
3085 if (highj == NULL_TREE)
3086 highj = TYPE_MAX_VALUE (type);
3087 /* Check lowj > highi. */
3088 tem = fold_binary (GT_EXPR, boolean_type_node,
3089 lowj, highi);
3090 if (tem == NULL_TREE || !integer_onep (tem))
3091 continue;
3092 if (optimize_xor)
3093 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3094 highi, highj, ops,
3095 ranges + i, ranges + j);
3096 else
3097 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3098 highi, highj, ops,
3099 ranges + i, ranges + j);
3100 if (changes)
3102 any_changes = true;
3103 break;
3107 return any_changes;
3110 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3111 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3112 EXP on success, NULL otherwise. */
3114 static tree
3115 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3116 wide_int *mask, tree *totallowp)
3118 tree tem = int_const_binop (MINUS_EXPR, high, low);
3119 if (tem == NULL_TREE
3120 || TREE_CODE (tem) != INTEGER_CST
3121 || TREE_OVERFLOW (tem)
3122 || tree_int_cst_sgn (tem) == -1
3123 || compare_tree_int (tem, prec) != -1)
3124 return NULL_TREE;
3126 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3127 *mask = wi::shifted_mask (0, max, false, prec);
3128 if (TREE_CODE (exp) == BIT_AND_EXPR
3129 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3131 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3132 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3133 if (wi::popcount (msk) == 1
3134 && wi::ltu_p (msk, prec - max))
3136 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3137 max += msk.to_uhwi ();
3138 exp = TREE_OPERAND (exp, 0);
3139 if (integer_zerop (low)
3140 && TREE_CODE (exp) == PLUS_EXPR
3141 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3143 tree ret = TREE_OPERAND (exp, 0);
3144 STRIP_NOPS (ret);
3145 widest_int bias
3146 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3147 TYPE_PRECISION (TREE_TYPE (low))));
3148 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3149 if (totallowp)
3151 *totallowp = tbias;
3152 return ret;
3154 else if (!tree_int_cst_lt (totallow, tbias))
3155 return NULL_TREE;
3156 bias = wi::to_widest (tbias);
3157 bias -= wi::to_widest (totallow);
3158 if (bias >= 0 && bias < prec - max)
3160 *mask = wi::lshift (*mask, bias);
3161 return ret;
3166 if (totallowp)
3167 return exp;
3168 if (!tree_int_cst_lt (totallow, low))
3169 return exp;
3170 tem = int_const_binop (MINUS_EXPR, low, totallow);
3171 if (tem == NULL_TREE
3172 || TREE_CODE (tem) != INTEGER_CST
3173 || TREE_OVERFLOW (tem)
3174 || compare_tree_int (tem, prec - max) == 1)
3175 return NULL_TREE;
3177 *mask = wi::lshift (*mask, wi::to_widest (tem));
3178 return exp;
3181 /* Attempt to optimize small range tests using bit test.
3182 E.g.
3183 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3184 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3185 has been by earlier optimizations optimized into:
3186 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3187 As all the 43 through 82 range is less than 64 numbers,
3188 for 64-bit word targets optimize that into:
3189 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3191 static bool
3192 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3193 vec<operand_entry *> *ops,
3194 struct range_entry *ranges)
3196 int i, j;
3197 bool any_changes = false;
3198 int prec = GET_MODE_BITSIZE (word_mode);
3199 auto_vec<struct range_entry *, 64> candidates;
3201 for (i = first; i < length - 1; i++)
3203 tree lowi, highi, lowj, highj, type;
3205 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3206 continue;
3207 type = TREE_TYPE (ranges[i].exp);
3208 if (!INTEGRAL_TYPE_P (type))
3209 continue;
3210 lowi = ranges[i].low;
3211 if (lowi == NULL_TREE)
3212 lowi = TYPE_MIN_VALUE (type);
3213 highi = ranges[i].high;
3214 if (highi == NULL_TREE)
3215 continue;
3216 wide_int mask;
3217 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3218 highi, &mask, &lowi);
3219 if (exp == NULL_TREE)
3220 continue;
3221 bool strict_overflow_p = ranges[i].strict_overflow_p;
3222 candidates.truncate (0);
3223 int end = MIN (i + 64, length);
3224 for (j = i + 1; j < end; j++)
3226 tree exp2;
3227 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3228 continue;
3229 if (ranges[j].exp == exp)
3231 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3233 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3234 if (exp2 == exp)
3236 else if (TREE_CODE (exp2) == PLUS_EXPR)
3238 exp2 = TREE_OPERAND (exp2, 0);
3239 STRIP_NOPS (exp2);
3240 if (exp2 != exp)
3241 continue;
3243 else
3244 continue;
3246 else
3247 continue;
3248 lowj = ranges[j].low;
3249 if (lowj == NULL_TREE)
3250 continue;
3251 highj = ranges[j].high;
3252 if (highj == NULL_TREE)
3253 highj = TYPE_MAX_VALUE (type);
3254 wide_int mask2;
3255 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3256 highj, &mask2, NULL);
3257 if (exp2 != exp)
3258 continue;
3259 mask |= mask2;
3260 strict_overflow_p |= ranges[j].strict_overflow_p;
3261 candidates.safe_push (&ranges[j]);
3264 /* If every possible relative value of the expression is a valid shift
3265 amount, then we can merge the entry test in the bit test. In this
3266 case, if we would need otherwise 2 or more comparisons, then use
3267 the bit test; in the other cases, the threshold is 3 comparisons. */
3268 bool entry_test_needed;
3269 value_range r;
3270 if (TREE_CODE (exp) == SSA_NAME
3271 && get_range_query (cfun)->range_of_expr (r, exp)
3272 && r.kind () == VR_RANGE
3273 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3275 wide_int min = r.lower_bound ();
3276 wide_int ilowi = wi::to_wide (lowi);
3277 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3279 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3280 mask = wi::lshift (mask, ilowi - min);
3282 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3284 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3285 mask = wi::lrshift (mask, min - ilowi);
3287 entry_test_needed = false;
3289 else
3290 entry_test_needed = true;
3291 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3293 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3294 wi::to_widest (lowi)
3295 + prec - 1 - wi::clz (mask));
3296 operand_entry *oe = (*ops)[ranges[i].idx];
3297 tree op = oe->op;
3298 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3299 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3300 location_t loc = gimple_location (stmt);
3301 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3303 /* See if it isn't cheaper to pretend the minimum value of the
3304 range is 0, if maximum value is small enough.
3305 We can avoid then subtraction of the minimum value, but the
3306 mask constant could be perhaps more expensive. */
3307 if (compare_tree_int (lowi, 0) > 0
3308 && compare_tree_int (high, prec) < 0)
3310 int cost_diff;
3311 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3312 rtx reg = gen_raw_REG (word_mode, 10000);
3313 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3314 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3315 GEN_INT (-m)),
3316 word_mode, speed_p);
3317 rtx r = immed_wide_int_const (mask, word_mode);
3318 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3319 word_mode, speed_p);
3320 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3321 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3322 word_mode, speed_p);
3323 if (cost_diff > 0)
3325 mask = wi::lshift (mask, m);
3326 lowi = build_zero_cst (TREE_TYPE (lowi));
3330 tree tem;
3331 if (entry_test_needed)
3333 tem = build_range_check (loc, optype, unshare_expr (exp),
3334 false, lowi, high);
3335 if (tem == NULL_TREE || is_gimple_val (tem))
3336 continue;
3338 else
3339 tem = NULL_TREE;
3340 tree etype = unsigned_type_for (TREE_TYPE (exp));
3341 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3342 fold_convert_loc (loc, etype, exp),
3343 fold_convert_loc (loc, etype, lowi));
3344 exp = fold_convert_loc (loc, integer_type_node, exp);
3345 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3346 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3347 build_int_cst (word_type, 1), exp);
3348 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3349 wide_int_to_tree (word_type, mask));
3350 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3351 build_zero_cst (word_type));
3352 if (is_gimple_val (exp))
3353 continue;
3355 /* The shift might have undefined behavior if TEM is true,
3356 but reassociate_bb isn't prepared to have basic blocks
3357 split when it is running. So, temporarily emit a code
3358 with BIT_IOR_EXPR instead of &&, and fix it up in
3359 branch_fixup. */
3360 gimple_seq seq = NULL;
3361 if (tem)
3363 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3364 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3365 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3367 gimple_seq seq2;
3368 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3369 gimple_seq_add_seq_without_update (&seq, seq2);
3370 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3371 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3372 if (tem)
3374 gimple *g = gimple_build_assign (make_ssa_name (optype),
3375 BIT_IOR_EXPR, tem, exp);
3376 gimple_set_location (g, loc);
3377 gimple_seq_add_stmt_without_update (&seq, g);
3378 exp = gimple_assign_lhs (g);
3380 tree val = build_zero_cst (optype);
3381 if (update_range_test (&ranges[i], NULL, candidates.address (),
3382 candidates.length (), opcode, ops, exp,
3383 seq, false, val, val, strict_overflow_p))
3385 any_changes = true;
3386 if (tem)
3387 reassoc_branch_fixups.safe_push (tem);
3389 else
3390 gimple_seq_discard (seq);
3393 return any_changes;
3396 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3397 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3398 Also, handle x < C && y < C && z < C where C is power of two as
3399 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3400 as (x | y | z) < 0. */
3402 static bool
3403 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3404 vec<operand_entry *> *ops,
3405 struct range_entry *ranges)
3407 int i;
3408 unsigned int b;
3409 bool any_changes = false;
3410 auto_vec<int, 128> buckets;
3411 auto_vec<int, 32> chains;
3412 auto_vec<struct range_entry *, 32> candidates;
3414 for (i = first; i < length; i++)
3416 int idx;
3418 if (ranges[i].exp == NULL_TREE
3419 || TREE_CODE (ranges[i].exp) != SSA_NAME
3420 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3421 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3422 continue;
3424 if (ranges[i].low != NULL_TREE
3425 && ranges[i].high != NULL_TREE
3426 && ranges[i].in_p
3427 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3429 idx = !integer_zerop (ranges[i].low);
3430 if (idx && !integer_all_onesp (ranges[i].low))
3431 continue;
3433 else if (ranges[i].high != NULL_TREE
3434 && TREE_CODE (ranges[i].high) == INTEGER_CST
3435 && ranges[i].in_p)
3437 wide_int w = wi::to_wide (ranges[i].high);
3438 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3439 int l = wi::clz (w);
3440 idx = 2;
3441 if (l <= 0
3442 || l >= prec
3443 || w != wi::mask (prec - l, false, prec))
3444 continue;
3445 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3446 && ranges[i].low == NULL_TREE)
3447 || (ranges[i].low
3448 && integer_zerop (ranges[i].low))))
3449 continue;
3451 else if (ranges[i].high == NULL_TREE
3452 && ranges[i].low != NULL_TREE
3453 /* Perform this optimization only in the last
3454 reassoc pass, as it interferes with the reassociation
3455 itself or could also with VRP etc. which might not
3456 be able to virtually undo the optimization. */
3457 && !reassoc_insert_powi_p
3458 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3459 && integer_zerop (ranges[i].low))
3460 idx = 3;
3461 else
3462 continue;
3464 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3465 if (buckets.length () <= b)
3466 buckets.safe_grow_cleared (b + 1, true);
3467 if (chains.length () <= (unsigned) i)
3468 chains.safe_grow (i + 1, true);
3469 chains[i] = buckets[b];
3470 buckets[b] = i + 1;
3473 FOR_EACH_VEC_ELT (buckets, b, i)
3474 if (i && chains[i - 1])
3476 int j, k = i;
3477 if ((b % 4) == 2)
3479 /* When ranges[X - 1].high + 1 is a power of two,
3480 we need to process the same bucket up to
3481 precision - 1 times, each time split the entries
3482 with the same high bound into one chain and the
3483 rest into another one to be processed later. */
3484 int this_prev = i;
3485 int other_prev = 0;
3486 for (j = chains[i - 1]; j; j = chains[j - 1])
3488 if (tree_int_cst_equal (ranges[i - 1].high,
3489 ranges[j - 1].high))
3491 chains[this_prev - 1] = j;
3492 this_prev = j;
3494 else if (other_prev == 0)
3496 buckets[b] = j;
3497 other_prev = j;
3499 else
3501 chains[other_prev - 1] = j;
3502 other_prev = j;
3505 chains[this_prev - 1] = 0;
3506 if (other_prev)
3507 chains[other_prev - 1] = 0;
3508 if (chains[i - 1] == 0)
3510 if (other_prev)
3511 b--;
3512 continue;
3515 for (j = chains[i - 1]; j; j = chains[j - 1])
3517 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3518 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3519 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3520 k = j;
3522 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3523 tree type2 = NULL_TREE;
3524 bool strict_overflow_p = false;
3525 candidates.truncate (0);
3526 for (j = i; j; j = chains[j - 1])
3528 tree type = TREE_TYPE (ranges[j - 1].exp);
3529 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3530 if ((b % 4) == 3)
3532 /* For the signed < 0 cases, the types should be
3533 really compatible (all signed with the same precision,
3534 instead put ranges that have different in_p from
3535 k first. */
3536 if (!useless_type_conversion_p (type1, type))
3537 continue;
3538 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3539 candidates.safe_push (&ranges[j - 1]);
3540 type2 = type1;
3541 continue;
3543 if (j == k
3544 || useless_type_conversion_p (type1, type))
3546 else if (type2 == NULL_TREE
3547 || useless_type_conversion_p (type2, type))
3549 if (type2 == NULL_TREE)
3550 type2 = type;
3551 candidates.safe_push (&ranges[j - 1]);
3554 unsigned l = candidates.length ();
3555 for (j = i; j; j = chains[j - 1])
3557 tree type = TREE_TYPE (ranges[j - 1].exp);
3558 if (j == k)
3559 continue;
3560 if ((b % 4) == 3)
3562 if (!useless_type_conversion_p (type1, type))
3563 continue;
3564 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3565 candidates.safe_push (&ranges[j - 1]);
3566 continue;
3568 if (useless_type_conversion_p (type1, type))
3570 else if (type2 == NULL_TREE
3571 || useless_type_conversion_p (type2, type))
3572 continue;
3573 candidates.safe_push (&ranges[j - 1]);
3575 gimple_seq seq = NULL;
3576 tree op = NULL_TREE;
3577 unsigned int id;
3578 struct range_entry *r;
3579 candidates.safe_push (&ranges[k - 1]);
3580 FOR_EACH_VEC_ELT (candidates, id, r)
3582 gimple *g;
3583 enum tree_code code;
3584 if (id == 0)
3586 op = r->exp;
3587 continue;
3589 if (id == l)
3591 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3592 g = gimple_build_assign (make_ssa_name (type1), code, op);
3593 gimple_seq_add_stmt_without_update (&seq, g);
3594 op = gimple_assign_lhs (g);
3596 tree type = TREE_TYPE (r->exp);
3597 tree exp = r->exp;
3598 if (id >= l && !useless_type_conversion_p (type1, type))
3600 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3601 gimple_seq_add_stmt_without_update (&seq, g);
3602 exp = gimple_assign_lhs (g);
3604 if ((b % 4) == 3)
3605 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3606 else
3607 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3608 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3609 code, op, exp);
3610 gimple_seq_add_stmt_without_update (&seq, g);
3611 op = gimple_assign_lhs (g);
3613 candidates.pop ();
3614 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3615 candidates.length (), opcode, ops, op,
3616 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3617 ranges[k - 1].high, strict_overflow_p))
3618 any_changes = true;
3619 else
3620 gimple_seq_discard (seq);
3621 if ((b % 4) == 2 && buckets[b] != i)
3622 /* There is more work to do for this bucket. */
3623 b--;
3626 return any_changes;
3629 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3630 a >= 0 && a < b into (unsigned) a < (unsigned) b
3631 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3633 static bool
3634 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3635 vec<operand_entry *> *ops,
3636 struct range_entry *ranges,
3637 basic_block first_bb)
3639 int i;
3640 bool any_changes = false;
3641 hash_map<tree, int> *map = NULL;
3643 for (i = first; i < length; i++)
3645 if (ranges[i].exp == NULL_TREE
3646 || TREE_CODE (ranges[i].exp) != SSA_NAME
3647 || !ranges[i].in_p)
3648 continue;
3650 tree type = TREE_TYPE (ranges[i].exp);
3651 if (!INTEGRAL_TYPE_P (type)
3652 || TYPE_UNSIGNED (type)
3653 || ranges[i].low == NULL_TREE
3654 || !integer_zerop (ranges[i].low)
3655 || ranges[i].high != NULL_TREE)
3656 continue;
3657 /* EXP >= 0 here. */
3658 if (map == NULL)
3659 map = new hash_map <tree, int>;
3660 map->put (ranges[i].exp, i);
3663 if (map == NULL)
3664 return false;
3666 for (i = 0; i < length; i++)
3668 bool in_p = ranges[i].in_p;
3669 if (ranges[i].low == NULL_TREE
3670 || ranges[i].high == NULL_TREE)
3671 continue;
3672 if (!integer_zerop (ranges[i].low)
3673 || !integer_zerop (ranges[i].high))
3675 if (ranges[i].exp
3676 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3677 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3678 && integer_onep (ranges[i].low)
3679 && integer_onep (ranges[i].high))
3680 in_p = !in_p;
3681 else
3682 continue;
3685 gimple *stmt;
3686 tree_code ccode;
3687 tree rhs1, rhs2;
3688 if (ranges[i].exp)
3690 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3691 continue;
3692 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3693 if (!is_gimple_assign (stmt))
3694 continue;
3695 ccode = gimple_assign_rhs_code (stmt);
3696 rhs1 = gimple_assign_rhs1 (stmt);
3697 rhs2 = gimple_assign_rhs2 (stmt);
3699 else
3701 operand_entry *oe = (*ops)[ranges[i].idx];
3702 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3703 if (gimple_code (stmt) != GIMPLE_COND)
3704 continue;
3705 ccode = gimple_cond_code (stmt);
3706 rhs1 = gimple_cond_lhs (stmt);
3707 rhs2 = gimple_cond_rhs (stmt);
3710 if (TREE_CODE (rhs1) != SSA_NAME
3711 || rhs2 == NULL_TREE
3712 || TREE_CODE (rhs2) != SSA_NAME)
3713 continue;
3715 switch (ccode)
3717 case GT_EXPR:
3718 case GE_EXPR:
3719 case LT_EXPR:
3720 case LE_EXPR:
3721 break;
3722 default:
3723 continue;
3725 if (in_p)
3726 ccode = invert_tree_comparison (ccode, false);
3727 switch (ccode)
3729 case GT_EXPR:
3730 case GE_EXPR:
3731 std::swap (rhs1, rhs2);
3732 ccode = swap_tree_comparison (ccode);
3733 break;
3734 case LT_EXPR:
3735 case LE_EXPR:
3736 break;
3737 default:
3738 gcc_unreachable ();
3741 int *idx = map->get (rhs1);
3742 if (idx == NULL)
3743 continue;
3745 /* maybe_optimize_range_tests allows statements without side-effects
3746 in the basic blocks as long as they are consumed in the same bb.
3747 Make sure rhs2's def stmt is not among them, otherwise we can't
3748 use safely get_nonzero_bits on it. E.g. in:
3749 # RANGE [-83, 1] NONZERO 173
3750 # k_32 = PHI <k_47(13), k_12(9)>
3752 if (k_32 >= 0)
3753 goto <bb 5>; [26.46%]
3754 else
3755 goto <bb 9>; [73.54%]
3757 <bb 5> [local count: 140323371]:
3758 # RANGE [0, 1] NONZERO 1
3759 _5 = (int) k_32;
3760 # RANGE [0, 4] NONZERO 4
3761 _21 = _5 << 2;
3762 # RANGE [0, 4] NONZERO 4
3763 iftmp.0_44 = (char) _21;
3764 if (k_32 < iftmp.0_44)
3765 goto <bb 6>; [84.48%]
3766 else
3767 goto <bb 9>; [15.52%]
3768 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3769 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3770 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3771 those stmts even for negative k_32 and the value ranges would be no
3772 longer guaranteed and so the optimization would be invalid. */
3773 while (opcode == ERROR_MARK)
3775 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3776 basic_block bb2 = gimple_bb (g);
3777 if (bb2
3778 && bb2 != first_bb
3779 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3781 /* As an exception, handle a few common cases. */
3782 if (gimple_assign_cast_p (g)
3783 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3785 tree op0 = gimple_assign_rhs1 (g);
3786 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3787 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3788 > TYPE_PRECISION (TREE_TYPE (op0))))
3789 /* Zero-extension is always ok. */
3790 break;
3791 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3792 == TYPE_PRECISION (TREE_TYPE (op0))
3793 && TREE_CODE (op0) == SSA_NAME)
3795 /* Cast from signed to unsigned or vice versa. Retry
3796 with the op0 as new rhs2. */
3797 rhs2 = op0;
3798 continue;
3801 else if (is_gimple_assign (g)
3802 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3803 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3804 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3805 /* Masking with INTEGER_CST with MSB clear is always ok
3806 too. */
3807 break;
3808 rhs2 = NULL_TREE;
3810 break;
3812 if (rhs2 == NULL_TREE)
3813 continue;
3815 wide_int nz = get_nonzero_bits (rhs2);
3816 if (wi::neg_p (nz))
3817 continue;
3819 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3820 and RHS2 is known to be RHS2 >= 0. */
3821 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3823 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3824 if ((ranges[*idx].strict_overflow_p
3825 || ranges[i].strict_overflow_p)
3826 && issue_strict_overflow_warning (wc))
3827 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3828 "assuming signed overflow does not occur "
3829 "when simplifying range test");
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3833 struct range_entry *r = &ranges[*idx];
3834 fprintf (dump_file, "Optimizing range test ");
3835 print_generic_expr (dump_file, r->exp);
3836 fprintf (dump_file, " +[");
3837 print_generic_expr (dump_file, r->low);
3838 fprintf (dump_file, ", ");
3839 print_generic_expr (dump_file, r->high);
3840 fprintf (dump_file, "] and comparison ");
3841 print_generic_expr (dump_file, rhs1);
3842 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3843 print_generic_expr (dump_file, rhs2);
3844 fprintf (dump_file, "\n into (");
3845 print_generic_expr (dump_file, utype);
3846 fprintf (dump_file, ") ");
3847 print_generic_expr (dump_file, rhs1);
3848 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3849 print_generic_expr (dump_file, utype);
3850 fprintf (dump_file, ") ");
3851 print_generic_expr (dump_file, rhs2);
3852 fprintf (dump_file, "\n");
3855 operand_entry *oe = (*ops)[ranges[i].idx];
3856 ranges[i].in_p = 0;
3857 if (opcode == BIT_IOR_EXPR
3858 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3860 ranges[i].in_p = 1;
3861 ccode = invert_tree_comparison (ccode, false);
3864 unsigned int uid = gimple_uid (stmt);
3865 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3866 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3867 gimple_set_uid (g, uid);
3868 rhs1 = gimple_assign_lhs (g);
3869 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3870 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3872 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3873 gimple_set_uid (g, uid);
3874 rhs2 = gimple_assign_lhs (g);
3875 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3877 if (tree_swap_operands_p (rhs1, rhs2))
3879 std::swap (rhs1, rhs2);
3880 ccode = swap_tree_comparison (ccode);
3882 if (gimple_code (stmt) == GIMPLE_COND)
3884 gcond *c = as_a <gcond *> (stmt);
3885 gimple_cond_set_code (c, ccode);
3886 gimple_cond_set_lhs (c, rhs1);
3887 gimple_cond_set_rhs (c, rhs2);
3888 update_stmt (stmt);
3890 else
3892 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3893 if (!INTEGRAL_TYPE_P (ctype)
3894 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3895 && TYPE_PRECISION (ctype) != 1))
3896 ctype = boolean_type_node;
3897 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3898 gimple_set_uid (g, uid);
3899 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3900 if (oe->op && ctype != TREE_TYPE (oe->op))
3902 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3903 NOP_EXPR, gimple_assign_lhs (g));
3904 gimple_set_uid (g, uid);
3905 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3907 ranges[i].exp = gimple_assign_lhs (g);
3908 oe->op = ranges[i].exp;
3909 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3910 ranges[i].high = ranges[i].low;
3912 ranges[i].strict_overflow_p = false;
3913 oe = (*ops)[ranges[*idx].idx];
3914 /* Now change all the other range test immediate uses, so that
3915 those tests will be optimized away. */
3916 if (opcode == ERROR_MARK)
3918 if (oe->op)
3919 oe->op = build_int_cst (TREE_TYPE (oe->op),
3920 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3921 else
3922 oe->op = (oe->rank == BIT_IOR_EXPR
3923 ? boolean_false_node : boolean_true_node);
3925 else
3926 oe->op = error_mark_node;
3927 ranges[*idx].exp = NULL_TREE;
3928 ranges[*idx].low = NULL_TREE;
3929 ranges[*idx].high = NULL_TREE;
3930 any_changes = true;
3933 delete map;
3934 return any_changes;
3937 /* Optimize range tests, similarly how fold_range_test optimizes
3938 it on trees. The tree code for the binary
3939 operation between all the operands is OPCODE.
3940 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3941 maybe_optimize_range_tests for inter-bb range optimization.
3942 In that case if oe->op is NULL, oe->id is bb->index whose
3943 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3944 the actual opcode.
3945 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3947 static bool
3948 optimize_range_tests (enum tree_code opcode,
3949 vec<operand_entry *> *ops, basic_block first_bb)
3951 unsigned int length = ops->length (), i, j, first;
3952 operand_entry *oe;
3953 struct range_entry *ranges;
3954 bool any_changes = false;
3956 if (length == 1)
3957 return false;
3959 ranges = XNEWVEC (struct range_entry, length);
3960 for (i = 0; i < length; i++)
3962 oe = (*ops)[i];
3963 ranges[i].idx = i;
3964 init_range_entry (ranges + i, oe->op,
3965 oe->op
3966 ? NULL
3967 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3968 /* For | invert it now, we will invert it again before emitting
3969 the optimized expression. */
3970 if (opcode == BIT_IOR_EXPR
3971 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3972 ranges[i].in_p = !ranges[i].in_p;
3975 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3976 for (i = 0; i < length; i++)
3977 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3978 break;
3980 /* Try to merge ranges. */
3981 for (first = i; i < length; i++)
3983 tree low = ranges[i].low;
3984 tree high = ranges[i].high;
3985 int in_p = ranges[i].in_p;
3986 bool strict_overflow_p = ranges[i].strict_overflow_p;
3987 int update_fail_count = 0;
3989 for (j = i + 1; j < length; j++)
3991 if (ranges[i].exp != ranges[j].exp)
3992 break;
3993 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3994 ranges[j].in_p, ranges[j].low, ranges[j].high))
3995 break;
3996 strict_overflow_p |= ranges[j].strict_overflow_p;
3999 if (j == i + 1)
4000 continue;
4002 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4003 opcode, ops, ranges[i].exp, NULL, in_p,
4004 low, high, strict_overflow_p))
4006 i = j - 1;
4007 any_changes = true;
4009 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4010 while update_range_test would fail. */
4011 else if (update_fail_count == 64)
4012 i = j - 1;
4013 else
4014 ++update_fail_count;
4017 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4018 ops, ranges);
4020 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4021 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4022 ops, ranges);
4023 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4024 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4025 ops, ranges);
4026 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4027 ranges, first_bb);
4028 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4029 ops, ranges);
4031 if (any_changes && opcode != ERROR_MARK)
4033 j = 0;
4034 FOR_EACH_VEC_ELT (*ops, i, oe)
4036 if (oe->op == error_mark_node)
4037 continue;
4038 else if (i != j)
4039 (*ops)[j] = oe;
4040 j++;
4042 ops->truncate (j);
4045 XDELETEVEC (ranges);
4046 return any_changes;
4049 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4050 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4051 otherwise the comparison code. TYPE is a return value that is set
4052 to type of comparison. */
4054 static tree_code
4055 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4056 tree *lhs, tree *rhs, gassign **vcond)
4058 if (TREE_CODE (var) != SSA_NAME)
4059 return ERROR_MARK;
4061 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4062 if (stmt == NULL)
4063 return ERROR_MARK;
4064 if (vcond)
4065 *vcond = stmt;
4067 /* ??? If we start creating more COND_EXPR, we could perform
4068 this same optimization with them. For now, simplify. */
4069 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4070 return ERROR_MARK;
4072 tree cond = gimple_assign_rhs1 (stmt);
4073 tree_code cmp = TREE_CODE (cond);
4074 if (cmp != SSA_NAME)
4075 return ERROR_MARK;
4077 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4078 if (assign == NULL
4079 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4080 return ERROR_MARK;
4082 cmp = gimple_assign_rhs_code (assign);
4083 if (lhs)
4084 *lhs = gimple_assign_rhs1 (assign);
4085 if (rhs)
4086 *rhs = gimple_assign_rhs2 (assign);
4088 /* ??? For now, allow only canonical true and false result vectors.
4089 We could expand this to other constants should the need arise,
4090 but at the moment we don't create them. */
4091 tree t = gimple_assign_rhs2 (stmt);
4092 tree f = gimple_assign_rhs3 (stmt);
4093 bool inv;
4094 if (integer_all_onesp (t))
4095 inv = false;
4096 else if (integer_all_onesp (f))
4098 cmp = invert_tree_comparison (cmp, false);
4099 inv = true;
4101 else
4102 return ERROR_MARK;
4103 if (!integer_zerop (f))
4104 return ERROR_MARK;
4106 /* Success! */
4107 if (rets)
4108 *rets = assign;
4109 if (reti)
4110 *reti = inv;
4111 if (type)
4112 *type = TREE_TYPE (cond);
4113 return cmp;
4116 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4117 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4119 static bool
4120 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4122 unsigned int length = ops->length (), i, j;
4123 bool any_changes = false;
4125 if (length == 1)
4126 return false;
4128 for (i = 0; i < length; ++i)
4130 tree elt0 = (*ops)[i]->op;
4132 gassign *stmt0, *vcond0;
4133 bool invert;
4134 tree type, lhs0, rhs0;
4135 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4136 &rhs0, &vcond0);
4137 if (cmp0 == ERROR_MARK)
4138 continue;
4140 for (j = i + 1; j < length; ++j)
4142 tree &elt1 = (*ops)[j]->op;
4144 gassign *stmt1, *vcond1;
4145 tree lhs1, rhs1;
4146 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4147 &rhs1, &vcond1);
4148 if (cmp1 == ERROR_MARK)
4149 continue;
4151 tree comb;
4152 if (opcode == BIT_AND_EXPR)
4153 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4154 cmp1, lhs1, rhs1);
4155 else if (opcode == BIT_IOR_EXPR)
4156 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4157 cmp1, lhs1, rhs1);
4158 else
4159 gcc_unreachable ();
4160 if (comb == NULL)
4161 continue;
4163 /* Success! */
4164 if (dump_file && (dump_flags & TDF_DETAILS))
4166 fprintf (dump_file, "Transforming ");
4167 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4168 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4169 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4170 fprintf (dump_file, " into ");
4171 print_generic_expr (dump_file, comb);
4172 fputc ('\n', dump_file);
4175 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4176 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4177 true, GSI_SAME_STMT);
4178 if (invert)
4179 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4180 gimple_assign_rhs3_ptr (vcond0));
4181 gimple_assign_set_rhs1 (vcond0, exp);
4182 update_stmt (vcond0);
4184 elt1 = error_mark_node;
4185 any_changes = true;
4189 if (any_changes)
4191 operand_entry *oe;
4192 j = 0;
4193 FOR_EACH_VEC_ELT (*ops, i, oe)
4195 if (oe->op == error_mark_node)
4196 continue;
4197 else if (i != j)
4198 (*ops)[j] = oe;
4199 j++;
4201 ops->truncate (j);
4204 return any_changes;
4207 /* Return true if STMT is a cast like:
4208 <bb N>:
4210 _123 = (int) _234;
4212 <bb M>:
4213 # _345 = PHI <_123(N), 1(...), 1(...)>
4214 where _234 has bool type, _123 has single use and
4215 bb N has a single successor M. This is commonly used in
4216 the last block of a range test.
4218 Also Return true if STMT is tcc_compare like:
4219 <bb N>:
4221 _234 = a_2(D) == 2;
4223 <bb M>:
4224 # _345 = PHI <_234(N), 1(...), 1(...)>
4225 _346 = (int) _345;
4226 where _234 has booltype, single use and
4227 bb N has a single successor M. This is commonly used in
4228 the last block of a range test. */
4230 static bool
4231 final_range_test_p (gimple *stmt)
4233 basic_block bb, rhs_bb, lhs_bb;
4234 edge e;
4235 tree lhs, rhs;
4236 use_operand_p use_p;
4237 gimple *use_stmt;
4239 if (!gimple_assign_cast_p (stmt)
4240 && (!is_gimple_assign (stmt)
4241 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4242 != tcc_comparison)))
4243 return false;
4244 bb = gimple_bb (stmt);
4245 if (!single_succ_p (bb))
4246 return false;
4247 e = single_succ_edge (bb);
4248 if (e->flags & EDGE_COMPLEX)
4249 return false;
4251 lhs = gimple_assign_lhs (stmt);
4252 rhs = gimple_assign_rhs1 (stmt);
4253 if (gimple_assign_cast_p (stmt)
4254 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4255 || TREE_CODE (rhs) != SSA_NAME
4256 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4257 return false;
4259 if (!gimple_assign_cast_p (stmt)
4260 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4261 return false;
4263 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4264 if (!single_imm_use (lhs, &use_p, &use_stmt))
4265 return false;
4267 if (gimple_code (use_stmt) != GIMPLE_PHI
4268 || gimple_bb (use_stmt) != e->dest)
4269 return false;
4271 /* And that the rhs is defined in the same loop. */
4272 if (gimple_assign_cast_p (stmt))
4274 if (TREE_CODE (rhs) != SSA_NAME
4275 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4276 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4277 return false;
4279 else
4281 if (TREE_CODE (lhs) != SSA_NAME
4282 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4283 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4284 return false;
4287 return true;
4290 /* Return true if BB is suitable basic block for inter-bb range test
4291 optimization. If BACKWARD is true, BB should be the only predecessor
4292 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4293 or compared with to find a common basic block to which all conditions
4294 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4295 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4296 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4297 block points to an empty block that falls through into *OTHER_BB and
4298 the phi args match that path. */
4300 static bool
4301 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4302 bool *test_swapped_p, bool backward)
4304 edge_iterator ei, ei2;
4305 edge e, e2;
4306 gimple *stmt;
4307 gphi_iterator gsi;
4308 bool other_edge_seen = false;
4309 bool is_cond;
4311 if (test_bb == bb)
4312 return false;
4313 /* Check last stmt first. */
4314 stmt = last_stmt (bb);
4315 if (stmt == NULL
4316 || (gimple_code (stmt) != GIMPLE_COND
4317 && (backward || !final_range_test_p (stmt)))
4318 || gimple_visited_p (stmt)
4319 || stmt_could_throw_p (cfun, stmt)
4320 || *other_bb == bb)
4321 return false;
4322 is_cond = gimple_code (stmt) == GIMPLE_COND;
4323 if (is_cond)
4325 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4326 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4327 to *OTHER_BB (if not set yet, try to find it out). */
4328 if (EDGE_COUNT (bb->succs) != 2)
4329 return false;
4330 FOR_EACH_EDGE (e, ei, bb->succs)
4332 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4333 return false;
4334 if (e->dest == test_bb)
4336 if (backward)
4337 continue;
4338 else
4339 return false;
4341 if (e->dest == bb)
4342 return false;
4343 if (*other_bb == NULL)
4345 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4346 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4347 return false;
4348 else if (e->dest == e2->dest)
4349 *other_bb = e->dest;
4350 if (*other_bb == NULL)
4351 return false;
4353 if (e->dest == *other_bb)
4354 other_edge_seen = true;
4355 else if (backward)
4356 return false;
4358 if (*other_bb == NULL || !other_edge_seen)
4359 return false;
4361 else if (single_succ (bb) != *other_bb)
4362 return false;
4364 /* Now check all PHIs of *OTHER_BB. */
4365 e = find_edge (bb, *other_bb);
4366 e2 = find_edge (test_bb, *other_bb);
4367 retry:;
4368 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4370 gphi *phi = gsi.phi ();
4371 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4372 corresponding to BB and TEST_BB predecessor must be the same. */
4373 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4374 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4376 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4377 one of the PHIs should have the lhs of the last stmt in
4378 that block as PHI arg and that PHI should have 0 or 1
4379 corresponding to it in all other range test basic blocks
4380 considered. */
4381 if (!is_cond)
4383 if (gimple_phi_arg_def (phi, e->dest_idx)
4384 == gimple_assign_lhs (stmt)
4385 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4386 || integer_onep (gimple_phi_arg_def (phi,
4387 e2->dest_idx))))
4388 continue;
4390 else
4392 gimple *test_last = last_stmt (test_bb);
4393 if (gimple_code (test_last) == GIMPLE_COND)
4395 if (backward ? e2->src != test_bb : e->src != bb)
4396 return false;
4398 /* For last_bb, handle also:
4399 if (x_3(D) == 3)
4400 goto <bb 6>; [34.00%]
4401 else
4402 goto <bb 7>; [66.00%]
4404 <bb 6> [local count: 79512730]:
4406 <bb 7> [local count: 1073741824]:
4407 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4408 where bb 7 is *OTHER_BB, but the PHI values from the
4409 earlier bbs match the path through the empty bb
4410 in between. */
4411 edge e3;
4412 if (backward)
4413 e3 = EDGE_SUCC (test_bb,
4414 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4415 else
4416 e3 = EDGE_SUCC (bb,
4417 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4418 if (empty_block_p (e3->dest)
4419 && single_succ_p (e3->dest)
4420 && single_succ (e3->dest) == *other_bb
4421 && single_pred_p (e3->dest)
4422 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4424 if (backward)
4425 e2 = single_succ_edge (e3->dest);
4426 else
4427 e = single_succ_edge (e3->dest);
4428 if (test_swapped_p)
4429 *test_swapped_p = true;
4430 goto retry;
4433 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4434 == gimple_assign_lhs (test_last)
4435 && (integer_zerop (gimple_phi_arg_def (phi,
4436 e->dest_idx))
4437 || integer_onep (gimple_phi_arg_def (phi,
4438 e->dest_idx))))
4439 continue;
4442 return false;
4445 return true;
4448 /* Return true if BB doesn't have side-effects that would disallow
4449 range test optimization, all SSA_NAMEs set in the bb are consumed
4450 in the bb and there are no PHIs. */
4452 bool
4453 no_side_effect_bb (basic_block bb)
4455 gimple_stmt_iterator gsi;
4456 gimple *last;
4458 if (!gimple_seq_empty_p (phi_nodes (bb)))
4459 return false;
4460 last = last_stmt (bb);
4461 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4463 gimple *stmt = gsi_stmt (gsi);
4464 tree lhs;
4465 imm_use_iterator imm_iter;
4466 use_operand_p use_p;
4468 if (is_gimple_debug (stmt))
4469 continue;
4470 if (gimple_has_side_effects (stmt))
4471 return false;
4472 if (stmt == last)
4473 return true;
4474 if (!is_gimple_assign (stmt))
4475 return false;
4476 lhs = gimple_assign_lhs (stmt);
4477 if (TREE_CODE (lhs) != SSA_NAME)
4478 return false;
4479 if (gimple_assign_rhs_could_trap_p (stmt))
4480 return false;
4481 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4483 gimple *use_stmt = USE_STMT (use_p);
4484 if (is_gimple_debug (use_stmt))
4485 continue;
4486 if (gimple_bb (use_stmt) != bb)
4487 return false;
4490 return false;
4493 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4494 return true and fill in *OPS recursively. */
4496 static bool
4497 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4498 class loop *loop)
4500 gimple *stmt = SSA_NAME_DEF_STMT (var);
4501 tree rhs[2];
4502 int i;
4504 if (!is_reassociable_op (stmt, code, loop))
4505 return false;
4507 rhs[0] = gimple_assign_rhs1 (stmt);
4508 rhs[1] = gimple_assign_rhs2 (stmt);
4509 gimple_set_visited (stmt, true);
4510 for (i = 0; i < 2; i++)
4511 if (TREE_CODE (rhs[i]) == SSA_NAME
4512 && !get_ops (rhs[i], code, ops, loop)
4513 && has_single_use (rhs[i]))
4515 operand_entry *oe = operand_entry_pool.allocate ();
4517 oe->op = rhs[i];
4518 oe->rank = code;
4519 oe->id = 0;
4520 oe->count = 1;
4521 oe->stmt_to_insert = NULL;
4522 ops->safe_push (oe);
4524 return true;
4527 /* Find the ops that were added by get_ops starting from VAR, see if
4528 they were changed during update_range_test and if yes, create new
4529 stmts. */
4531 static tree
4532 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4533 unsigned int *pidx, class loop *loop)
4535 gimple *stmt = SSA_NAME_DEF_STMT (var);
4536 tree rhs[4];
4537 int i;
4539 if (!is_reassociable_op (stmt, code, loop))
4540 return NULL;
4542 rhs[0] = gimple_assign_rhs1 (stmt);
4543 rhs[1] = gimple_assign_rhs2 (stmt);
4544 rhs[2] = rhs[0];
4545 rhs[3] = rhs[1];
4546 for (i = 0; i < 2; i++)
4547 if (TREE_CODE (rhs[i]) == SSA_NAME)
4549 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4550 if (rhs[2 + i] == NULL_TREE)
4552 if (has_single_use (rhs[i]))
4553 rhs[2 + i] = ops[(*pidx)++]->op;
4554 else
4555 rhs[2 + i] = rhs[i];
4558 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4559 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4561 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4562 var = make_ssa_name (TREE_TYPE (var));
4563 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4564 rhs[2], rhs[3]);
4565 gimple_set_uid (g, gimple_uid (stmt));
4566 gimple_set_visited (g, true);
4567 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4569 return var;
4572 /* Structure to track the initial value passed to get_ops and
4573 the range in the ops vector for each basic block. */
4575 struct inter_bb_range_test_entry
4577 tree op;
4578 unsigned int first_idx, last_idx;
4581 /* Inter-bb range test optimization.
4583 Returns TRUE if a gimple conditional is optimized to a true/false,
4584 otherwise return FALSE.
4586 This indicates to the caller that it should run a CFG cleanup pass
4587 once reassociation is completed. */
4589 static bool
4590 maybe_optimize_range_tests (gimple *stmt)
4592 basic_block first_bb = gimple_bb (stmt);
4593 basic_block last_bb = first_bb;
4594 basic_block other_bb = NULL;
4595 basic_block bb;
4596 edge_iterator ei;
4597 edge e;
4598 auto_vec<operand_entry *> ops;
4599 auto_vec<inter_bb_range_test_entry> bbinfo;
4600 bool any_changes = false;
4601 bool cfg_cleanup_needed = false;
4603 /* Consider only basic blocks that end with GIMPLE_COND or
4604 a cast statement satisfying final_range_test_p. All
4605 but the last bb in the first_bb .. last_bb range
4606 should end with GIMPLE_COND. */
4607 if (gimple_code (stmt) == GIMPLE_COND)
4609 if (EDGE_COUNT (first_bb->succs) != 2)
4610 return cfg_cleanup_needed;
4612 else if (final_range_test_p (stmt))
4613 other_bb = single_succ (first_bb);
4614 else
4615 return cfg_cleanup_needed;
4617 if (stmt_could_throw_p (cfun, stmt))
4618 return cfg_cleanup_needed;
4620 /* As relative ordering of post-dominator sons isn't fixed,
4621 maybe_optimize_range_tests can be called first on any
4622 bb in the range we want to optimize. So, start searching
4623 backwards, if first_bb can be set to a predecessor. */
4624 while (single_pred_p (first_bb))
4626 basic_block pred_bb = single_pred (first_bb);
4627 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4628 break;
4629 if (!no_side_effect_bb (first_bb))
4630 break;
4631 first_bb = pred_bb;
4633 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4634 Before starting forward search in last_bb successors, find
4635 out the other_bb. */
4636 if (first_bb == last_bb)
4638 other_bb = NULL;
4639 /* As non-GIMPLE_COND last stmt always terminates the range,
4640 if forward search didn't discover anything, just give up. */
4641 if (gimple_code (stmt) != GIMPLE_COND)
4642 return cfg_cleanup_needed;
4643 /* Look at both successors. Either it ends with a GIMPLE_COND
4644 and satisfies suitable_cond_bb, or ends with a cast and
4645 other_bb is that cast's successor. */
4646 FOR_EACH_EDGE (e, ei, first_bb->succs)
4647 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4648 || e->dest == first_bb)
4649 return cfg_cleanup_needed;
4650 else if (single_pred_p (e->dest))
4652 stmt = last_stmt (e->dest);
4653 if (stmt
4654 && gimple_code (stmt) == GIMPLE_COND
4655 && EDGE_COUNT (e->dest->succs) == 2)
4657 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4658 NULL, true))
4659 break;
4660 else
4661 other_bb = NULL;
4663 else if (stmt
4664 && final_range_test_p (stmt)
4665 && find_edge (first_bb, single_succ (e->dest)))
4667 other_bb = single_succ (e->dest);
4668 if (other_bb == first_bb)
4669 other_bb = NULL;
4672 if (other_bb == NULL)
4673 return cfg_cleanup_needed;
4675 /* Now do the forward search, moving last_bb to successor bbs
4676 that aren't other_bb. */
4677 while (EDGE_COUNT (last_bb->succs) == 2)
4679 FOR_EACH_EDGE (e, ei, last_bb->succs)
4680 if (e->dest != other_bb)
4681 break;
4682 if (e == NULL)
4683 break;
4684 if (!single_pred_p (e->dest))
4685 break;
4686 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4687 break;
4688 if (!no_side_effect_bb (e->dest))
4689 break;
4690 last_bb = e->dest;
4692 if (first_bb == last_bb)
4693 return cfg_cleanup_needed;
4694 /* Here basic blocks first_bb through last_bb's predecessor
4695 end with GIMPLE_COND, all of them have one of the edges to
4696 other_bb and another to another block in the range,
4697 all blocks except first_bb don't have side-effects and
4698 last_bb ends with either GIMPLE_COND, or cast satisfying
4699 final_range_test_p. */
4700 for (bb = last_bb; ; bb = single_pred (bb))
4702 enum tree_code code;
4703 tree lhs, rhs;
4704 inter_bb_range_test_entry bb_ent;
4706 bb_ent.op = NULL_TREE;
4707 bb_ent.first_idx = ops.length ();
4708 bb_ent.last_idx = bb_ent.first_idx;
4709 e = find_edge (bb, other_bb);
4710 stmt = last_stmt (bb);
4711 gimple_set_visited (stmt, true);
4712 if (gimple_code (stmt) != GIMPLE_COND)
4714 use_operand_p use_p;
4715 gimple *phi;
4716 edge e2;
4717 unsigned int d;
4719 lhs = gimple_assign_lhs (stmt);
4720 rhs = gimple_assign_rhs1 (stmt);
4721 gcc_assert (bb == last_bb);
4723 /* stmt is
4724 _123 = (int) _234;
4726 _234 = a_2(D) == 2;
4728 followed by:
4729 <bb M>:
4730 # _345 = PHI <_123(N), 1(...), 1(...)>
4732 or 0 instead of 1. If it is 0, the _234
4733 range test is anded together with all the
4734 other range tests, if it is 1, it is ored with
4735 them. */
4736 single_imm_use (lhs, &use_p, &phi);
4737 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4738 e2 = find_edge (first_bb, other_bb);
4739 d = e2->dest_idx;
4740 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4741 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4742 code = BIT_AND_EXPR;
4743 else
4745 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4746 code = BIT_IOR_EXPR;
4749 /* If _234 SSA_NAME_DEF_STMT is
4750 _234 = _567 | _789;
4751 (or &, corresponding to 1/0 in the phi arguments,
4752 push into ops the individual range test arguments
4753 of the bitwise or resp. and, recursively. */
4754 if (TREE_CODE (rhs) == SSA_NAME
4755 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4756 != tcc_comparison)
4757 && !get_ops (rhs, code, &ops,
4758 loop_containing_stmt (stmt))
4759 && has_single_use (rhs))
4761 /* Otherwise, push the _234 range test itself. */
4762 operand_entry *oe = operand_entry_pool.allocate ();
4764 oe->op = rhs;
4765 oe->rank = code;
4766 oe->id = 0;
4767 oe->count = 1;
4768 oe->stmt_to_insert = NULL;
4769 ops.safe_push (oe);
4770 bb_ent.last_idx++;
4771 bb_ent.op = rhs;
4773 else if (is_gimple_assign (stmt)
4774 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4775 == tcc_comparison)
4776 && !get_ops (lhs, code, &ops,
4777 loop_containing_stmt (stmt))
4778 && has_single_use (lhs))
4780 operand_entry *oe = operand_entry_pool.allocate ();
4781 oe->op = lhs;
4782 oe->rank = code;
4783 oe->id = 0;
4784 oe->count = 1;
4785 ops.safe_push (oe);
4786 bb_ent.last_idx++;
4787 bb_ent.op = lhs;
4789 else
4791 bb_ent.last_idx = ops.length ();
4792 bb_ent.op = rhs;
4794 bbinfo.safe_push (bb_ent);
4795 continue;
4797 else if (bb == last_bb)
4799 /* For last_bb, handle also:
4800 if (x_3(D) == 3)
4801 goto <bb 6>; [34.00%]
4802 else
4803 goto <bb 7>; [66.00%]
4805 <bb 6> [local count: 79512730]:
4807 <bb 7> [local count: 1073741824]:
4808 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4809 where bb 7 is OTHER_BB, but the PHI values from the
4810 earlier bbs match the path through the empty bb
4811 in between. */
4812 bool test_swapped_p = false;
4813 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4814 &other_bb, &test_swapped_p, true);
4815 gcc_assert (ok);
4816 if (test_swapped_p)
4817 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4819 /* Otherwise stmt is GIMPLE_COND. */
4820 code = gimple_cond_code (stmt);
4821 lhs = gimple_cond_lhs (stmt);
4822 rhs = gimple_cond_rhs (stmt);
4823 if (TREE_CODE (lhs) == SSA_NAME
4824 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4825 && ((code != EQ_EXPR && code != NE_EXPR)
4826 || rhs != boolean_false_node
4827 /* Either push into ops the individual bitwise
4828 or resp. and operands, depending on which
4829 edge is other_bb. */
4830 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4831 ^ (code == EQ_EXPR))
4832 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4833 loop_containing_stmt (stmt))))
4835 /* Or push the GIMPLE_COND stmt itself. */
4836 operand_entry *oe = operand_entry_pool.allocate ();
4838 oe->op = NULL;
4839 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4840 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4841 /* oe->op = NULL signs that there is no SSA_NAME
4842 for the range test, and oe->id instead is the
4843 basic block number, at which's end the GIMPLE_COND
4844 is. */
4845 oe->id = bb->index;
4846 oe->count = 1;
4847 oe->stmt_to_insert = NULL;
4848 ops.safe_push (oe);
4849 bb_ent.op = NULL;
4850 bb_ent.last_idx++;
4852 else if (ops.length () > bb_ent.first_idx)
4854 bb_ent.op = lhs;
4855 bb_ent.last_idx = ops.length ();
4857 bbinfo.safe_push (bb_ent);
4858 if (bb == first_bb)
4859 break;
4861 if (ops.length () > 1)
4862 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4863 if (any_changes)
4865 unsigned int idx, max_idx = 0;
4866 /* update_ops relies on has_single_use predicates returning the
4867 same values as it did during get_ops earlier. Additionally it
4868 never removes statements, only adds new ones and it should walk
4869 from the single imm use and check the predicate already before
4870 making those changes.
4871 On the other side, the handling of GIMPLE_COND directly can turn
4872 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4873 it needs to be done in a separate loop afterwards. */
4874 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4876 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4877 && bbinfo[idx].op != NULL_TREE)
4879 tree new_op;
4881 max_idx = idx;
4882 stmt = last_stmt (bb);
4883 new_op = update_ops (bbinfo[idx].op,
4884 (enum tree_code)
4885 ops[bbinfo[idx].first_idx]->rank,
4886 ops, &bbinfo[idx].first_idx,
4887 loop_containing_stmt (stmt));
4888 if (new_op == NULL_TREE)
4890 gcc_assert (bb == last_bb);
4891 new_op = ops[bbinfo[idx].first_idx++]->op;
4893 if (bbinfo[idx].op != new_op)
4895 imm_use_iterator iter;
4896 use_operand_p use_p;
4897 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4899 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4900 if (is_gimple_debug (use_stmt))
4901 continue;
4902 else if (gimple_code (use_stmt) == GIMPLE_COND
4903 || gimple_code (use_stmt) == GIMPLE_PHI)
4904 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4905 SET_USE (use_p, new_op);
4906 else if ((is_gimple_assign (use_stmt)
4907 && (TREE_CODE_CLASS
4908 (gimple_assign_rhs_code (use_stmt))
4909 == tcc_comparison)))
4910 cast_or_tcc_cmp_stmt = use_stmt;
4911 else if (gimple_assign_cast_p (use_stmt))
4912 cast_or_tcc_cmp_stmt = use_stmt;
4913 else
4914 gcc_unreachable ();
4916 if (cast_or_tcc_cmp_stmt)
4918 gcc_assert (bb == last_bb);
4919 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4920 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4921 enum tree_code rhs_code
4922 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4923 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4924 : CONVERT_EXPR;
4925 gassign *g;
4926 if (is_gimple_min_invariant (new_op))
4928 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4929 g = gimple_build_assign (new_lhs, new_op);
4931 else
4932 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4933 gimple_stmt_iterator gsi
4934 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4935 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4936 gimple_set_visited (g, true);
4937 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4938 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4939 if (is_gimple_debug (use_stmt))
4940 continue;
4941 else if (gimple_code (use_stmt) == GIMPLE_COND
4942 || gimple_code (use_stmt) == GIMPLE_PHI)
4943 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4944 SET_USE (use_p, new_lhs);
4945 else
4946 gcc_unreachable ();
4950 if (bb == first_bb)
4951 break;
4953 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4955 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4956 && bbinfo[idx].op == NULL_TREE
4957 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4959 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4961 if (idx > max_idx)
4962 max_idx = idx;
4964 /* If we collapse the conditional to a true/false
4965 condition, then bubble that knowledge up to our caller. */
4966 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4968 gimple_cond_make_false (cond_stmt);
4969 cfg_cleanup_needed = true;
4971 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4973 gimple_cond_make_true (cond_stmt);
4974 cfg_cleanup_needed = true;
4976 else
4978 gimple_cond_set_code (cond_stmt, NE_EXPR);
4979 gimple_cond_set_lhs (cond_stmt,
4980 ops[bbinfo[idx].first_idx]->op);
4981 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4983 update_stmt (cond_stmt);
4985 if (bb == first_bb)
4986 break;
4989 /* The above changes could result in basic blocks after the first
4990 modified one, up to and including last_bb, to be executed even if
4991 they would not be in the original program. If the value ranges of
4992 assignment lhs' in those bbs were dependent on the conditions
4993 guarding those basic blocks which now can change, the VRs might
4994 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4995 are only used within the same bb, it should be not a big deal if
4996 we just reset all the VRs in those bbs. See PR68671. */
4997 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4998 reset_flow_sensitive_info_in_bb (bb);
5000 return cfg_cleanup_needed;
5003 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5004 of STMT in it's operands. This is also known as a "destructive
5005 update" operation. */
5007 static bool
5008 is_phi_for_stmt (gimple *stmt, tree operand)
5010 gimple *def_stmt;
5011 gphi *def_phi;
5012 tree lhs;
5013 use_operand_p arg_p;
5014 ssa_op_iter i;
5016 if (TREE_CODE (operand) != SSA_NAME)
5017 return false;
5019 lhs = gimple_assign_lhs (stmt);
5021 def_stmt = SSA_NAME_DEF_STMT (operand);
5022 def_phi = dyn_cast <gphi *> (def_stmt);
5023 if (!def_phi)
5024 return false;
5026 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5027 if (lhs == USE_FROM_PTR (arg_p))
5028 return true;
5029 return false;
5032 /* Remove def stmt of VAR if VAR has zero uses and recurse
5033 on rhs1 operand if so. */
5035 static void
5036 remove_visited_stmt_chain (tree var)
5038 gimple *stmt;
5039 gimple_stmt_iterator gsi;
5041 while (1)
5043 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5044 return;
5045 stmt = SSA_NAME_DEF_STMT (var);
5046 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5048 var = gimple_assign_rhs1 (stmt);
5049 gsi = gsi_for_stmt (stmt);
5050 reassoc_remove_stmt (&gsi);
5051 release_defs (stmt);
5053 else
5054 return;
5058 /* This function checks three consequtive operands in
5059 passed operands vector OPS starting from OPINDEX and
5060 swaps two operands if it is profitable for binary operation
5061 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5063 We pair ops with the same rank if possible.
5065 The alternative we try is to see if STMT is a destructive
5066 update style statement, which is like:
5067 b = phi (a, ...)
5068 a = c + b;
5069 In that case, we want to use the destructive update form to
5070 expose the possible vectorizer sum reduction opportunity.
5071 In that case, the third operand will be the phi node. This
5072 check is not performed if STMT is null.
5074 We could, of course, try to be better as noted above, and do a
5075 lot of work to try to find these opportunities in >3 operand
5076 cases, but it is unlikely to be worth it. */
5078 static void
5079 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5080 unsigned int opindex, gimple *stmt)
5082 operand_entry *oe1, *oe2, *oe3;
5084 oe1 = ops[opindex];
5085 oe2 = ops[opindex + 1];
5086 oe3 = ops[opindex + 2];
5088 if ((oe1->rank == oe2->rank
5089 && oe2->rank != oe3->rank)
5090 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5091 && !is_phi_for_stmt (stmt, oe1->op)
5092 && !is_phi_for_stmt (stmt, oe2->op)))
5093 std::swap (*oe1, *oe3);
5094 else if ((oe1->rank == oe3->rank
5095 && oe2->rank != oe3->rank)
5096 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5097 && !is_phi_for_stmt (stmt, oe1->op)
5098 && !is_phi_for_stmt (stmt, oe3->op)))
5099 std::swap (*oe1, *oe2);
5102 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5103 two definitions, otherwise return STMT. */
5105 static inline gimple *
5106 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5108 if (TREE_CODE (rhs1) == SSA_NAME
5109 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5110 stmt = SSA_NAME_DEF_STMT (rhs1);
5111 if (TREE_CODE (rhs2) == SSA_NAME
5112 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5113 stmt = SSA_NAME_DEF_STMT (rhs2);
5114 return stmt;
5117 /* If the stmt that defines operand has to be inserted, insert it
5118 before the use. */
5119 static void
5120 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5122 gcc_assert (is_gimple_assign (stmt_to_insert));
5123 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5124 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5125 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5126 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5127 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5129 /* If the insert point is not stmt, then insert_point would be
5130 the point where operand rhs1 or rhs2 is defined. In this case,
5131 stmt_to_insert has to be inserted afterwards. This would
5132 only happen when the stmt insertion point is flexible. */
5133 if (stmt == insert_point)
5134 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5135 else
5136 insert_stmt_after (stmt_to_insert, insert_point);
5140 /* Recursively rewrite our linearized statements so that the operators
5141 match those in OPS[OPINDEX], putting the computation in rank
5142 order. Return new lhs.
5143 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5144 the current stmt and during recursive invocations.
5145 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5146 recursive invocations. */
5148 static tree
5149 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5150 const vec<operand_entry *> &ops, bool changed,
5151 bool next_changed)
5153 tree rhs1 = gimple_assign_rhs1 (stmt);
5154 tree rhs2 = gimple_assign_rhs2 (stmt);
5155 tree lhs = gimple_assign_lhs (stmt);
5156 operand_entry *oe;
5158 /* The final recursion case for this function is that you have
5159 exactly two operations left.
5160 If we had exactly one op in the entire list to start with, we
5161 would have never called this function, and the tail recursion
5162 rewrites them one at a time. */
5163 if (opindex + 2 == ops.length ())
5165 operand_entry *oe1, *oe2;
5167 oe1 = ops[opindex];
5168 oe2 = ops[opindex + 1];
5170 if (rhs1 != oe1->op || rhs2 != oe2->op)
5172 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5173 unsigned int uid = gimple_uid (stmt);
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, "Transforming ");
5178 print_gimple_stmt (dump_file, stmt, 0);
5181 /* If the stmt that defines operand has to be inserted, insert it
5182 before the use. */
5183 if (oe1->stmt_to_insert)
5184 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5185 if (oe2->stmt_to_insert)
5186 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5187 /* Even when changed is false, reassociation could have e.g. removed
5188 some redundant operations, so unless we are just swapping the
5189 arguments or unless there is no change at all (then we just
5190 return lhs), force creation of a new SSA_NAME. */
5191 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5193 gimple *insert_point
5194 = find_insert_point (stmt, oe1->op, oe2->op);
5195 lhs = make_ssa_name (TREE_TYPE (lhs));
5196 stmt
5197 = gimple_build_assign (lhs, rhs_code,
5198 oe1->op, oe2->op);
5199 gimple_set_uid (stmt, uid);
5200 gimple_set_visited (stmt, true);
5201 if (insert_point == gsi_stmt (gsi))
5202 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5203 else
5204 insert_stmt_after (stmt, insert_point);
5206 else
5208 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5209 == stmt);
5210 gimple_assign_set_rhs1 (stmt, oe1->op);
5211 gimple_assign_set_rhs2 (stmt, oe2->op);
5212 update_stmt (stmt);
5215 if (rhs1 != oe1->op && rhs1 != oe2->op)
5216 remove_visited_stmt_chain (rhs1);
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5220 fprintf (dump_file, " into ");
5221 print_gimple_stmt (dump_file, stmt, 0);
5224 return lhs;
5227 /* If we hit here, we should have 3 or more ops left. */
5228 gcc_assert (opindex + 2 < ops.length ());
5230 /* Rewrite the next operator. */
5231 oe = ops[opindex];
5233 /* If the stmt that defines operand has to be inserted, insert it
5234 before the use. */
5235 if (oe->stmt_to_insert)
5236 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5238 /* Recurse on the LHS of the binary operator, which is guaranteed to
5239 be the non-leaf side. */
5240 tree new_rhs1
5241 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5242 changed || oe->op != rhs2 || next_changed,
5243 false);
5245 if (oe->op != rhs2 || new_rhs1 != rhs1)
5247 if (dump_file && (dump_flags & TDF_DETAILS))
5249 fprintf (dump_file, "Transforming ");
5250 print_gimple_stmt (dump_file, stmt, 0);
5253 /* If changed is false, this is either opindex == 0
5254 or all outer rhs2's were equal to corresponding oe->op,
5255 and powi_result is NULL.
5256 That means lhs is equivalent before and after reassociation.
5257 Otherwise ensure the old lhs SSA_NAME is not reused and
5258 create a new stmt as well, so that any debug stmts will be
5259 properly adjusted. */
5260 if (changed)
5262 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5263 unsigned int uid = gimple_uid (stmt);
5264 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5266 lhs = make_ssa_name (TREE_TYPE (lhs));
5267 stmt = gimple_build_assign (lhs, rhs_code,
5268 new_rhs1, oe->op);
5269 gimple_set_uid (stmt, uid);
5270 gimple_set_visited (stmt, true);
5271 if (insert_point == gsi_stmt (gsi))
5272 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5273 else
5274 insert_stmt_after (stmt, insert_point);
5276 else
5278 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5279 == stmt);
5280 gimple_assign_set_rhs1 (stmt, new_rhs1);
5281 gimple_assign_set_rhs2 (stmt, oe->op);
5282 update_stmt (stmt);
5285 if (dump_file && (dump_flags & TDF_DETAILS))
5287 fprintf (dump_file, " into ");
5288 print_gimple_stmt (dump_file, stmt, 0);
5291 return lhs;
5294 /* Find out how many cycles we need to compute statements chain.
5295 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5296 maximum number of independent statements we may execute per cycle. */
5298 static int
5299 get_required_cycles (int ops_num, int cpu_width)
5301 int res;
5302 int elog;
5303 unsigned int rest;
5305 /* While we have more than 2 * cpu_width operands
5306 we may reduce number of operands by cpu_width
5307 per cycle. */
5308 res = ops_num / (2 * cpu_width);
5310 /* Remained operands count may be reduced twice per cycle
5311 until we have only one operand. */
5312 rest = (unsigned)(ops_num - res * cpu_width);
5313 elog = exact_log2 (rest);
5314 if (elog >= 0)
5315 res += elog;
5316 else
5317 res += floor_log2 (rest) + 1;
5319 return res;
5322 /* Returns an optimal number of registers to use for computation of
5323 given statements. */
5325 static int
5326 get_reassociation_width (int ops_num, enum tree_code opc,
5327 machine_mode mode)
5329 int param_width = param_tree_reassoc_width;
5330 int width;
5331 int width_min;
5332 int cycles_best;
5334 if (param_width > 0)
5335 width = param_width;
5336 else
5337 width = targetm.sched.reassociation_width (opc, mode);
5339 if (width == 1)
5340 return width;
5342 /* Get the minimal time required for sequence computation. */
5343 cycles_best = get_required_cycles (ops_num, width);
5345 /* Check if we may use less width and still compute sequence for
5346 the same time. It will allow us to reduce registers usage.
5347 get_required_cycles is monotonically increasing with lower width
5348 so we can perform a binary search for the minimal width that still
5349 results in the optimal cycle count. */
5350 width_min = 1;
5351 while (width > width_min)
5353 int width_mid = (width + width_min) / 2;
5355 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5356 width = width_mid;
5357 else if (width_min < width_mid)
5358 width_min = width_mid;
5359 else
5360 break;
5363 return width;
5366 /* Recursively rewrite our linearized statements so that the operators
5367 match those in OPS[OPINDEX], putting the computation in rank
5368 order and trying to allow operations to be executed in
5369 parallel. */
5371 static void
5372 rewrite_expr_tree_parallel (gassign *stmt, int width,
5373 const vec<operand_entry *> &ops)
5375 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5376 int op_num = ops.length ();
5377 gcc_assert (op_num > 0);
5378 int stmt_num = op_num - 1;
5379 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5380 int op_index = op_num - 1;
5381 int stmt_index = 0;
5382 int ready_stmts_end = 0;
5383 int i = 0;
5384 gimple *stmt1 = NULL, *stmt2 = NULL;
5385 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5387 /* We start expression rewriting from the top statements.
5388 So, in this loop we create a full list of statements
5389 we will work with. */
5390 stmts[stmt_num - 1] = stmt;
5391 for (i = stmt_num - 2; i >= 0; i--)
5392 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5394 for (i = 0; i < stmt_num; i++)
5396 tree op1, op2;
5398 /* Determine whether we should use results of
5399 already handled statements or not. */
5400 if (ready_stmts_end == 0
5401 && (i - stmt_index >= width || op_index < 1))
5402 ready_stmts_end = i;
5404 /* Now we choose operands for the next statement. Non zero
5405 value in ready_stmts_end means here that we should use
5406 the result of already generated statements as new operand. */
5407 if (ready_stmts_end > 0)
5409 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5410 if (ready_stmts_end > stmt_index)
5411 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5412 else if (op_index >= 0)
5414 operand_entry *oe = ops[op_index--];
5415 stmt2 = oe->stmt_to_insert;
5416 op2 = oe->op;
5418 else
5420 gcc_assert (stmt_index < i);
5421 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5424 if (stmt_index >= ready_stmts_end)
5425 ready_stmts_end = 0;
5427 else
5429 if (op_index > 1)
5430 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5431 operand_entry *oe2 = ops[op_index--];
5432 operand_entry *oe1 = ops[op_index--];
5433 op2 = oe2->op;
5434 stmt2 = oe2->stmt_to_insert;
5435 op1 = oe1->op;
5436 stmt1 = oe1->stmt_to_insert;
5439 /* If we emit the last statement then we should put
5440 operands into the last statement. It will also
5441 break the loop. */
5442 if (op_index < 0 && stmt_index == i)
5443 i = stmt_num - 1;
5445 if (dump_file && (dump_flags & TDF_DETAILS))
5447 fprintf (dump_file, "Transforming ");
5448 print_gimple_stmt (dump_file, stmts[i], 0);
5451 /* If the stmt that defines operand has to be inserted, insert it
5452 before the use. */
5453 if (stmt1)
5454 insert_stmt_before_use (stmts[i], stmt1);
5455 if (stmt2)
5456 insert_stmt_before_use (stmts[i], stmt2);
5457 stmt1 = stmt2 = NULL;
5459 /* We keep original statement only for the last one. All
5460 others are recreated. */
5461 if (i == stmt_num - 1)
5463 gimple_assign_set_rhs1 (stmts[i], op1);
5464 gimple_assign_set_rhs2 (stmts[i], op2);
5465 update_stmt (stmts[i]);
5467 else
5469 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5470 gimple_set_visited (stmts[i], true);
5472 if (dump_file && (dump_flags & TDF_DETAILS))
5474 fprintf (dump_file, " into ");
5475 print_gimple_stmt (dump_file, stmts[i], 0);
5479 remove_visited_stmt_chain (last_rhs1);
5482 /* Transform STMT, which is really (A +B) + (C + D) into the left
5483 linear form, ((A+B)+C)+D.
5484 Recurse on D if necessary. */
5486 static void
5487 linearize_expr (gimple *stmt)
5489 gimple_stmt_iterator gsi;
5490 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5491 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5492 gimple *oldbinrhs = binrhs;
5493 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5494 gimple *newbinrhs = NULL;
5495 class loop *loop = loop_containing_stmt (stmt);
5496 tree lhs = gimple_assign_lhs (stmt);
5498 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5499 && is_reassociable_op (binrhs, rhscode, loop));
5501 gsi = gsi_for_stmt (stmt);
5503 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5504 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5505 gimple_assign_rhs_code (binrhs),
5506 gimple_assign_lhs (binlhs),
5507 gimple_assign_rhs2 (binrhs));
5508 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5509 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5510 gimple_set_uid (binrhs, gimple_uid (stmt));
5512 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5513 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5515 if (dump_file && (dump_flags & TDF_DETAILS))
5517 fprintf (dump_file, "Linearized: ");
5518 print_gimple_stmt (dump_file, stmt, 0);
5521 reassociate_stats.linearized++;
5522 update_stmt (stmt);
5524 gsi = gsi_for_stmt (oldbinrhs);
5525 reassoc_remove_stmt (&gsi);
5526 release_defs (oldbinrhs);
5528 gimple_set_visited (stmt, true);
5529 gimple_set_visited (binlhs, true);
5530 gimple_set_visited (binrhs, true);
5532 /* Tail recurse on the new rhs if it still needs reassociation. */
5533 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5534 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5535 want to change the algorithm while converting to tuples. */
5536 linearize_expr (stmt);
5539 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5540 it. Otherwise, return NULL. */
5542 static gimple *
5543 get_single_immediate_use (tree lhs)
5545 use_operand_p immuse;
5546 gimple *immusestmt;
5548 if (TREE_CODE (lhs) == SSA_NAME
5549 && single_imm_use (lhs, &immuse, &immusestmt)
5550 && is_gimple_assign (immusestmt))
5551 return immusestmt;
5553 return NULL;
5556 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5557 representing the negated value. Insertions of any necessary
5558 instructions go before GSI.
5559 This function is recursive in that, if you hand it "a_5" as the
5560 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5561 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5563 static tree
5564 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5566 gimple *negatedefstmt = NULL;
5567 tree resultofnegate;
5568 gimple_stmt_iterator gsi;
5569 unsigned int uid;
5571 /* If we are trying to negate a name, defined by an add, negate the
5572 add operands instead. */
5573 if (TREE_CODE (tonegate) == SSA_NAME)
5574 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5575 if (TREE_CODE (tonegate) == SSA_NAME
5576 && is_gimple_assign (negatedefstmt)
5577 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5578 && has_single_use (gimple_assign_lhs (negatedefstmt))
5579 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5581 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5582 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5583 tree lhs = gimple_assign_lhs (negatedefstmt);
5584 gimple *g;
5586 gsi = gsi_for_stmt (negatedefstmt);
5587 rhs1 = negate_value (rhs1, &gsi);
5589 gsi = gsi_for_stmt (negatedefstmt);
5590 rhs2 = negate_value (rhs2, &gsi);
5592 gsi = gsi_for_stmt (negatedefstmt);
5593 lhs = make_ssa_name (TREE_TYPE (lhs));
5594 gimple_set_visited (negatedefstmt, true);
5595 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5596 gimple_set_uid (g, gimple_uid (negatedefstmt));
5597 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5598 return lhs;
5601 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5602 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5603 NULL_TREE, true, GSI_SAME_STMT);
5604 gsi = *gsip;
5605 uid = gimple_uid (gsi_stmt (gsi));
5606 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5608 gimple *stmt = gsi_stmt (gsi);
5609 if (gimple_uid (stmt) != 0)
5610 break;
5611 gimple_set_uid (stmt, uid);
5613 return resultofnegate;
5616 /* Return true if we should break up the subtract in STMT into an add
5617 with negate. This is true when we the subtract operands are really
5618 adds, or the subtract itself is used in an add expression. In
5619 either case, breaking up the subtract into an add with negate
5620 exposes the adds to reassociation. */
5622 static bool
5623 should_break_up_subtract (gimple *stmt)
5625 tree lhs = gimple_assign_lhs (stmt);
5626 tree binlhs = gimple_assign_rhs1 (stmt);
5627 tree binrhs = gimple_assign_rhs2 (stmt);
5628 gimple *immusestmt;
5629 class loop *loop = loop_containing_stmt (stmt);
5631 if (TREE_CODE (binlhs) == SSA_NAME
5632 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5633 return true;
5635 if (TREE_CODE (binrhs) == SSA_NAME
5636 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5637 return true;
5639 if (TREE_CODE (lhs) == SSA_NAME
5640 && (immusestmt = get_single_immediate_use (lhs))
5641 && is_gimple_assign (immusestmt)
5642 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5643 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5644 && gimple_assign_rhs1 (immusestmt) == lhs)
5645 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5646 return true;
5647 return false;
5650 /* Transform STMT from A - B into A + -B. */
5652 static void
5653 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5655 tree rhs1 = gimple_assign_rhs1 (stmt);
5656 tree rhs2 = gimple_assign_rhs2 (stmt);
5658 if (dump_file && (dump_flags & TDF_DETAILS))
5660 fprintf (dump_file, "Breaking up subtract ");
5661 print_gimple_stmt (dump_file, stmt, 0);
5664 rhs2 = negate_value (rhs2, gsip);
5665 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5666 update_stmt (stmt);
5669 /* Determine whether STMT is a builtin call that raises an SSA name
5670 to an integer power and has only one use. If so, and this is early
5671 reassociation and unsafe math optimizations are permitted, place
5672 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5673 If any of these conditions does not hold, return FALSE. */
5675 static bool
5676 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5678 tree arg1;
5679 REAL_VALUE_TYPE c, cint;
5681 switch (gimple_call_combined_fn (stmt))
5683 CASE_CFN_POW:
5684 if (flag_errno_math)
5685 return false;
5687 *base = gimple_call_arg (stmt, 0);
5688 arg1 = gimple_call_arg (stmt, 1);
5690 if (TREE_CODE (arg1) != REAL_CST)
5691 return false;
5693 c = TREE_REAL_CST (arg1);
5695 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5696 return false;
5698 *exponent = real_to_integer (&c);
5699 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5700 if (!real_identical (&c, &cint))
5701 return false;
5703 break;
5705 CASE_CFN_POWI:
5706 *base = gimple_call_arg (stmt, 0);
5707 arg1 = gimple_call_arg (stmt, 1);
5709 if (!tree_fits_shwi_p (arg1))
5710 return false;
5712 *exponent = tree_to_shwi (arg1);
5713 break;
5715 default:
5716 return false;
5719 /* Expanding negative exponents is generally unproductive, so we don't
5720 complicate matters with those. Exponents of zero and one should
5721 have been handled by expression folding. */
5722 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5723 return false;
5725 return true;
5728 /* Try to derive and add operand entry for OP to *OPS. Return false if
5729 unsuccessful. */
5731 static bool
5732 try_special_add_to_ops (vec<operand_entry *> *ops,
5733 enum tree_code code,
5734 tree op, gimple* def_stmt)
5736 tree base = NULL_TREE;
5737 HOST_WIDE_INT exponent = 0;
5739 if (TREE_CODE (op) != SSA_NAME
5740 || ! has_single_use (op))
5741 return false;
5743 if (code == MULT_EXPR
5744 && reassoc_insert_powi_p
5745 && flag_unsafe_math_optimizations
5746 && is_gimple_call (def_stmt)
5747 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5749 add_repeat_to_ops_vec (ops, base, exponent);
5750 gimple_set_visited (def_stmt, true);
5751 return true;
5753 else if (code == MULT_EXPR
5754 && is_gimple_assign (def_stmt)
5755 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5756 && !HONOR_SNANS (TREE_TYPE (op))
5757 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5758 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5760 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5761 tree cst = build_minus_one_cst (TREE_TYPE (op));
5762 add_to_ops_vec (ops, rhs1);
5763 add_to_ops_vec (ops, cst);
5764 gimple_set_visited (def_stmt, true);
5765 return true;
5768 return false;
5771 /* Recursively linearize a binary expression that is the RHS of STMT.
5772 Place the operands of the expression tree in the vector named OPS. */
5774 static void
5775 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5776 bool is_associative, bool set_visited)
5778 tree binlhs = gimple_assign_rhs1 (stmt);
5779 tree binrhs = gimple_assign_rhs2 (stmt);
5780 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5781 bool binlhsisreassoc = false;
5782 bool binrhsisreassoc = false;
5783 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5784 class loop *loop = loop_containing_stmt (stmt);
5786 if (set_visited)
5787 gimple_set_visited (stmt, true);
5789 if (TREE_CODE (binlhs) == SSA_NAME)
5791 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5792 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5793 && !stmt_could_throw_p (cfun, binlhsdef));
5796 if (TREE_CODE (binrhs) == SSA_NAME)
5798 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5799 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5800 && !stmt_could_throw_p (cfun, binrhsdef));
5803 /* If the LHS is not reassociable, but the RHS is, we need to swap
5804 them. If neither is reassociable, there is nothing we can do, so
5805 just put them in the ops vector. If the LHS is reassociable,
5806 linearize it. If both are reassociable, then linearize the RHS
5807 and the LHS. */
5809 if (!binlhsisreassoc)
5811 /* If this is not a associative operation like division, give up. */
5812 if (!is_associative)
5814 add_to_ops_vec (ops, binrhs);
5815 return;
5818 if (!binrhsisreassoc)
5820 bool swap = false;
5821 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5822 /* If we add ops for the rhs we expect to be able to recurse
5823 to it via the lhs during expression rewrite so swap
5824 operands. */
5825 swap = true;
5826 else
5827 add_to_ops_vec (ops, binrhs);
5829 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5830 add_to_ops_vec (ops, binlhs);
5832 if (!swap)
5833 return;
5836 if (dump_file && (dump_flags & TDF_DETAILS))
5838 fprintf (dump_file, "swapping operands of ");
5839 print_gimple_stmt (dump_file, stmt, 0);
5842 swap_ssa_operands (stmt,
5843 gimple_assign_rhs1_ptr (stmt),
5844 gimple_assign_rhs2_ptr (stmt));
5845 update_stmt (stmt);
5847 if (dump_file && (dump_flags & TDF_DETAILS))
5849 fprintf (dump_file, " is now ");
5850 print_gimple_stmt (dump_file, stmt, 0);
5852 if (!binrhsisreassoc)
5853 return;
5855 /* We want to make it so the lhs is always the reassociative op,
5856 so swap. */
5857 std::swap (binlhs, binrhs);
5859 else if (binrhsisreassoc)
5861 linearize_expr (stmt);
5862 binlhs = gimple_assign_rhs1 (stmt);
5863 binrhs = gimple_assign_rhs2 (stmt);
5866 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5867 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5868 rhscode, loop));
5869 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5870 is_associative, set_visited);
5872 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5873 add_to_ops_vec (ops, binrhs);
5876 /* Repropagate the negates back into subtracts, since no other pass
5877 currently does it. */
5879 static void
5880 repropagate_negates (void)
5882 unsigned int i = 0;
5883 tree negate;
5885 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5887 gimple *user = get_single_immediate_use (negate);
5889 if (!user || !is_gimple_assign (user))
5890 continue;
5892 /* The negate operand can be either operand of a PLUS_EXPR
5893 (it can be the LHS if the RHS is a constant for example).
5895 Force the negate operand to the RHS of the PLUS_EXPR, then
5896 transform the PLUS_EXPR into a MINUS_EXPR. */
5897 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5899 /* If the negated operand appears on the LHS of the
5900 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5901 to force the negated operand to the RHS of the PLUS_EXPR. */
5902 if (gimple_assign_rhs1 (user) == negate)
5904 swap_ssa_operands (user,
5905 gimple_assign_rhs1_ptr (user),
5906 gimple_assign_rhs2_ptr (user));
5909 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5910 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5911 if (gimple_assign_rhs2 (user) == negate)
5913 tree rhs1 = gimple_assign_rhs1 (user);
5914 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5915 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5916 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5917 update_stmt (user);
5920 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5922 if (gimple_assign_rhs1 (user) == negate)
5924 /* We have
5925 x = -a
5926 y = x - b
5927 which we transform into
5928 x = a + b
5929 y = -x .
5930 This pushes down the negate which we possibly can merge
5931 into some other operation, hence insert it into the
5932 plus_negates vector. */
5933 gimple *feed = SSA_NAME_DEF_STMT (negate);
5934 tree a = gimple_assign_rhs1 (feed);
5935 tree b = gimple_assign_rhs2 (user);
5936 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5937 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5938 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5939 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5940 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5941 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5942 user = gsi_stmt (gsi2);
5943 update_stmt (user);
5944 reassoc_remove_stmt (&gsi);
5945 release_defs (feed);
5946 plus_negates.safe_push (gimple_assign_lhs (user));
5948 else
5950 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5951 rid of one operation. */
5952 gimple *feed = SSA_NAME_DEF_STMT (negate);
5953 tree a = gimple_assign_rhs1 (feed);
5954 tree rhs1 = gimple_assign_rhs1 (user);
5955 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5956 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5957 update_stmt (gsi_stmt (gsi));
5963 /* Break up subtract operations in block BB.
5965 We do this top down because we don't know whether the subtract is
5966 part of a possible chain of reassociation except at the top.
5968 IE given
5969 d = f + g
5970 c = a + e
5971 b = c - d
5972 q = b - r
5973 k = t - q
5975 we want to break up k = t - q, but we won't until we've transformed q
5976 = b - r, which won't be broken up until we transform b = c - d.
5978 En passant, clear the GIMPLE visited flag on every statement
5979 and set UIDs within each basic block. */
5981 static void
5982 break_up_subtract_bb (basic_block bb)
5984 gimple_stmt_iterator gsi;
5985 basic_block son;
5986 unsigned int uid = 1;
5988 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5990 gimple *stmt = gsi_stmt (gsi);
5991 gimple_set_visited (stmt, false);
5992 gimple_set_uid (stmt, uid++);
5994 if (!is_gimple_assign (stmt)
5995 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5996 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5997 continue;
5999 /* Look for simple gimple subtract operations. */
6000 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6002 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6003 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6004 continue;
6006 /* Check for a subtract used only in an addition. If this
6007 is the case, transform it into add of a negate for better
6008 reassociation. IE transform C = A-B into C = A + -B if C
6009 is only used in an addition. */
6010 if (should_break_up_subtract (stmt))
6011 break_up_subtract (stmt, &gsi);
6013 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6014 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6015 plus_negates.safe_push (gimple_assign_lhs (stmt));
6017 for (son = first_dom_son (CDI_DOMINATORS, bb);
6018 son;
6019 son = next_dom_son (CDI_DOMINATORS, son))
6020 break_up_subtract_bb (son);
6023 /* Used for repeated factor analysis. */
6024 struct repeat_factor
6026 /* An SSA name that occurs in a multiply chain. */
6027 tree factor;
6029 /* Cached rank of the factor. */
6030 unsigned rank;
6032 /* Number of occurrences of the factor in the chain. */
6033 HOST_WIDE_INT count;
6035 /* An SSA name representing the product of this factor and
6036 all factors appearing later in the repeated factor vector. */
6037 tree repr;
6041 static vec<repeat_factor> repeat_factor_vec;
6043 /* Used for sorting the repeat factor vector. Sort primarily by
6044 ascending occurrence count, secondarily by descending rank. */
6046 static int
6047 compare_repeat_factors (const void *x1, const void *x2)
6049 const repeat_factor *rf1 = (const repeat_factor *) x1;
6050 const repeat_factor *rf2 = (const repeat_factor *) x2;
6052 if (rf1->count != rf2->count)
6053 return rf1->count - rf2->count;
6055 return rf2->rank - rf1->rank;
6058 /* Look for repeated operands in OPS in the multiply tree rooted at
6059 STMT. Replace them with an optimal sequence of multiplies and powi
6060 builtin calls, and remove the used operands from OPS. Return an
6061 SSA name representing the value of the replacement sequence. */
6063 static tree
6064 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6066 unsigned i, j, vec_len;
6067 int ii;
6068 operand_entry *oe;
6069 repeat_factor *rf1, *rf2;
6070 repeat_factor rfnew;
6071 tree result = NULL_TREE;
6072 tree target_ssa, iter_result;
6073 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6074 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6075 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6076 gimple *mul_stmt, *pow_stmt;
6078 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6079 target, unless type is integral. */
6080 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6081 return NULL_TREE;
6083 /* Allocate the repeated factor vector. */
6084 repeat_factor_vec.create (10);
6086 /* Scan the OPS vector for all SSA names in the product and build
6087 up a vector of occurrence counts for each factor. */
6088 FOR_EACH_VEC_ELT (*ops, i, oe)
6090 if (TREE_CODE (oe->op) == SSA_NAME)
6092 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6094 if (rf1->factor == oe->op)
6096 rf1->count += oe->count;
6097 break;
6101 if (j >= repeat_factor_vec.length ())
6103 rfnew.factor = oe->op;
6104 rfnew.rank = oe->rank;
6105 rfnew.count = oe->count;
6106 rfnew.repr = NULL_TREE;
6107 repeat_factor_vec.safe_push (rfnew);
6112 /* Sort the repeated factor vector by (a) increasing occurrence count,
6113 and (b) decreasing rank. */
6114 repeat_factor_vec.qsort (compare_repeat_factors);
6116 /* It is generally best to combine as many base factors as possible
6117 into a product before applying __builtin_powi to the result.
6118 However, the sort order chosen for the repeated factor vector
6119 allows us to cache partial results for the product of the base
6120 factors for subsequent use. When we already have a cached partial
6121 result from a previous iteration, it is best to make use of it
6122 before looking for another __builtin_pow opportunity.
6124 As an example, consider x * x * y * y * y * z * z * z * z.
6125 We want to first compose the product x * y * z, raise it to the
6126 second power, then multiply this by y * z, and finally multiply
6127 by z. This can be done in 5 multiplies provided we cache y * z
6128 for use in both expressions:
6130 t1 = y * z
6131 t2 = t1 * x
6132 t3 = t2 * t2
6133 t4 = t1 * t3
6134 result = t4 * z
6136 If we instead ignored the cached y * z and first multiplied by
6137 the __builtin_pow opportunity z * z, we would get the inferior:
6139 t1 = y * z
6140 t2 = t1 * x
6141 t3 = t2 * t2
6142 t4 = z * z
6143 t5 = t3 * t4
6144 result = t5 * y */
6146 vec_len = repeat_factor_vec.length ();
6148 /* Repeatedly look for opportunities to create a builtin_powi call. */
6149 while (true)
6151 HOST_WIDE_INT power;
6153 /* First look for the largest cached product of factors from
6154 preceding iterations. If found, create a builtin_powi for
6155 it if the minimum occurrence count for its factors is at
6156 least 2, or just use this cached product as our next
6157 multiplicand if the minimum occurrence count is 1. */
6158 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6160 if (rf1->repr && rf1->count > 0)
6161 break;
6164 if (j < vec_len)
6166 power = rf1->count;
6168 if (power == 1)
6170 iter_result = rf1->repr;
6172 if (dump_file && (dump_flags & TDF_DETAILS))
6174 unsigned elt;
6175 repeat_factor *rf;
6176 fputs ("Multiplying by cached product ", dump_file);
6177 for (elt = j; elt < vec_len; elt++)
6179 rf = &repeat_factor_vec[elt];
6180 print_generic_expr (dump_file, rf->factor);
6181 if (elt < vec_len - 1)
6182 fputs (" * ", dump_file);
6184 fputs ("\n", dump_file);
6187 else
6189 if (INTEGRAL_TYPE_P (type))
6191 gcc_assert (power > 1);
6192 gimple_stmt_iterator gsip = gsi;
6193 gsi_prev (&gsip);
6194 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6195 rf1->repr, power);
6196 gimple_stmt_iterator gsic = gsi;
6197 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6199 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6200 gimple_set_visited (gsi_stmt (gsic), true);
6201 gsi_prev (&gsic);
6204 else
6206 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6207 pow_stmt
6208 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6209 build_int_cst (integer_type_node,
6210 power));
6211 gimple_call_set_lhs (pow_stmt, iter_result);
6212 gimple_set_location (pow_stmt, gimple_location (stmt));
6213 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6214 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6217 if (dump_file && (dump_flags & TDF_DETAILS))
6219 unsigned elt;
6220 repeat_factor *rf;
6221 fputs ("Building __builtin_pow call for cached product (",
6222 dump_file);
6223 for (elt = j; elt < vec_len; elt++)
6225 rf = &repeat_factor_vec[elt];
6226 print_generic_expr (dump_file, rf->factor);
6227 if (elt < vec_len - 1)
6228 fputs (" * ", dump_file);
6230 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6231 power);
6235 else
6237 /* Otherwise, find the first factor in the repeated factor
6238 vector whose occurrence count is at least 2. If no such
6239 factor exists, there are no builtin_powi opportunities
6240 remaining. */
6241 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6243 if (rf1->count >= 2)
6244 break;
6247 if (j >= vec_len)
6248 break;
6250 power = rf1->count;
6252 if (dump_file && (dump_flags & TDF_DETAILS))
6254 unsigned elt;
6255 repeat_factor *rf;
6256 fputs ("Building __builtin_pow call for (", dump_file);
6257 for (elt = j; elt < vec_len; elt++)
6259 rf = &repeat_factor_vec[elt];
6260 print_generic_expr (dump_file, rf->factor);
6261 if (elt < vec_len - 1)
6262 fputs (" * ", dump_file);
6264 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6267 reassociate_stats.pows_created++;
6269 /* Visit each element of the vector in reverse order (so that
6270 high-occurrence elements are visited first, and within the
6271 same occurrence count, lower-ranked elements are visited
6272 first). Form a linear product of all elements in this order
6273 whose occurrencce count is at least that of element J.
6274 Record the SSA name representing the product of each element
6275 with all subsequent elements in the vector. */
6276 if (j == vec_len - 1)
6277 rf1->repr = rf1->factor;
6278 else
6280 for (ii = vec_len - 2; ii >= (int)j; ii--)
6282 tree op1, op2;
6284 rf1 = &repeat_factor_vec[ii];
6285 rf2 = &repeat_factor_vec[ii + 1];
6287 /* Init the last factor's representative to be itself. */
6288 if (!rf2->repr)
6289 rf2->repr = rf2->factor;
6291 op1 = rf1->factor;
6292 op2 = rf2->repr;
6294 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6295 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6296 op1, op2);
6297 gimple_set_location (mul_stmt, gimple_location (stmt));
6298 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6299 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6300 rf1->repr = target_ssa;
6302 /* Don't reprocess the multiply we just introduced. */
6303 gimple_set_visited (mul_stmt, true);
6307 /* Form a call to __builtin_powi for the maximum product
6308 just formed, raised to the power obtained earlier. */
6309 rf1 = &repeat_factor_vec[j];
6310 if (INTEGRAL_TYPE_P (type))
6312 gcc_assert (power > 1);
6313 gimple_stmt_iterator gsip = gsi;
6314 gsi_prev (&gsip);
6315 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6316 rf1->repr, power);
6317 gimple_stmt_iterator gsic = gsi;
6318 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6320 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6321 gimple_set_visited (gsi_stmt (gsic), true);
6322 gsi_prev (&gsic);
6325 else
6327 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6328 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6329 build_int_cst (integer_type_node,
6330 power));
6331 gimple_call_set_lhs (pow_stmt, iter_result);
6332 gimple_set_location (pow_stmt, gimple_location (stmt));
6333 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6334 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6338 /* If we previously formed at least one other builtin_powi call,
6339 form the product of this one and those others. */
6340 if (result)
6342 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6343 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6344 result, iter_result);
6345 gimple_set_location (mul_stmt, gimple_location (stmt));
6346 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6347 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6348 gimple_set_visited (mul_stmt, true);
6349 result = new_result;
6351 else
6352 result = iter_result;
6354 /* Decrement the occurrence count of each element in the product
6355 by the count found above, and remove this many copies of each
6356 factor from OPS. */
6357 for (i = j; i < vec_len; i++)
6359 unsigned k = power;
6360 unsigned n;
6362 rf1 = &repeat_factor_vec[i];
6363 rf1->count -= power;
6365 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6367 if (oe->op == rf1->factor)
6369 if (oe->count <= k)
6371 ops->ordered_remove (n);
6372 k -= oe->count;
6374 if (k == 0)
6375 break;
6377 else
6379 oe->count -= k;
6380 break;
6387 /* At this point all elements in the repeated factor vector have a
6388 remaining occurrence count of 0 or 1, and those with a count of 1
6389 don't have cached representatives. Re-sort the ops vector and
6390 clean up. */
6391 ops->qsort (sort_by_operand_rank);
6392 repeat_factor_vec.release ();
6394 /* Return the final product computed herein. Note that there may
6395 still be some elements with single occurrence count left in OPS;
6396 those will be handled by the normal reassociation logic. */
6397 return result;
6400 /* Attempt to optimize
6401 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6402 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6404 static void
6405 attempt_builtin_copysign (vec<operand_entry *> *ops)
6407 operand_entry *oe;
6408 unsigned int i;
6409 unsigned int length = ops->length ();
6410 tree cst = ops->last ()->op;
6412 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6413 return;
6415 FOR_EACH_VEC_ELT (*ops, i, oe)
6417 if (TREE_CODE (oe->op) == SSA_NAME
6418 && has_single_use (oe->op))
6420 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6421 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6423 tree arg0, arg1;
6424 switch (gimple_call_combined_fn (old_call))
6426 CASE_CFN_COPYSIGN:
6427 CASE_CFN_COPYSIGN_FN:
6428 arg0 = gimple_call_arg (old_call, 0);
6429 arg1 = gimple_call_arg (old_call, 1);
6430 /* The first argument of copysign must be a constant,
6431 otherwise there's nothing to do. */
6432 if (TREE_CODE (arg0) == REAL_CST)
6434 tree type = TREE_TYPE (arg0);
6435 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6436 /* If we couldn't fold to a single constant, skip it.
6437 That happens e.g. for inexact multiplication when
6438 -frounding-math. */
6439 if (mul == NULL_TREE)
6440 break;
6441 /* Instead of adjusting OLD_CALL, let's build a new
6442 call to not leak the LHS and prevent keeping bogus
6443 debug statements. DCE will clean up the old call. */
6444 gcall *new_call;
6445 if (gimple_call_internal_p (old_call))
6446 new_call = gimple_build_call_internal
6447 (IFN_COPYSIGN, 2, mul, arg1);
6448 else
6449 new_call = gimple_build_call
6450 (gimple_call_fndecl (old_call), 2, mul, arg1);
6451 tree lhs = make_ssa_name (type);
6452 gimple_call_set_lhs (new_call, lhs);
6453 gimple_set_location (new_call,
6454 gimple_location (old_call));
6455 insert_stmt_after (new_call, old_call);
6456 /* We've used the constant, get rid of it. */
6457 ops->pop ();
6458 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6459 /* Handle the CST1 < 0 case by negating the result. */
6460 if (cst1_neg)
6462 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6463 gimple *negate_stmt
6464 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6465 insert_stmt_after (negate_stmt, new_call);
6466 oe->op = negrhs;
6468 else
6469 oe->op = lhs;
6470 if (dump_file && (dump_flags & TDF_DETAILS))
6472 fprintf (dump_file, "Optimizing copysign: ");
6473 print_generic_expr (dump_file, cst);
6474 fprintf (dump_file, " * COPYSIGN (");
6475 print_generic_expr (dump_file, arg0);
6476 fprintf (dump_file, ", ");
6477 print_generic_expr (dump_file, arg1);
6478 fprintf (dump_file, ") into %sCOPYSIGN (",
6479 cst1_neg ? "-" : "");
6480 print_generic_expr (dump_file, mul);
6481 fprintf (dump_file, ", ");
6482 print_generic_expr (dump_file, arg1);
6483 fprintf (dump_file, "\n");
6485 return;
6487 break;
6488 default:
6489 break;
6496 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6498 static void
6499 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6501 tree rhs1;
6503 if (dump_file && (dump_flags & TDF_DETAILS))
6505 fprintf (dump_file, "Transforming ");
6506 print_gimple_stmt (dump_file, stmt, 0);
6509 rhs1 = gimple_assign_rhs1 (stmt);
6510 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6511 update_stmt (stmt);
6512 remove_visited_stmt_chain (rhs1);
6514 if (dump_file && (dump_flags & TDF_DETAILS))
6516 fprintf (dump_file, " into ");
6517 print_gimple_stmt (dump_file, stmt, 0);
6521 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6523 static void
6524 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6525 tree rhs1, tree rhs2)
6527 if (dump_file && (dump_flags & TDF_DETAILS))
6529 fprintf (dump_file, "Transforming ");
6530 print_gimple_stmt (dump_file, stmt, 0);
6533 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6534 update_stmt (gsi_stmt (*gsi));
6535 remove_visited_stmt_chain (rhs1);
6537 if (dump_file && (dump_flags & TDF_DETAILS))
6539 fprintf (dump_file, " into ");
6540 print_gimple_stmt (dump_file, stmt, 0);
6544 /* Reassociate expressions in basic block BB and its post-dominator as
6545 children.
6547 Bubble up return status from maybe_optimize_range_tests. */
6549 static bool
6550 reassociate_bb (basic_block bb)
6552 gimple_stmt_iterator gsi;
6553 basic_block son;
6554 gimple *stmt = last_stmt (bb);
6555 bool cfg_cleanup_needed = false;
6557 if (stmt && !gimple_visited_p (stmt))
6558 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6560 bool do_prev = false;
6561 for (gsi = gsi_last_bb (bb);
6562 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6564 do_prev = true;
6565 stmt = gsi_stmt (gsi);
6567 if (is_gimple_assign (stmt)
6568 && !stmt_could_throw_p (cfun, stmt))
6570 tree lhs, rhs1, rhs2;
6571 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6573 /* If this was part of an already processed statement,
6574 we don't need to touch it again. */
6575 if (gimple_visited_p (stmt))
6577 /* This statement might have become dead because of previous
6578 reassociations. */
6579 if (has_zero_uses (gimple_get_lhs (stmt)))
6581 reassoc_remove_stmt (&gsi);
6582 release_defs (stmt);
6583 /* We might end up removing the last stmt above which
6584 places the iterator to the end of the sequence.
6585 Reset it to the last stmt in this case and make sure
6586 we don't do gsi_prev in that case. */
6587 if (gsi_end_p (gsi))
6589 gsi = gsi_last_bb (bb);
6590 do_prev = false;
6593 continue;
6596 /* If this is not a gimple binary expression, there is
6597 nothing for us to do with it. */
6598 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6599 continue;
6601 lhs = gimple_assign_lhs (stmt);
6602 rhs1 = gimple_assign_rhs1 (stmt);
6603 rhs2 = gimple_assign_rhs2 (stmt);
6605 /* For non-bit or min/max operations we can't associate
6606 all types. Verify that here. */
6607 if ((rhs_code != BIT_IOR_EXPR
6608 && rhs_code != BIT_AND_EXPR
6609 && rhs_code != BIT_XOR_EXPR
6610 && rhs_code != MIN_EXPR
6611 && rhs_code != MAX_EXPR
6612 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6613 || !can_reassociate_op_p (rhs1)
6614 || !can_reassociate_op_p (rhs2))
6615 continue;
6617 if (associative_tree_code (rhs_code))
6619 auto_vec<operand_entry *> ops;
6620 tree powi_result = NULL_TREE;
6621 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6623 /* There may be no immediate uses left by the time we
6624 get here because we may have eliminated them all. */
6625 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6626 continue;
6628 gimple_set_visited (stmt, true);
6629 linearize_expr_tree (&ops, stmt, true, true);
6630 ops.qsort (sort_by_operand_rank);
6631 int orig_len = ops.length ();
6632 optimize_ops_list (rhs_code, &ops);
6633 if (undistribute_ops_list (rhs_code, &ops,
6634 loop_containing_stmt (stmt)))
6636 ops.qsort (sort_by_operand_rank);
6637 optimize_ops_list (rhs_code, &ops);
6639 if (undistribute_bitref_for_vector (rhs_code, &ops,
6640 loop_containing_stmt (stmt)))
6642 ops.qsort (sort_by_operand_rank);
6643 optimize_ops_list (rhs_code, &ops);
6645 if (rhs_code == PLUS_EXPR
6646 && transform_add_to_multiply (&ops))
6647 ops.qsort (sort_by_operand_rank);
6649 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6651 if (is_vector)
6652 optimize_vec_cond_expr (rhs_code, &ops);
6653 else
6654 optimize_range_tests (rhs_code, &ops, NULL);
6657 if (rhs_code == MULT_EXPR && !is_vector)
6659 attempt_builtin_copysign (&ops);
6661 if (reassoc_insert_powi_p
6662 && (flag_unsafe_math_optimizations
6663 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6664 powi_result = attempt_builtin_powi (stmt, &ops);
6667 operand_entry *last;
6668 bool negate_result = false;
6669 if (ops.length () > 1
6670 && rhs_code == MULT_EXPR)
6672 last = ops.last ();
6673 if ((integer_minus_onep (last->op)
6674 || real_minus_onep (last->op))
6675 && !HONOR_SNANS (TREE_TYPE (lhs))
6676 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6677 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6679 ops.pop ();
6680 negate_result = true;
6684 tree new_lhs = lhs;
6685 /* If the operand vector is now empty, all operands were
6686 consumed by the __builtin_powi optimization. */
6687 if (ops.length () == 0)
6688 transform_stmt_to_copy (&gsi, stmt, powi_result);
6689 else if (ops.length () == 1)
6691 tree last_op = ops.last ()->op;
6693 /* If the stmt that defines operand has to be inserted, insert it
6694 before the use. */
6695 if (ops.last ()->stmt_to_insert)
6696 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6697 if (powi_result)
6698 transform_stmt_to_multiply (&gsi, stmt, last_op,
6699 powi_result);
6700 else
6701 transform_stmt_to_copy (&gsi, stmt, last_op);
6703 else
6705 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6706 int ops_num = ops.length ();
6707 int width;
6709 /* For binary bit operations, if there are at least 3
6710 operands and the last operand in OPS is a constant,
6711 move it to the front. This helps ensure that we generate
6712 (X & Y) & C rather than (X & C) & Y. The former will
6713 often match a canonical bit test when we get to RTL. */
6714 if (ops.length () > 2
6715 && (rhs_code == BIT_AND_EXPR
6716 || rhs_code == BIT_IOR_EXPR
6717 || rhs_code == BIT_XOR_EXPR)
6718 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6719 std::swap (*ops[0], *ops[ops_num - 1]);
6721 /* Only rewrite the expression tree to parallel in the
6722 last reassoc pass to avoid useless work back-and-forth
6723 with initial linearization. */
6724 if (!reassoc_insert_powi_p
6725 && ops.length () > 3
6726 && (width = get_reassociation_width (ops_num, rhs_code,
6727 mode)) > 1)
6729 if (dump_file && (dump_flags & TDF_DETAILS))
6730 fprintf (dump_file,
6731 "Width = %d was chosen for reassociation\n",
6732 width);
6733 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6734 width, ops);
6736 else
6738 /* When there are three operands left, we want
6739 to make sure the ones that get the double
6740 binary op are chosen wisely. */
6741 int len = ops.length ();
6742 if (len >= 3)
6743 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6745 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6746 powi_result != NULL
6747 || negate_result,
6748 len != orig_len);
6751 /* If we combined some repeated factors into a
6752 __builtin_powi call, multiply that result by the
6753 reassociated operands. */
6754 if (powi_result)
6756 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6757 tree type = TREE_TYPE (lhs);
6758 tree target_ssa = make_temp_ssa_name (type, NULL,
6759 "reassocpow");
6760 gimple_set_lhs (lhs_stmt, target_ssa);
6761 update_stmt (lhs_stmt);
6762 if (lhs != new_lhs)
6764 target_ssa = new_lhs;
6765 new_lhs = lhs;
6767 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6768 powi_result, target_ssa);
6769 gimple_set_location (mul_stmt, gimple_location (stmt));
6770 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6771 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6775 if (negate_result)
6777 stmt = SSA_NAME_DEF_STMT (lhs);
6778 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6779 gimple_set_lhs (stmt, tmp);
6780 if (lhs != new_lhs)
6781 tmp = new_lhs;
6782 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6783 tmp);
6784 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6785 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6786 update_stmt (stmt);
6791 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6792 son;
6793 son = next_dom_son (CDI_POST_DOMINATORS, son))
6794 cfg_cleanup_needed |= reassociate_bb (son);
6796 return cfg_cleanup_needed;
6799 /* Add jumps around shifts for range tests turned into bit tests.
6800 For each SSA_NAME VAR we have code like:
6801 VAR = ...; // final stmt of range comparison
6802 // bit test here...;
6803 OTHERVAR = ...; // final stmt of the bit test sequence
6804 RES = VAR | OTHERVAR;
6805 Turn the above into:
6806 VAR = ...;
6807 if (VAR != 0)
6808 goto <l3>;
6809 else
6810 goto <l2>;
6811 <l2>:
6812 // bit test here...;
6813 OTHERVAR = ...;
6814 <l3>:
6815 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6817 static void
6818 branch_fixup (void)
6820 tree var;
6821 unsigned int i;
6823 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6825 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6826 gimple *use_stmt;
6827 use_operand_p use;
6828 bool ok = single_imm_use (var, &use, &use_stmt);
6829 gcc_assert (ok
6830 && is_gimple_assign (use_stmt)
6831 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6832 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6834 basic_block cond_bb = gimple_bb (def_stmt);
6835 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6836 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6838 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6839 gimple *g = gimple_build_cond (NE_EXPR, var,
6840 build_zero_cst (TREE_TYPE (var)),
6841 NULL_TREE, NULL_TREE);
6842 location_t loc = gimple_location (use_stmt);
6843 gimple_set_location (g, loc);
6844 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6846 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6847 etrue->probability = profile_probability::even ();
6848 edge efalse = find_edge (cond_bb, then_bb);
6849 efalse->flags = EDGE_FALSE_VALUE;
6850 efalse->probability -= etrue->probability;
6851 then_bb->count -= etrue->count ();
6853 tree othervar = NULL_TREE;
6854 if (gimple_assign_rhs1 (use_stmt) == var)
6855 othervar = gimple_assign_rhs2 (use_stmt);
6856 else if (gimple_assign_rhs2 (use_stmt) == var)
6857 othervar = gimple_assign_rhs1 (use_stmt);
6858 else
6859 gcc_unreachable ();
6860 tree lhs = gimple_assign_lhs (use_stmt);
6861 gphi *phi = create_phi_node (lhs, merge_bb);
6862 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6863 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6864 gsi = gsi_for_stmt (use_stmt);
6865 gsi_remove (&gsi, true);
6867 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6868 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6870 reassoc_branch_fixups.release ();
6873 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6874 void debug_ops_vector (vec<operand_entry *> ops);
6876 /* Dump the operand entry vector OPS to FILE. */
6878 void
6879 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6881 operand_entry *oe;
6882 unsigned int i;
6884 FOR_EACH_VEC_ELT (ops, i, oe)
6886 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6887 print_generic_expr (file, oe->op);
6888 fprintf (file, "\n");
6892 /* Dump the operand entry vector OPS to STDERR. */
6894 DEBUG_FUNCTION void
6895 debug_ops_vector (vec<operand_entry *> ops)
6897 dump_ops_vector (stderr, ops);
6900 /* Bubble up return status from reassociate_bb. */
6902 static bool
6903 do_reassoc (void)
6905 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6906 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6909 /* Initialize the reassociation pass. */
6911 static void
6912 init_reassoc (void)
6914 int i;
6915 int64_t rank = 2;
6916 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6918 /* Find the loops, so that we can prevent moving calculations in
6919 them. */
6920 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6922 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6924 next_operand_entry_id = 0;
6926 /* Reverse RPO (Reverse Post Order) will give us something where
6927 deeper loops come later. */
6928 pre_and_rev_post_order_compute (NULL, bbs, false);
6929 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6930 operand_rank = new hash_map<tree, int64_t>;
6932 /* Give each default definition a distinct rank. This includes
6933 parameters and the static chain. Walk backwards over all
6934 SSA names so that we get proper rank ordering according
6935 to tree_swap_operands_p. */
6936 for (i = num_ssa_names - 1; i > 0; --i)
6938 tree name = ssa_name (i);
6939 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6940 insert_operand_rank (name, ++rank);
6943 /* Set up rank for each BB */
6944 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6945 bb_rank[bbs[i]] = ++rank << 16;
6947 free (bbs);
6948 calculate_dominance_info (CDI_POST_DOMINATORS);
6949 plus_negates = vNULL;
6952 /* Cleanup after the reassociation pass, and print stats if
6953 requested. */
6955 static void
6956 fini_reassoc (void)
6958 statistics_counter_event (cfun, "Linearized",
6959 reassociate_stats.linearized);
6960 statistics_counter_event (cfun, "Constants eliminated",
6961 reassociate_stats.constants_eliminated);
6962 statistics_counter_event (cfun, "Ops eliminated",
6963 reassociate_stats.ops_eliminated);
6964 statistics_counter_event (cfun, "Statements rewritten",
6965 reassociate_stats.rewritten);
6966 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6967 reassociate_stats.pows_encountered);
6968 statistics_counter_event (cfun, "Built-in powi calls created",
6969 reassociate_stats.pows_created);
6971 delete operand_rank;
6972 bitmap_clear (biased_names);
6973 operand_entry_pool.release ();
6974 free (bb_rank);
6975 plus_negates.release ();
6976 free_dominance_info (CDI_POST_DOMINATORS);
6977 loop_optimizer_finalize ();
6980 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6981 insertion of __builtin_powi calls.
6983 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6984 optimization of a gimple conditional. Otherwise returns zero. */
6986 static unsigned int
6987 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
6989 reassoc_insert_powi_p = insert_powi_p;
6990 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
6992 init_reassoc ();
6994 bool cfg_cleanup_needed;
6995 cfg_cleanup_needed = do_reassoc ();
6996 repropagate_negates ();
6997 branch_fixup ();
6999 fini_reassoc ();
7000 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7003 namespace {
7005 const pass_data pass_data_reassoc =
7007 GIMPLE_PASS, /* type */
7008 "reassoc", /* name */
7009 OPTGROUP_NONE, /* optinfo_flags */
7010 TV_TREE_REASSOC, /* tv_id */
7011 ( PROP_cfg | PROP_ssa ), /* properties_required */
7012 0, /* properties_provided */
7013 0, /* properties_destroyed */
7014 0, /* todo_flags_start */
7015 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7018 class pass_reassoc : public gimple_opt_pass
7020 public:
7021 pass_reassoc (gcc::context *ctxt)
7022 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7025 /* opt_pass methods: */
7026 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
7027 void set_pass_param (unsigned int n, bool param)
7029 gcc_assert (n == 0);
7030 insert_powi_p = param;
7031 bias_loop_carried_phi_ranks_p = !param;
7033 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
7034 virtual unsigned int execute (function *)
7036 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7039 private:
7040 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7041 point 3a in the pass header comment. */
7042 bool insert_powi_p;
7043 bool bias_loop_carried_phi_ranks_p;
7044 }; // class pass_reassoc
7046 } // anon namespace
7048 gimple_opt_pass *
7049 make_pass_reassoc (gcc::context *ctxt)
7051 return new pass_reassoc (ctxt);