Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobdb9fb4e1cac8e7428cf6f2e6dcdf3044d9c3f07c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p;
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
199 static object_allocator<operand_entry> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
208 depth. */
209 static int64_t *bb_rank;
211 /* Operand->rank hashtable. */
212 static hash_map<tree, int64_t> *operand_rank;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
215 biased. */
216 static auto_bitmap biased_names;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec<tree> reassoc_branch_fixups;
224 /* Forward decls. */
225 static int64_t get_rank (tree);
226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
231 static bool
232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
234 gimple *stmt = gsi_stmt (*gsi);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
237 return gsi_remove (gsi, true);
239 gimple_stmt_iterator prev = *gsi;
240 gsi_prev (&prev);
241 unsigned uid = gimple_uid (stmt);
242 basic_block bb = gimple_bb (stmt);
243 bool ret = gsi_remove (gsi, true);
244 if (!gsi_end_p (prev))
245 gsi_next (&prev);
246 else
247 prev = gsi_start_bb (bb);
248 gimple *end_stmt = gsi_stmt (*gsi);
249 while ((stmt = gsi_stmt (prev)) != end_stmt)
251 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
252 gimple_set_uid (stmt, uid);
253 gsi_next (&prev);
255 return ret;
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
265 like this:
267 x_1 = phi(x_0, x_2)
268 a = x_1 | 1
269 b = a ^ 2
270 .MEM = b
271 c = b + d
272 x_2 = c + e
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
277 static bool
278 propagate_bias_p (gimple *stmt)
280 use_operand_p use;
281 imm_use_iterator use_iter;
282 gimple *single_use_stmt = NULL;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
285 return false;
287 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
289 gimple *current_use_stmt = USE_STMT (use);
291 if (is_gimple_assign (current_use_stmt)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
294 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
295 return false;
296 single_use_stmt = current_use_stmt;
300 if (single_use_stmt == NULL)
301 return false;
303 if (gimple_bb (stmt)->loop_father
304 != gimple_bb (single_use_stmt)->loop_father)
305 return false;
307 return true;
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
317 static int64_t
318 phi_rank (gimple *stmt)
320 basic_block bb = gimple_bb (stmt);
321 class loop *father = bb->loop_father;
322 tree res;
323 unsigned i;
324 use_operand_p use;
325 gimple *use_stmt;
327 if (!reassoc_bias_loop_carried_phi_ranks_p)
328 return bb_rank[bb->index];
330 /* We only care about real loops (those with a latch). */
331 if (!father->latch)
332 return bb_rank[bb->index];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb != father->header
336 || father->inner)
337 return bb_rank[bb->index];
339 /* Ignore virtual SSA_NAMEs. */
340 res = gimple_phi_result (stmt);
341 if (virtual_operand_p (res))
342 return bb_rank[bb->index];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res, &use, &use_stmt)
347 || gimple_bb (use_stmt)->loop_father != father)
348 return bb_rank[bb->index];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i = 0; i < gimple_phi_num_args (stmt); i++)
353 tree arg = gimple_phi_arg_def (stmt, i);
354 if (TREE_CODE (arg) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg))
357 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
358 if (gimple_bb (def_stmt)->loop_father == father)
359 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
363 /* Must be an uninteresting phi. */
364 return bb_rank[bb->index];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static int64_t
372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
374 int64_t op_rank;
376 op_rank = get_rank (op);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op) == SSA_NAME
381 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
383 if (maybe_biased_p == NULL)
384 return rank;
385 *maybe_biased_p = true;
388 return MAX (rank, op_rank);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e)
396 int64_t *slot = operand_rank->get (e);
397 return slot ? *slot : -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
402 static inline void
403 insert_operand_rank (tree e, int64_t rank)
405 gcc_assert (rank > 0);
406 gcc_assert (!operand_rank->put (e, rank));
409 /* Given an expression E, return the rank of the expression. */
411 static int64_t
412 get_rank (tree e)
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
419 the BB.
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
423 results. */
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
429 x_1 = phi(x_0, x_2)
430 b = a + x_1
431 c = b + d
432 x_2 = c + e
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
438 x_1 = phi(x_0, x_2)
439 b = a + d
440 c = b + e
441 x_2 = c + x_1
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e) == SSA_NAME)
453 ssa_op_iter iter;
454 gimple *stmt;
455 int64_t rank;
456 tree op;
458 /* If we already have a rank for this expression, use that. */
459 rank = find_operand_rank (e);
460 if (rank != -1)
461 return rank;
463 stmt = SSA_NAME_DEF_STMT (e);
464 if (gimple_code (stmt) == GIMPLE_PHI)
466 rank = phi_rank (stmt);
467 if (rank != bb_rank[gimple_bb (stmt)->index])
468 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
471 else if (!is_gimple_assign (stmt))
472 rank = bb_rank[gimple_bb (stmt)->index];
474 else
476 bool biased_p = false;
477 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
484 thus have rank 0. */
485 rank = 0;
486 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
487 rank = propagate_rank (rank, op, maybe_biased_p);
489 rank += 1;
490 if (biased_p)
491 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
494 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Rank for ");
497 print_generic_expr (dump_file, e);
498 fprintf (dump_file, " is %" PRId64 "\n", rank);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e, rank);
503 return rank;
506 /* Constants, globals, etc., are rank 0 */
507 return 0;
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
520 static inline int
521 constant_type (tree t)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
524 return INTEGER_CONST_TYPE;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t) || real_minus_onep (t))
531 return FLOAT_ONE_CONST_TYPE;
532 return FLOAT_CONST_TYPE;
534 else
535 return OTHER_CONST_TYPE;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
540 static int
541 sort_by_operand_rank (const void *pa, const void *pb)
543 const operand_entry *oea = *(const operand_entry *const *)pa;
544 const operand_entry *oeb = *(const operand_entry *const *)pb;
546 if (oeb->rank != oea->rank)
547 return oeb->rank > oea->rank ? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oea->op) - constant_type (oeb->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id > oea->id ? 1 : -1;
562 if (TREE_CODE (oea->op) != SSA_NAME)
564 if (TREE_CODE (oeb->op) != SSA_NAME)
565 return oeb->id > oea->id ? 1 : -1;
566 else
567 return 1;
569 else if (TREE_CODE (oeb->op) != SSA_NAME)
570 return -1;
572 /* Lastly, make sure the versions that are the same go next to each
573 other. */
574 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
579 See PR60418. */
580 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
581 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
582 basic_block bba = gimple_bb (stmta);
583 basic_block bbb = gimple_bb (stmtb);
584 if (bbb != bba)
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
588 if (!bba)
589 return 1;
590 if (!bbb)
591 return -1;
592 /* If neither is, compare bb_rank. */
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
597 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
598 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
599 if (da != db)
600 return da ? 1 : -1;
602 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
605 return oeb->id > oea->id ? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
610 static void
611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
613 operand_entry *oe = operand_entry_pool.allocate ();
615 oe->op = op;
616 oe->rank = get_rank (op);
617 oe->id = next_operand_entry_id++;
618 oe->count = 1;
619 oe->stmt_to_insert = stmt_to_insert;
620 ops->safe_push (oe);
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 count REPEAT. */
626 static void
627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
628 HOST_WIDE_INT repeat)
630 operand_entry *oe = operand_entry_pool.allocate ();
632 oe->op = op;
633 oe->rank = get_rank (op);
634 oe->id = next_operand_entry_id++;
635 oe->count = repeat;
636 oe->stmt_to_insert = NULL;
637 ops->safe_push (oe);
639 reassociate_stats.pows_encountered++;
642 /* Returns true if we can associate the SSA def OP. */
644 static bool
645 can_reassociate_op_p (tree op)
647 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
648 return false;
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
654 return false;
655 return true;
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
662 static bool
663 can_reassociate_type_p (tree type)
665 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
666 || NON_SAT_FIXED_POINT_TYPE_P (type)
667 || (flag_associative_math && FLOAT_TYPE_P (type)))
668 return true;
669 return false;
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
675 static bool
676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
678 basic_block bb = gimple_bb (stmt);
680 if (gimple_bb (stmt) == NULL)
681 return false;
683 if (!flow_bb_inside_loop_p (loop, bb))
684 return false;
686 if (is_gimple_assign (stmt)
687 && gimple_assign_rhs_code (stmt) == code
688 && has_single_use (gimple_assign_lhs (stmt)))
690 tree rhs1 = gimple_assign_rhs1 (stmt);
691 tree rhs2 = gimple_assign_rhs2 (stmt);
692 if (!can_reassociate_op_p (rhs1)
693 || (rhs2 && !can_reassociate_op_p (rhs2)))
694 return false;
695 return true;
698 return false;
702 /* Return true if STMT is a nop-conversion. */
704 static bool
705 gimple_nop_conversion_p (gimple *stmt)
707 if (gassign *ass = dyn_cast <gassign *> (stmt))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
711 TREE_TYPE (gimple_assign_rhs1 (ass))))
712 return true;
714 return false;
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
720 static tree
721 get_unary_op (tree name, enum tree_code opcode)
723 gimple *stmt = SSA_NAME_DEF_STMT (name);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt)
727 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
728 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
730 if (!is_gimple_assign (stmt))
731 return NULL_TREE;
733 if (gimple_assign_rhs_code (stmt) == opcode)
734 return gimple_assign_rhs1 (stmt);
735 return NULL_TREE;
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
740 static bool
741 ops_equal_values_p (tree op1, tree op2)
743 if (op1 == op2)
744 return true;
746 tree orig_op1 = op1;
747 if (TREE_CODE (op1) == SSA_NAME)
749 gimple *stmt = SSA_NAME_DEF_STMT (op1);
750 if (gimple_nop_conversion_p (stmt))
752 op1 = gimple_assign_rhs1 (stmt);
753 if (op1 == op2)
754 return true;
758 if (TREE_CODE (op2) == SSA_NAME)
760 gimple *stmt = SSA_NAME_DEF_STMT (op2);
761 if (gimple_nop_conversion_p (stmt))
763 op2 = gimple_assign_rhs1 (stmt);
764 if (op1 == op2
765 || orig_op1 == op2)
766 return true;
770 return false;
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
778 static bool
779 eliminate_duplicate_pair (enum tree_code opcode,
780 vec<operand_entry *> *ops,
781 bool *all_done,
782 unsigned int i,
783 operand_entry *curr,
784 operand_entry *last)
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last && last->op == curr->op)
794 switch (opcode)
796 case MAX_EXPR:
797 case MIN_EXPR:
798 case BIT_IOR_EXPR:
799 case BIT_AND_EXPR:
800 if (dump_file && (dump_flags & TDF_DETAILS))
802 fprintf (dump_file, "Equivalence: ");
803 print_generic_expr (dump_file, curr->op);
804 fprintf (dump_file, " [&|minmax] ");
805 print_generic_expr (dump_file, last->op);
806 fprintf (dump_file, " -> ");
807 print_generic_stmt (dump_file, last->op);
810 ops->ordered_remove (i);
811 reassociate_stats.ops_eliminated ++;
813 return true;
815 case BIT_XOR_EXPR:
816 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "Equivalence: ");
819 print_generic_expr (dump_file, curr->op);
820 fprintf (dump_file, " ^ ");
821 print_generic_expr (dump_file, last->op);
822 fprintf (dump_file, " -> nothing\n");
825 reassociate_stats.ops_eliminated += 2;
827 if (ops->length () == 2)
829 ops->truncate (0);
830 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
831 *all_done = true;
833 else
835 ops->ordered_remove (i-1);
836 ops->ordered_remove (i-1);
839 return true;
841 default:
842 break;
845 return false;
848 static vec<tree> plus_negates;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
854 return false. */
856 static bool
857 eliminate_plus_minus_pair (enum tree_code opcode,
858 vec<operand_entry *> *ops,
859 unsigned int currindex,
860 operand_entry *curr)
862 tree negateop;
863 tree notop;
864 unsigned int i;
865 operand_entry *oe;
867 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
868 return false;
870 negateop = get_unary_op (curr->op, NEGATE_EXPR);
871 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
872 if (negateop == NULL_TREE && notop == NULL_TREE)
873 return false;
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
877 one, we can stop. */
879 for (i = currindex + 1;
880 ops->iterate (i, &oe)
881 && oe->rank >= curr->rank - 1 ;
882 i++)
884 if (negateop
885 && ops_equal_values_p (oe->op, negateop))
887 if (dump_file && (dump_flags & TDF_DETAILS))
889 fprintf (dump_file, "Equivalence: ");
890 print_generic_expr (dump_file, negateop);
891 fprintf (dump_file, " + -");
892 print_generic_expr (dump_file, oe->op);
893 fprintf (dump_file, " -> 0\n");
896 ops->ordered_remove (i);
897 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
898 ops->ordered_remove (currindex);
899 reassociate_stats.ops_eliminated ++;
901 return true;
903 else if (notop
904 && ops_equal_values_p (oe->op, notop))
906 tree op_type = TREE_TYPE (oe->op);
908 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Equivalence: ");
911 print_generic_expr (dump_file, notop);
912 fprintf (dump_file, " + ~");
913 print_generic_expr (dump_file, oe->op);
914 fprintf (dump_file, " -> -1\n");
917 ops->ordered_remove (i);
918 add_to_ops_vec (ops, build_all_ones_cst (op_type));
919 ops->ordered_remove (currindex);
920 reassociate_stats.ops_eliminated ++;
922 return true;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop != NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
930 plus_negates.safe_push (curr->op);
932 return false;
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
939 false. */
941 static bool
942 eliminate_not_pairs (enum tree_code opcode,
943 vec<operand_entry *> *ops,
944 unsigned int currindex,
945 operand_entry *curr)
947 tree notop;
948 unsigned int i;
949 operand_entry *oe;
951 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
952 || TREE_CODE (curr->op) != SSA_NAME)
953 return false;
955 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
956 if (notop == NULL_TREE)
957 return false;
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
961 one, we can stop. */
963 for (i = currindex + 1;
964 ops->iterate (i, &oe)
965 && oe->rank >= curr->rank - 1;
966 i++)
968 if (oe->op == notop)
970 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Equivalence: ");
973 print_generic_expr (dump_file, notop);
974 if (opcode == BIT_AND_EXPR)
975 fprintf (dump_file, " & ~");
976 else if (opcode == BIT_IOR_EXPR)
977 fprintf (dump_file, " | ~");
978 print_generic_expr (dump_file, oe->op);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " -> 0\n");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " -> -1\n");
985 if (opcode == BIT_AND_EXPR)
986 oe->op = build_zero_cst (TREE_TYPE (oe->op));
987 else if (opcode == BIT_IOR_EXPR)
988 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
990 reassociate_stats.ops_eliminated += ops->length () - 1;
991 ops->truncate (0);
992 ops->quick_push (oe);
993 return true;
997 return false;
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009 vec<operand_entry *> *ops)
1011 operand_entry *oelast = ops->last ();
1012 tree type = TREE_TYPE (oelast->op);
1014 if (oelast->rank == 0
1015 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1017 switch (opcode)
1019 case BIT_AND_EXPR:
1020 if (integer_zerop (oelast->op))
1022 if (ops->length () != 1)
1024 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "Found & 0, removing all other ops\n");
1027 reassociate_stats.ops_eliminated += ops->length () - 1;
1029 ops->truncate (0);
1030 ops->quick_push (oelast);
1031 return;
1034 else if (integer_all_onesp (oelast->op))
1036 if (ops->length () != 1)
1038 if (dump_file && (dump_flags & TDF_DETAILS))
1039 fprintf (dump_file, "Found & -1, removing\n");
1040 ops->pop ();
1041 reassociate_stats.ops_eliminated++;
1044 break;
1045 case BIT_IOR_EXPR:
1046 if (integer_all_onesp (oelast->op))
1048 if (ops->length () != 1)
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found | -1, removing all other ops\n");
1053 reassociate_stats.ops_eliminated += ops->length () - 1;
1055 ops->truncate (0);
1056 ops->quick_push (oelast);
1057 return;
1060 else if (integer_zerop (oelast->op))
1062 if (ops->length () != 1)
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 fprintf (dump_file, "Found | 0, removing\n");
1066 ops->pop ();
1067 reassociate_stats.ops_eliminated++;
1070 break;
1071 case MULT_EXPR:
1072 if (integer_zerop (oelast->op)
1073 || (FLOAT_TYPE_P (type)
1074 && !HONOR_NANS (type)
1075 && !HONOR_SIGNED_ZEROS (type)
1076 && real_zerop (oelast->op)))
1078 if (ops->length () != 1)
1080 if (dump_file && (dump_flags & TDF_DETAILS))
1081 fprintf (dump_file, "Found * 0, removing all other ops\n");
1083 reassociate_stats.ops_eliminated += ops->length () - 1;
1084 ops->truncate (0);
1085 ops->quick_push (oelast);
1086 return;
1089 else if (integer_onep (oelast->op)
1090 || (FLOAT_TYPE_P (type)
1091 && !HONOR_SNANS (type)
1092 && real_onep (oelast->op)))
1094 if (ops->length () != 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Found * 1, removing\n");
1098 ops->pop ();
1099 reassociate_stats.ops_eliminated++;
1100 return;
1103 break;
1104 case BIT_XOR_EXPR:
1105 case PLUS_EXPR:
1106 case MINUS_EXPR:
1107 if (integer_zerop (oelast->op)
1108 || (FLOAT_TYPE_P (type)
1109 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110 && fold_real_zero_addition_p (type, 0, oelast->op,
1111 opcode == MINUS_EXPR)))
1113 if (ops->length () != 1)
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 fprintf (dump_file, "Found [|^+] 0, removing\n");
1117 ops->pop ();
1118 reassociate_stats.ops_eliminated++;
1119 return;
1122 break;
1123 default:
1124 break;
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131 bool, bool);
1133 /* Structure for tracking and counting operands. */
1134 struct oecount {
1135 unsigned int cnt;
1136 unsigned int id;
1137 enum tree_code oecode;
1138 tree op;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec<oecount> cvec;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher : int_hash <int, 0, 1>
1150 static inline hashval_t hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1159 const oecount *c = &cvec[p - 42];
1160 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1163 /* Comparison function for oecount. */
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1168 const oecount *c1 = &cvec[p1 - 42];
1169 const oecount *c2 = &cvec[p2 - 42];
1170 return c1->oecode == c2->oecode && c1->op == c2->op;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1178 const oecount *c1 = (const oecount *)p1;
1179 const oecount *c2 = (const oecount *)p2;
1180 if (c1->cnt != c2->cnt)
1181 return c1->cnt > c2->cnt ? 1 : -1;
1182 else
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1->id > c2->id ? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1193 if (!is_gimple_call (stmt))
1194 return false;
1196 switch (gimple_call_combined_fn (stmt))
1198 CASE_CFN_POW:
1199 CASE_CFN_POWI:
1200 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1202 default:
1203 return false;
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1214 REAL_VALUE_TYPE c, cint;
1215 HOST_WIDE_INT power;
1216 tree arg1;
1218 switch (gimple_call_combined_fn (stmt))
1220 CASE_CFN_POW:
1221 arg1 = gimple_call_arg (stmt, 1);
1222 c = TREE_REAL_CST (arg1);
1223 power = real_to_integer (&c) - 1;
1224 real_from_integer (&cint, VOIDmode, power, SIGNED);
1225 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226 return power;
1228 CASE_CFN_POWI:
1229 arg1 = gimple_call_arg (stmt, 1);
1230 power = TREE_INT_CST_LOW (arg1) - 1;
1231 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232 return power;
1234 default:
1235 gcc_unreachable ();
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1245 gimple *use_stmt;
1246 use_operand_p use;
1247 imm_use_iterator iter;
1248 tree new_lhs, new_debug_lhs = NULL_TREE;
1249 tree lhs = gimple_get_lhs (stmt);
1251 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252 gimple_set_lhs (stmt, new_lhs);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1257 tree repl = new_lhs;
1258 if (is_gimple_debug (use_stmt))
1260 if (new_debug_lhs == NULL_TREE)
1262 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1263 gdebug *def_temp
1264 = gimple_build_debug_bind (new_debug_lhs,
1265 build2 (opcode, TREE_TYPE (lhs),
1266 new_lhs, op),
1267 stmt);
1268 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1269 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1270 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1271 gimple_set_uid (def_temp, gimple_uid (stmt));
1272 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1273 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1275 repl = new_debug_lhs;
1277 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1278 SET_USE (use, repl);
1279 update_stmt (use_stmt);
1281 return new_lhs;
1284 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1285 uses with new SSAs. Also do this for the stmt that defines DEF
1286 if *DEF is not OP. */
1288 static void
1289 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1290 vec<gimple *> &stmts_to_fix)
1292 unsigned i;
1293 gimple *stmt;
1295 if (*def != op
1296 && TREE_CODE (*def) == SSA_NAME
1297 && (stmt = SSA_NAME_DEF_STMT (*def))
1298 && gimple_code (stmt) != GIMPLE_NOP)
1299 *def = make_new_ssa_for_def (stmt, opcode, op);
1301 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1302 make_new_ssa_for_def (stmt, opcode, op);
1305 /* Find the single immediate use of STMT's LHS, and replace it
1306 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1307 replace *DEF with OP as well. */
1309 static void
1310 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1312 tree lhs;
1313 gimple *use_stmt;
1314 use_operand_p use;
1315 gimple_stmt_iterator gsi;
1317 if (is_gimple_call (stmt))
1318 lhs = gimple_call_lhs (stmt);
1319 else
1320 lhs = gimple_assign_lhs (stmt);
1322 gcc_assert (has_single_use (lhs));
1323 single_imm_use (lhs, &use, &use_stmt);
1324 if (lhs == *def)
1325 *def = op;
1326 SET_USE (use, op);
1327 if (TREE_CODE (op) != SSA_NAME)
1328 update_stmt (use_stmt);
1329 gsi = gsi_for_stmt (stmt);
1330 unlink_stmt_vdef (stmt);
1331 reassoc_remove_stmt (&gsi);
1332 release_defs (stmt);
1335 /* Walks the linear chain with result *DEF searching for an operation
1336 with operand OP and code OPCODE removing that from the chain. *DEF
1337 is updated if there is only one operand but no operation left. */
1339 static void
1340 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1342 tree orig_def = *def;
1343 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1344 /* PR72835 - Record the stmt chain that has to be updated such that
1345 we dont use the same LHS when the values computed are different. */
1346 auto_vec<gimple *, 64> stmts_to_fix;
1350 tree name;
1352 if (opcode == MULT_EXPR)
1354 if (stmt_is_power_of_op (stmt, op))
1356 if (decrement_power (stmt) == 1)
1358 if (stmts_to_fix.length () > 0)
1359 stmts_to_fix.pop ();
1360 propagate_op_to_single_use (op, stmt, def);
1362 break;
1364 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1366 if (gimple_assign_rhs1 (stmt) == op)
1368 tree cst = build_minus_one_cst (TREE_TYPE (op));
1369 if (stmts_to_fix.length () > 0)
1370 stmts_to_fix.pop ();
1371 propagate_op_to_single_use (cst, stmt, def);
1372 break;
1374 else if (integer_minus_onep (op)
1375 || real_minus_onep (op))
1377 gimple_assign_set_rhs_code
1378 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1379 break;
1384 name = gimple_assign_rhs1 (stmt);
1386 /* If this is the operation we look for and one of the operands
1387 is ours simply propagate the other operand into the stmts
1388 single use. */
1389 if (gimple_assign_rhs_code (stmt) == opcode
1390 && (name == op
1391 || gimple_assign_rhs2 (stmt) == op))
1393 if (name == op)
1394 name = gimple_assign_rhs2 (stmt);
1395 if (stmts_to_fix.length () > 0)
1396 stmts_to_fix.pop ();
1397 propagate_op_to_single_use (name, stmt, def);
1398 break;
1401 /* We might have a multiply of two __builtin_pow* calls, and
1402 the operand might be hiding in the rightmost one. Likewise
1403 this can happen for a negate. */
1404 if (opcode == MULT_EXPR
1405 && gimple_assign_rhs_code (stmt) == opcode
1406 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1407 && has_single_use (gimple_assign_rhs2 (stmt)))
1409 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1410 if (stmt_is_power_of_op (stmt2, op))
1412 if (decrement_power (stmt2) == 1)
1413 propagate_op_to_single_use (op, stmt2, def);
1414 else
1415 stmts_to_fix.safe_push (stmt2);
1416 break;
1418 else if (is_gimple_assign (stmt2)
1419 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1421 if (gimple_assign_rhs1 (stmt2) == op)
1423 tree cst = build_minus_one_cst (TREE_TYPE (op));
1424 propagate_op_to_single_use (cst, stmt2, def);
1425 break;
1427 else if (integer_minus_onep (op)
1428 || real_minus_onep (op))
1430 stmts_to_fix.safe_push (stmt2);
1431 gimple_assign_set_rhs_code
1432 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1433 break;
1438 /* Continue walking the chain. */
1439 gcc_assert (name != op
1440 && TREE_CODE (name) == SSA_NAME);
1441 stmt = SSA_NAME_DEF_STMT (name);
1442 stmts_to_fix.safe_push (stmt);
1444 while (1);
1446 if (stmts_to_fix.length () > 0 || *def == orig_def)
1447 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1450 /* Returns true if statement S1 dominates statement S2. Like
1451 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1453 static bool
1454 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1456 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1458 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1459 SSA_NAME. Assume it lives at the beginning of function and
1460 thus dominates everything. */
1461 if (!bb1 || s1 == s2)
1462 return true;
1464 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1465 if (!bb2)
1466 return false;
1468 if (bb1 == bb2)
1470 /* PHIs in the same basic block are assumed to be
1471 executed all in parallel, if only one stmt is a PHI,
1472 it dominates the other stmt in the same basic block. */
1473 if (gimple_code (s1) == GIMPLE_PHI)
1474 return true;
1476 if (gimple_code (s2) == GIMPLE_PHI)
1477 return false;
1479 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1481 if (gimple_uid (s1) < gimple_uid (s2))
1482 return true;
1484 if (gimple_uid (s1) > gimple_uid (s2))
1485 return false;
1487 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1488 unsigned int uid = gimple_uid (s1);
1489 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1491 gimple *s = gsi_stmt (gsi);
1492 if (gimple_uid (s) != uid)
1493 break;
1494 if (s == s2)
1495 return true;
1498 return false;
1501 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1504 /* Insert STMT after INSERT_POINT. */
1506 static void
1507 insert_stmt_after (gimple *stmt, gimple *insert_point)
1509 gimple_stmt_iterator gsi;
1510 basic_block bb;
1512 if (gimple_code (insert_point) == GIMPLE_PHI)
1513 bb = gimple_bb (insert_point);
1514 else if (!stmt_ends_bb_p (insert_point))
1516 gsi = gsi_for_stmt (insert_point);
1517 gimple_set_uid (stmt, gimple_uid (insert_point));
1518 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1519 return;
1521 else if (gimple_code (insert_point) == GIMPLE_ASM)
1522 /* We have no idea where to insert - it depends on where the
1523 uses will be placed. */
1524 gcc_unreachable ();
1525 else
1526 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1527 thus if it must end a basic block, it should be a call that can
1528 throw, or some assignment that can throw. If it throws, the LHS
1529 of it will not be initialized though, so only valid places using
1530 the SSA_NAME should be dominated by the fallthru edge. */
1531 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1532 gsi = gsi_after_labels (bb);
1533 if (gsi_end_p (gsi))
1535 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1536 gimple_set_uid (stmt,
1537 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1539 else
1540 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1541 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1544 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1545 the result. Places the statement after the definition of either
1546 OP1 or OP2. Returns the new statement. */
1548 static gimple *
1549 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1551 gimple *op1def = NULL, *op2def = NULL;
1552 gimple_stmt_iterator gsi;
1553 tree op;
1554 gassign *sum;
1556 /* Create the addition statement. */
1557 op = make_ssa_name (type);
1558 sum = gimple_build_assign (op, opcode, op1, op2);
1560 /* Find an insertion place and insert. */
1561 if (TREE_CODE (op1) == SSA_NAME)
1562 op1def = SSA_NAME_DEF_STMT (op1);
1563 if (TREE_CODE (op2) == SSA_NAME)
1564 op2def = SSA_NAME_DEF_STMT (op2);
1565 if ((!op1def || gimple_nop_p (op1def))
1566 && (!op2def || gimple_nop_p (op2def)))
1568 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1569 if (gsi_end_p (gsi))
1571 gimple_stmt_iterator gsi2
1572 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1573 gimple_set_uid (sum,
1574 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1576 else
1577 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1578 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1580 else
1582 gimple *insert_point;
1583 if ((!op1def || gimple_nop_p (op1def))
1584 || (op2def && !gimple_nop_p (op2def)
1585 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1586 insert_point = op2def;
1587 else
1588 insert_point = op1def;
1589 insert_stmt_after (sum, insert_point);
1591 update_stmt (sum);
1593 return sum;
1596 /* Perform un-distribution of divisions and multiplications.
1597 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1598 to (A + B) / X for real X.
1600 The algorithm is organized as follows.
1602 - First we walk the addition chain *OPS looking for summands that
1603 are defined by a multiplication or a real division. This results
1604 in the candidates bitmap with relevant indices into *OPS.
1606 - Second we build the chains of multiplications or divisions for
1607 these candidates, counting the number of occurrences of (operand, code)
1608 pairs in all of the candidates chains.
1610 - Third we sort the (operand, code) pairs by number of occurrence and
1611 process them starting with the pair with the most uses.
1613 * For each such pair we walk the candidates again to build a
1614 second candidate bitmap noting all multiplication/division chains
1615 that have at least one occurrence of (operand, code).
1617 * We build an alternate addition chain only covering these
1618 candidates with one (operand, code) operation removed from their
1619 multiplication/division chain.
1621 * The first candidate gets replaced by the alternate addition chain
1622 multiplied/divided by the operand.
1624 * All candidate chains get disabled for further processing and
1625 processing of (operand, code) pairs continues.
1627 The alternate addition chains built are re-processed by the main
1628 reassociation algorithm which allows optimizing a * x * y + b * y * x
1629 to (a + b ) * x * y in one invocation of the reassociation pass. */
1631 static bool
1632 undistribute_ops_list (enum tree_code opcode,
1633 vec<operand_entry *> *ops, class loop *loop)
1635 unsigned int length = ops->length ();
1636 operand_entry *oe1;
1637 unsigned i, j;
1638 unsigned nr_candidates, nr_candidates2;
1639 sbitmap_iterator sbi0;
1640 vec<operand_entry *> *subops;
1641 bool changed = false;
1642 unsigned int next_oecount_id = 0;
1644 if (length <= 1
1645 || opcode != PLUS_EXPR)
1646 return false;
1648 /* Build a list of candidates to process. */
1649 auto_sbitmap candidates (length);
1650 bitmap_clear (candidates);
1651 nr_candidates = 0;
1652 FOR_EACH_VEC_ELT (*ops, i, oe1)
1654 enum tree_code dcode;
1655 gimple *oe1def;
1657 if (TREE_CODE (oe1->op) != SSA_NAME)
1658 continue;
1659 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1660 if (!is_gimple_assign (oe1def))
1661 continue;
1662 dcode = gimple_assign_rhs_code (oe1def);
1663 if ((dcode != MULT_EXPR
1664 && dcode != RDIV_EXPR)
1665 || !is_reassociable_op (oe1def, dcode, loop))
1666 continue;
1668 bitmap_set_bit (candidates, i);
1669 nr_candidates++;
1672 if (nr_candidates < 2)
1673 return false;
1675 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "searching for un-distribute opportunities ");
1678 print_generic_expr (dump_file,
1679 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1680 fprintf (dump_file, " %d\n", nr_candidates);
1683 /* Build linearized sub-operand lists and the counting table. */
1684 cvec.create (0);
1686 hash_table<oecount_hasher> ctable (15);
1688 /* ??? Macro arguments cannot have multi-argument template types in
1689 them. This typedef is needed to workaround that limitation. */
1690 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1691 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1692 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1694 gimple *oedef;
1695 enum tree_code oecode;
1696 unsigned j;
1698 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 linearize_expr_tree (&subops[i], oedef,
1701 associative_tree_code (oecode), false);
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1705 oecount c;
1706 int *slot;
1707 int idx;
1708 c.oecode = oecode;
1709 c.cnt = 1;
1710 c.id = next_oecount_id++;
1711 c.op = oe1->op;
1712 cvec.safe_push (c);
1713 idx = cvec.length () + 41;
1714 slot = ctable.find_slot (idx, INSERT);
1715 if (!*slot)
1717 *slot = idx;
1719 else
1721 cvec.pop ();
1722 cvec[*slot - 42].cnt++;
1727 /* Sort the counting table. */
1728 cvec.qsort (oecount_cmp);
1730 if (dump_file && (dump_flags & TDF_DETAILS))
1732 oecount *c;
1733 fprintf (dump_file, "Candidates:\n");
1734 FOR_EACH_VEC_ELT (cvec, j, c)
1736 fprintf (dump_file, " %u %s: ", c->cnt,
1737 c->oecode == MULT_EXPR
1738 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1739 print_generic_expr (dump_file, c->op);
1740 fprintf (dump_file, "\n");
1744 /* Process the (operand, code) pairs in order of most occurrence. */
1745 auto_sbitmap candidates2 (length);
1746 while (!cvec.is_empty ())
1748 oecount *c = &cvec.last ();
1749 if (c->cnt < 2)
1750 break;
1752 /* Now collect the operands in the outer chain that contain
1753 the common operand in their inner chain. */
1754 bitmap_clear (candidates2);
1755 nr_candidates2 = 0;
1756 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1758 gimple *oedef;
1759 enum tree_code oecode;
1760 unsigned j;
1761 tree op = (*ops)[i]->op;
1763 /* If we undistributed in this chain already this may be
1764 a constant. */
1765 if (TREE_CODE (op) != SSA_NAME)
1766 continue;
1768 oedef = SSA_NAME_DEF_STMT (op);
1769 oecode = gimple_assign_rhs_code (oedef);
1770 if (oecode != c->oecode)
1771 continue;
1773 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1775 if (oe1->op == c->op)
1777 bitmap_set_bit (candidates2, i);
1778 ++nr_candidates2;
1779 break;
1784 if (nr_candidates2 >= 2)
1786 operand_entry *oe1, *oe2;
1787 gimple *prod;
1788 int first = bitmap_first_set_bit (candidates2);
1790 /* Build the new addition chain. */
1791 oe1 = (*ops)[first];
1792 if (dump_file && (dump_flags & TDF_DETAILS))
1794 fprintf (dump_file, "Building (");
1795 print_generic_expr (dump_file, oe1->op);
1797 zero_one_operation (&oe1->op, c->oecode, c->op);
1798 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1800 gimple *sum;
1801 oe2 = (*ops)[i];
1802 if (dump_file && (dump_flags & TDF_DETAILS))
1804 fprintf (dump_file, " + ");
1805 print_generic_expr (dump_file, oe2->op);
1807 zero_one_operation (&oe2->op, c->oecode, c->op);
1808 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1809 oe1->op, oe2->op, opcode);
1810 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1811 oe2->rank = 0;
1812 oe1->op = gimple_get_lhs (sum);
1815 /* Apply the multiplication/division. */
1816 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1817 oe1->op, c->op, c->oecode);
1818 if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1821 print_generic_expr (dump_file, c->op);
1822 fprintf (dump_file, "\n");
1825 /* Record it in the addition chain and disable further
1826 undistribution with this op. */
1827 oe1->op = gimple_assign_lhs (prod);
1828 oe1->rank = get_rank (oe1->op);
1829 subops[first].release ();
1831 changed = true;
1834 cvec.pop ();
1837 for (i = 0; i < ops->length (); ++i)
1838 subops[i].release ();
1839 free (subops);
1840 cvec.release ();
1842 return changed;
1845 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1846 first: element index for each relevant BIT_FIELD_REF.
1847 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1848 typedef std::pair<unsigned, unsigned> v_info_elem;
1849 struct v_info {
1850 tree vec_type;
1851 auto_vec<v_info_elem, 32> vec;
1853 typedef v_info *v_info_ptr;
1855 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1856 static int
1857 sort_by_mach_mode (const void *p_i, const void *p_j)
1859 const tree tr1 = *((const tree *) p_i);
1860 const tree tr2 = *((const tree *) p_j);
1861 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1862 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1863 if (mode1 > mode2)
1864 return 1;
1865 else if (mode1 < mode2)
1866 return -1;
1867 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1868 return -1;
1869 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1870 return 1;
1871 return 0;
1874 /* Cleanup hash map for VECTOR information. */
1875 static void
1876 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1878 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1879 it != info_map.end (); ++it)
1881 v_info_ptr info = (*it).second;
1882 delete info;
1883 (*it).second = NULL;
1887 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1888 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1889 is transformed to
1890 Vs = (V1 + V2 + ... + Vn)
1891 Vs[0] + Vs[1] + ... + Vs[k]
1893 The basic steps are listed below:
1895 1) Check the addition chain *OPS by looking those summands coming from
1896 VECTOR bit_field_ref on VECTOR type. Put the information into
1897 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1899 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1900 continuous, they can cover the whole VECTOR perfectly without any holes.
1901 Obtain one VECTOR list which contain candidates to be transformed.
1903 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1904 candidates with same mode, build the addition statements for them and
1905 generate BIT_FIELD_REFs accordingly.
1907 TODO:
1908 The current implementation requires the whole VECTORs should be fully
1909 covered, but it can be extended to support partial, checking adjacent
1910 but not fill the whole, it may need some cost model to define the
1911 boundary to do or not.
1913 static bool
1914 undistribute_bitref_for_vector (enum tree_code opcode,
1915 vec<operand_entry *> *ops, struct loop *loop)
1917 if (ops->length () <= 1)
1918 return false;
1920 if (opcode != PLUS_EXPR
1921 && opcode != MULT_EXPR
1922 && opcode != BIT_XOR_EXPR
1923 && opcode != BIT_IOR_EXPR
1924 && opcode != BIT_AND_EXPR)
1925 return false;
1927 hash_map<tree, v_info_ptr> v_info_map;
1928 operand_entry *oe1;
1929 unsigned i;
1931 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1932 information into map. */
1933 FOR_EACH_VEC_ELT (*ops, i, oe1)
1935 enum tree_code dcode;
1936 gimple *oe1def;
1938 if (TREE_CODE (oe1->op) != SSA_NAME)
1939 continue;
1940 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1941 if (!is_gimple_assign (oe1def))
1942 continue;
1943 dcode = gimple_assign_rhs_code (oe1def);
1944 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1945 continue;
1947 tree rhs = gimple_assign_rhs1 (oe1def);
1948 tree vec = TREE_OPERAND (rhs, 0);
1949 tree vec_type = TREE_TYPE (vec);
1951 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1952 continue;
1954 /* Ignore it if target machine can't support this VECTOR type. */
1955 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1956 continue;
1958 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1959 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1960 continue;
1962 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1963 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1964 continue;
1966 /* The type of BIT_FIELD_REF might not be equal to the element type of
1967 the vector. We want to use a vector type with element type the
1968 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1969 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1971 machine_mode simd_mode;
1972 unsigned HOST_WIDE_INT size, nunits;
1973 unsigned HOST_WIDE_INT elem_size
1974 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1975 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1976 continue;
1977 if (size <= elem_size || (size % elem_size) != 0)
1978 continue;
1979 nunits = size / elem_size;
1980 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1981 nunits).exists (&simd_mode))
1982 continue;
1983 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1985 /* Ignore it if target machine can't support this VECTOR type. */
1986 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1987 continue;
1989 /* Check const vector type, constrain BIT_FIELD_REF offset and
1990 size. */
1991 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1992 continue;
1994 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1995 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1996 continue;
1999 tree elem_type = TREE_TYPE (vec_type);
2000 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2001 if (maybe_ne (bit_field_size (rhs), elem_size))
2002 continue;
2004 unsigned idx;
2005 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2006 continue;
2008 /* Ignore it if target machine can't support this type of VECTOR
2009 operation. */
2010 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2011 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2012 continue;
2014 bool existed;
2015 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2016 if (!existed)
2018 info = new v_info;
2019 info->vec_type = vec_type;
2021 else if (!types_compatible_p (vec_type, info->vec_type))
2022 continue;
2023 info->vec.safe_push (std::make_pair (idx, i));
2026 /* At least two VECTOR to combine. */
2027 if (v_info_map.elements () <= 1)
2029 cleanup_vinfo_map (v_info_map);
2030 return false;
2033 /* Verify all VECTOR candidates by checking two conditions:
2034 1) sorted offsets are adjacent, no holes.
2035 2) can fill the whole VECTOR perfectly.
2036 And add the valid candidates to a vector for further handling. */
2037 auto_vec<tree> valid_vecs (v_info_map.elements ());
2038 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2039 it != v_info_map.end (); ++it)
2041 tree cand_vec = (*it).first;
2042 v_info_ptr cand_info = (*it).second;
2043 unsigned int num_elems
2044 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2045 if (cand_info->vec.length () != num_elems)
2046 continue;
2047 sbitmap holes = sbitmap_alloc (num_elems);
2048 bitmap_ones (holes);
2049 bool valid = true;
2050 v_info_elem *curr;
2051 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2053 if (!bitmap_bit_p (holes, curr->first))
2055 valid = false;
2056 break;
2058 else
2059 bitmap_clear_bit (holes, curr->first);
2061 if (valid && bitmap_empty_p (holes))
2062 valid_vecs.quick_push (cand_vec);
2063 sbitmap_free (holes);
2066 /* At least two VECTOR to combine. */
2067 if (valid_vecs.length () <= 1)
2069 cleanup_vinfo_map (v_info_map);
2070 return false;
2073 valid_vecs.qsort (sort_by_mach_mode);
2074 /* Go through all candidates by machine mode order, query the mode_to_total
2075 to get the total number for each mode and skip the single one. */
2076 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2078 tree tvec = valid_vecs[i];
2079 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2081 /* Skip modes with only a single candidate. */
2082 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2083 continue;
2085 unsigned int idx, j;
2086 gimple *sum = NULL;
2087 tree sum_vec = tvec;
2088 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2089 v_info_elem *elem;
2090 tree vec_type = info_ptr->vec_type;
2092 /* Build the sum for all candidates with same mode. */
2095 sum = build_and_add_sum (vec_type, sum_vec,
2096 valid_vecs[i + 1], opcode);
2097 if (!useless_type_conversion_p (vec_type,
2098 TREE_TYPE (valid_vecs[i + 1])))
2100 /* Update the operands only after build_and_add_sum,
2101 so that we don't have to repeat the placement algorithm
2102 of build_and_add_sum. */
2103 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2104 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2105 valid_vecs[i + 1]);
2106 tree lhs = make_ssa_name (vec_type);
2107 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2108 gimple_set_uid (g, gimple_uid (sum));
2109 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2110 gimple_assign_set_rhs2 (sum, lhs);
2111 if (sum_vec == tvec)
2113 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2114 lhs = make_ssa_name (vec_type);
2115 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2116 gimple_set_uid (g, gimple_uid (sum));
2117 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2118 gimple_assign_set_rhs1 (sum, lhs);
2120 update_stmt (sum);
2122 sum_vec = gimple_get_lhs (sum);
2123 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2124 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2125 /* Update those related ops of current candidate VECTOR. */
2126 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2128 idx = elem->second;
2129 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2130 /* Set this then op definition will get DCEd later. */
2131 gimple_set_visited (def, true);
2132 if (opcode == PLUS_EXPR
2133 || opcode == BIT_XOR_EXPR
2134 || opcode == BIT_IOR_EXPR)
2135 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2136 else if (opcode == MULT_EXPR)
2137 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2138 else
2140 gcc_assert (opcode == BIT_AND_EXPR);
2141 (*ops)[idx]->op
2142 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2144 (*ops)[idx]->rank = 0;
2146 if (dump_file && (dump_flags & TDF_DETAILS))
2148 fprintf (dump_file, "Generating addition -> ");
2149 print_gimple_stmt (dump_file, sum, 0);
2151 i++;
2153 while ((i < valid_vecs.length () - 1)
2154 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2156 /* Referring to first valid VECTOR with this mode, generate the
2157 BIT_FIELD_REF statements accordingly. */
2158 info_ptr = *(v_info_map.get (tvec));
2159 gcc_assert (sum);
2160 tree elem_type = TREE_TYPE (vec_type);
2161 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2163 idx = elem->second;
2164 tree dst = make_ssa_name (elem_type);
2165 tree pos = bitsize_int (elem->first
2166 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2167 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2168 TYPE_SIZE (elem_type), pos);
2169 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2170 insert_stmt_after (gs, sum);
2171 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2172 /* Set this then op definition will get DCEd later. */
2173 gimple_set_visited (def, true);
2174 (*ops)[idx]->op = gimple_assign_lhs (gs);
2175 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2176 if (dump_file && (dump_flags & TDF_DETAILS))
2178 fprintf (dump_file, "Generating bit_field_ref -> ");
2179 print_gimple_stmt (dump_file, gs, 0);
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2187 cleanup_vinfo_map (v_info_map);
2189 return true;
2192 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2193 expression, examine the other OPS to see if any of them are comparisons
2194 of the same values, which we may be able to combine or eliminate.
2195 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2197 static bool
2198 eliminate_redundant_comparison (enum tree_code opcode,
2199 vec<operand_entry *> *ops,
2200 unsigned int currindex,
2201 operand_entry *curr)
2203 tree op1, op2;
2204 enum tree_code lcode, rcode;
2205 gimple *def1, *def2;
2206 int i;
2207 operand_entry *oe;
2209 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2210 return false;
2212 /* Check that CURR is a comparison. */
2213 if (TREE_CODE (curr->op) != SSA_NAME)
2214 return false;
2215 def1 = SSA_NAME_DEF_STMT (curr->op);
2216 if (!is_gimple_assign (def1))
2217 return false;
2218 lcode = gimple_assign_rhs_code (def1);
2219 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2220 return false;
2221 op1 = gimple_assign_rhs1 (def1);
2222 op2 = gimple_assign_rhs2 (def1);
2224 /* Now look for a similar comparison in the remaining OPS. */
2225 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2227 tree t;
2229 if (TREE_CODE (oe->op) != SSA_NAME)
2230 continue;
2231 def2 = SSA_NAME_DEF_STMT (oe->op);
2232 if (!is_gimple_assign (def2))
2233 continue;
2234 rcode = gimple_assign_rhs_code (def2);
2235 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2236 continue;
2238 /* If we got here, we have a match. See if we can combine the
2239 two comparisons. */
2240 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2241 if (opcode == BIT_IOR_EXPR)
2242 t = maybe_fold_or_comparisons (type,
2243 lcode, op1, op2,
2244 rcode, gimple_assign_rhs1 (def2),
2245 gimple_assign_rhs2 (def2));
2246 else
2247 t = maybe_fold_and_comparisons (type,
2248 lcode, op1, op2,
2249 rcode, gimple_assign_rhs1 (def2),
2250 gimple_assign_rhs2 (def2));
2251 if (!t)
2252 continue;
2254 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2255 always give us a boolean_type_node value back. If the original
2256 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2257 we need to convert. */
2258 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2259 t = fold_convert (TREE_TYPE (curr->op), t);
2261 if (TREE_CODE (t) != INTEGER_CST
2262 && !operand_equal_p (t, curr->op, 0))
2264 enum tree_code subcode;
2265 tree newop1, newop2;
2266 if (!COMPARISON_CLASS_P (t))
2267 continue;
2268 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2269 STRIP_USELESS_TYPE_CONVERSION (newop1);
2270 STRIP_USELESS_TYPE_CONVERSION (newop2);
2271 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2272 continue;
2275 if (dump_file && (dump_flags & TDF_DETAILS))
2277 fprintf (dump_file, "Equivalence: ");
2278 print_generic_expr (dump_file, curr->op);
2279 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2280 print_generic_expr (dump_file, oe->op);
2281 fprintf (dump_file, " -> ");
2282 print_generic_expr (dump_file, t);
2283 fprintf (dump_file, "\n");
2286 /* Now we can delete oe, as it has been subsumed by the new combined
2287 expression t. */
2288 ops->ordered_remove (i);
2289 reassociate_stats.ops_eliminated ++;
2291 /* If t is the same as curr->op, we're done. Otherwise we must
2292 replace curr->op with t. Special case is if we got a constant
2293 back, in which case we add it to the end instead of in place of
2294 the current entry. */
2295 if (TREE_CODE (t) == INTEGER_CST)
2297 ops->ordered_remove (currindex);
2298 add_to_ops_vec (ops, t);
2300 else if (!operand_equal_p (t, curr->op, 0))
2302 gimple *sum;
2303 enum tree_code subcode;
2304 tree newop1;
2305 tree newop2;
2306 gcc_assert (COMPARISON_CLASS_P (t));
2307 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2308 STRIP_USELESS_TYPE_CONVERSION (newop1);
2309 STRIP_USELESS_TYPE_CONVERSION (newop2);
2310 gcc_checking_assert (is_gimple_val (newop1)
2311 && is_gimple_val (newop2));
2312 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2313 curr->op = gimple_get_lhs (sum);
2315 return true;
2318 return false;
2322 /* Transform repeated addition of same values into multiply with
2323 constant. */
2324 static bool
2325 transform_add_to_multiply (vec<operand_entry *> *ops)
2327 operand_entry *oe;
2328 tree op = NULL_TREE;
2329 int j;
2330 int i, start = -1, end = 0, count = 0;
2331 auto_vec<std::pair <int, int> > indxs;
2332 bool changed = false;
2334 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2335 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2336 || !flag_unsafe_math_optimizations))
2337 return false;
2339 /* Look for repeated operands. */
2340 FOR_EACH_VEC_ELT (*ops, i, oe)
2342 if (start == -1)
2344 count = 1;
2345 op = oe->op;
2346 start = i;
2348 else if (operand_equal_p (oe->op, op, 0))
2350 count++;
2351 end = i;
2353 else
2355 if (count > 1)
2356 indxs.safe_push (std::make_pair (start, end));
2357 count = 1;
2358 op = oe->op;
2359 start = i;
2363 if (count > 1)
2364 indxs.safe_push (std::make_pair (start, end));
2366 for (j = indxs.length () - 1; j >= 0; --j)
2368 /* Convert repeated operand addition to multiplication. */
2369 start = indxs[j].first;
2370 end = indxs[j].second;
2371 op = (*ops)[start]->op;
2372 count = end - start + 1;
2373 for (i = end; i >= start; --i)
2374 ops->unordered_remove (i);
2375 tree tmp = make_ssa_name (TREE_TYPE (op));
2376 tree cst = build_int_cst (integer_type_node, count);
2377 gassign *mul_stmt
2378 = gimple_build_assign (tmp, MULT_EXPR,
2379 op, fold_convert (TREE_TYPE (op), cst));
2380 gimple_set_visited (mul_stmt, true);
2381 add_to_ops_vec (ops, tmp, mul_stmt);
2382 changed = true;
2385 return changed;
2389 /* Perform various identities and other optimizations on the list of
2390 operand entries, stored in OPS. The tree code for the binary
2391 operation between all the operands is OPCODE. */
2393 static void
2394 optimize_ops_list (enum tree_code opcode,
2395 vec<operand_entry *> *ops)
2397 unsigned int length = ops->length ();
2398 unsigned int i;
2399 operand_entry *oe;
2400 operand_entry *oelast = NULL;
2401 bool iterate = false;
2403 if (length == 1)
2404 return;
2406 oelast = ops->last ();
2408 /* If the last two are constants, pop the constants off, merge them
2409 and try the next two. */
2410 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2412 operand_entry *oelm1 = (*ops)[length - 2];
2414 if (oelm1->rank == 0
2415 && is_gimple_min_invariant (oelm1->op)
2416 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2417 TREE_TYPE (oelast->op)))
2419 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2420 oelm1->op, oelast->op);
2422 if (folded && is_gimple_min_invariant (folded))
2424 if (dump_file && (dump_flags & TDF_DETAILS))
2425 fprintf (dump_file, "Merging constants\n");
2427 ops->pop ();
2428 ops->pop ();
2430 add_to_ops_vec (ops, folded);
2431 reassociate_stats.constants_eliminated++;
2433 optimize_ops_list (opcode, ops);
2434 return;
2439 eliminate_using_constants (opcode, ops);
2440 oelast = NULL;
2442 for (i = 0; ops->iterate (i, &oe);)
2444 bool done = false;
2446 if (eliminate_not_pairs (opcode, ops, i, oe))
2447 return;
2448 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2449 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2450 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2452 if (done)
2453 return;
2454 iterate = true;
2455 oelast = NULL;
2456 continue;
2458 oelast = oe;
2459 i++;
2462 if (iterate)
2463 optimize_ops_list (opcode, ops);
2466 /* The following functions are subroutines to optimize_range_tests and allow
2467 it to try to change a logical combination of comparisons into a range
2468 test.
2470 For example, both
2471 X == 2 || X == 5 || X == 3 || X == 4
2473 X >= 2 && X <= 5
2474 are converted to
2475 (unsigned) (X - 2) <= 3
2477 For more information see comments above fold_test_range in fold-const.c,
2478 this implementation is for GIMPLE. */
2482 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2484 void
2485 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2487 if (!skip_exp)
2488 print_generic_expr (file, r->exp);
2489 fprintf (file, " %c[", r->in_p ? '+' : '-');
2490 print_generic_expr (file, r->low);
2491 fputs (", ", file);
2492 print_generic_expr (file, r->high);
2493 fputc (']', file);
2496 /* Dump the range entry R to STDERR. */
2498 DEBUG_FUNCTION void
2499 debug_range_entry (struct range_entry *r)
2501 dump_range_entry (stderr, r, false);
2502 fputc ('\n', stderr);
2505 /* This is similar to make_range in fold-const.c, but on top of
2506 GIMPLE instead of trees. If EXP is non-NULL, it should be
2507 an SSA_NAME and STMT argument is ignored, otherwise STMT
2508 argument should be a GIMPLE_COND. */
2510 void
2511 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2513 int in_p;
2514 tree low, high;
2515 bool is_bool, strict_overflow_p;
2517 r->exp = NULL_TREE;
2518 r->in_p = false;
2519 r->strict_overflow_p = false;
2520 r->low = NULL_TREE;
2521 r->high = NULL_TREE;
2522 if (exp != NULL_TREE
2523 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2524 return;
2526 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2527 and see if we can refine the range. Some of the cases below may not
2528 happen, but it doesn't seem worth worrying about this. We "continue"
2529 the outer loop when we've changed something; otherwise we "break"
2530 the switch, which will "break" the while. */
2531 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2532 high = low;
2533 in_p = 0;
2534 strict_overflow_p = false;
2535 is_bool = false;
2536 if (exp == NULL_TREE)
2537 is_bool = true;
2538 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2540 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2541 is_bool = true;
2542 else
2543 return;
2545 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2546 is_bool = true;
2548 while (1)
2550 enum tree_code code;
2551 tree arg0, arg1, exp_type;
2552 tree nexp;
2553 location_t loc;
2555 if (exp != NULL_TREE)
2557 if (TREE_CODE (exp) != SSA_NAME
2558 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2559 break;
2561 stmt = SSA_NAME_DEF_STMT (exp);
2562 if (!is_gimple_assign (stmt))
2563 break;
2565 code = gimple_assign_rhs_code (stmt);
2566 arg0 = gimple_assign_rhs1 (stmt);
2567 arg1 = gimple_assign_rhs2 (stmt);
2568 exp_type = TREE_TYPE (exp);
2570 else
2572 code = gimple_cond_code (stmt);
2573 arg0 = gimple_cond_lhs (stmt);
2574 arg1 = gimple_cond_rhs (stmt);
2575 exp_type = boolean_type_node;
2578 if (TREE_CODE (arg0) != SSA_NAME
2579 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2580 break;
2581 loc = gimple_location (stmt);
2582 switch (code)
2584 case BIT_NOT_EXPR:
2585 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2586 /* Ensure the range is either +[-,0], +[0,0],
2587 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2588 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2589 or similar expression of unconditional true or
2590 false, it should not be negated. */
2591 && ((high && integer_zerop (high))
2592 || (low && integer_onep (low))))
2594 in_p = !in_p;
2595 exp = arg0;
2596 continue;
2598 break;
2599 case SSA_NAME:
2600 exp = arg0;
2601 continue;
2602 CASE_CONVERT:
2603 if (is_bool)
2605 if ((TYPE_PRECISION (exp_type) == 1
2606 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2607 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2608 return;
2610 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2612 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2613 is_bool = true;
2614 else
2615 return;
2617 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2618 is_bool = true;
2619 goto do_default;
2620 case EQ_EXPR:
2621 case NE_EXPR:
2622 case LT_EXPR:
2623 case LE_EXPR:
2624 case GE_EXPR:
2625 case GT_EXPR:
2626 is_bool = true;
2627 /* FALLTHRU */
2628 default:
2629 if (!is_bool)
2630 return;
2631 do_default:
2632 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2633 &low, &high, &in_p,
2634 &strict_overflow_p);
2635 if (nexp != NULL_TREE)
2637 exp = nexp;
2638 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2639 continue;
2641 break;
2643 break;
2645 if (is_bool)
2647 r->exp = exp;
2648 r->in_p = in_p;
2649 r->low = low;
2650 r->high = high;
2651 r->strict_overflow_p = strict_overflow_p;
2655 /* Comparison function for qsort. Sort entries
2656 without SSA_NAME exp first, then with SSA_NAMEs sorted
2657 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2658 by increasing ->low and if ->low is the same, by increasing
2659 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2660 maximum. */
2662 static int
2663 range_entry_cmp (const void *a, const void *b)
2665 const struct range_entry *p = (const struct range_entry *) a;
2666 const struct range_entry *q = (const struct range_entry *) b;
2668 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2670 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2672 /* Group range_entries for the same SSA_NAME together. */
2673 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2674 return -1;
2675 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2676 return 1;
2677 /* If ->low is different, NULL low goes first, then by
2678 ascending low. */
2679 if (p->low != NULL_TREE)
2681 if (q->low != NULL_TREE)
2683 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2684 p->low, q->low);
2685 if (tem && integer_onep (tem))
2686 return -1;
2687 tem = fold_binary (GT_EXPR, boolean_type_node,
2688 p->low, q->low);
2689 if (tem && integer_onep (tem))
2690 return 1;
2692 else
2693 return 1;
2695 else if (q->low != NULL_TREE)
2696 return -1;
2697 /* If ->high is different, NULL high goes last, before that by
2698 ascending high. */
2699 if (p->high != NULL_TREE)
2701 if (q->high != NULL_TREE)
2703 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2704 p->high, q->high);
2705 if (tem && integer_onep (tem))
2706 return -1;
2707 tem = fold_binary (GT_EXPR, boolean_type_node,
2708 p->high, q->high);
2709 if (tem && integer_onep (tem))
2710 return 1;
2712 else
2713 return -1;
2715 else if (q->high != NULL_TREE)
2716 return 1;
2717 /* If both ranges are the same, sort below by ascending idx. */
2719 else
2720 return 1;
2722 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2723 return -1;
2725 if (p->idx < q->idx)
2726 return -1;
2727 else
2729 gcc_checking_assert (p->idx > q->idx);
2730 return 1;
2734 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2735 insert needed statements BEFORE or after GSI. */
2737 static tree
2738 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2740 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2741 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2742 if (TREE_CODE (ret) != SSA_NAME)
2744 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2745 if (before)
2746 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2747 else
2748 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2749 ret = gimple_assign_lhs (g);
2751 return ret;
2754 /* Helper routine of optimize_range_test.
2755 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2756 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2757 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2758 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2759 an array of COUNT pointers to other ranges. Return
2760 true if the range merge has been successful.
2761 If OPCODE is ERROR_MARK, this is called from within
2762 maybe_optimize_range_tests and is performing inter-bb range optimization.
2763 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2764 oe->rank. */
2766 static bool
2767 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2768 struct range_entry **otherrangep,
2769 unsigned int count, enum tree_code opcode,
2770 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2771 bool in_p, tree low, tree high, bool strict_overflow_p)
2773 operand_entry *oe = (*ops)[range->idx];
2774 tree op = oe->op;
2775 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2776 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2777 location_t loc = gimple_location (stmt);
2778 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2779 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2780 in_p, low, high);
2781 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2782 gimple_stmt_iterator gsi;
2783 unsigned int i, uid;
2785 if (tem == NULL_TREE)
2786 return false;
2788 /* If op is default def SSA_NAME, there is no place to insert the
2789 new comparison. Give up, unless we can use OP itself as the
2790 range test. */
2791 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2793 if (op == range->exp
2794 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2795 || TREE_CODE (optype) == BOOLEAN_TYPE)
2796 && (op == tem
2797 || (TREE_CODE (tem) == EQ_EXPR
2798 && TREE_OPERAND (tem, 0) == op
2799 && integer_onep (TREE_OPERAND (tem, 1))))
2800 && opcode != BIT_IOR_EXPR
2801 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2803 stmt = NULL;
2804 tem = op;
2806 else
2807 return false;
2810 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2811 warning_at (loc, OPT_Wstrict_overflow,
2812 "assuming signed overflow does not occur "
2813 "when simplifying range test");
2815 if (dump_file && (dump_flags & TDF_DETAILS))
2817 struct range_entry *r;
2818 fprintf (dump_file, "Optimizing range tests ");
2819 dump_range_entry (dump_file, range, false);
2820 for (i = 0; i < count; i++)
2822 if (otherrange)
2823 r = otherrange + i;
2824 else
2825 r = otherrangep[i];
2826 if (r->exp
2827 && r->exp != range->exp
2828 && TREE_CODE (r->exp) == SSA_NAME)
2830 fprintf (dump_file, " and ");
2831 dump_range_entry (dump_file, r, false);
2833 else
2835 fprintf (dump_file, " and");
2836 dump_range_entry (dump_file, r, true);
2839 fprintf (dump_file, "\n into ");
2840 print_generic_expr (dump_file, tem);
2841 fprintf (dump_file, "\n");
2844 if (opcode == BIT_IOR_EXPR
2845 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2846 tem = invert_truthvalue_loc (loc, tem);
2848 tem = fold_convert_loc (loc, optype, tem);
2849 if (stmt)
2851 gsi = gsi_for_stmt (stmt);
2852 uid = gimple_uid (stmt);
2854 else
2856 gsi = gsi_none ();
2857 uid = 0;
2859 if (stmt == NULL)
2860 gcc_checking_assert (tem == op);
2861 /* In rare cases range->exp can be equal to lhs of stmt.
2862 In that case we have to insert after the stmt rather then before
2863 it. If stmt is a PHI, insert it at the start of the basic block. */
2864 else if (op != range->exp)
2866 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2867 tem = force_into_ssa_name (&gsi, tem, true);
2868 gsi_prev (&gsi);
2870 else if (gimple_code (stmt) != GIMPLE_PHI)
2872 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2873 tem = force_into_ssa_name (&gsi, tem, false);
2875 else
2877 gsi = gsi_after_labels (gimple_bb (stmt));
2878 if (!gsi_end_p (gsi))
2879 uid = gimple_uid (gsi_stmt (gsi));
2880 else
2882 gsi = gsi_start_bb (gimple_bb (stmt));
2883 uid = 1;
2884 while (!gsi_end_p (gsi))
2886 uid = gimple_uid (gsi_stmt (gsi));
2887 gsi_next (&gsi);
2890 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2891 tem = force_into_ssa_name (&gsi, tem, true);
2892 if (gsi_end_p (gsi))
2893 gsi = gsi_last_bb (gimple_bb (stmt));
2894 else
2895 gsi_prev (&gsi);
2897 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2898 if (gimple_uid (gsi_stmt (gsi)))
2899 break;
2900 else
2901 gimple_set_uid (gsi_stmt (gsi), uid);
2903 oe->op = tem;
2904 range->exp = exp;
2905 range->low = low;
2906 range->high = high;
2907 range->in_p = in_p;
2908 range->strict_overflow_p = false;
2910 for (i = 0; i < count; i++)
2912 if (otherrange)
2913 range = otherrange + i;
2914 else
2915 range = otherrangep[i];
2916 oe = (*ops)[range->idx];
2917 /* Now change all the other range test immediate uses, so that
2918 those tests will be optimized away. */
2919 if (opcode == ERROR_MARK)
2921 if (oe->op)
2922 oe->op = build_int_cst (TREE_TYPE (oe->op),
2923 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2924 else
2925 oe->op = (oe->rank == BIT_IOR_EXPR
2926 ? boolean_false_node : boolean_true_node);
2928 else
2929 oe->op = error_mark_node;
2930 range->exp = NULL_TREE;
2931 range->low = NULL_TREE;
2932 range->high = NULL_TREE;
2934 return true;
2937 /* Optimize X == CST1 || X == CST2
2938 if popcount (CST1 ^ CST2) == 1 into
2939 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2940 Similarly for ranges. E.g.
2941 X != 2 && X != 3 && X != 10 && X != 11
2942 will be transformed by the previous optimization into
2943 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2944 and this loop can transform that into
2945 !(((X & ~8) - 2U) <= 1U). */
2947 static bool
2948 optimize_range_tests_xor (enum tree_code opcode, tree type,
2949 tree lowi, tree lowj, tree highi, tree highj,
2950 vec<operand_entry *> *ops,
2951 struct range_entry *rangei,
2952 struct range_entry *rangej)
2954 tree lowxor, highxor, tem, exp;
2955 /* Check lowi ^ lowj == highi ^ highj and
2956 popcount (lowi ^ lowj) == 1. */
2957 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2958 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2959 return false;
2960 if (!integer_pow2p (lowxor))
2961 return false;
2962 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2963 if (!tree_int_cst_equal (lowxor, highxor))
2964 return false;
2966 exp = rangei->exp;
2967 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2968 int prec = GET_MODE_PRECISION (mode);
2969 if (TYPE_PRECISION (type) < prec
2970 || (wi::to_wide (TYPE_MIN_VALUE (type))
2971 != wi::min_value (prec, TYPE_SIGN (type)))
2972 || (wi::to_wide (TYPE_MAX_VALUE (type))
2973 != wi::max_value (prec, TYPE_SIGN (type))))
2975 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2976 exp = fold_convert (type, exp);
2977 lowxor = fold_convert (type, lowxor);
2978 lowi = fold_convert (type, lowi);
2979 highi = fold_convert (type, highi);
2981 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2982 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2983 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2984 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2985 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2986 NULL, rangei->in_p, lowj, highj,
2987 rangei->strict_overflow_p
2988 || rangej->strict_overflow_p))
2989 return true;
2990 return false;
2993 /* Optimize X == CST1 || X == CST2
2994 if popcount (CST2 - CST1) == 1 into
2995 ((X - CST1) & ~(CST2 - CST1)) == 0.
2996 Similarly for ranges. E.g.
2997 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2998 || X == 75 || X == 45
2999 will be transformed by the previous optimization into
3000 (X - 43U) <= 3U || (X - 75U) <= 3U
3001 and this loop can transform that into
3002 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3003 static bool
3004 optimize_range_tests_diff (enum tree_code opcode, tree type,
3005 tree lowi, tree lowj, tree highi, tree highj,
3006 vec<operand_entry *> *ops,
3007 struct range_entry *rangei,
3008 struct range_entry *rangej)
3010 tree tem1, tem2, mask;
3011 /* Check highi - lowi == highj - lowj. */
3012 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3013 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3014 return false;
3015 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3016 if (!tree_int_cst_equal (tem1, tem2))
3017 return false;
3018 /* Check popcount (lowj - lowi) == 1. */
3019 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3020 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3021 return false;
3022 if (!integer_pow2p (tem1))
3023 return false;
3025 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3026 int prec = GET_MODE_PRECISION (mode);
3027 if (TYPE_PRECISION (type) < prec
3028 || (wi::to_wide (TYPE_MIN_VALUE (type))
3029 != wi::min_value (prec, TYPE_SIGN (type)))
3030 || (wi::to_wide (TYPE_MAX_VALUE (type))
3031 != wi::max_value (prec, TYPE_SIGN (type))))
3032 type = build_nonstandard_integer_type (prec, 1);
3033 else
3034 type = unsigned_type_for (type);
3035 tem1 = fold_convert (type, tem1);
3036 tem2 = fold_convert (type, tem2);
3037 lowi = fold_convert (type, lowi);
3038 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3039 tem1 = fold_build2 (MINUS_EXPR, type,
3040 fold_convert (type, rangei->exp), lowi);
3041 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3042 lowj = build_int_cst (type, 0);
3043 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3044 NULL, rangei->in_p, lowj, tem2,
3045 rangei->strict_overflow_p
3046 || rangej->strict_overflow_p))
3047 return true;
3048 return false;
3051 /* It does some common checks for function optimize_range_tests_xor and
3052 optimize_range_tests_diff.
3053 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3054 Else it calls optimize_range_tests_diff. */
3056 static bool
3057 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3058 bool optimize_xor, vec<operand_entry *> *ops,
3059 struct range_entry *ranges)
3061 int i, j;
3062 bool any_changes = false;
3063 for (i = first; i < length; i++)
3065 tree lowi, highi, lowj, highj, type, tem;
3067 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3068 continue;
3069 type = TREE_TYPE (ranges[i].exp);
3070 if (!INTEGRAL_TYPE_P (type))
3071 continue;
3072 lowi = ranges[i].low;
3073 if (lowi == NULL_TREE)
3074 lowi = TYPE_MIN_VALUE (type);
3075 highi = ranges[i].high;
3076 if (highi == NULL_TREE)
3077 continue;
3078 for (j = i + 1; j < length && j < i + 64; j++)
3080 bool changes;
3081 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3082 continue;
3083 lowj = ranges[j].low;
3084 if (lowj == NULL_TREE)
3085 continue;
3086 highj = ranges[j].high;
3087 if (highj == NULL_TREE)
3088 highj = TYPE_MAX_VALUE (type);
3089 /* Check lowj > highi. */
3090 tem = fold_binary (GT_EXPR, boolean_type_node,
3091 lowj, highi);
3092 if (tem == NULL_TREE || !integer_onep (tem))
3093 continue;
3094 if (optimize_xor)
3095 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3096 highi, highj, ops,
3097 ranges + i, ranges + j);
3098 else
3099 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3100 highi, highj, ops,
3101 ranges + i, ranges + j);
3102 if (changes)
3104 any_changes = true;
3105 break;
3109 return any_changes;
3112 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3113 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3114 EXP on success, NULL otherwise. */
3116 static tree
3117 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3118 wide_int *mask, tree *totallowp)
3120 tree tem = int_const_binop (MINUS_EXPR, high, low);
3121 if (tem == NULL_TREE
3122 || TREE_CODE (tem) != INTEGER_CST
3123 || TREE_OVERFLOW (tem)
3124 || tree_int_cst_sgn (tem) == -1
3125 || compare_tree_int (tem, prec) != -1)
3126 return NULL_TREE;
3128 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3129 *mask = wi::shifted_mask (0, max, false, prec);
3130 if (TREE_CODE (exp) == BIT_AND_EXPR
3131 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3133 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3134 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3135 if (wi::popcount (msk) == 1
3136 && wi::ltu_p (msk, prec - max))
3138 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3139 max += msk.to_uhwi ();
3140 exp = TREE_OPERAND (exp, 0);
3141 if (integer_zerop (low)
3142 && TREE_CODE (exp) == PLUS_EXPR
3143 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3145 tree ret = TREE_OPERAND (exp, 0);
3146 STRIP_NOPS (ret);
3147 widest_int bias
3148 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3149 TYPE_PRECISION (TREE_TYPE (low))));
3150 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3151 if (totallowp)
3153 *totallowp = tbias;
3154 return ret;
3156 else if (!tree_int_cst_lt (totallow, tbias))
3157 return NULL_TREE;
3158 bias = wi::to_widest (tbias);
3159 bias -= wi::to_widest (totallow);
3160 if (bias >= 0 && bias < prec - max)
3162 *mask = wi::lshift (*mask, bias);
3163 return ret;
3168 if (totallowp)
3169 return exp;
3170 if (!tree_int_cst_lt (totallow, low))
3171 return exp;
3172 tem = int_const_binop (MINUS_EXPR, low, totallow);
3173 if (tem == NULL_TREE
3174 || TREE_CODE (tem) != INTEGER_CST
3175 || TREE_OVERFLOW (tem)
3176 || compare_tree_int (tem, prec - max) == 1)
3177 return NULL_TREE;
3179 *mask = wi::lshift (*mask, wi::to_widest (tem));
3180 return exp;
3183 /* Attempt to optimize small range tests using bit test.
3184 E.g.
3185 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3186 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3187 has been by earlier optimizations optimized into:
3188 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3189 As all the 43 through 82 range is less than 64 numbers,
3190 for 64-bit word targets optimize that into:
3191 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3193 static bool
3194 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3195 vec<operand_entry *> *ops,
3196 struct range_entry *ranges)
3198 int i, j;
3199 bool any_changes = false;
3200 int prec = GET_MODE_BITSIZE (word_mode);
3201 auto_vec<struct range_entry *, 64> candidates;
3203 for (i = first; i < length - 1; i++)
3205 tree lowi, highi, lowj, highj, type;
3207 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3208 continue;
3209 type = TREE_TYPE (ranges[i].exp);
3210 if (!INTEGRAL_TYPE_P (type))
3211 continue;
3212 lowi = ranges[i].low;
3213 if (lowi == NULL_TREE)
3214 lowi = TYPE_MIN_VALUE (type);
3215 highi = ranges[i].high;
3216 if (highi == NULL_TREE)
3217 continue;
3218 wide_int mask;
3219 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3220 highi, &mask, &lowi);
3221 if (exp == NULL_TREE)
3222 continue;
3223 bool strict_overflow_p = ranges[i].strict_overflow_p;
3224 candidates.truncate (0);
3225 int end = MIN (i + 64, length);
3226 for (j = i + 1; j < end; j++)
3228 tree exp2;
3229 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3230 continue;
3231 if (ranges[j].exp == exp)
3233 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3235 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3236 if (exp2 == exp)
3238 else if (TREE_CODE (exp2) == PLUS_EXPR)
3240 exp2 = TREE_OPERAND (exp2, 0);
3241 STRIP_NOPS (exp2);
3242 if (exp2 != exp)
3243 continue;
3245 else
3246 continue;
3248 else
3249 continue;
3250 lowj = ranges[j].low;
3251 if (lowj == NULL_TREE)
3252 continue;
3253 highj = ranges[j].high;
3254 if (highj == NULL_TREE)
3255 highj = TYPE_MAX_VALUE (type);
3256 wide_int mask2;
3257 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3258 highj, &mask2, NULL);
3259 if (exp2 != exp)
3260 continue;
3261 mask |= mask2;
3262 strict_overflow_p |= ranges[j].strict_overflow_p;
3263 candidates.safe_push (&ranges[j]);
3266 /* If every possible relative value of the expression is a valid shift
3267 amount, then we can merge the entry test in the bit test. In this
3268 case, if we would need otherwise 2 or more comparisons, then use
3269 the bit test; in the other cases, the threshold is 3 comparisons. */
3270 bool entry_test_needed;
3271 value_range r;
3272 if (TREE_CODE (exp) == SSA_NAME
3273 && get_range_query (cfun)->range_of_expr (r, exp)
3274 && r.kind () == VR_RANGE
3275 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3277 wide_int min = r.lower_bound ();
3278 wide_int ilowi = wi::to_wide (lowi);
3279 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3281 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3282 mask = wi::lshift (mask, ilowi - min);
3284 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3286 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3287 mask = wi::lrshift (mask, min - ilowi);
3289 entry_test_needed = false;
3291 else
3292 entry_test_needed = true;
3293 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3295 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3296 wi::to_widest (lowi)
3297 + prec - 1 - wi::clz (mask));
3298 operand_entry *oe = (*ops)[ranges[i].idx];
3299 tree op = oe->op;
3300 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3301 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3302 location_t loc = gimple_location (stmt);
3303 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3305 /* See if it isn't cheaper to pretend the minimum value of the
3306 range is 0, if maximum value is small enough.
3307 We can avoid then subtraction of the minimum value, but the
3308 mask constant could be perhaps more expensive. */
3309 if (compare_tree_int (lowi, 0) > 0
3310 && compare_tree_int (high, prec) < 0)
3312 int cost_diff;
3313 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3314 rtx reg = gen_raw_REG (word_mode, 10000);
3315 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3316 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3317 GEN_INT (-m)),
3318 word_mode, speed_p);
3319 rtx r = immed_wide_int_const (mask, word_mode);
3320 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3321 word_mode, speed_p);
3322 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3323 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3324 word_mode, speed_p);
3325 if (cost_diff > 0)
3327 mask = wi::lshift (mask, m);
3328 lowi = build_zero_cst (TREE_TYPE (lowi));
3332 tree tem;
3333 if (entry_test_needed)
3335 tem = build_range_check (loc, optype, unshare_expr (exp),
3336 false, lowi, high);
3337 if (tem == NULL_TREE || is_gimple_val (tem))
3338 continue;
3340 else
3341 tem = NULL_TREE;
3342 tree etype = unsigned_type_for (TREE_TYPE (exp));
3343 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3344 fold_convert_loc (loc, etype, exp),
3345 fold_convert_loc (loc, etype, lowi));
3346 exp = fold_convert_loc (loc, integer_type_node, exp);
3347 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3348 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3349 build_int_cst (word_type, 1), exp);
3350 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3351 wide_int_to_tree (word_type, mask));
3352 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3353 build_zero_cst (word_type));
3354 if (is_gimple_val (exp))
3355 continue;
3357 /* The shift might have undefined behavior if TEM is true,
3358 but reassociate_bb isn't prepared to have basic blocks
3359 split when it is running. So, temporarily emit a code
3360 with BIT_IOR_EXPR instead of &&, and fix it up in
3361 branch_fixup. */
3362 gimple_seq seq = NULL;
3363 if (tem)
3365 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3366 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3367 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3369 gimple_seq seq2;
3370 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3371 gimple_seq_add_seq_without_update (&seq, seq2);
3372 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3373 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3374 if (tem)
3376 gimple *g = gimple_build_assign (make_ssa_name (optype),
3377 BIT_IOR_EXPR, tem, exp);
3378 gimple_set_location (g, loc);
3379 gimple_seq_add_stmt_without_update (&seq, g);
3380 exp = gimple_assign_lhs (g);
3382 tree val = build_zero_cst (optype);
3383 if (update_range_test (&ranges[i], NULL, candidates.address (),
3384 candidates.length (), opcode, ops, exp,
3385 seq, false, val, val, strict_overflow_p))
3387 any_changes = true;
3388 if (tem)
3389 reassoc_branch_fixups.safe_push (tem);
3391 else
3392 gimple_seq_discard (seq);
3395 return any_changes;
3398 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3399 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3400 Also, handle x < C && y < C && z < C where C is power of two as
3401 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3402 as (x | y | z) < 0. */
3404 static bool
3405 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3406 vec<operand_entry *> *ops,
3407 struct range_entry *ranges)
3409 int i;
3410 unsigned int b;
3411 bool any_changes = false;
3412 auto_vec<int, 128> buckets;
3413 auto_vec<int, 32> chains;
3414 auto_vec<struct range_entry *, 32> candidates;
3416 for (i = first; i < length; i++)
3418 int idx;
3420 if (ranges[i].exp == NULL_TREE
3421 || TREE_CODE (ranges[i].exp) != SSA_NAME
3422 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3423 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3424 continue;
3426 if (ranges[i].low != NULL_TREE
3427 && ranges[i].high != NULL_TREE
3428 && ranges[i].in_p
3429 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3431 idx = !integer_zerop (ranges[i].low);
3432 if (idx && !integer_all_onesp (ranges[i].low))
3433 continue;
3435 else if (ranges[i].high != NULL_TREE
3436 && TREE_CODE (ranges[i].high) == INTEGER_CST
3437 && ranges[i].in_p)
3439 wide_int w = wi::to_wide (ranges[i].high);
3440 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3441 int l = wi::clz (w);
3442 idx = 2;
3443 if (l <= 0
3444 || l >= prec
3445 || w != wi::mask (prec - l, false, prec))
3446 continue;
3447 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3448 && ranges[i].low == NULL_TREE)
3449 || (ranges[i].low
3450 && integer_zerop (ranges[i].low))))
3451 continue;
3453 else if (ranges[i].high == NULL_TREE
3454 && ranges[i].low != NULL_TREE
3455 /* Perform this optimization only in the last
3456 reassoc pass, as it interferes with the reassociation
3457 itself or could also with VRP etc. which might not
3458 be able to virtually undo the optimization. */
3459 && !reassoc_insert_powi_p
3460 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3461 && integer_zerop (ranges[i].low))
3462 idx = 3;
3463 else
3464 continue;
3466 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3467 if (buckets.length () <= b)
3468 buckets.safe_grow_cleared (b + 1, true);
3469 if (chains.length () <= (unsigned) i)
3470 chains.safe_grow (i + 1, true);
3471 chains[i] = buckets[b];
3472 buckets[b] = i + 1;
3475 FOR_EACH_VEC_ELT (buckets, b, i)
3476 if (i && chains[i - 1])
3478 int j, k = i;
3479 if ((b % 4) == 2)
3481 /* When ranges[X - 1].high + 1 is a power of two,
3482 we need to process the same bucket up to
3483 precision - 1 times, each time split the entries
3484 with the same high bound into one chain and the
3485 rest into another one to be processed later. */
3486 int this_prev = i;
3487 int other_prev = 0;
3488 for (j = chains[i - 1]; j; j = chains[j - 1])
3490 if (tree_int_cst_equal (ranges[i - 1].high,
3491 ranges[j - 1].high))
3493 chains[this_prev - 1] = j;
3494 this_prev = j;
3496 else if (other_prev == 0)
3498 buckets[b] = j;
3499 other_prev = j;
3501 else
3503 chains[other_prev - 1] = j;
3504 other_prev = j;
3507 chains[this_prev - 1] = 0;
3508 if (other_prev)
3509 chains[other_prev - 1] = 0;
3510 if (chains[i - 1] == 0)
3512 if (other_prev)
3513 b--;
3514 continue;
3517 for (j = chains[i - 1]; j; j = chains[j - 1])
3519 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3520 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3521 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3522 k = j;
3524 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3525 tree type2 = NULL_TREE;
3526 bool strict_overflow_p = false;
3527 candidates.truncate (0);
3528 for (j = i; j; j = chains[j - 1])
3530 tree type = TREE_TYPE (ranges[j - 1].exp);
3531 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3532 if ((b % 4) == 3)
3534 /* For the signed < 0 cases, the types should be
3535 really compatible (all signed with the same precision,
3536 instead put ranges that have different in_p from
3537 k first. */
3538 if (!useless_type_conversion_p (type1, type))
3539 continue;
3540 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3541 candidates.safe_push (&ranges[j - 1]);
3542 type2 = type1;
3543 continue;
3545 if (j == k
3546 || useless_type_conversion_p (type1, type))
3548 else if (type2 == NULL_TREE
3549 || useless_type_conversion_p (type2, type))
3551 if (type2 == NULL_TREE)
3552 type2 = type;
3553 candidates.safe_push (&ranges[j - 1]);
3556 unsigned l = candidates.length ();
3557 for (j = i; j; j = chains[j - 1])
3559 tree type = TREE_TYPE (ranges[j - 1].exp);
3560 if (j == k)
3561 continue;
3562 if ((b % 4) == 3)
3564 if (!useless_type_conversion_p (type1, type))
3565 continue;
3566 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3567 candidates.safe_push (&ranges[j - 1]);
3568 continue;
3570 if (useless_type_conversion_p (type1, type))
3572 else if (type2 == NULL_TREE
3573 || useless_type_conversion_p (type2, type))
3574 continue;
3575 candidates.safe_push (&ranges[j - 1]);
3577 gimple_seq seq = NULL;
3578 tree op = NULL_TREE;
3579 unsigned int id;
3580 struct range_entry *r;
3581 candidates.safe_push (&ranges[k - 1]);
3582 FOR_EACH_VEC_ELT (candidates, id, r)
3584 gimple *g;
3585 enum tree_code code;
3586 if (id == 0)
3588 op = r->exp;
3589 continue;
3591 if (id == l)
3593 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3594 g = gimple_build_assign (make_ssa_name (type1), code, op);
3595 gimple_seq_add_stmt_without_update (&seq, g);
3596 op = gimple_assign_lhs (g);
3598 tree type = TREE_TYPE (r->exp);
3599 tree exp = r->exp;
3600 if (id >= l && !useless_type_conversion_p (type1, type))
3602 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3603 gimple_seq_add_stmt_without_update (&seq, g);
3604 exp = gimple_assign_lhs (g);
3606 if ((b % 4) == 3)
3607 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3608 else
3609 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3610 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3611 code, op, exp);
3612 gimple_seq_add_stmt_without_update (&seq, g);
3613 op = gimple_assign_lhs (g);
3615 candidates.pop ();
3616 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3617 candidates.length (), opcode, ops, op,
3618 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3619 ranges[k - 1].high, strict_overflow_p))
3620 any_changes = true;
3621 else
3622 gimple_seq_discard (seq);
3623 if ((b % 4) == 2 && buckets[b] != i)
3624 /* There is more work to do for this bucket. */
3625 b--;
3628 return any_changes;
3631 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3632 a >= 0 && a < b into (unsigned) a < (unsigned) b
3633 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3635 static bool
3636 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3637 vec<operand_entry *> *ops,
3638 struct range_entry *ranges,
3639 basic_block first_bb)
3641 int i;
3642 bool any_changes = false;
3643 hash_map<tree, int> *map = NULL;
3645 for (i = first; i < length; i++)
3647 if (ranges[i].exp == NULL_TREE
3648 || TREE_CODE (ranges[i].exp) != SSA_NAME
3649 || !ranges[i].in_p)
3650 continue;
3652 tree type = TREE_TYPE (ranges[i].exp);
3653 if (!INTEGRAL_TYPE_P (type)
3654 || TYPE_UNSIGNED (type)
3655 || ranges[i].low == NULL_TREE
3656 || !integer_zerop (ranges[i].low)
3657 || ranges[i].high != NULL_TREE)
3658 continue;
3659 /* EXP >= 0 here. */
3660 if (map == NULL)
3661 map = new hash_map <tree, int>;
3662 map->put (ranges[i].exp, i);
3665 if (map == NULL)
3666 return false;
3668 for (i = 0; i < length; i++)
3670 bool in_p = ranges[i].in_p;
3671 if (ranges[i].low == NULL_TREE
3672 || ranges[i].high == NULL_TREE)
3673 continue;
3674 if (!integer_zerop (ranges[i].low)
3675 || !integer_zerop (ranges[i].high))
3677 if (ranges[i].exp
3678 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3679 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3680 && integer_onep (ranges[i].low)
3681 && integer_onep (ranges[i].high))
3682 in_p = !in_p;
3683 else
3684 continue;
3687 gimple *stmt;
3688 tree_code ccode;
3689 tree rhs1, rhs2;
3690 if (ranges[i].exp)
3692 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3693 continue;
3694 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3695 if (!is_gimple_assign (stmt))
3696 continue;
3697 ccode = gimple_assign_rhs_code (stmt);
3698 rhs1 = gimple_assign_rhs1 (stmt);
3699 rhs2 = gimple_assign_rhs2 (stmt);
3701 else
3703 operand_entry *oe = (*ops)[ranges[i].idx];
3704 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3705 if (gimple_code (stmt) != GIMPLE_COND)
3706 continue;
3707 ccode = gimple_cond_code (stmt);
3708 rhs1 = gimple_cond_lhs (stmt);
3709 rhs2 = gimple_cond_rhs (stmt);
3712 if (TREE_CODE (rhs1) != SSA_NAME
3713 || rhs2 == NULL_TREE
3714 || TREE_CODE (rhs2) != SSA_NAME)
3715 continue;
3717 switch (ccode)
3719 case GT_EXPR:
3720 case GE_EXPR:
3721 case LT_EXPR:
3722 case LE_EXPR:
3723 break;
3724 default:
3725 continue;
3727 if (in_p)
3728 ccode = invert_tree_comparison (ccode, false);
3729 switch (ccode)
3731 case GT_EXPR:
3732 case GE_EXPR:
3733 std::swap (rhs1, rhs2);
3734 ccode = swap_tree_comparison (ccode);
3735 break;
3736 case LT_EXPR:
3737 case LE_EXPR:
3738 break;
3739 default:
3740 gcc_unreachable ();
3743 int *idx = map->get (rhs1);
3744 if (idx == NULL)
3745 continue;
3747 /* maybe_optimize_range_tests allows statements without side-effects
3748 in the basic blocks as long as they are consumed in the same bb.
3749 Make sure rhs2's def stmt is not among them, otherwise we can't
3750 use safely get_nonzero_bits on it. E.g. in:
3751 # RANGE [-83, 1] NONZERO 173
3752 # k_32 = PHI <k_47(13), k_12(9)>
3754 if (k_32 >= 0)
3755 goto <bb 5>; [26.46%]
3756 else
3757 goto <bb 9>; [73.54%]
3759 <bb 5> [local count: 140323371]:
3760 # RANGE [0, 1] NONZERO 1
3761 _5 = (int) k_32;
3762 # RANGE [0, 4] NONZERO 4
3763 _21 = _5 << 2;
3764 # RANGE [0, 4] NONZERO 4
3765 iftmp.0_44 = (char) _21;
3766 if (k_32 < iftmp.0_44)
3767 goto <bb 6>; [84.48%]
3768 else
3769 goto <bb 9>; [15.52%]
3770 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3771 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3772 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3773 those stmts even for negative k_32 and the value ranges would be no
3774 longer guaranteed and so the optimization would be invalid. */
3775 while (opcode == ERROR_MARK)
3777 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3778 basic_block bb2 = gimple_bb (g);
3779 if (bb2
3780 && bb2 != first_bb
3781 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3783 /* As an exception, handle a few common cases. */
3784 if (gimple_assign_cast_p (g)
3785 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3787 tree op0 = gimple_assign_rhs1 (g);
3788 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3789 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3790 > TYPE_PRECISION (TREE_TYPE (op0))))
3791 /* Zero-extension is always ok. */
3792 break;
3793 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3794 == TYPE_PRECISION (TREE_TYPE (op0))
3795 && TREE_CODE (op0) == SSA_NAME)
3797 /* Cast from signed to unsigned or vice versa. Retry
3798 with the op0 as new rhs2. */
3799 rhs2 = op0;
3800 continue;
3803 else if (is_gimple_assign (g)
3804 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3805 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3806 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3807 /* Masking with INTEGER_CST with MSB clear is always ok
3808 too. */
3809 break;
3810 rhs2 = NULL_TREE;
3812 break;
3814 if (rhs2 == NULL_TREE)
3815 continue;
3817 wide_int nz = get_nonzero_bits (rhs2);
3818 if (wi::neg_p (nz))
3819 continue;
3821 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3822 and RHS2 is known to be RHS2 >= 0. */
3823 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3825 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3826 if ((ranges[*idx].strict_overflow_p
3827 || ranges[i].strict_overflow_p)
3828 && issue_strict_overflow_warning (wc))
3829 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3830 "assuming signed overflow does not occur "
3831 "when simplifying range test");
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3835 struct range_entry *r = &ranges[*idx];
3836 fprintf (dump_file, "Optimizing range test ");
3837 print_generic_expr (dump_file, r->exp);
3838 fprintf (dump_file, " +[");
3839 print_generic_expr (dump_file, r->low);
3840 fprintf (dump_file, ", ");
3841 print_generic_expr (dump_file, r->high);
3842 fprintf (dump_file, "] and comparison ");
3843 print_generic_expr (dump_file, rhs1);
3844 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3845 print_generic_expr (dump_file, rhs2);
3846 fprintf (dump_file, "\n into (");
3847 print_generic_expr (dump_file, utype);
3848 fprintf (dump_file, ") ");
3849 print_generic_expr (dump_file, rhs1);
3850 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3851 print_generic_expr (dump_file, utype);
3852 fprintf (dump_file, ") ");
3853 print_generic_expr (dump_file, rhs2);
3854 fprintf (dump_file, "\n");
3857 operand_entry *oe = (*ops)[ranges[i].idx];
3858 ranges[i].in_p = 0;
3859 if (opcode == BIT_IOR_EXPR
3860 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3862 ranges[i].in_p = 1;
3863 ccode = invert_tree_comparison (ccode, false);
3866 unsigned int uid = gimple_uid (stmt);
3867 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3868 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3869 gimple_set_uid (g, uid);
3870 rhs1 = gimple_assign_lhs (g);
3871 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3872 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3874 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3875 gimple_set_uid (g, uid);
3876 rhs2 = gimple_assign_lhs (g);
3877 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3879 if (tree_swap_operands_p (rhs1, rhs2))
3881 std::swap (rhs1, rhs2);
3882 ccode = swap_tree_comparison (ccode);
3884 if (gimple_code (stmt) == GIMPLE_COND)
3886 gcond *c = as_a <gcond *> (stmt);
3887 gimple_cond_set_code (c, ccode);
3888 gimple_cond_set_lhs (c, rhs1);
3889 gimple_cond_set_rhs (c, rhs2);
3890 update_stmt (stmt);
3892 else
3894 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3895 if (!INTEGRAL_TYPE_P (ctype)
3896 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3897 && TYPE_PRECISION (ctype) != 1))
3898 ctype = boolean_type_node;
3899 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3900 gimple_set_uid (g, uid);
3901 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3902 if (oe->op && ctype != TREE_TYPE (oe->op))
3904 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3905 NOP_EXPR, gimple_assign_lhs (g));
3906 gimple_set_uid (g, uid);
3907 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3909 ranges[i].exp = gimple_assign_lhs (g);
3910 oe->op = ranges[i].exp;
3911 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3912 ranges[i].high = ranges[i].low;
3914 ranges[i].strict_overflow_p = false;
3915 oe = (*ops)[ranges[*idx].idx];
3916 /* Now change all the other range test immediate uses, so that
3917 those tests will be optimized away. */
3918 if (opcode == ERROR_MARK)
3920 if (oe->op)
3921 oe->op = build_int_cst (TREE_TYPE (oe->op),
3922 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3923 else
3924 oe->op = (oe->rank == BIT_IOR_EXPR
3925 ? boolean_false_node : boolean_true_node);
3927 else
3928 oe->op = error_mark_node;
3929 ranges[*idx].exp = NULL_TREE;
3930 ranges[*idx].low = NULL_TREE;
3931 ranges[*idx].high = NULL_TREE;
3932 any_changes = true;
3935 delete map;
3936 return any_changes;
3939 /* Optimize range tests, similarly how fold_range_test optimizes
3940 it on trees. The tree code for the binary
3941 operation between all the operands is OPCODE.
3942 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3943 maybe_optimize_range_tests for inter-bb range optimization.
3944 In that case if oe->op is NULL, oe->id is bb->index whose
3945 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3946 the actual opcode.
3947 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3949 static bool
3950 optimize_range_tests (enum tree_code opcode,
3951 vec<operand_entry *> *ops, basic_block first_bb)
3953 unsigned int length = ops->length (), i, j, first;
3954 operand_entry *oe;
3955 struct range_entry *ranges;
3956 bool any_changes = false;
3958 if (length == 1)
3959 return false;
3961 ranges = XNEWVEC (struct range_entry, length);
3962 for (i = 0; i < length; i++)
3964 oe = (*ops)[i];
3965 ranges[i].idx = i;
3966 init_range_entry (ranges + i, oe->op,
3967 oe->op
3968 ? NULL
3969 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3970 /* For | invert it now, we will invert it again before emitting
3971 the optimized expression. */
3972 if (opcode == BIT_IOR_EXPR
3973 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3974 ranges[i].in_p = !ranges[i].in_p;
3977 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3978 for (i = 0; i < length; i++)
3979 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3980 break;
3982 /* Try to merge ranges. */
3983 for (first = i; i < length; i++)
3985 tree low = ranges[i].low;
3986 tree high = ranges[i].high;
3987 int in_p = ranges[i].in_p;
3988 bool strict_overflow_p = ranges[i].strict_overflow_p;
3989 int update_fail_count = 0;
3991 for (j = i + 1; j < length; j++)
3993 if (ranges[i].exp != ranges[j].exp)
3994 break;
3995 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3996 ranges[j].in_p, ranges[j].low, ranges[j].high))
3997 break;
3998 strict_overflow_p |= ranges[j].strict_overflow_p;
4001 if (j == i + 1)
4002 continue;
4004 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4005 opcode, ops, ranges[i].exp, NULL, in_p,
4006 low, high, strict_overflow_p))
4008 i = j - 1;
4009 any_changes = true;
4011 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4012 while update_range_test would fail. */
4013 else if (update_fail_count == 64)
4014 i = j - 1;
4015 else
4016 ++update_fail_count;
4019 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4020 ops, ranges);
4022 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4023 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4024 ops, ranges);
4025 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4026 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4027 ops, ranges);
4028 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4029 ranges, first_bb);
4030 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4031 ops, ranges);
4033 if (any_changes && opcode != ERROR_MARK)
4035 j = 0;
4036 FOR_EACH_VEC_ELT (*ops, i, oe)
4038 if (oe->op == error_mark_node)
4039 continue;
4040 else if (i != j)
4041 (*ops)[j] = oe;
4042 j++;
4044 ops->truncate (j);
4047 XDELETEVEC (ranges);
4048 return any_changes;
4051 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4052 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4053 otherwise the comparison code. TYPE is a return value that is set
4054 to type of comparison. */
4056 static tree_code
4057 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4058 tree *lhs, tree *rhs, gassign **vcond)
4060 if (TREE_CODE (var) != SSA_NAME)
4061 return ERROR_MARK;
4063 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4064 if (stmt == NULL)
4065 return ERROR_MARK;
4066 if (vcond)
4067 *vcond = stmt;
4069 /* ??? If we start creating more COND_EXPR, we could perform
4070 this same optimization with them. For now, simplify. */
4071 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4072 return ERROR_MARK;
4074 tree cond = gimple_assign_rhs1 (stmt);
4075 tree_code cmp = TREE_CODE (cond);
4076 if (cmp != SSA_NAME)
4077 return ERROR_MARK;
4079 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4080 if (assign == NULL
4081 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4082 return ERROR_MARK;
4084 cmp = gimple_assign_rhs_code (assign);
4085 if (lhs)
4086 *lhs = gimple_assign_rhs1 (assign);
4087 if (rhs)
4088 *rhs = gimple_assign_rhs2 (assign);
4090 /* ??? For now, allow only canonical true and false result vectors.
4091 We could expand this to other constants should the need arise,
4092 but at the moment we don't create them. */
4093 tree t = gimple_assign_rhs2 (stmt);
4094 tree f = gimple_assign_rhs3 (stmt);
4095 bool inv;
4096 if (integer_all_onesp (t))
4097 inv = false;
4098 else if (integer_all_onesp (f))
4100 cmp = invert_tree_comparison (cmp, false);
4101 inv = true;
4103 else
4104 return ERROR_MARK;
4105 if (!integer_zerop (f))
4106 return ERROR_MARK;
4108 /* Success! */
4109 if (rets)
4110 *rets = assign;
4111 if (reti)
4112 *reti = inv;
4113 if (type)
4114 *type = TREE_TYPE (cond);
4115 return cmp;
4118 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4119 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4121 static bool
4122 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4124 unsigned int length = ops->length (), i, j;
4125 bool any_changes = false;
4127 if (length == 1)
4128 return false;
4130 for (i = 0; i < length; ++i)
4132 tree elt0 = (*ops)[i]->op;
4134 gassign *stmt0, *vcond0;
4135 bool invert;
4136 tree type, lhs0, rhs0;
4137 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4138 &rhs0, &vcond0);
4139 if (cmp0 == ERROR_MARK)
4140 continue;
4142 for (j = i + 1; j < length; ++j)
4144 tree &elt1 = (*ops)[j]->op;
4146 gassign *stmt1, *vcond1;
4147 tree lhs1, rhs1;
4148 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4149 &rhs1, &vcond1);
4150 if (cmp1 == ERROR_MARK)
4151 continue;
4153 tree comb;
4154 if (opcode == BIT_AND_EXPR)
4155 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4156 cmp1, lhs1, rhs1);
4157 else if (opcode == BIT_IOR_EXPR)
4158 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4159 cmp1, lhs1, rhs1);
4160 else
4161 gcc_unreachable ();
4162 if (comb == NULL)
4163 continue;
4165 /* Success! */
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4168 fprintf (dump_file, "Transforming ");
4169 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4170 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4171 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4172 fprintf (dump_file, " into ");
4173 print_generic_expr (dump_file, comb);
4174 fputc ('\n', dump_file);
4177 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4178 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4179 true, GSI_SAME_STMT);
4180 if (invert)
4181 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4182 gimple_assign_rhs3_ptr (vcond0));
4183 gimple_assign_set_rhs1 (vcond0, exp);
4184 update_stmt (vcond0);
4186 elt1 = error_mark_node;
4187 any_changes = true;
4191 if (any_changes)
4193 operand_entry *oe;
4194 j = 0;
4195 FOR_EACH_VEC_ELT (*ops, i, oe)
4197 if (oe->op == error_mark_node)
4198 continue;
4199 else if (i != j)
4200 (*ops)[j] = oe;
4201 j++;
4203 ops->truncate (j);
4206 return any_changes;
4209 /* Return true if STMT is a cast like:
4210 <bb N>:
4212 _123 = (int) _234;
4214 <bb M>:
4215 # _345 = PHI <_123(N), 1(...), 1(...)>
4216 where _234 has bool type, _123 has single use and
4217 bb N has a single successor M. This is commonly used in
4218 the last block of a range test.
4220 Also Return true if STMT is tcc_compare like:
4221 <bb N>:
4223 _234 = a_2(D) == 2;
4225 <bb M>:
4226 # _345 = PHI <_234(N), 1(...), 1(...)>
4227 _346 = (int) _345;
4228 where _234 has booltype, single use and
4229 bb N has a single successor M. This is commonly used in
4230 the last block of a range test. */
4232 static bool
4233 final_range_test_p (gimple *stmt)
4235 basic_block bb, rhs_bb, lhs_bb;
4236 edge e;
4237 tree lhs, rhs;
4238 use_operand_p use_p;
4239 gimple *use_stmt;
4241 if (!gimple_assign_cast_p (stmt)
4242 && (!is_gimple_assign (stmt)
4243 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4244 != tcc_comparison)))
4245 return false;
4246 bb = gimple_bb (stmt);
4247 if (!single_succ_p (bb))
4248 return false;
4249 e = single_succ_edge (bb);
4250 if (e->flags & EDGE_COMPLEX)
4251 return false;
4253 lhs = gimple_assign_lhs (stmt);
4254 rhs = gimple_assign_rhs1 (stmt);
4255 if (gimple_assign_cast_p (stmt)
4256 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4257 || TREE_CODE (rhs) != SSA_NAME
4258 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4259 return false;
4261 if (!gimple_assign_cast_p (stmt)
4262 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4263 return false;
4265 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4266 if (!single_imm_use (lhs, &use_p, &use_stmt))
4267 return false;
4269 if (gimple_code (use_stmt) != GIMPLE_PHI
4270 || gimple_bb (use_stmt) != e->dest)
4271 return false;
4273 /* And that the rhs is defined in the same loop. */
4274 if (gimple_assign_cast_p (stmt))
4276 if (TREE_CODE (rhs) != SSA_NAME
4277 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4278 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4279 return false;
4281 else
4283 if (TREE_CODE (lhs) != SSA_NAME
4284 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4285 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4286 return false;
4289 return true;
4292 /* Return true if BB is suitable basic block for inter-bb range test
4293 optimization. If BACKWARD is true, BB should be the only predecessor
4294 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4295 or compared with to find a common basic block to which all conditions
4296 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4297 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4298 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4299 block points to an empty block that falls through into *OTHER_BB and
4300 the phi args match that path. */
4302 static bool
4303 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4304 bool *test_swapped_p, bool backward)
4306 edge_iterator ei, ei2;
4307 edge e, e2;
4308 gimple *stmt;
4309 gphi_iterator gsi;
4310 bool other_edge_seen = false;
4311 bool is_cond;
4313 if (test_bb == bb)
4314 return false;
4315 /* Check last stmt first. */
4316 stmt = last_stmt (bb);
4317 if (stmt == NULL
4318 || (gimple_code (stmt) != GIMPLE_COND
4319 && (backward || !final_range_test_p (stmt)))
4320 || gimple_visited_p (stmt)
4321 || stmt_could_throw_p (cfun, stmt)
4322 || *other_bb == bb)
4323 return false;
4324 is_cond = gimple_code (stmt) == GIMPLE_COND;
4325 if (is_cond)
4327 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4328 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4329 to *OTHER_BB (if not set yet, try to find it out). */
4330 if (EDGE_COUNT (bb->succs) != 2)
4331 return false;
4332 FOR_EACH_EDGE (e, ei, bb->succs)
4334 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4335 return false;
4336 if (e->dest == test_bb)
4338 if (backward)
4339 continue;
4340 else
4341 return false;
4343 if (e->dest == bb)
4344 return false;
4345 if (*other_bb == NULL)
4347 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4348 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4349 return false;
4350 else if (e->dest == e2->dest)
4351 *other_bb = e->dest;
4352 if (*other_bb == NULL)
4353 return false;
4355 if (e->dest == *other_bb)
4356 other_edge_seen = true;
4357 else if (backward)
4358 return false;
4360 if (*other_bb == NULL || !other_edge_seen)
4361 return false;
4363 else if (single_succ (bb) != *other_bb)
4364 return false;
4366 /* Now check all PHIs of *OTHER_BB. */
4367 e = find_edge (bb, *other_bb);
4368 e2 = find_edge (test_bb, *other_bb);
4369 retry:;
4370 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4372 gphi *phi = gsi.phi ();
4373 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4374 corresponding to BB and TEST_BB predecessor must be the same. */
4375 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4376 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4378 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4379 one of the PHIs should have the lhs of the last stmt in
4380 that block as PHI arg and that PHI should have 0 or 1
4381 corresponding to it in all other range test basic blocks
4382 considered. */
4383 if (!is_cond)
4385 if (gimple_phi_arg_def (phi, e->dest_idx)
4386 == gimple_assign_lhs (stmt)
4387 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4388 || integer_onep (gimple_phi_arg_def (phi,
4389 e2->dest_idx))))
4390 continue;
4392 else
4394 gimple *test_last = last_stmt (test_bb);
4395 if (gimple_code (test_last) == GIMPLE_COND)
4397 if (backward ? e2->src != test_bb : e->src != bb)
4398 return false;
4400 /* For last_bb, handle also:
4401 if (x_3(D) == 3)
4402 goto <bb 6>; [34.00%]
4403 else
4404 goto <bb 7>; [66.00%]
4406 <bb 6> [local count: 79512730]:
4408 <bb 7> [local count: 1073741824]:
4409 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4410 where bb 7 is *OTHER_BB, but the PHI values from the
4411 earlier bbs match the path through the empty bb
4412 in between. */
4413 edge e3;
4414 if (backward)
4415 e3 = EDGE_SUCC (test_bb,
4416 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4417 else
4418 e3 = EDGE_SUCC (bb,
4419 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4420 if (empty_block_p (e3->dest)
4421 && single_succ_p (e3->dest)
4422 && single_succ (e3->dest) == *other_bb
4423 && single_pred_p (e3->dest)
4424 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4426 if (backward)
4427 e2 = single_succ_edge (e3->dest);
4428 else
4429 e = single_succ_edge (e3->dest);
4430 if (test_swapped_p)
4431 *test_swapped_p = true;
4432 goto retry;
4435 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4436 == gimple_assign_lhs (test_last)
4437 && (integer_zerop (gimple_phi_arg_def (phi,
4438 e->dest_idx))
4439 || integer_onep (gimple_phi_arg_def (phi,
4440 e->dest_idx))))
4441 continue;
4444 return false;
4447 return true;
4450 /* Return true if BB doesn't have side-effects that would disallow
4451 range test optimization, all SSA_NAMEs set in the bb are consumed
4452 in the bb and there are no PHIs. */
4454 bool
4455 no_side_effect_bb (basic_block bb)
4457 gimple_stmt_iterator gsi;
4458 gimple *last;
4460 if (!gimple_seq_empty_p (phi_nodes (bb)))
4461 return false;
4462 last = last_stmt (bb);
4463 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4465 gimple *stmt = gsi_stmt (gsi);
4466 tree lhs;
4467 imm_use_iterator imm_iter;
4468 use_operand_p use_p;
4470 if (is_gimple_debug (stmt))
4471 continue;
4472 if (gimple_has_side_effects (stmt))
4473 return false;
4474 if (stmt == last)
4475 return true;
4476 if (!is_gimple_assign (stmt))
4477 return false;
4478 lhs = gimple_assign_lhs (stmt);
4479 if (TREE_CODE (lhs) != SSA_NAME)
4480 return false;
4481 if (gimple_assign_rhs_could_trap_p (stmt))
4482 return false;
4483 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4485 gimple *use_stmt = USE_STMT (use_p);
4486 if (is_gimple_debug (use_stmt))
4487 continue;
4488 if (gimple_bb (use_stmt) != bb)
4489 return false;
4492 return false;
4495 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4496 return true and fill in *OPS recursively. */
4498 static bool
4499 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4500 class loop *loop)
4502 gimple *stmt = SSA_NAME_DEF_STMT (var);
4503 tree rhs[2];
4504 int i;
4506 if (!is_reassociable_op (stmt, code, loop))
4507 return false;
4509 rhs[0] = gimple_assign_rhs1 (stmt);
4510 rhs[1] = gimple_assign_rhs2 (stmt);
4511 gimple_set_visited (stmt, true);
4512 for (i = 0; i < 2; i++)
4513 if (TREE_CODE (rhs[i]) == SSA_NAME
4514 && !get_ops (rhs[i], code, ops, loop)
4515 && has_single_use (rhs[i]))
4517 operand_entry *oe = operand_entry_pool.allocate ();
4519 oe->op = rhs[i];
4520 oe->rank = code;
4521 oe->id = 0;
4522 oe->count = 1;
4523 oe->stmt_to_insert = NULL;
4524 ops->safe_push (oe);
4526 return true;
4529 /* Find the ops that were added by get_ops starting from VAR, see if
4530 they were changed during update_range_test and if yes, create new
4531 stmts. */
4533 static tree
4534 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4535 unsigned int *pidx, class loop *loop)
4537 gimple *stmt = SSA_NAME_DEF_STMT (var);
4538 tree rhs[4];
4539 int i;
4541 if (!is_reassociable_op (stmt, code, loop))
4542 return NULL;
4544 rhs[0] = gimple_assign_rhs1 (stmt);
4545 rhs[1] = gimple_assign_rhs2 (stmt);
4546 rhs[2] = rhs[0];
4547 rhs[3] = rhs[1];
4548 for (i = 0; i < 2; i++)
4549 if (TREE_CODE (rhs[i]) == SSA_NAME)
4551 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4552 if (rhs[2 + i] == NULL_TREE)
4554 if (has_single_use (rhs[i]))
4555 rhs[2 + i] = ops[(*pidx)++]->op;
4556 else
4557 rhs[2 + i] = rhs[i];
4560 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4561 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4563 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4564 var = make_ssa_name (TREE_TYPE (var));
4565 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4566 rhs[2], rhs[3]);
4567 gimple_set_uid (g, gimple_uid (stmt));
4568 gimple_set_visited (g, true);
4569 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4571 return var;
4574 /* Structure to track the initial value passed to get_ops and
4575 the range in the ops vector for each basic block. */
4577 struct inter_bb_range_test_entry
4579 tree op;
4580 unsigned int first_idx, last_idx;
4583 /* Inter-bb range test optimization.
4585 Returns TRUE if a gimple conditional is optimized to a true/false,
4586 otherwise return FALSE.
4588 This indicates to the caller that it should run a CFG cleanup pass
4589 once reassociation is completed. */
4591 static bool
4592 maybe_optimize_range_tests (gimple *stmt)
4594 basic_block first_bb = gimple_bb (stmt);
4595 basic_block last_bb = first_bb;
4596 basic_block other_bb = NULL;
4597 basic_block bb;
4598 edge_iterator ei;
4599 edge e;
4600 auto_vec<operand_entry *> ops;
4601 auto_vec<inter_bb_range_test_entry> bbinfo;
4602 bool any_changes = false;
4603 bool cfg_cleanup_needed = false;
4605 /* Consider only basic blocks that end with GIMPLE_COND or
4606 a cast statement satisfying final_range_test_p. All
4607 but the last bb in the first_bb .. last_bb range
4608 should end with GIMPLE_COND. */
4609 if (gimple_code (stmt) == GIMPLE_COND)
4611 if (EDGE_COUNT (first_bb->succs) != 2)
4612 return cfg_cleanup_needed;
4614 else if (final_range_test_p (stmt))
4615 other_bb = single_succ (first_bb);
4616 else
4617 return cfg_cleanup_needed;
4619 if (stmt_could_throw_p (cfun, stmt))
4620 return cfg_cleanup_needed;
4622 /* As relative ordering of post-dominator sons isn't fixed,
4623 maybe_optimize_range_tests can be called first on any
4624 bb in the range we want to optimize. So, start searching
4625 backwards, if first_bb can be set to a predecessor. */
4626 while (single_pred_p (first_bb))
4628 basic_block pred_bb = single_pred (first_bb);
4629 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4630 break;
4631 if (!no_side_effect_bb (first_bb))
4632 break;
4633 first_bb = pred_bb;
4635 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4636 Before starting forward search in last_bb successors, find
4637 out the other_bb. */
4638 if (first_bb == last_bb)
4640 other_bb = NULL;
4641 /* As non-GIMPLE_COND last stmt always terminates the range,
4642 if forward search didn't discover anything, just give up. */
4643 if (gimple_code (stmt) != GIMPLE_COND)
4644 return cfg_cleanup_needed;
4645 /* Look at both successors. Either it ends with a GIMPLE_COND
4646 and satisfies suitable_cond_bb, or ends with a cast and
4647 other_bb is that cast's successor. */
4648 FOR_EACH_EDGE (e, ei, first_bb->succs)
4649 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4650 || e->dest == first_bb)
4651 return cfg_cleanup_needed;
4652 else if (single_pred_p (e->dest))
4654 stmt = last_stmt (e->dest);
4655 if (stmt
4656 && gimple_code (stmt) == GIMPLE_COND
4657 && EDGE_COUNT (e->dest->succs) == 2)
4659 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4660 NULL, true))
4661 break;
4662 else
4663 other_bb = NULL;
4665 else if (stmt
4666 && final_range_test_p (stmt)
4667 && find_edge (first_bb, single_succ (e->dest)))
4669 other_bb = single_succ (e->dest);
4670 if (other_bb == first_bb)
4671 other_bb = NULL;
4674 if (other_bb == NULL)
4675 return cfg_cleanup_needed;
4677 /* Now do the forward search, moving last_bb to successor bbs
4678 that aren't other_bb. */
4679 while (EDGE_COUNT (last_bb->succs) == 2)
4681 FOR_EACH_EDGE (e, ei, last_bb->succs)
4682 if (e->dest != other_bb)
4683 break;
4684 if (e == NULL)
4685 break;
4686 if (!single_pred_p (e->dest))
4687 break;
4688 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4689 break;
4690 if (!no_side_effect_bb (e->dest))
4691 break;
4692 last_bb = e->dest;
4694 if (first_bb == last_bb)
4695 return cfg_cleanup_needed;
4696 /* Here basic blocks first_bb through last_bb's predecessor
4697 end with GIMPLE_COND, all of them have one of the edges to
4698 other_bb and another to another block in the range,
4699 all blocks except first_bb don't have side-effects and
4700 last_bb ends with either GIMPLE_COND, or cast satisfying
4701 final_range_test_p. */
4702 for (bb = last_bb; ; bb = single_pred (bb))
4704 enum tree_code code;
4705 tree lhs, rhs;
4706 inter_bb_range_test_entry bb_ent;
4708 bb_ent.op = NULL_TREE;
4709 bb_ent.first_idx = ops.length ();
4710 bb_ent.last_idx = bb_ent.first_idx;
4711 e = find_edge (bb, other_bb);
4712 stmt = last_stmt (bb);
4713 gimple_set_visited (stmt, true);
4714 if (gimple_code (stmt) != GIMPLE_COND)
4716 use_operand_p use_p;
4717 gimple *phi;
4718 edge e2;
4719 unsigned int d;
4721 lhs = gimple_assign_lhs (stmt);
4722 rhs = gimple_assign_rhs1 (stmt);
4723 gcc_assert (bb == last_bb);
4725 /* stmt is
4726 _123 = (int) _234;
4728 _234 = a_2(D) == 2;
4730 followed by:
4731 <bb M>:
4732 # _345 = PHI <_123(N), 1(...), 1(...)>
4734 or 0 instead of 1. If it is 0, the _234
4735 range test is anded together with all the
4736 other range tests, if it is 1, it is ored with
4737 them. */
4738 single_imm_use (lhs, &use_p, &phi);
4739 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4740 e2 = find_edge (first_bb, other_bb);
4741 d = e2->dest_idx;
4742 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4743 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4744 code = BIT_AND_EXPR;
4745 else
4747 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4748 code = BIT_IOR_EXPR;
4751 /* If _234 SSA_NAME_DEF_STMT is
4752 _234 = _567 | _789;
4753 (or &, corresponding to 1/0 in the phi arguments,
4754 push into ops the individual range test arguments
4755 of the bitwise or resp. and, recursively. */
4756 if (TREE_CODE (rhs) == SSA_NAME
4757 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4758 != tcc_comparison)
4759 && !get_ops (rhs, code, &ops,
4760 loop_containing_stmt (stmt))
4761 && has_single_use (rhs))
4763 /* Otherwise, push the _234 range test itself. */
4764 operand_entry *oe = operand_entry_pool.allocate ();
4766 oe->op = rhs;
4767 oe->rank = code;
4768 oe->id = 0;
4769 oe->count = 1;
4770 oe->stmt_to_insert = NULL;
4771 ops.safe_push (oe);
4772 bb_ent.last_idx++;
4773 bb_ent.op = rhs;
4775 else if (is_gimple_assign (stmt)
4776 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4777 == tcc_comparison)
4778 && !get_ops (lhs, code, &ops,
4779 loop_containing_stmt (stmt))
4780 && has_single_use (lhs))
4782 operand_entry *oe = operand_entry_pool.allocate ();
4783 oe->op = lhs;
4784 oe->rank = code;
4785 oe->id = 0;
4786 oe->count = 1;
4787 ops.safe_push (oe);
4788 bb_ent.last_idx++;
4789 bb_ent.op = lhs;
4791 else
4793 bb_ent.last_idx = ops.length ();
4794 bb_ent.op = rhs;
4796 bbinfo.safe_push (bb_ent);
4797 continue;
4799 else if (bb == last_bb)
4801 /* For last_bb, handle also:
4802 if (x_3(D) == 3)
4803 goto <bb 6>; [34.00%]
4804 else
4805 goto <bb 7>; [66.00%]
4807 <bb 6> [local count: 79512730]:
4809 <bb 7> [local count: 1073741824]:
4810 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4811 where bb 7 is OTHER_BB, but the PHI values from the
4812 earlier bbs match the path through the empty bb
4813 in between. */
4814 bool test_swapped_p = false;
4815 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4816 &other_bb, &test_swapped_p, true);
4817 gcc_assert (ok);
4818 if (test_swapped_p)
4819 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4821 /* Otherwise stmt is GIMPLE_COND. */
4822 code = gimple_cond_code (stmt);
4823 lhs = gimple_cond_lhs (stmt);
4824 rhs = gimple_cond_rhs (stmt);
4825 if (TREE_CODE (lhs) == SSA_NAME
4826 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4827 && ((code != EQ_EXPR && code != NE_EXPR)
4828 || rhs != boolean_false_node
4829 /* Either push into ops the individual bitwise
4830 or resp. and operands, depending on which
4831 edge is other_bb. */
4832 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4833 ^ (code == EQ_EXPR))
4834 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4835 loop_containing_stmt (stmt))))
4837 /* Or push the GIMPLE_COND stmt itself. */
4838 operand_entry *oe = operand_entry_pool.allocate ();
4840 oe->op = NULL;
4841 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4842 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4843 /* oe->op = NULL signs that there is no SSA_NAME
4844 for the range test, and oe->id instead is the
4845 basic block number, at which's end the GIMPLE_COND
4846 is. */
4847 oe->id = bb->index;
4848 oe->count = 1;
4849 oe->stmt_to_insert = NULL;
4850 ops.safe_push (oe);
4851 bb_ent.op = NULL;
4852 bb_ent.last_idx++;
4854 else if (ops.length () > bb_ent.first_idx)
4856 bb_ent.op = lhs;
4857 bb_ent.last_idx = ops.length ();
4859 bbinfo.safe_push (bb_ent);
4860 if (bb == first_bb)
4861 break;
4863 if (ops.length () > 1)
4864 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4865 if (any_changes)
4867 unsigned int idx, max_idx = 0;
4868 /* update_ops relies on has_single_use predicates returning the
4869 same values as it did during get_ops earlier. Additionally it
4870 never removes statements, only adds new ones and it should walk
4871 from the single imm use and check the predicate already before
4872 making those changes.
4873 On the other side, the handling of GIMPLE_COND directly can turn
4874 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4875 it needs to be done in a separate loop afterwards. */
4876 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4878 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4879 && bbinfo[idx].op != NULL_TREE)
4881 tree new_op;
4883 max_idx = idx;
4884 stmt = last_stmt (bb);
4885 new_op = update_ops (bbinfo[idx].op,
4886 (enum tree_code)
4887 ops[bbinfo[idx].first_idx]->rank,
4888 ops, &bbinfo[idx].first_idx,
4889 loop_containing_stmt (stmt));
4890 if (new_op == NULL_TREE)
4892 gcc_assert (bb == last_bb);
4893 new_op = ops[bbinfo[idx].first_idx++]->op;
4895 if (bbinfo[idx].op != new_op)
4897 imm_use_iterator iter;
4898 use_operand_p use_p;
4899 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4901 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4902 if (is_gimple_debug (use_stmt))
4903 continue;
4904 else if (gimple_code (use_stmt) == GIMPLE_COND
4905 || gimple_code (use_stmt) == GIMPLE_PHI)
4906 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4907 SET_USE (use_p, new_op);
4908 else if ((is_gimple_assign (use_stmt)
4909 && (TREE_CODE_CLASS
4910 (gimple_assign_rhs_code (use_stmt))
4911 == tcc_comparison)))
4912 cast_or_tcc_cmp_stmt = use_stmt;
4913 else if (gimple_assign_cast_p (use_stmt))
4914 cast_or_tcc_cmp_stmt = use_stmt;
4915 else
4916 gcc_unreachable ();
4918 if (cast_or_tcc_cmp_stmt)
4920 gcc_assert (bb == last_bb);
4921 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4922 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4923 enum tree_code rhs_code
4924 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4925 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4926 : CONVERT_EXPR;
4927 gassign *g;
4928 if (is_gimple_min_invariant (new_op))
4930 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4931 g = gimple_build_assign (new_lhs, new_op);
4933 else
4934 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4935 gimple_stmt_iterator gsi
4936 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4937 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4938 gimple_set_visited (g, true);
4939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4940 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4941 if (is_gimple_debug (use_stmt))
4942 continue;
4943 else if (gimple_code (use_stmt) == GIMPLE_COND
4944 || gimple_code (use_stmt) == GIMPLE_PHI)
4945 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4946 SET_USE (use_p, new_lhs);
4947 else
4948 gcc_unreachable ();
4952 if (bb == first_bb)
4953 break;
4955 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4957 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4958 && bbinfo[idx].op == NULL_TREE
4959 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4961 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4963 if (idx > max_idx)
4964 max_idx = idx;
4966 /* If we collapse the conditional to a true/false
4967 condition, then bubble that knowledge up to our caller. */
4968 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4970 gimple_cond_make_false (cond_stmt);
4971 cfg_cleanup_needed = true;
4973 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4975 gimple_cond_make_true (cond_stmt);
4976 cfg_cleanup_needed = true;
4978 else
4980 gimple_cond_set_code (cond_stmt, NE_EXPR);
4981 gimple_cond_set_lhs (cond_stmt,
4982 ops[bbinfo[idx].first_idx]->op);
4983 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4985 update_stmt (cond_stmt);
4987 if (bb == first_bb)
4988 break;
4991 /* The above changes could result in basic blocks after the first
4992 modified one, up to and including last_bb, to be executed even if
4993 they would not be in the original program. If the value ranges of
4994 assignment lhs' in those bbs were dependent on the conditions
4995 guarding those basic blocks which now can change, the VRs might
4996 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4997 are only used within the same bb, it should be not a big deal if
4998 we just reset all the VRs in those bbs. See PR68671. */
4999 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5000 reset_flow_sensitive_info_in_bb (bb);
5002 return cfg_cleanup_needed;
5005 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5006 of STMT in it's operands. This is also known as a "destructive
5007 update" operation. */
5009 static bool
5010 is_phi_for_stmt (gimple *stmt, tree operand)
5012 gimple *def_stmt;
5013 gphi *def_phi;
5014 tree lhs;
5015 use_operand_p arg_p;
5016 ssa_op_iter i;
5018 if (TREE_CODE (operand) != SSA_NAME)
5019 return false;
5021 lhs = gimple_assign_lhs (stmt);
5023 def_stmt = SSA_NAME_DEF_STMT (operand);
5024 def_phi = dyn_cast <gphi *> (def_stmt);
5025 if (!def_phi)
5026 return false;
5028 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5029 if (lhs == USE_FROM_PTR (arg_p))
5030 return true;
5031 return false;
5034 /* Remove def stmt of VAR if VAR has zero uses and recurse
5035 on rhs1 operand if so. */
5037 static void
5038 remove_visited_stmt_chain (tree var)
5040 gimple *stmt;
5041 gimple_stmt_iterator gsi;
5043 while (1)
5045 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5046 return;
5047 stmt = SSA_NAME_DEF_STMT (var);
5048 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5050 var = gimple_assign_rhs1 (stmt);
5051 gsi = gsi_for_stmt (stmt);
5052 reassoc_remove_stmt (&gsi);
5053 release_defs (stmt);
5055 else
5056 return;
5060 /* This function checks three consequtive operands in
5061 passed operands vector OPS starting from OPINDEX and
5062 swaps two operands if it is profitable for binary operation
5063 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5065 We pair ops with the same rank if possible.
5067 The alternative we try is to see if STMT is a destructive
5068 update style statement, which is like:
5069 b = phi (a, ...)
5070 a = c + b;
5071 In that case, we want to use the destructive update form to
5072 expose the possible vectorizer sum reduction opportunity.
5073 In that case, the third operand will be the phi node. This
5074 check is not performed if STMT is null.
5076 We could, of course, try to be better as noted above, and do a
5077 lot of work to try to find these opportunities in >3 operand
5078 cases, but it is unlikely to be worth it. */
5080 static void
5081 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5082 unsigned int opindex, gimple *stmt)
5084 operand_entry *oe1, *oe2, *oe3;
5086 oe1 = ops[opindex];
5087 oe2 = ops[opindex + 1];
5088 oe3 = ops[opindex + 2];
5090 if ((oe1->rank == oe2->rank
5091 && oe2->rank != oe3->rank)
5092 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5093 && !is_phi_for_stmt (stmt, oe1->op)
5094 && !is_phi_for_stmt (stmt, oe2->op)))
5095 std::swap (*oe1, *oe3);
5096 else if ((oe1->rank == oe3->rank
5097 && oe2->rank != oe3->rank)
5098 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5099 && !is_phi_for_stmt (stmt, oe1->op)
5100 && !is_phi_for_stmt (stmt, oe3->op)))
5101 std::swap (*oe1, *oe2);
5104 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5105 two definitions, otherwise return STMT. */
5107 static inline gimple *
5108 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5110 if (TREE_CODE (rhs1) == SSA_NAME
5111 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5112 stmt = SSA_NAME_DEF_STMT (rhs1);
5113 if (TREE_CODE (rhs2) == SSA_NAME
5114 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5115 stmt = SSA_NAME_DEF_STMT (rhs2);
5116 return stmt;
5119 /* If the stmt that defines operand has to be inserted, insert it
5120 before the use. */
5121 static void
5122 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5124 gcc_assert (is_gimple_assign (stmt_to_insert));
5125 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5126 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5127 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5128 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5129 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5131 /* If the insert point is not stmt, then insert_point would be
5132 the point where operand rhs1 or rhs2 is defined. In this case,
5133 stmt_to_insert has to be inserted afterwards. This would
5134 only happen when the stmt insertion point is flexible. */
5135 if (stmt == insert_point)
5136 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5137 else
5138 insert_stmt_after (stmt_to_insert, insert_point);
5142 /* Recursively rewrite our linearized statements so that the operators
5143 match those in OPS[OPINDEX], putting the computation in rank
5144 order. Return new lhs.
5145 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5146 the current stmt and during recursive invocations.
5147 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5148 recursive invocations. */
5150 static tree
5151 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5152 const vec<operand_entry *> &ops, bool changed,
5153 bool next_changed)
5155 tree rhs1 = gimple_assign_rhs1 (stmt);
5156 tree rhs2 = gimple_assign_rhs2 (stmt);
5157 tree lhs = gimple_assign_lhs (stmt);
5158 operand_entry *oe;
5160 /* The final recursion case for this function is that you have
5161 exactly two operations left.
5162 If we had exactly one op in the entire list to start with, we
5163 would have never called this function, and the tail recursion
5164 rewrites them one at a time. */
5165 if (opindex + 2 == ops.length ())
5167 operand_entry *oe1, *oe2;
5169 oe1 = ops[opindex];
5170 oe2 = ops[opindex + 1];
5172 if (rhs1 != oe1->op || rhs2 != oe2->op)
5174 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5175 unsigned int uid = gimple_uid (stmt);
5177 if (dump_file && (dump_flags & TDF_DETAILS))
5179 fprintf (dump_file, "Transforming ");
5180 print_gimple_stmt (dump_file, stmt, 0);
5183 /* If the stmt that defines operand has to be inserted, insert it
5184 before the use. */
5185 if (oe1->stmt_to_insert)
5186 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5187 if (oe2->stmt_to_insert)
5188 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5189 /* Even when changed is false, reassociation could have e.g. removed
5190 some redundant operations, so unless we are just swapping the
5191 arguments or unless there is no change at all (then we just
5192 return lhs), force creation of a new SSA_NAME. */
5193 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5195 gimple *insert_point
5196 = find_insert_point (stmt, oe1->op, oe2->op);
5197 lhs = make_ssa_name (TREE_TYPE (lhs));
5198 stmt
5199 = gimple_build_assign (lhs, rhs_code,
5200 oe1->op, oe2->op);
5201 gimple_set_uid (stmt, uid);
5202 gimple_set_visited (stmt, true);
5203 if (insert_point == gsi_stmt (gsi))
5204 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5205 else
5206 insert_stmt_after (stmt, insert_point);
5208 else
5210 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5211 == stmt);
5212 gimple_assign_set_rhs1 (stmt, oe1->op);
5213 gimple_assign_set_rhs2 (stmt, oe2->op);
5214 update_stmt (stmt);
5217 if (rhs1 != oe1->op && rhs1 != oe2->op)
5218 remove_visited_stmt_chain (rhs1);
5220 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, " into ");
5223 print_gimple_stmt (dump_file, stmt, 0);
5226 return lhs;
5229 /* If we hit here, we should have 3 or more ops left. */
5230 gcc_assert (opindex + 2 < ops.length ());
5232 /* Rewrite the next operator. */
5233 oe = ops[opindex];
5235 /* If the stmt that defines operand has to be inserted, insert it
5236 before the use. */
5237 if (oe->stmt_to_insert)
5238 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5240 /* Recurse on the LHS of the binary operator, which is guaranteed to
5241 be the non-leaf side. */
5242 tree new_rhs1
5243 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5244 changed || oe->op != rhs2 || next_changed,
5245 false);
5247 if (oe->op != rhs2 || new_rhs1 != rhs1)
5249 if (dump_file && (dump_flags & TDF_DETAILS))
5251 fprintf (dump_file, "Transforming ");
5252 print_gimple_stmt (dump_file, stmt, 0);
5255 /* If changed is false, this is either opindex == 0
5256 or all outer rhs2's were equal to corresponding oe->op,
5257 and powi_result is NULL.
5258 That means lhs is equivalent before and after reassociation.
5259 Otherwise ensure the old lhs SSA_NAME is not reused and
5260 create a new stmt as well, so that any debug stmts will be
5261 properly adjusted. */
5262 if (changed)
5264 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5265 unsigned int uid = gimple_uid (stmt);
5266 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5268 lhs = make_ssa_name (TREE_TYPE (lhs));
5269 stmt = gimple_build_assign (lhs, rhs_code,
5270 new_rhs1, oe->op);
5271 gimple_set_uid (stmt, uid);
5272 gimple_set_visited (stmt, true);
5273 if (insert_point == gsi_stmt (gsi))
5274 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5275 else
5276 insert_stmt_after (stmt, insert_point);
5278 else
5280 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5281 == stmt);
5282 gimple_assign_set_rhs1 (stmt, new_rhs1);
5283 gimple_assign_set_rhs2 (stmt, oe->op);
5284 update_stmt (stmt);
5287 if (dump_file && (dump_flags & TDF_DETAILS))
5289 fprintf (dump_file, " into ");
5290 print_gimple_stmt (dump_file, stmt, 0);
5293 return lhs;
5296 /* Find out how many cycles we need to compute statements chain.
5297 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5298 maximum number of independent statements we may execute per cycle. */
5300 static int
5301 get_required_cycles (int ops_num, int cpu_width)
5303 int res;
5304 int elog;
5305 unsigned int rest;
5307 /* While we have more than 2 * cpu_width operands
5308 we may reduce number of operands by cpu_width
5309 per cycle. */
5310 res = ops_num / (2 * cpu_width);
5312 /* Remained operands count may be reduced twice per cycle
5313 until we have only one operand. */
5314 rest = (unsigned)(ops_num - res * cpu_width);
5315 elog = exact_log2 (rest);
5316 if (elog >= 0)
5317 res += elog;
5318 else
5319 res += floor_log2 (rest) + 1;
5321 return res;
5324 /* Returns an optimal number of registers to use for computation of
5325 given statements. */
5327 static int
5328 get_reassociation_width (int ops_num, enum tree_code opc,
5329 machine_mode mode)
5331 int param_width = param_tree_reassoc_width;
5332 int width;
5333 int width_min;
5334 int cycles_best;
5336 if (param_width > 0)
5337 width = param_width;
5338 else
5339 width = targetm.sched.reassociation_width (opc, mode);
5341 if (width == 1)
5342 return width;
5344 /* Get the minimal time required for sequence computation. */
5345 cycles_best = get_required_cycles (ops_num, width);
5347 /* Check if we may use less width and still compute sequence for
5348 the same time. It will allow us to reduce registers usage.
5349 get_required_cycles is monotonically increasing with lower width
5350 so we can perform a binary search for the minimal width that still
5351 results in the optimal cycle count. */
5352 width_min = 1;
5353 while (width > width_min)
5355 int width_mid = (width + width_min) / 2;
5357 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5358 width = width_mid;
5359 else if (width_min < width_mid)
5360 width_min = width_mid;
5361 else
5362 break;
5365 return width;
5368 /* Recursively rewrite our linearized statements so that the operators
5369 match those in OPS[OPINDEX], putting the computation in rank
5370 order and trying to allow operations to be executed in
5371 parallel. */
5373 static void
5374 rewrite_expr_tree_parallel (gassign *stmt, int width,
5375 const vec<operand_entry *> &ops)
5377 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5378 int op_num = ops.length ();
5379 gcc_assert (op_num > 0);
5380 int stmt_num = op_num - 1;
5381 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5382 int op_index = op_num - 1;
5383 int stmt_index = 0;
5384 int ready_stmts_end = 0;
5385 int i = 0;
5386 gimple *stmt1 = NULL, *stmt2 = NULL;
5387 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5389 /* We start expression rewriting from the top statements.
5390 So, in this loop we create a full list of statements
5391 we will work with. */
5392 stmts[stmt_num - 1] = stmt;
5393 for (i = stmt_num - 2; i >= 0; i--)
5394 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5396 for (i = 0; i < stmt_num; i++)
5398 tree op1, op2;
5400 /* Determine whether we should use results of
5401 already handled statements or not. */
5402 if (ready_stmts_end == 0
5403 && (i - stmt_index >= width || op_index < 1))
5404 ready_stmts_end = i;
5406 /* Now we choose operands for the next statement. Non zero
5407 value in ready_stmts_end means here that we should use
5408 the result of already generated statements as new operand. */
5409 if (ready_stmts_end > 0)
5411 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5412 if (ready_stmts_end > stmt_index)
5413 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5414 else if (op_index >= 0)
5416 operand_entry *oe = ops[op_index--];
5417 stmt2 = oe->stmt_to_insert;
5418 op2 = oe->op;
5420 else
5422 gcc_assert (stmt_index < i);
5423 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5426 if (stmt_index >= ready_stmts_end)
5427 ready_stmts_end = 0;
5429 else
5431 if (op_index > 1)
5432 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5433 operand_entry *oe2 = ops[op_index--];
5434 operand_entry *oe1 = ops[op_index--];
5435 op2 = oe2->op;
5436 stmt2 = oe2->stmt_to_insert;
5437 op1 = oe1->op;
5438 stmt1 = oe1->stmt_to_insert;
5441 /* If we emit the last statement then we should put
5442 operands into the last statement. It will also
5443 break the loop. */
5444 if (op_index < 0 && stmt_index == i)
5445 i = stmt_num - 1;
5447 if (dump_file && (dump_flags & TDF_DETAILS))
5449 fprintf (dump_file, "Transforming ");
5450 print_gimple_stmt (dump_file, stmts[i], 0);
5453 /* If the stmt that defines operand has to be inserted, insert it
5454 before the use. */
5455 if (stmt1)
5456 insert_stmt_before_use (stmts[i], stmt1);
5457 if (stmt2)
5458 insert_stmt_before_use (stmts[i], stmt2);
5459 stmt1 = stmt2 = NULL;
5461 /* We keep original statement only for the last one. All
5462 others are recreated. */
5463 if (i == stmt_num - 1)
5465 gimple_assign_set_rhs1 (stmts[i], op1);
5466 gimple_assign_set_rhs2 (stmts[i], op2);
5467 update_stmt (stmts[i]);
5469 else
5471 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5472 gimple_set_visited (stmts[i], true);
5474 if (dump_file && (dump_flags & TDF_DETAILS))
5476 fprintf (dump_file, " into ");
5477 print_gimple_stmt (dump_file, stmts[i], 0);
5481 remove_visited_stmt_chain (last_rhs1);
5484 /* Transform STMT, which is really (A +B) + (C + D) into the left
5485 linear form, ((A+B)+C)+D.
5486 Recurse on D if necessary. */
5488 static void
5489 linearize_expr (gimple *stmt)
5491 gimple_stmt_iterator gsi;
5492 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5493 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5494 gimple *oldbinrhs = binrhs;
5495 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5496 gimple *newbinrhs = NULL;
5497 class loop *loop = loop_containing_stmt (stmt);
5498 tree lhs = gimple_assign_lhs (stmt);
5500 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5501 && is_reassociable_op (binrhs, rhscode, loop));
5503 gsi = gsi_for_stmt (stmt);
5505 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5506 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5507 gimple_assign_rhs_code (binrhs),
5508 gimple_assign_lhs (binlhs),
5509 gimple_assign_rhs2 (binrhs));
5510 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5511 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5512 gimple_set_uid (binrhs, gimple_uid (stmt));
5514 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5515 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5517 if (dump_file && (dump_flags & TDF_DETAILS))
5519 fprintf (dump_file, "Linearized: ");
5520 print_gimple_stmt (dump_file, stmt, 0);
5523 reassociate_stats.linearized++;
5524 update_stmt (stmt);
5526 gsi = gsi_for_stmt (oldbinrhs);
5527 reassoc_remove_stmt (&gsi);
5528 release_defs (oldbinrhs);
5530 gimple_set_visited (stmt, true);
5531 gimple_set_visited (binlhs, true);
5532 gimple_set_visited (binrhs, true);
5534 /* Tail recurse on the new rhs if it still needs reassociation. */
5535 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5536 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5537 want to change the algorithm while converting to tuples. */
5538 linearize_expr (stmt);
5541 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5542 it. Otherwise, return NULL. */
5544 static gimple *
5545 get_single_immediate_use (tree lhs)
5547 use_operand_p immuse;
5548 gimple *immusestmt;
5550 if (TREE_CODE (lhs) == SSA_NAME
5551 && single_imm_use (lhs, &immuse, &immusestmt)
5552 && is_gimple_assign (immusestmt))
5553 return immusestmt;
5555 return NULL;
5558 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5559 representing the negated value. Insertions of any necessary
5560 instructions go before GSI.
5561 This function is recursive in that, if you hand it "a_5" as the
5562 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5563 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5565 static tree
5566 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5568 gimple *negatedefstmt = NULL;
5569 tree resultofnegate;
5570 gimple_stmt_iterator gsi;
5571 unsigned int uid;
5573 /* If we are trying to negate a name, defined by an add, negate the
5574 add operands instead. */
5575 if (TREE_CODE (tonegate) == SSA_NAME)
5576 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5577 if (TREE_CODE (tonegate) == SSA_NAME
5578 && is_gimple_assign (negatedefstmt)
5579 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5580 && has_single_use (gimple_assign_lhs (negatedefstmt))
5581 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5583 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5584 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5585 tree lhs = gimple_assign_lhs (negatedefstmt);
5586 gimple *g;
5588 gsi = gsi_for_stmt (negatedefstmt);
5589 rhs1 = negate_value (rhs1, &gsi);
5591 gsi = gsi_for_stmt (negatedefstmt);
5592 rhs2 = negate_value (rhs2, &gsi);
5594 gsi = gsi_for_stmt (negatedefstmt);
5595 lhs = make_ssa_name (TREE_TYPE (lhs));
5596 gimple_set_visited (negatedefstmt, true);
5597 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5598 gimple_set_uid (g, gimple_uid (negatedefstmt));
5599 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5600 return lhs;
5603 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5604 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5605 NULL_TREE, true, GSI_SAME_STMT);
5606 gsi = *gsip;
5607 uid = gimple_uid (gsi_stmt (gsi));
5608 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5610 gimple *stmt = gsi_stmt (gsi);
5611 if (gimple_uid (stmt) != 0)
5612 break;
5613 gimple_set_uid (stmt, uid);
5615 return resultofnegate;
5618 /* Return true if we should break up the subtract in STMT into an add
5619 with negate. This is true when we the subtract operands are really
5620 adds, or the subtract itself is used in an add expression. In
5621 either case, breaking up the subtract into an add with negate
5622 exposes the adds to reassociation. */
5624 static bool
5625 should_break_up_subtract (gimple *stmt)
5627 tree lhs = gimple_assign_lhs (stmt);
5628 tree binlhs = gimple_assign_rhs1 (stmt);
5629 tree binrhs = gimple_assign_rhs2 (stmt);
5630 gimple *immusestmt;
5631 class loop *loop = loop_containing_stmt (stmt);
5633 if (TREE_CODE (binlhs) == SSA_NAME
5634 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5635 return true;
5637 if (TREE_CODE (binrhs) == SSA_NAME
5638 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5639 return true;
5641 if (TREE_CODE (lhs) == SSA_NAME
5642 && (immusestmt = get_single_immediate_use (lhs))
5643 && is_gimple_assign (immusestmt)
5644 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5645 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5646 && gimple_assign_rhs1 (immusestmt) == lhs)
5647 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5648 return true;
5649 return false;
5652 /* Transform STMT from A - B into A + -B. */
5654 static void
5655 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5657 tree rhs1 = gimple_assign_rhs1 (stmt);
5658 tree rhs2 = gimple_assign_rhs2 (stmt);
5660 if (dump_file && (dump_flags & TDF_DETAILS))
5662 fprintf (dump_file, "Breaking up subtract ");
5663 print_gimple_stmt (dump_file, stmt, 0);
5666 rhs2 = negate_value (rhs2, gsip);
5667 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5668 update_stmt (stmt);
5671 /* Determine whether STMT is a builtin call that raises an SSA name
5672 to an integer power and has only one use. If so, and this is early
5673 reassociation and unsafe math optimizations are permitted, place
5674 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5675 If any of these conditions does not hold, return FALSE. */
5677 static bool
5678 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5680 tree arg1;
5681 REAL_VALUE_TYPE c, cint;
5683 switch (gimple_call_combined_fn (stmt))
5685 CASE_CFN_POW:
5686 if (flag_errno_math)
5687 return false;
5689 *base = gimple_call_arg (stmt, 0);
5690 arg1 = gimple_call_arg (stmt, 1);
5692 if (TREE_CODE (arg1) != REAL_CST)
5693 return false;
5695 c = TREE_REAL_CST (arg1);
5697 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5698 return false;
5700 *exponent = real_to_integer (&c);
5701 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5702 if (!real_identical (&c, &cint))
5703 return false;
5705 break;
5707 CASE_CFN_POWI:
5708 *base = gimple_call_arg (stmt, 0);
5709 arg1 = gimple_call_arg (stmt, 1);
5711 if (!tree_fits_shwi_p (arg1))
5712 return false;
5714 *exponent = tree_to_shwi (arg1);
5715 break;
5717 default:
5718 return false;
5721 /* Expanding negative exponents is generally unproductive, so we don't
5722 complicate matters with those. Exponents of zero and one should
5723 have been handled by expression folding. */
5724 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5725 return false;
5727 return true;
5730 /* Try to derive and add operand entry for OP to *OPS. Return false if
5731 unsuccessful. */
5733 static bool
5734 try_special_add_to_ops (vec<operand_entry *> *ops,
5735 enum tree_code code,
5736 tree op, gimple* def_stmt)
5738 tree base = NULL_TREE;
5739 HOST_WIDE_INT exponent = 0;
5741 if (TREE_CODE (op) != SSA_NAME
5742 || ! has_single_use (op))
5743 return false;
5745 if (code == MULT_EXPR
5746 && reassoc_insert_powi_p
5747 && flag_unsafe_math_optimizations
5748 && is_gimple_call (def_stmt)
5749 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5751 add_repeat_to_ops_vec (ops, base, exponent);
5752 gimple_set_visited (def_stmt, true);
5753 return true;
5755 else if (code == MULT_EXPR
5756 && is_gimple_assign (def_stmt)
5757 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5758 && !HONOR_SNANS (TREE_TYPE (op))
5759 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5760 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5762 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5763 tree cst = build_minus_one_cst (TREE_TYPE (op));
5764 add_to_ops_vec (ops, rhs1);
5765 add_to_ops_vec (ops, cst);
5766 gimple_set_visited (def_stmt, true);
5767 return true;
5770 return false;
5773 /* Recursively linearize a binary expression that is the RHS of STMT.
5774 Place the operands of the expression tree in the vector named OPS. */
5776 static void
5777 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5778 bool is_associative, bool set_visited)
5780 tree binlhs = gimple_assign_rhs1 (stmt);
5781 tree binrhs = gimple_assign_rhs2 (stmt);
5782 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5783 bool binlhsisreassoc = false;
5784 bool binrhsisreassoc = false;
5785 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5786 class loop *loop = loop_containing_stmt (stmt);
5788 if (set_visited)
5789 gimple_set_visited (stmt, true);
5791 if (TREE_CODE (binlhs) == SSA_NAME)
5793 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5794 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5795 && !stmt_could_throw_p (cfun, binlhsdef));
5798 if (TREE_CODE (binrhs) == SSA_NAME)
5800 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5801 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5802 && !stmt_could_throw_p (cfun, binrhsdef));
5805 /* If the LHS is not reassociable, but the RHS is, we need to swap
5806 them. If neither is reassociable, there is nothing we can do, so
5807 just put them in the ops vector. If the LHS is reassociable,
5808 linearize it. If both are reassociable, then linearize the RHS
5809 and the LHS. */
5811 if (!binlhsisreassoc)
5813 /* If this is not a associative operation like division, give up. */
5814 if (!is_associative)
5816 add_to_ops_vec (ops, binrhs);
5817 return;
5820 if (!binrhsisreassoc)
5822 bool swap = false;
5823 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5824 /* If we add ops for the rhs we expect to be able to recurse
5825 to it via the lhs during expression rewrite so swap
5826 operands. */
5827 swap = true;
5828 else
5829 add_to_ops_vec (ops, binrhs);
5831 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5832 add_to_ops_vec (ops, binlhs);
5834 if (!swap)
5835 return;
5838 if (dump_file && (dump_flags & TDF_DETAILS))
5840 fprintf (dump_file, "swapping operands of ");
5841 print_gimple_stmt (dump_file, stmt, 0);
5844 swap_ssa_operands (stmt,
5845 gimple_assign_rhs1_ptr (stmt),
5846 gimple_assign_rhs2_ptr (stmt));
5847 update_stmt (stmt);
5849 if (dump_file && (dump_flags & TDF_DETAILS))
5851 fprintf (dump_file, " is now ");
5852 print_gimple_stmt (dump_file, stmt, 0);
5854 if (!binrhsisreassoc)
5855 return;
5857 /* We want to make it so the lhs is always the reassociative op,
5858 so swap. */
5859 std::swap (binlhs, binrhs);
5861 else if (binrhsisreassoc)
5863 linearize_expr (stmt);
5864 binlhs = gimple_assign_rhs1 (stmt);
5865 binrhs = gimple_assign_rhs2 (stmt);
5868 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5869 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5870 rhscode, loop));
5871 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5872 is_associative, set_visited);
5874 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5875 add_to_ops_vec (ops, binrhs);
5878 /* Repropagate the negates back into subtracts, since no other pass
5879 currently does it. */
5881 static void
5882 repropagate_negates (void)
5884 unsigned int i = 0;
5885 tree negate;
5887 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5889 gimple *user = get_single_immediate_use (negate);
5891 if (!user || !is_gimple_assign (user))
5892 continue;
5894 /* The negate operand can be either operand of a PLUS_EXPR
5895 (it can be the LHS if the RHS is a constant for example).
5897 Force the negate operand to the RHS of the PLUS_EXPR, then
5898 transform the PLUS_EXPR into a MINUS_EXPR. */
5899 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5901 /* If the negated operand appears on the LHS of the
5902 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5903 to force the negated operand to the RHS of the PLUS_EXPR. */
5904 if (gimple_assign_rhs1 (user) == negate)
5906 swap_ssa_operands (user,
5907 gimple_assign_rhs1_ptr (user),
5908 gimple_assign_rhs2_ptr (user));
5911 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5912 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5913 if (gimple_assign_rhs2 (user) == negate)
5915 tree rhs1 = gimple_assign_rhs1 (user);
5916 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5917 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5918 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5919 update_stmt (user);
5922 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5924 if (gimple_assign_rhs1 (user) == negate)
5926 /* We have
5927 x = -a
5928 y = x - b
5929 which we transform into
5930 x = a + b
5931 y = -x .
5932 This pushes down the negate which we possibly can merge
5933 into some other operation, hence insert it into the
5934 plus_negates vector. */
5935 gimple *feed = SSA_NAME_DEF_STMT (negate);
5936 tree a = gimple_assign_rhs1 (feed);
5937 tree b = gimple_assign_rhs2 (user);
5938 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5939 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5940 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5941 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5942 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5943 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5944 user = gsi_stmt (gsi2);
5945 update_stmt (user);
5946 reassoc_remove_stmt (&gsi);
5947 release_defs (feed);
5948 plus_negates.safe_push (gimple_assign_lhs (user));
5950 else
5952 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5953 rid of one operation. */
5954 gimple *feed = SSA_NAME_DEF_STMT (negate);
5955 tree a = gimple_assign_rhs1 (feed);
5956 tree rhs1 = gimple_assign_rhs1 (user);
5957 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5958 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5959 update_stmt (gsi_stmt (gsi));
5965 /* Break up subtract operations in block BB.
5967 We do this top down because we don't know whether the subtract is
5968 part of a possible chain of reassociation except at the top.
5970 IE given
5971 d = f + g
5972 c = a + e
5973 b = c - d
5974 q = b - r
5975 k = t - q
5977 we want to break up k = t - q, but we won't until we've transformed q
5978 = b - r, which won't be broken up until we transform b = c - d.
5980 En passant, clear the GIMPLE visited flag on every statement
5981 and set UIDs within each basic block. */
5983 static void
5984 break_up_subtract_bb (basic_block bb)
5986 gimple_stmt_iterator gsi;
5987 basic_block son;
5988 unsigned int uid = 1;
5990 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5992 gimple *stmt = gsi_stmt (gsi);
5993 gimple_set_visited (stmt, false);
5994 gimple_set_uid (stmt, uid++);
5996 if (!is_gimple_assign (stmt)
5997 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5998 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5999 continue;
6001 /* Look for simple gimple subtract operations. */
6002 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6004 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6005 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6006 continue;
6008 /* Check for a subtract used only in an addition. If this
6009 is the case, transform it into add of a negate for better
6010 reassociation. IE transform C = A-B into C = A + -B if C
6011 is only used in an addition. */
6012 if (should_break_up_subtract (stmt))
6013 break_up_subtract (stmt, &gsi);
6015 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6016 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6017 plus_negates.safe_push (gimple_assign_lhs (stmt));
6019 for (son = first_dom_son (CDI_DOMINATORS, bb);
6020 son;
6021 son = next_dom_son (CDI_DOMINATORS, son))
6022 break_up_subtract_bb (son);
6025 /* Used for repeated factor analysis. */
6026 struct repeat_factor
6028 /* An SSA name that occurs in a multiply chain. */
6029 tree factor;
6031 /* Cached rank of the factor. */
6032 unsigned rank;
6034 /* Number of occurrences of the factor in the chain. */
6035 HOST_WIDE_INT count;
6037 /* An SSA name representing the product of this factor and
6038 all factors appearing later in the repeated factor vector. */
6039 tree repr;
6043 static vec<repeat_factor> repeat_factor_vec;
6045 /* Used for sorting the repeat factor vector. Sort primarily by
6046 ascending occurrence count, secondarily by descending rank. */
6048 static int
6049 compare_repeat_factors (const void *x1, const void *x2)
6051 const repeat_factor *rf1 = (const repeat_factor *) x1;
6052 const repeat_factor *rf2 = (const repeat_factor *) x2;
6054 if (rf1->count != rf2->count)
6055 return rf1->count - rf2->count;
6057 return rf2->rank - rf1->rank;
6060 /* Look for repeated operands in OPS in the multiply tree rooted at
6061 STMT. Replace them with an optimal sequence of multiplies and powi
6062 builtin calls, and remove the used operands from OPS. Return an
6063 SSA name representing the value of the replacement sequence. */
6065 static tree
6066 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6068 unsigned i, j, vec_len;
6069 int ii;
6070 operand_entry *oe;
6071 repeat_factor *rf1, *rf2;
6072 repeat_factor rfnew;
6073 tree result = NULL_TREE;
6074 tree target_ssa, iter_result;
6075 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6076 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6077 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6078 gimple *mul_stmt, *pow_stmt;
6080 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6081 target, unless type is integral. */
6082 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6083 return NULL_TREE;
6085 /* Allocate the repeated factor vector. */
6086 repeat_factor_vec.create (10);
6088 /* Scan the OPS vector for all SSA names in the product and build
6089 up a vector of occurrence counts for each factor. */
6090 FOR_EACH_VEC_ELT (*ops, i, oe)
6092 if (TREE_CODE (oe->op) == SSA_NAME)
6094 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6096 if (rf1->factor == oe->op)
6098 rf1->count += oe->count;
6099 break;
6103 if (j >= repeat_factor_vec.length ())
6105 rfnew.factor = oe->op;
6106 rfnew.rank = oe->rank;
6107 rfnew.count = oe->count;
6108 rfnew.repr = NULL_TREE;
6109 repeat_factor_vec.safe_push (rfnew);
6114 /* Sort the repeated factor vector by (a) increasing occurrence count,
6115 and (b) decreasing rank. */
6116 repeat_factor_vec.qsort (compare_repeat_factors);
6118 /* It is generally best to combine as many base factors as possible
6119 into a product before applying __builtin_powi to the result.
6120 However, the sort order chosen for the repeated factor vector
6121 allows us to cache partial results for the product of the base
6122 factors for subsequent use. When we already have a cached partial
6123 result from a previous iteration, it is best to make use of it
6124 before looking for another __builtin_pow opportunity.
6126 As an example, consider x * x * y * y * y * z * z * z * z.
6127 We want to first compose the product x * y * z, raise it to the
6128 second power, then multiply this by y * z, and finally multiply
6129 by z. This can be done in 5 multiplies provided we cache y * z
6130 for use in both expressions:
6132 t1 = y * z
6133 t2 = t1 * x
6134 t3 = t2 * t2
6135 t4 = t1 * t3
6136 result = t4 * z
6138 If we instead ignored the cached y * z and first multiplied by
6139 the __builtin_pow opportunity z * z, we would get the inferior:
6141 t1 = y * z
6142 t2 = t1 * x
6143 t3 = t2 * t2
6144 t4 = z * z
6145 t5 = t3 * t4
6146 result = t5 * y */
6148 vec_len = repeat_factor_vec.length ();
6150 /* Repeatedly look for opportunities to create a builtin_powi call. */
6151 while (true)
6153 HOST_WIDE_INT power;
6155 /* First look for the largest cached product of factors from
6156 preceding iterations. If found, create a builtin_powi for
6157 it if the minimum occurrence count for its factors is at
6158 least 2, or just use this cached product as our next
6159 multiplicand if the minimum occurrence count is 1. */
6160 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6162 if (rf1->repr && rf1->count > 0)
6163 break;
6166 if (j < vec_len)
6168 power = rf1->count;
6170 if (power == 1)
6172 iter_result = rf1->repr;
6174 if (dump_file && (dump_flags & TDF_DETAILS))
6176 unsigned elt;
6177 repeat_factor *rf;
6178 fputs ("Multiplying by cached product ", dump_file);
6179 for (elt = j; elt < vec_len; elt++)
6181 rf = &repeat_factor_vec[elt];
6182 print_generic_expr (dump_file, rf->factor);
6183 if (elt < vec_len - 1)
6184 fputs (" * ", dump_file);
6186 fputs ("\n", dump_file);
6189 else
6191 if (INTEGRAL_TYPE_P (type))
6193 gcc_assert (power > 1);
6194 gimple_stmt_iterator gsip = gsi;
6195 gsi_prev (&gsip);
6196 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6197 rf1->repr, power);
6198 gimple_stmt_iterator gsic = gsi;
6199 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6201 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6202 gimple_set_visited (gsi_stmt (gsic), true);
6203 gsi_prev (&gsic);
6206 else
6208 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6209 pow_stmt
6210 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6211 build_int_cst (integer_type_node,
6212 power));
6213 gimple_call_set_lhs (pow_stmt, iter_result);
6214 gimple_set_location (pow_stmt, gimple_location (stmt));
6215 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6216 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6219 if (dump_file && (dump_flags & TDF_DETAILS))
6221 unsigned elt;
6222 repeat_factor *rf;
6223 fputs ("Building __builtin_pow call for cached product (",
6224 dump_file);
6225 for (elt = j; elt < vec_len; elt++)
6227 rf = &repeat_factor_vec[elt];
6228 print_generic_expr (dump_file, rf->factor);
6229 if (elt < vec_len - 1)
6230 fputs (" * ", dump_file);
6232 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6233 power);
6237 else
6239 /* Otherwise, find the first factor in the repeated factor
6240 vector whose occurrence count is at least 2. If no such
6241 factor exists, there are no builtin_powi opportunities
6242 remaining. */
6243 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6245 if (rf1->count >= 2)
6246 break;
6249 if (j >= vec_len)
6250 break;
6252 power = rf1->count;
6254 if (dump_file && (dump_flags & TDF_DETAILS))
6256 unsigned elt;
6257 repeat_factor *rf;
6258 fputs ("Building __builtin_pow call for (", dump_file);
6259 for (elt = j; elt < vec_len; elt++)
6261 rf = &repeat_factor_vec[elt];
6262 print_generic_expr (dump_file, rf->factor);
6263 if (elt < vec_len - 1)
6264 fputs (" * ", dump_file);
6266 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6269 reassociate_stats.pows_created++;
6271 /* Visit each element of the vector in reverse order (so that
6272 high-occurrence elements are visited first, and within the
6273 same occurrence count, lower-ranked elements are visited
6274 first). Form a linear product of all elements in this order
6275 whose occurrencce count is at least that of element J.
6276 Record the SSA name representing the product of each element
6277 with all subsequent elements in the vector. */
6278 if (j == vec_len - 1)
6279 rf1->repr = rf1->factor;
6280 else
6282 for (ii = vec_len - 2; ii >= (int)j; ii--)
6284 tree op1, op2;
6286 rf1 = &repeat_factor_vec[ii];
6287 rf2 = &repeat_factor_vec[ii + 1];
6289 /* Init the last factor's representative to be itself. */
6290 if (!rf2->repr)
6291 rf2->repr = rf2->factor;
6293 op1 = rf1->factor;
6294 op2 = rf2->repr;
6296 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6297 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6298 op1, op2);
6299 gimple_set_location (mul_stmt, gimple_location (stmt));
6300 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6301 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6302 rf1->repr = target_ssa;
6304 /* Don't reprocess the multiply we just introduced. */
6305 gimple_set_visited (mul_stmt, true);
6309 /* Form a call to __builtin_powi for the maximum product
6310 just formed, raised to the power obtained earlier. */
6311 rf1 = &repeat_factor_vec[j];
6312 if (INTEGRAL_TYPE_P (type))
6314 gcc_assert (power > 1);
6315 gimple_stmt_iterator gsip = gsi;
6316 gsi_prev (&gsip);
6317 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6318 rf1->repr, power);
6319 gimple_stmt_iterator gsic = gsi;
6320 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6322 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6323 gimple_set_visited (gsi_stmt (gsic), true);
6324 gsi_prev (&gsic);
6327 else
6329 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6330 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6331 build_int_cst (integer_type_node,
6332 power));
6333 gimple_call_set_lhs (pow_stmt, iter_result);
6334 gimple_set_location (pow_stmt, gimple_location (stmt));
6335 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6336 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6340 /* If we previously formed at least one other builtin_powi call,
6341 form the product of this one and those others. */
6342 if (result)
6344 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6345 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6346 result, iter_result);
6347 gimple_set_location (mul_stmt, gimple_location (stmt));
6348 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6349 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6350 gimple_set_visited (mul_stmt, true);
6351 result = new_result;
6353 else
6354 result = iter_result;
6356 /* Decrement the occurrence count of each element in the product
6357 by the count found above, and remove this many copies of each
6358 factor from OPS. */
6359 for (i = j; i < vec_len; i++)
6361 unsigned k = power;
6362 unsigned n;
6364 rf1 = &repeat_factor_vec[i];
6365 rf1->count -= power;
6367 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6369 if (oe->op == rf1->factor)
6371 if (oe->count <= k)
6373 ops->ordered_remove (n);
6374 k -= oe->count;
6376 if (k == 0)
6377 break;
6379 else
6381 oe->count -= k;
6382 break;
6389 /* At this point all elements in the repeated factor vector have a
6390 remaining occurrence count of 0 or 1, and those with a count of 1
6391 don't have cached representatives. Re-sort the ops vector and
6392 clean up. */
6393 ops->qsort (sort_by_operand_rank);
6394 repeat_factor_vec.release ();
6396 /* Return the final product computed herein. Note that there may
6397 still be some elements with single occurrence count left in OPS;
6398 those will be handled by the normal reassociation logic. */
6399 return result;
6402 /* Attempt to optimize
6403 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6404 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6406 static void
6407 attempt_builtin_copysign (vec<operand_entry *> *ops)
6409 operand_entry *oe;
6410 unsigned int i;
6411 unsigned int length = ops->length ();
6412 tree cst = ops->last ()->op;
6414 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6415 return;
6417 FOR_EACH_VEC_ELT (*ops, i, oe)
6419 if (TREE_CODE (oe->op) == SSA_NAME
6420 && has_single_use (oe->op))
6422 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6423 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6425 tree arg0, arg1;
6426 switch (gimple_call_combined_fn (old_call))
6428 CASE_CFN_COPYSIGN:
6429 CASE_CFN_COPYSIGN_FN:
6430 arg0 = gimple_call_arg (old_call, 0);
6431 arg1 = gimple_call_arg (old_call, 1);
6432 /* The first argument of copysign must be a constant,
6433 otherwise there's nothing to do. */
6434 if (TREE_CODE (arg0) == REAL_CST)
6436 tree type = TREE_TYPE (arg0);
6437 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6438 /* If we couldn't fold to a single constant, skip it.
6439 That happens e.g. for inexact multiplication when
6440 -frounding-math. */
6441 if (mul == NULL_TREE)
6442 break;
6443 /* Instead of adjusting OLD_CALL, let's build a new
6444 call to not leak the LHS and prevent keeping bogus
6445 debug statements. DCE will clean up the old call. */
6446 gcall *new_call;
6447 if (gimple_call_internal_p (old_call))
6448 new_call = gimple_build_call_internal
6449 (IFN_COPYSIGN, 2, mul, arg1);
6450 else
6451 new_call = gimple_build_call
6452 (gimple_call_fndecl (old_call), 2, mul, arg1);
6453 tree lhs = make_ssa_name (type);
6454 gimple_call_set_lhs (new_call, lhs);
6455 gimple_set_location (new_call,
6456 gimple_location (old_call));
6457 insert_stmt_after (new_call, old_call);
6458 /* We've used the constant, get rid of it. */
6459 ops->pop ();
6460 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6461 /* Handle the CST1 < 0 case by negating the result. */
6462 if (cst1_neg)
6464 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6465 gimple *negate_stmt
6466 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6467 insert_stmt_after (negate_stmt, new_call);
6468 oe->op = negrhs;
6470 else
6471 oe->op = lhs;
6472 if (dump_file && (dump_flags & TDF_DETAILS))
6474 fprintf (dump_file, "Optimizing copysign: ");
6475 print_generic_expr (dump_file, cst);
6476 fprintf (dump_file, " * COPYSIGN (");
6477 print_generic_expr (dump_file, arg0);
6478 fprintf (dump_file, ", ");
6479 print_generic_expr (dump_file, arg1);
6480 fprintf (dump_file, ") into %sCOPYSIGN (",
6481 cst1_neg ? "-" : "");
6482 print_generic_expr (dump_file, mul);
6483 fprintf (dump_file, ", ");
6484 print_generic_expr (dump_file, arg1);
6485 fprintf (dump_file, "\n");
6487 return;
6489 break;
6490 default:
6491 break;
6498 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6500 static void
6501 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6503 tree rhs1;
6505 if (dump_file && (dump_flags & TDF_DETAILS))
6507 fprintf (dump_file, "Transforming ");
6508 print_gimple_stmt (dump_file, stmt, 0);
6511 rhs1 = gimple_assign_rhs1 (stmt);
6512 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6513 update_stmt (stmt);
6514 remove_visited_stmt_chain (rhs1);
6516 if (dump_file && (dump_flags & TDF_DETAILS))
6518 fprintf (dump_file, " into ");
6519 print_gimple_stmt (dump_file, stmt, 0);
6523 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6525 static void
6526 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6527 tree rhs1, tree rhs2)
6529 if (dump_file && (dump_flags & TDF_DETAILS))
6531 fprintf (dump_file, "Transforming ");
6532 print_gimple_stmt (dump_file, stmt, 0);
6535 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6536 update_stmt (gsi_stmt (*gsi));
6537 remove_visited_stmt_chain (rhs1);
6539 if (dump_file && (dump_flags & TDF_DETAILS))
6541 fprintf (dump_file, " into ");
6542 print_gimple_stmt (dump_file, stmt, 0);
6546 /* Reassociate expressions in basic block BB and its post-dominator as
6547 children.
6549 Bubble up return status from maybe_optimize_range_tests. */
6551 static bool
6552 reassociate_bb (basic_block bb)
6554 gimple_stmt_iterator gsi;
6555 basic_block son;
6556 gimple *stmt = last_stmt (bb);
6557 bool cfg_cleanup_needed = false;
6559 if (stmt && !gimple_visited_p (stmt))
6560 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6562 bool do_prev = false;
6563 for (gsi = gsi_last_bb (bb);
6564 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6566 do_prev = true;
6567 stmt = gsi_stmt (gsi);
6569 if (is_gimple_assign (stmt)
6570 && !stmt_could_throw_p (cfun, stmt))
6572 tree lhs, rhs1, rhs2;
6573 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6575 /* If this was part of an already processed statement,
6576 we don't need to touch it again. */
6577 if (gimple_visited_p (stmt))
6579 /* This statement might have become dead because of previous
6580 reassociations. */
6581 if (has_zero_uses (gimple_get_lhs (stmt)))
6583 reassoc_remove_stmt (&gsi);
6584 release_defs (stmt);
6585 /* We might end up removing the last stmt above which
6586 places the iterator to the end of the sequence.
6587 Reset it to the last stmt in this case and make sure
6588 we don't do gsi_prev in that case. */
6589 if (gsi_end_p (gsi))
6591 gsi = gsi_last_bb (bb);
6592 do_prev = false;
6595 continue;
6598 /* If this is not a gimple binary expression, there is
6599 nothing for us to do with it. */
6600 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6601 continue;
6603 lhs = gimple_assign_lhs (stmt);
6604 rhs1 = gimple_assign_rhs1 (stmt);
6605 rhs2 = gimple_assign_rhs2 (stmt);
6607 /* For non-bit or min/max operations we can't associate
6608 all types. Verify that here. */
6609 if ((rhs_code != BIT_IOR_EXPR
6610 && rhs_code != BIT_AND_EXPR
6611 && rhs_code != BIT_XOR_EXPR
6612 && rhs_code != MIN_EXPR
6613 && rhs_code != MAX_EXPR
6614 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6615 || !can_reassociate_op_p (rhs1)
6616 || !can_reassociate_op_p (rhs2))
6617 continue;
6619 if (associative_tree_code (rhs_code))
6621 auto_vec<operand_entry *> ops;
6622 tree powi_result = NULL_TREE;
6623 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6625 /* There may be no immediate uses left by the time we
6626 get here because we may have eliminated them all. */
6627 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6628 continue;
6630 gimple_set_visited (stmt, true);
6631 linearize_expr_tree (&ops, stmt, true, true);
6632 ops.qsort (sort_by_operand_rank);
6633 int orig_len = ops.length ();
6634 optimize_ops_list (rhs_code, &ops);
6635 if (undistribute_ops_list (rhs_code, &ops,
6636 loop_containing_stmt (stmt)))
6638 ops.qsort (sort_by_operand_rank);
6639 optimize_ops_list (rhs_code, &ops);
6641 if (undistribute_bitref_for_vector (rhs_code, &ops,
6642 loop_containing_stmt (stmt)))
6644 ops.qsort (sort_by_operand_rank);
6645 optimize_ops_list (rhs_code, &ops);
6647 if (rhs_code == PLUS_EXPR
6648 && transform_add_to_multiply (&ops))
6649 ops.qsort (sort_by_operand_rank);
6651 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6653 if (is_vector)
6654 optimize_vec_cond_expr (rhs_code, &ops);
6655 else
6656 optimize_range_tests (rhs_code, &ops, NULL);
6659 if (rhs_code == MULT_EXPR && !is_vector)
6661 attempt_builtin_copysign (&ops);
6663 if (reassoc_insert_powi_p
6664 && (flag_unsafe_math_optimizations
6665 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6666 powi_result = attempt_builtin_powi (stmt, &ops);
6669 operand_entry *last;
6670 bool negate_result = false;
6671 if (ops.length () > 1
6672 && rhs_code == MULT_EXPR)
6674 last = ops.last ();
6675 if ((integer_minus_onep (last->op)
6676 || real_minus_onep (last->op))
6677 && !HONOR_SNANS (TREE_TYPE (lhs))
6678 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6679 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6681 ops.pop ();
6682 negate_result = true;
6686 tree new_lhs = lhs;
6687 /* If the operand vector is now empty, all operands were
6688 consumed by the __builtin_powi optimization. */
6689 if (ops.length () == 0)
6690 transform_stmt_to_copy (&gsi, stmt, powi_result);
6691 else if (ops.length () == 1)
6693 tree last_op = ops.last ()->op;
6695 /* If the stmt that defines operand has to be inserted, insert it
6696 before the use. */
6697 if (ops.last ()->stmt_to_insert)
6698 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6699 if (powi_result)
6700 transform_stmt_to_multiply (&gsi, stmt, last_op,
6701 powi_result);
6702 else
6703 transform_stmt_to_copy (&gsi, stmt, last_op);
6705 else
6707 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6708 int ops_num = ops.length ();
6709 int width;
6711 /* For binary bit operations, if there are at least 3
6712 operands and the last operand in OPS is a constant,
6713 move it to the front. This helps ensure that we generate
6714 (X & Y) & C rather than (X & C) & Y. The former will
6715 often match a canonical bit test when we get to RTL. */
6716 if (ops.length () > 2
6717 && (rhs_code == BIT_AND_EXPR
6718 || rhs_code == BIT_IOR_EXPR
6719 || rhs_code == BIT_XOR_EXPR)
6720 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6721 std::swap (*ops[0], *ops[ops_num - 1]);
6723 /* Only rewrite the expression tree to parallel in the
6724 last reassoc pass to avoid useless work back-and-forth
6725 with initial linearization. */
6726 if (!reassoc_insert_powi_p
6727 && ops.length () > 3
6728 && (width = get_reassociation_width (ops_num, rhs_code,
6729 mode)) > 1)
6731 if (dump_file && (dump_flags & TDF_DETAILS))
6732 fprintf (dump_file,
6733 "Width = %d was chosen for reassociation\n",
6734 width);
6735 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6736 width, ops);
6738 else
6740 /* When there are three operands left, we want
6741 to make sure the ones that get the double
6742 binary op are chosen wisely. */
6743 int len = ops.length ();
6744 if (len >= 3)
6745 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6747 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6748 powi_result != NULL
6749 || negate_result,
6750 len != orig_len);
6753 /* If we combined some repeated factors into a
6754 __builtin_powi call, multiply that result by the
6755 reassociated operands. */
6756 if (powi_result)
6758 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6759 tree type = TREE_TYPE (lhs);
6760 tree target_ssa = make_temp_ssa_name (type, NULL,
6761 "reassocpow");
6762 gimple_set_lhs (lhs_stmt, target_ssa);
6763 update_stmt (lhs_stmt);
6764 if (lhs != new_lhs)
6766 target_ssa = new_lhs;
6767 new_lhs = lhs;
6769 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6770 powi_result, target_ssa);
6771 gimple_set_location (mul_stmt, gimple_location (stmt));
6772 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6773 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6777 if (negate_result)
6779 stmt = SSA_NAME_DEF_STMT (lhs);
6780 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6781 gimple_set_lhs (stmt, tmp);
6782 if (lhs != new_lhs)
6783 tmp = new_lhs;
6784 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6785 tmp);
6786 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6787 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6788 update_stmt (stmt);
6793 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6794 son;
6795 son = next_dom_son (CDI_POST_DOMINATORS, son))
6796 cfg_cleanup_needed |= reassociate_bb (son);
6798 return cfg_cleanup_needed;
6801 /* Add jumps around shifts for range tests turned into bit tests.
6802 For each SSA_NAME VAR we have code like:
6803 VAR = ...; // final stmt of range comparison
6804 // bit test here...;
6805 OTHERVAR = ...; // final stmt of the bit test sequence
6806 RES = VAR | OTHERVAR;
6807 Turn the above into:
6808 VAR = ...;
6809 if (VAR != 0)
6810 goto <l3>;
6811 else
6812 goto <l2>;
6813 <l2>:
6814 // bit test here...;
6815 OTHERVAR = ...;
6816 <l3>:
6817 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6819 static void
6820 branch_fixup (void)
6822 tree var;
6823 unsigned int i;
6825 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6827 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6828 gimple *use_stmt;
6829 use_operand_p use;
6830 bool ok = single_imm_use (var, &use, &use_stmt);
6831 gcc_assert (ok
6832 && is_gimple_assign (use_stmt)
6833 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6834 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6836 basic_block cond_bb = gimple_bb (def_stmt);
6837 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6838 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6840 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6841 gimple *g = gimple_build_cond (NE_EXPR, var,
6842 build_zero_cst (TREE_TYPE (var)),
6843 NULL_TREE, NULL_TREE);
6844 location_t loc = gimple_location (use_stmt);
6845 gimple_set_location (g, loc);
6846 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6848 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6849 etrue->probability = profile_probability::even ();
6850 edge efalse = find_edge (cond_bb, then_bb);
6851 efalse->flags = EDGE_FALSE_VALUE;
6852 efalse->probability -= etrue->probability;
6853 then_bb->count -= etrue->count ();
6855 tree othervar = NULL_TREE;
6856 if (gimple_assign_rhs1 (use_stmt) == var)
6857 othervar = gimple_assign_rhs2 (use_stmt);
6858 else if (gimple_assign_rhs2 (use_stmt) == var)
6859 othervar = gimple_assign_rhs1 (use_stmt);
6860 else
6861 gcc_unreachable ();
6862 tree lhs = gimple_assign_lhs (use_stmt);
6863 gphi *phi = create_phi_node (lhs, merge_bb);
6864 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6865 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6866 gsi = gsi_for_stmt (use_stmt);
6867 gsi_remove (&gsi, true);
6869 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6870 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6872 reassoc_branch_fixups.release ();
6875 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6876 void debug_ops_vector (vec<operand_entry *> ops);
6878 /* Dump the operand entry vector OPS to FILE. */
6880 void
6881 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6883 operand_entry *oe;
6884 unsigned int i;
6886 FOR_EACH_VEC_ELT (ops, i, oe)
6888 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6889 print_generic_expr (file, oe->op);
6890 fprintf (file, "\n");
6894 /* Dump the operand entry vector OPS to STDERR. */
6896 DEBUG_FUNCTION void
6897 debug_ops_vector (vec<operand_entry *> ops)
6899 dump_ops_vector (stderr, ops);
6902 /* Bubble up return status from reassociate_bb. */
6904 static bool
6905 do_reassoc (void)
6907 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6908 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6911 /* Initialize the reassociation pass. */
6913 static void
6914 init_reassoc (void)
6916 int i;
6917 int64_t rank = 2;
6918 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6920 /* Find the loops, so that we can prevent moving calculations in
6921 them. */
6922 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6924 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6926 next_operand_entry_id = 0;
6928 /* Reverse RPO (Reverse Post Order) will give us something where
6929 deeper loops come later. */
6930 pre_and_rev_post_order_compute (NULL, bbs, false);
6931 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6932 operand_rank = new hash_map<tree, int64_t>;
6934 /* Give each default definition a distinct rank. This includes
6935 parameters and the static chain. Walk backwards over all
6936 SSA names so that we get proper rank ordering according
6937 to tree_swap_operands_p. */
6938 for (i = num_ssa_names - 1; i > 0; --i)
6940 tree name = ssa_name (i);
6941 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6942 insert_operand_rank (name, ++rank);
6945 /* Set up rank for each BB */
6946 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6947 bb_rank[bbs[i]] = ++rank << 16;
6949 free (bbs);
6950 calculate_dominance_info (CDI_POST_DOMINATORS);
6951 plus_negates = vNULL;
6954 /* Cleanup after the reassociation pass, and print stats if
6955 requested. */
6957 static void
6958 fini_reassoc (void)
6960 statistics_counter_event (cfun, "Linearized",
6961 reassociate_stats.linearized);
6962 statistics_counter_event (cfun, "Constants eliminated",
6963 reassociate_stats.constants_eliminated);
6964 statistics_counter_event (cfun, "Ops eliminated",
6965 reassociate_stats.ops_eliminated);
6966 statistics_counter_event (cfun, "Statements rewritten",
6967 reassociate_stats.rewritten);
6968 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6969 reassociate_stats.pows_encountered);
6970 statistics_counter_event (cfun, "Built-in powi calls created",
6971 reassociate_stats.pows_created);
6973 delete operand_rank;
6974 bitmap_clear (biased_names);
6975 operand_entry_pool.release ();
6976 free (bb_rank);
6977 plus_negates.release ();
6978 free_dominance_info (CDI_POST_DOMINATORS);
6979 loop_optimizer_finalize ();
6982 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6983 insertion of __builtin_powi calls.
6985 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6986 optimization of a gimple conditional. Otherwise returns zero. */
6988 static unsigned int
6989 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
6991 reassoc_insert_powi_p = insert_powi_p;
6992 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
6994 init_reassoc ();
6996 bool cfg_cleanup_needed;
6997 cfg_cleanup_needed = do_reassoc ();
6998 repropagate_negates ();
6999 branch_fixup ();
7001 fini_reassoc ();
7002 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7005 namespace {
7007 const pass_data pass_data_reassoc =
7009 GIMPLE_PASS, /* type */
7010 "reassoc", /* name */
7011 OPTGROUP_NONE, /* optinfo_flags */
7012 TV_TREE_REASSOC, /* tv_id */
7013 ( PROP_cfg | PROP_ssa ), /* properties_required */
7014 0, /* properties_provided */
7015 0, /* properties_destroyed */
7016 0, /* todo_flags_start */
7017 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7020 class pass_reassoc : public gimple_opt_pass
7022 public:
7023 pass_reassoc (gcc::context *ctxt)
7024 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7027 /* opt_pass methods: */
7028 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
7029 void set_pass_param (unsigned int n, bool param)
7031 gcc_assert (n == 0);
7032 insert_powi_p = param;
7033 bias_loop_carried_phi_ranks_p = !param;
7035 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
7036 virtual unsigned int execute (function *)
7038 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7041 private:
7042 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7043 point 3a in the pass header comment. */
7044 bool insert_powi_p;
7045 bool bias_loop_carried_phi_ranks_p;
7046 }; // class pass_reassoc
7048 } // anon namespace
7050 gimple_opt_pass *
7051 make_pass_reassoc (gcc::context *ctxt)
7053 return new pass_reassoc (ctxt);