Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob8498cfc7aa8c449eea336dce18b4146ff96106c1
1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
73 -a, a & a, etc.
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
107 those on it.
109 You want to first merge all leaves of the same rank, as much as
110 possible.
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
125 mergetmp2 = d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
134 So we have
136 build binary op
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
162 mergetmp = op1 + op2
163 newstmt = mergetmp + op3
165 instead of
166 mergetmp = op2 + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
171 can do.
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p;
183 /* Statistics */
184 static struct
186 int linearized;
187 int constants_eliminated;
188 int ops_eliminated;
189 int rewritten;
190 int pows_encountered;
191 int pows_created;
192 } reassociate_stats;
195 static object_allocator<operand_entry> operand_entry_pool
196 ("operand entry pool");
198 /* This is used to assign a unique ID to each struct operand_entry
199 so that qsort results are identical on different hosts. */
200 static unsigned int next_operand_entry_id;
202 /* Starting rank number for a given basic block, so that we can rank
203 operations using unmovable instructions in that BB based on the bb
204 depth. */
205 static int64_t *bb_rank;
207 /* Operand->rank hashtable. */
208 static hash_map<tree, int64_t> *operand_rank;
210 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
211 all basic blocks the CFG should be adjusted - basic blocks
212 split right after that SSA_NAME's definition statement and before
213 the only use, which must be a bit ior. */
214 static vec<tree> reassoc_branch_fixups;
216 /* Forward decls. */
217 static int64_t get_rank (tree);
218 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
220 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
221 possibly added by gsi_remove. */
223 static bool
224 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
226 gimple *stmt = gsi_stmt (*gsi);
228 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
229 return gsi_remove (gsi, true);
231 gimple_stmt_iterator prev = *gsi;
232 gsi_prev (&prev);
233 unsigned uid = gimple_uid (stmt);
234 basic_block bb = gimple_bb (stmt);
235 bool ret = gsi_remove (gsi, true);
236 if (!gsi_end_p (prev))
237 gsi_next (&prev);
238 else
239 prev = gsi_start_bb (bb);
240 gimple *end_stmt = gsi_stmt (*gsi);
241 while ((stmt = gsi_stmt (prev)) != end_stmt)
243 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
244 gimple_set_uid (stmt, uid);
245 gsi_next (&prev);
247 return ret;
250 /* Bias amount for loop-carried phis. We want this to be larger than
251 the depth of any reassociation tree we can see, but not larger than
252 the rank difference between two blocks. */
253 #define PHI_LOOP_BIAS (1 << 15)
255 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
256 an innermost loop, and the phi has only a single use which is inside
257 the loop, then the rank is the block rank of the loop latch plus an
258 extra bias for the loop-carried dependence. This causes expressions
259 calculated into an accumulator variable to be independent for each
260 iteration of the loop. If STMT is some other phi, the rank is the
261 block rank of its containing block. */
262 static int64_t
263 phi_rank (gimple *stmt)
265 basic_block bb = gimple_bb (stmt);
266 class loop *father = bb->loop_father;
267 tree res;
268 unsigned i;
269 use_operand_p use;
270 gimple *use_stmt;
272 /* We only care about real loops (those with a latch). */
273 if (!father->latch)
274 return bb_rank[bb->index];
276 /* Interesting phis must be in headers of innermost loops. */
277 if (bb != father->header
278 || father->inner)
279 return bb_rank[bb->index];
281 /* Ignore virtual SSA_NAMEs. */
282 res = gimple_phi_result (stmt);
283 if (virtual_operand_p (res))
284 return bb_rank[bb->index];
286 /* The phi definition must have a single use, and that use must be
287 within the loop. Otherwise this isn't an accumulator pattern. */
288 if (!single_imm_use (res, &use, &use_stmt)
289 || gimple_bb (use_stmt)->loop_father != father)
290 return bb_rank[bb->index];
292 /* Look for phi arguments from within the loop. If found, bias this phi. */
293 for (i = 0; i < gimple_phi_num_args (stmt); i++)
295 tree arg = gimple_phi_arg_def (stmt, i);
296 if (TREE_CODE (arg) == SSA_NAME
297 && !SSA_NAME_IS_DEFAULT_DEF (arg))
299 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
300 if (gimple_bb (def_stmt)->loop_father == father)
301 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
305 /* Must be an uninteresting phi. */
306 return bb_rank[bb->index];
309 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
310 loop-carried dependence of an innermost loop, return TRUE; else
311 return FALSE. */
312 static bool
313 loop_carried_phi (tree exp)
315 gimple *phi_stmt;
316 int64_t block_rank;
318 if (TREE_CODE (exp) != SSA_NAME
319 || SSA_NAME_IS_DEFAULT_DEF (exp))
320 return false;
322 phi_stmt = SSA_NAME_DEF_STMT (exp);
324 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
325 return false;
327 /* Non-loop-carried phis have block rank. Loop-carried phis have
328 an additional bias added in. If this phi doesn't have block rank,
329 it's biased and should not be propagated. */
330 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
332 if (phi_rank (phi_stmt) != block_rank)
333 return true;
335 return false;
338 /* Return the maximum of RANK and the rank that should be propagated
339 from expression OP. For most operands, this is just the rank of OP.
340 For loop-carried phis, the value is zero to avoid undoing the bias
341 in favor of the phi. */
342 static int64_t
343 propagate_rank (int64_t rank, tree op)
345 int64_t op_rank;
347 if (loop_carried_phi (op))
348 return rank;
350 op_rank = get_rank (op);
352 return MAX (rank, op_rank);
355 /* Look up the operand rank structure for expression E. */
357 static inline int64_t
358 find_operand_rank (tree e)
360 int64_t *slot = operand_rank->get (e);
361 return slot ? *slot : -1;
364 /* Insert {E,RANK} into the operand rank hashtable. */
366 static inline void
367 insert_operand_rank (tree e, int64_t rank)
369 gcc_assert (rank > 0);
370 gcc_assert (!operand_rank->put (e, rank));
373 /* Given an expression E, return the rank of the expression. */
375 static int64_t
376 get_rank (tree e)
378 /* SSA_NAME's have the rank of the expression they are the result
380 For globals and uninitialized values, the rank is 0.
381 For function arguments, use the pre-setup rank.
382 For PHI nodes, stores, asm statements, etc, we use the rank of
383 the BB.
384 For simple operations, the rank is the maximum rank of any of
385 its operands, or the bb_rank, whichever is less.
386 I make no claims that this is optimal, however, it gives good
387 results. */
389 /* We make an exception to the normal ranking system to break
390 dependences of accumulator variables in loops. Suppose we
391 have a simple one-block loop containing:
393 x_1 = phi(x_0, x_2)
394 b = a + x_1
395 c = b + d
396 x_2 = c + e
398 As shown, each iteration of the calculation into x is fully
399 dependent upon the iteration before it. We would prefer to
400 see this in the form:
402 x_1 = phi(x_0, x_2)
403 b = a + d
404 c = b + e
405 x_2 = c + x_1
407 If the loop is unrolled, the calculations of b and c from
408 different iterations can be interleaved.
410 To obtain this result during reassociation, we bias the rank
411 of the phi definition x_1 upward, when it is recognized as an
412 accumulator pattern. The artificial rank causes it to be
413 added last, providing the desired independence. */
415 if (TREE_CODE (e) == SSA_NAME)
417 ssa_op_iter iter;
418 gimple *stmt;
419 int64_t rank;
420 tree op;
422 /* If we already have a rank for this expression, use that. */
423 rank = find_operand_rank (e);
424 if (rank != -1)
425 return rank;
427 stmt = SSA_NAME_DEF_STMT (e);
428 if (gimple_code (stmt) == GIMPLE_PHI)
429 rank = phi_rank (stmt);
431 else if (!is_gimple_assign (stmt))
432 rank = bb_rank[gimple_bb (stmt)->index];
434 else
436 /* Otherwise, find the maximum rank for the operands. As an
437 exception, remove the bias from loop-carried phis when propagating
438 the rank so that dependent operations are not also biased. */
439 /* Simply walk over all SSA uses - this takes advatage of the
440 fact that non-SSA operands are is_gimple_min_invariant and
441 thus have rank 0. */
442 rank = 0;
443 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
444 rank = propagate_rank (rank, op);
446 rank += 1;
449 if (dump_file && (dump_flags & TDF_DETAILS))
451 fprintf (dump_file, "Rank for ");
452 print_generic_expr (dump_file, e);
453 fprintf (dump_file, " is %" PRId64 "\n", rank);
456 /* Note the rank in the hashtable so we don't recompute it. */
457 insert_operand_rank (e, rank);
458 return rank;
461 /* Constants, globals, etc., are rank 0 */
462 return 0;
466 /* We want integer ones to end up last no matter what, since they are
467 the ones we can do the most with. */
468 #define INTEGER_CONST_TYPE 1 << 4
469 #define FLOAT_ONE_CONST_TYPE 1 << 3
470 #define FLOAT_CONST_TYPE 1 << 2
471 #define OTHER_CONST_TYPE 1 << 1
473 /* Classify an invariant tree into integer, float, or other, so that
474 we can sort them to be near other constants of the same type. */
475 static inline int
476 constant_type (tree t)
478 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
479 return INTEGER_CONST_TYPE;
480 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
482 /* Sort -1.0 and 1.0 constants last, while in some cases
483 const_binop can't optimize some inexact operations, multiplication
484 by -1.0 or 1.0 can be always merged with others. */
485 if (real_onep (t) || real_minus_onep (t))
486 return FLOAT_ONE_CONST_TYPE;
487 return FLOAT_CONST_TYPE;
489 else
490 return OTHER_CONST_TYPE;
493 /* qsort comparison function to sort operand entries PA and PB by rank
494 so that the sorted array is ordered by rank in decreasing order. */
495 static int
496 sort_by_operand_rank (const void *pa, const void *pb)
498 const operand_entry *oea = *(const operand_entry *const *)pa;
499 const operand_entry *oeb = *(const operand_entry *const *)pb;
501 if (oeb->rank != oea->rank)
502 return oeb->rank > oea->rank ? 1 : -1;
504 /* It's nicer for optimize_expression if constants that are likely
505 to fold when added/multiplied/whatever are put next to each
506 other. Since all constants have rank 0, order them by type. */
507 if (oea->rank == 0)
509 if (constant_type (oeb->op) != constant_type (oea->op))
510 return constant_type (oea->op) - constant_type (oeb->op);
511 else
512 /* To make sorting result stable, we use unique IDs to determine
513 order. */
514 return oeb->id > oea->id ? 1 : -1;
517 if (TREE_CODE (oea->op) != SSA_NAME)
519 if (TREE_CODE (oeb->op) != SSA_NAME)
520 return oeb->id > oea->id ? 1 : -1;
521 else
522 return 1;
524 else if (TREE_CODE (oeb->op) != SSA_NAME)
525 return -1;
527 /* Lastly, make sure the versions that are the same go next to each
528 other. */
529 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
531 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
532 versions of removed SSA_NAMEs, so if possible, prefer to sort
533 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
534 See PR60418. */
535 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
536 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
537 basic_block bba = gimple_bb (stmta);
538 basic_block bbb = gimple_bb (stmtb);
539 if (bbb != bba)
541 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
542 but the other might not. */
543 if (!bba)
544 return 1;
545 if (!bbb)
546 return -1;
547 /* If neither is, compare bb_rank. */
548 if (bb_rank[bbb->index] != bb_rank[bba->index])
549 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
552 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
553 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
554 if (da != db)
555 return da ? 1 : -1;
557 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
560 return oeb->id > oea->id ? 1 : -1;
563 /* Add an operand entry to *OPS for the tree operand OP. */
565 static void
566 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
568 operand_entry *oe = operand_entry_pool.allocate ();
570 oe->op = op;
571 oe->rank = get_rank (op);
572 oe->id = next_operand_entry_id++;
573 oe->count = 1;
574 oe->stmt_to_insert = stmt_to_insert;
575 ops->safe_push (oe);
578 /* Add an operand entry to *OPS for the tree operand OP with repeat
579 count REPEAT. */
581 static void
582 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
583 HOST_WIDE_INT repeat)
585 operand_entry *oe = operand_entry_pool.allocate ();
587 oe->op = op;
588 oe->rank = get_rank (op);
589 oe->id = next_operand_entry_id++;
590 oe->count = repeat;
591 oe->stmt_to_insert = NULL;
592 ops->safe_push (oe);
594 reassociate_stats.pows_encountered++;
597 /* Returns true if we can associate the SSA def OP. */
599 static bool
600 can_reassociate_op_p (tree op)
602 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
603 return false;
604 /* Make sure asm goto outputs do not participate in reassociation since
605 we have no way to find an insertion place after asm goto. */
606 if (TREE_CODE (op) == SSA_NAME
607 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
608 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
609 return false;
610 return true;
613 /* Returns true if we can reassociate operations of TYPE.
614 That is for integral or non-saturating fixed-point types, and for
615 floating point type when associative-math is enabled. */
617 static bool
618 can_reassociate_type_p (tree type)
620 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
621 || NON_SAT_FIXED_POINT_TYPE_P (type)
622 || (flag_associative_math && FLOAT_TYPE_P (type)))
623 return true;
624 return false;
627 /* Return true if STMT is reassociable operation containing a binary
628 operation with tree code CODE, and is inside LOOP. */
630 static bool
631 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
633 basic_block bb = gimple_bb (stmt);
635 if (gimple_bb (stmt) == NULL)
636 return false;
638 if (!flow_bb_inside_loop_p (loop, bb))
639 return false;
641 if (is_gimple_assign (stmt)
642 && gimple_assign_rhs_code (stmt) == code
643 && has_single_use (gimple_assign_lhs (stmt)))
645 tree rhs1 = gimple_assign_rhs1 (stmt);
646 tree rhs2 = gimple_assign_rhs2 (stmt);
647 if (!can_reassociate_op_p (rhs1)
648 || (rhs2 && !can_reassociate_op_p (rhs2)))
649 return false;
650 return true;
653 return false;
657 /* Return true if STMT is a nop-conversion. */
659 static bool
660 gimple_nop_conversion_p (gimple *stmt)
662 if (gassign *ass = dyn_cast <gassign *> (stmt))
664 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
665 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
666 TREE_TYPE (gimple_assign_rhs1 (ass))))
667 return true;
669 return false;
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
675 static tree
676 get_unary_op (tree name, enum tree_code opcode)
678 gimple *stmt = SSA_NAME_DEF_STMT (name);
680 /* Look through nop conversions (sign changes). */
681 if (gimple_nop_conversion_p (stmt)
682 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
683 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
685 if (!is_gimple_assign (stmt))
686 return NULL_TREE;
688 if (gimple_assign_rhs_code (stmt) == opcode)
689 return gimple_assign_rhs1 (stmt);
690 return NULL_TREE;
693 /* Return true if OP1 and OP2 have the same value if casted to either type. */
695 static bool
696 ops_equal_values_p (tree op1, tree op2)
698 if (op1 == op2)
699 return true;
701 tree orig_op1 = op1;
702 if (TREE_CODE (op1) == SSA_NAME)
704 gimple *stmt = SSA_NAME_DEF_STMT (op1);
705 if (gimple_nop_conversion_p (stmt))
707 op1 = gimple_assign_rhs1 (stmt);
708 if (op1 == op2)
709 return true;
713 if (TREE_CODE (op2) == SSA_NAME)
715 gimple *stmt = SSA_NAME_DEF_STMT (op2);
716 if (gimple_nop_conversion_p (stmt))
718 op2 = gimple_assign_rhs1 (stmt);
719 if (op1 == op2
720 || orig_op1 == op2)
721 return true;
725 return false;
729 /* If CURR and LAST are a pair of ops that OPCODE allows us to
730 eliminate through equivalences, do so, remove them from OPS, and
731 return true. Otherwise, return false. */
733 static bool
734 eliminate_duplicate_pair (enum tree_code opcode,
735 vec<operand_entry *> *ops,
736 bool *all_done,
737 unsigned int i,
738 operand_entry *curr,
739 operand_entry *last)
742 /* If we have two of the same op, and the opcode is & |, min, or max,
743 we can eliminate one of them.
744 If we have two of the same op, and the opcode is ^, we can
745 eliminate both of them. */
747 if (last && last->op == curr->op)
749 switch (opcode)
751 case MAX_EXPR:
752 case MIN_EXPR:
753 case BIT_IOR_EXPR:
754 case BIT_AND_EXPR:
755 if (dump_file && (dump_flags & TDF_DETAILS))
757 fprintf (dump_file, "Equivalence: ");
758 print_generic_expr (dump_file, curr->op);
759 fprintf (dump_file, " [&|minmax] ");
760 print_generic_expr (dump_file, last->op);
761 fprintf (dump_file, " -> ");
762 print_generic_stmt (dump_file, last->op);
765 ops->ordered_remove (i);
766 reassociate_stats.ops_eliminated ++;
768 return true;
770 case BIT_XOR_EXPR:
771 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "Equivalence: ");
774 print_generic_expr (dump_file, curr->op);
775 fprintf (dump_file, " ^ ");
776 print_generic_expr (dump_file, last->op);
777 fprintf (dump_file, " -> nothing\n");
780 reassociate_stats.ops_eliminated += 2;
782 if (ops->length () == 2)
784 ops->truncate (0);
785 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
786 *all_done = true;
788 else
790 ops->ordered_remove (i-1);
791 ops->ordered_remove (i-1);
794 return true;
796 default:
797 break;
800 return false;
803 static vec<tree> plus_negates;
805 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
806 expression, look in OPS for a corresponding positive operation to cancel
807 it out. If we find one, remove the other from OPS, replace
808 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
809 return false. */
811 static bool
812 eliminate_plus_minus_pair (enum tree_code opcode,
813 vec<operand_entry *> *ops,
814 unsigned int currindex,
815 operand_entry *curr)
817 tree negateop;
818 tree notop;
819 unsigned int i;
820 operand_entry *oe;
822 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
823 return false;
825 negateop = get_unary_op (curr->op, NEGATE_EXPR);
826 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
827 if (negateop == NULL_TREE && notop == NULL_TREE)
828 return false;
830 /* Any non-negated version will have a rank that is one less than
831 the current rank. So once we hit those ranks, if we don't find
832 one, we can stop. */
834 for (i = currindex + 1;
835 ops->iterate (i, &oe)
836 && oe->rank >= curr->rank - 1 ;
837 i++)
839 if (negateop
840 && ops_equal_values_p (oe->op, negateop))
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, negateop);
846 fprintf (dump_file, " + -");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> 0\n");
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
856 return true;
858 else if (notop
859 && ops_equal_values_p (oe->op, notop))
861 tree op_type = TREE_TYPE (oe->op);
863 if (dump_file && (dump_flags & TDF_DETAILS))
865 fprintf (dump_file, "Equivalence: ");
866 print_generic_expr (dump_file, notop);
867 fprintf (dump_file, " + ~");
868 print_generic_expr (dump_file, oe->op);
869 fprintf (dump_file, " -> -1\n");
872 ops->ordered_remove (i);
873 add_to_ops_vec (ops, build_all_ones_cst (op_type));
874 ops->ordered_remove (currindex);
875 reassociate_stats.ops_eliminated ++;
877 return true;
881 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
882 save it for later inspection in repropagate_negates(). */
883 if (negateop != NULL_TREE
884 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
885 plus_negates.safe_push (curr->op);
887 return false;
890 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
891 bitwise not expression, look in OPS for a corresponding operand to
892 cancel it out. If we find one, remove the other from OPS, replace
893 OPS[CURRINDEX] with 0, and return true. Otherwise, return
894 false. */
896 static bool
897 eliminate_not_pairs (enum tree_code opcode,
898 vec<operand_entry *> *ops,
899 unsigned int currindex,
900 operand_entry *curr)
902 tree notop;
903 unsigned int i;
904 operand_entry *oe;
906 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
907 || TREE_CODE (curr->op) != SSA_NAME)
908 return false;
910 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
911 if (notop == NULL_TREE)
912 return false;
914 /* Any non-not version will have a rank that is one less than
915 the current rank. So once we hit those ranks, if we don't find
916 one, we can stop. */
918 for (i = currindex + 1;
919 ops->iterate (i, &oe)
920 && oe->rank >= curr->rank - 1;
921 i++)
923 if (oe->op == notop)
925 if (dump_file && (dump_flags & TDF_DETAILS))
927 fprintf (dump_file, "Equivalence: ");
928 print_generic_expr (dump_file, notop);
929 if (opcode == BIT_AND_EXPR)
930 fprintf (dump_file, " & ~");
931 else if (opcode == BIT_IOR_EXPR)
932 fprintf (dump_file, " | ~");
933 print_generic_expr (dump_file, oe->op);
934 if (opcode == BIT_AND_EXPR)
935 fprintf (dump_file, " -> 0\n");
936 else if (opcode == BIT_IOR_EXPR)
937 fprintf (dump_file, " -> -1\n");
940 if (opcode == BIT_AND_EXPR)
941 oe->op = build_zero_cst (TREE_TYPE (oe->op));
942 else if (opcode == BIT_IOR_EXPR)
943 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
945 reassociate_stats.ops_eliminated += ops->length () - 1;
946 ops->truncate (0);
947 ops->quick_push (oe);
948 return true;
952 return false;
955 /* Use constant value that may be present in OPS to try to eliminate
956 operands. Note that this function is only really used when we've
957 eliminated ops for other reasons, or merged constants. Across
958 single statements, fold already does all of this, plus more. There
959 is little point in duplicating logic, so I've only included the
960 identities that I could ever construct testcases to trigger. */
962 static void
963 eliminate_using_constants (enum tree_code opcode,
964 vec<operand_entry *> *ops)
966 operand_entry *oelast = ops->last ();
967 tree type = TREE_TYPE (oelast->op);
969 if (oelast->rank == 0
970 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
972 switch (opcode)
974 case BIT_AND_EXPR:
975 if (integer_zerop (oelast->op))
977 if (ops->length () != 1)
979 if (dump_file && (dump_flags & TDF_DETAILS))
980 fprintf (dump_file, "Found & 0, removing all other ops\n");
982 reassociate_stats.ops_eliminated += ops->length () - 1;
984 ops->truncate (0);
985 ops->quick_push (oelast);
986 return;
989 else if (integer_all_onesp (oelast->op))
991 if (ops->length () != 1)
993 if (dump_file && (dump_flags & TDF_DETAILS))
994 fprintf (dump_file, "Found & -1, removing\n");
995 ops->pop ();
996 reassociate_stats.ops_eliminated++;
999 break;
1000 case BIT_IOR_EXPR:
1001 if (integer_all_onesp (oelast->op))
1003 if (ops->length () != 1)
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1006 fprintf (dump_file, "Found | -1, removing all other ops\n");
1008 reassociate_stats.ops_eliminated += ops->length () - 1;
1010 ops->truncate (0);
1011 ops->quick_push (oelast);
1012 return;
1015 else if (integer_zerop (oelast->op))
1017 if (ops->length () != 1)
1019 if (dump_file && (dump_flags & TDF_DETAILS))
1020 fprintf (dump_file, "Found | 0, removing\n");
1021 ops->pop ();
1022 reassociate_stats.ops_eliminated++;
1025 break;
1026 case MULT_EXPR:
1027 if (integer_zerop (oelast->op)
1028 || (FLOAT_TYPE_P (type)
1029 && !HONOR_NANS (type)
1030 && !HONOR_SIGNED_ZEROS (type)
1031 && real_zerop (oelast->op)))
1033 if (ops->length () != 1)
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "Found * 0, removing all other ops\n");
1038 reassociate_stats.ops_eliminated += ops->length () - 1;
1039 ops->truncate (0);
1040 ops->quick_push (oelast);
1041 return;
1044 else if (integer_onep (oelast->op)
1045 || (FLOAT_TYPE_P (type)
1046 && !HONOR_SNANS (type)
1047 && real_onep (oelast->op)))
1049 if (ops->length () != 1)
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 fprintf (dump_file, "Found * 1, removing\n");
1053 ops->pop ();
1054 reassociate_stats.ops_eliminated++;
1055 return;
1058 break;
1059 case BIT_XOR_EXPR:
1060 case PLUS_EXPR:
1061 case MINUS_EXPR:
1062 if (integer_zerop (oelast->op)
1063 || (FLOAT_TYPE_P (type)
1064 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1065 && fold_real_zero_addition_p (type, 0, oelast->op,
1066 opcode == MINUS_EXPR)))
1068 if (ops->length () != 1)
1070 if (dump_file && (dump_flags & TDF_DETAILS))
1071 fprintf (dump_file, "Found [|^+] 0, removing\n");
1072 ops->pop ();
1073 reassociate_stats.ops_eliminated++;
1074 return;
1077 break;
1078 default:
1079 break;
1085 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1086 bool, bool);
1088 /* Structure for tracking and counting operands. */
1089 struct oecount {
1090 unsigned int cnt;
1091 unsigned int id;
1092 enum tree_code oecode;
1093 tree op;
1097 /* The heap for the oecount hashtable and the sorted list of operands. */
1098 static vec<oecount> cvec;
1101 /* Oecount hashtable helpers. */
1103 struct oecount_hasher : int_hash <int, 0, 1>
1105 static inline hashval_t hash (int);
1106 static inline bool equal (int, int);
1109 /* Hash function for oecount. */
1111 inline hashval_t
1112 oecount_hasher::hash (int p)
1114 const oecount *c = &cvec[p - 42];
1115 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1118 /* Comparison function for oecount. */
1120 inline bool
1121 oecount_hasher::equal (int p1, int p2)
1123 const oecount *c1 = &cvec[p1 - 42];
1124 const oecount *c2 = &cvec[p2 - 42];
1125 return c1->oecode == c2->oecode && c1->op == c2->op;
1128 /* Comparison function for qsort sorting oecount elements by count. */
1130 static int
1131 oecount_cmp (const void *p1, const void *p2)
1133 const oecount *c1 = (const oecount *)p1;
1134 const oecount *c2 = (const oecount *)p2;
1135 if (c1->cnt != c2->cnt)
1136 return c1->cnt > c2->cnt ? 1 : -1;
1137 else
1138 /* If counts are identical, use unique IDs to stabilize qsort. */
1139 return c1->id > c2->id ? 1 : -1;
1142 /* Return TRUE iff STMT represents a builtin call that raises OP
1143 to some exponent. */
1145 static bool
1146 stmt_is_power_of_op (gimple *stmt, tree op)
1148 if (!is_gimple_call (stmt))
1149 return false;
1151 switch (gimple_call_combined_fn (stmt))
1153 CASE_CFN_POW:
1154 CASE_CFN_POWI:
1155 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1157 default:
1158 return false;
1162 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1163 in place and return the result. Assumes that stmt_is_power_of_op
1164 was previously called for STMT and returned TRUE. */
1166 static HOST_WIDE_INT
1167 decrement_power (gimple *stmt)
1169 REAL_VALUE_TYPE c, cint;
1170 HOST_WIDE_INT power;
1171 tree arg1;
1173 switch (gimple_call_combined_fn (stmt))
1175 CASE_CFN_POW:
1176 arg1 = gimple_call_arg (stmt, 1);
1177 c = TREE_REAL_CST (arg1);
1178 power = real_to_integer (&c) - 1;
1179 real_from_integer (&cint, VOIDmode, power, SIGNED);
1180 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1181 return power;
1183 CASE_CFN_POWI:
1184 arg1 = gimple_call_arg (stmt, 1);
1185 power = TREE_INT_CST_LOW (arg1) - 1;
1186 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1187 return power;
1189 default:
1190 gcc_unreachable ();
1194 /* Replace SSA defined by STMT and replace all its uses with new
1195 SSA. Also return the new SSA. */
1197 static tree
1198 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1200 gimple *use_stmt;
1201 use_operand_p use;
1202 imm_use_iterator iter;
1203 tree new_lhs, new_debug_lhs = NULL_TREE;
1204 tree lhs = gimple_get_lhs (stmt);
1206 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1207 gimple_set_lhs (stmt, new_lhs);
1209 /* Also need to update GIMPLE_DEBUGs. */
1210 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1212 tree repl = new_lhs;
1213 if (is_gimple_debug (use_stmt))
1215 if (new_debug_lhs == NULL_TREE)
1217 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1218 gdebug *def_temp
1219 = gimple_build_debug_bind (new_debug_lhs,
1220 build2 (opcode, TREE_TYPE (lhs),
1221 new_lhs, op),
1222 stmt);
1223 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1224 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1225 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1226 gimple_set_uid (def_temp, gimple_uid (stmt));
1227 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1228 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1230 repl = new_debug_lhs;
1232 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1233 SET_USE (use, repl);
1234 update_stmt (use_stmt);
1236 return new_lhs;
1239 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1240 uses with new SSAs. Also do this for the stmt that defines DEF
1241 if *DEF is not OP. */
1243 static void
1244 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1245 vec<gimple *> &stmts_to_fix)
1247 unsigned i;
1248 gimple *stmt;
1250 if (*def != op
1251 && TREE_CODE (*def) == SSA_NAME
1252 && (stmt = SSA_NAME_DEF_STMT (*def))
1253 && gimple_code (stmt) != GIMPLE_NOP)
1254 *def = make_new_ssa_for_def (stmt, opcode, op);
1256 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1257 make_new_ssa_for_def (stmt, opcode, op);
1260 /* Find the single immediate use of STMT's LHS, and replace it
1261 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1262 replace *DEF with OP as well. */
1264 static void
1265 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1267 tree lhs;
1268 gimple *use_stmt;
1269 use_operand_p use;
1270 gimple_stmt_iterator gsi;
1272 if (is_gimple_call (stmt))
1273 lhs = gimple_call_lhs (stmt);
1274 else
1275 lhs = gimple_assign_lhs (stmt);
1277 gcc_assert (has_single_use (lhs));
1278 single_imm_use (lhs, &use, &use_stmt);
1279 if (lhs == *def)
1280 *def = op;
1281 SET_USE (use, op);
1282 if (TREE_CODE (op) != SSA_NAME)
1283 update_stmt (use_stmt);
1284 gsi = gsi_for_stmt (stmt);
1285 unlink_stmt_vdef (stmt);
1286 reassoc_remove_stmt (&gsi);
1287 release_defs (stmt);
1290 /* Walks the linear chain with result *DEF searching for an operation
1291 with operand OP and code OPCODE removing that from the chain. *DEF
1292 is updated if there is only one operand but no operation left. */
1294 static void
1295 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1297 tree orig_def = *def;
1298 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1299 /* PR72835 - Record the stmt chain that has to be updated such that
1300 we dont use the same LHS when the values computed are different. */
1301 auto_vec<gimple *, 64> stmts_to_fix;
1305 tree name;
1307 if (opcode == MULT_EXPR)
1309 if (stmt_is_power_of_op (stmt, op))
1311 if (decrement_power (stmt) == 1)
1313 if (stmts_to_fix.length () > 0)
1314 stmts_to_fix.pop ();
1315 propagate_op_to_single_use (op, stmt, def);
1317 break;
1319 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1321 if (gimple_assign_rhs1 (stmt) == op)
1323 tree cst = build_minus_one_cst (TREE_TYPE (op));
1324 if (stmts_to_fix.length () > 0)
1325 stmts_to_fix.pop ();
1326 propagate_op_to_single_use (cst, stmt, def);
1327 break;
1329 else if (integer_minus_onep (op)
1330 || real_minus_onep (op))
1332 gimple_assign_set_rhs_code
1333 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1334 break;
1339 name = gimple_assign_rhs1 (stmt);
1341 /* If this is the operation we look for and one of the operands
1342 is ours simply propagate the other operand into the stmts
1343 single use. */
1344 if (gimple_assign_rhs_code (stmt) == opcode
1345 && (name == op
1346 || gimple_assign_rhs2 (stmt) == op))
1348 if (name == op)
1349 name = gimple_assign_rhs2 (stmt);
1350 if (stmts_to_fix.length () > 0)
1351 stmts_to_fix.pop ();
1352 propagate_op_to_single_use (name, stmt, def);
1353 break;
1356 /* We might have a multiply of two __builtin_pow* calls, and
1357 the operand might be hiding in the rightmost one. Likewise
1358 this can happen for a negate. */
1359 if (opcode == MULT_EXPR
1360 && gimple_assign_rhs_code (stmt) == opcode
1361 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1362 && has_single_use (gimple_assign_rhs2 (stmt)))
1364 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1365 if (stmt_is_power_of_op (stmt2, op))
1367 if (decrement_power (stmt2) == 1)
1368 propagate_op_to_single_use (op, stmt2, def);
1369 else
1370 stmts_to_fix.safe_push (stmt2);
1371 break;
1373 else if (is_gimple_assign (stmt2)
1374 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1376 if (gimple_assign_rhs1 (stmt2) == op)
1378 tree cst = build_minus_one_cst (TREE_TYPE (op));
1379 propagate_op_to_single_use (cst, stmt2, def);
1380 break;
1382 else if (integer_minus_onep (op)
1383 || real_minus_onep (op))
1385 stmts_to_fix.safe_push (stmt2);
1386 gimple_assign_set_rhs_code
1387 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1388 break;
1393 /* Continue walking the chain. */
1394 gcc_assert (name != op
1395 && TREE_CODE (name) == SSA_NAME);
1396 stmt = SSA_NAME_DEF_STMT (name);
1397 stmts_to_fix.safe_push (stmt);
1399 while (1);
1401 if (stmts_to_fix.length () > 0 || *def == orig_def)
1402 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1405 /* Returns true if statement S1 dominates statement S2. Like
1406 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1408 static bool
1409 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1411 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1413 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1414 SSA_NAME. Assume it lives at the beginning of function and
1415 thus dominates everything. */
1416 if (!bb1 || s1 == s2)
1417 return true;
1419 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1420 if (!bb2)
1421 return false;
1423 if (bb1 == bb2)
1425 /* PHIs in the same basic block are assumed to be
1426 executed all in parallel, if only one stmt is a PHI,
1427 it dominates the other stmt in the same basic block. */
1428 if (gimple_code (s1) == GIMPLE_PHI)
1429 return true;
1431 if (gimple_code (s2) == GIMPLE_PHI)
1432 return false;
1434 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1436 if (gimple_uid (s1) < gimple_uid (s2))
1437 return true;
1439 if (gimple_uid (s1) > gimple_uid (s2))
1440 return false;
1442 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1443 unsigned int uid = gimple_uid (s1);
1444 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1446 gimple *s = gsi_stmt (gsi);
1447 if (gimple_uid (s) != uid)
1448 break;
1449 if (s == s2)
1450 return true;
1453 return false;
1456 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1459 /* Insert STMT after INSERT_POINT. */
1461 static void
1462 insert_stmt_after (gimple *stmt, gimple *insert_point)
1464 gimple_stmt_iterator gsi;
1465 basic_block bb;
1467 if (gimple_code (insert_point) == GIMPLE_PHI)
1468 bb = gimple_bb (insert_point);
1469 else if (!stmt_ends_bb_p (insert_point))
1471 gsi = gsi_for_stmt (insert_point);
1472 gimple_set_uid (stmt, gimple_uid (insert_point));
1473 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1474 return;
1476 else if (gimple_code (insert_point) == GIMPLE_ASM)
1477 /* We have no idea where to insert - it depends on where the
1478 uses will be placed. */
1479 gcc_unreachable ();
1480 else
1481 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1482 thus if it must end a basic block, it should be a call that can
1483 throw, or some assignment that can throw. If it throws, the LHS
1484 of it will not be initialized though, so only valid places using
1485 the SSA_NAME should be dominated by the fallthru edge. */
1486 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1487 gsi = gsi_after_labels (bb);
1488 if (gsi_end_p (gsi))
1490 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1491 gimple_set_uid (stmt,
1492 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1494 else
1495 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1496 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1499 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1500 the result. Places the statement after the definition of either
1501 OP1 or OP2. Returns the new statement. */
1503 static gimple *
1504 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1506 gimple *op1def = NULL, *op2def = NULL;
1507 gimple_stmt_iterator gsi;
1508 tree op;
1509 gassign *sum;
1511 /* Create the addition statement. */
1512 op = make_ssa_name (type);
1513 sum = gimple_build_assign (op, opcode, op1, op2);
1515 /* Find an insertion place and insert. */
1516 if (TREE_CODE (op1) == SSA_NAME)
1517 op1def = SSA_NAME_DEF_STMT (op1);
1518 if (TREE_CODE (op2) == SSA_NAME)
1519 op2def = SSA_NAME_DEF_STMT (op2);
1520 if ((!op1def || gimple_nop_p (op1def))
1521 && (!op2def || gimple_nop_p (op2def)))
1523 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1524 if (gsi_end_p (gsi))
1526 gimple_stmt_iterator gsi2
1527 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1528 gimple_set_uid (sum,
1529 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1531 else
1532 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1533 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1535 else
1537 gimple *insert_point;
1538 if ((!op1def || gimple_nop_p (op1def))
1539 || (op2def && !gimple_nop_p (op2def)
1540 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1541 insert_point = op2def;
1542 else
1543 insert_point = op1def;
1544 insert_stmt_after (sum, insert_point);
1546 update_stmt (sum);
1548 return sum;
1551 /* Perform un-distribution of divisions and multiplications.
1552 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1553 to (A + B) / X for real X.
1555 The algorithm is organized as follows.
1557 - First we walk the addition chain *OPS looking for summands that
1558 are defined by a multiplication or a real division. This results
1559 in the candidates bitmap with relevant indices into *OPS.
1561 - Second we build the chains of multiplications or divisions for
1562 these candidates, counting the number of occurrences of (operand, code)
1563 pairs in all of the candidates chains.
1565 - Third we sort the (operand, code) pairs by number of occurrence and
1566 process them starting with the pair with the most uses.
1568 * For each such pair we walk the candidates again to build a
1569 second candidate bitmap noting all multiplication/division chains
1570 that have at least one occurrence of (operand, code).
1572 * We build an alternate addition chain only covering these
1573 candidates with one (operand, code) operation removed from their
1574 multiplication/division chain.
1576 * The first candidate gets replaced by the alternate addition chain
1577 multiplied/divided by the operand.
1579 * All candidate chains get disabled for further processing and
1580 processing of (operand, code) pairs continues.
1582 The alternate addition chains built are re-processed by the main
1583 reassociation algorithm which allows optimizing a * x * y + b * y * x
1584 to (a + b ) * x * y in one invocation of the reassociation pass. */
1586 static bool
1587 undistribute_ops_list (enum tree_code opcode,
1588 vec<operand_entry *> *ops, class loop *loop)
1590 unsigned int length = ops->length ();
1591 operand_entry *oe1;
1592 unsigned i, j;
1593 unsigned nr_candidates, nr_candidates2;
1594 sbitmap_iterator sbi0;
1595 vec<operand_entry *> *subops;
1596 bool changed = false;
1597 unsigned int next_oecount_id = 0;
1599 if (length <= 1
1600 || opcode != PLUS_EXPR)
1601 return false;
1603 /* Build a list of candidates to process. */
1604 auto_sbitmap candidates (length);
1605 bitmap_clear (candidates);
1606 nr_candidates = 0;
1607 FOR_EACH_VEC_ELT (*ops, i, oe1)
1609 enum tree_code dcode;
1610 gimple *oe1def;
1612 if (TREE_CODE (oe1->op) != SSA_NAME)
1613 continue;
1614 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1615 if (!is_gimple_assign (oe1def))
1616 continue;
1617 dcode = gimple_assign_rhs_code (oe1def);
1618 if ((dcode != MULT_EXPR
1619 && dcode != RDIV_EXPR)
1620 || !is_reassociable_op (oe1def, dcode, loop))
1621 continue;
1623 bitmap_set_bit (candidates, i);
1624 nr_candidates++;
1627 if (nr_candidates < 2)
1628 return false;
1630 if (dump_file && (dump_flags & TDF_DETAILS))
1632 fprintf (dump_file, "searching for un-distribute opportunities ");
1633 print_generic_expr (dump_file,
1634 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1635 fprintf (dump_file, " %d\n", nr_candidates);
1638 /* Build linearized sub-operand lists and the counting table. */
1639 cvec.create (0);
1641 hash_table<oecount_hasher> ctable (15);
1643 /* ??? Macro arguments cannot have multi-argument template types in
1644 them. This typedef is needed to workaround that limitation. */
1645 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1646 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1647 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1649 gimple *oedef;
1650 enum tree_code oecode;
1651 unsigned j;
1653 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1654 oecode = gimple_assign_rhs_code (oedef);
1655 linearize_expr_tree (&subops[i], oedef,
1656 associative_tree_code (oecode), false);
1658 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1660 oecount c;
1661 int *slot;
1662 int idx;
1663 c.oecode = oecode;
1664 c.cnt = 1;
1665 c.id = next_oecount_id++;
1666 c.op = oe1->op;
1667 cvec.safe_push (c);
1668 idx = cvec.length () + 41;
1669 slot = ctable.find_slot (idx, INSERT);
1670 if (!*slot)
1672 *slot = idx;
1674 else
1676 cvec.pop ();
1677 cvec[*slot - 42].cnt++;
1682 /* Sort the counting table. */
1683 cvec.qsort (oecount_cmp);
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1687 oecount *c;
1688 fprintf (dump_file, "Candidates:\n");
1689 FOR_EACH_VEC_ELT (cvec, j, c)
1691 fprintf (dump_file, " %u %s: ", c->cnt,
1692 c->oecode == MULT_EXPR
1693 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1694 print_generic_expr (dump_file, c->op);
1695 fprintf (dump_file, "\n");
1699 /* Process the (operand, code) pairs in order of most occurrence. */
1700 auto_sbitmap candidates2 (length);
1701 while (!cvec.is_empty ())
1703 oecount *c = &cvec.last ();
1704 if (c->cnt < 2)
1705 break;
1707 /* Now collect the operands in the outer chain that contain
1708 the common operand in their inner chain. */
1709 bitmap_clear (candidates2);
1710 nr_candidates2 = 0;
1711 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1713 gimple *oedef;
1714 enum tree_code oecode;
1715 unsigned j;
1716 tree op = (*ops)[i]->op;
1718 /* If we undistributed in this chain already this may be
1719 a constant. */
1720 if (TREE_CODE (op) != SSA_NAME)
1721 continue;
1723 oedef = SSA_NAME_DEF_STMT (op);
1724 oecode = gimple_assign_rhs_code (oedef);
1725 if (oecode != c->oecode)
1726 continue;
1728 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1730 if (oe1->op == c->op)
1732 bitmap_set_bit (candidates2, i);
1733 ++nr_candidates2;
1734 break;
1739 if (nr_candidates2 >= 2)
1741 operand_entry *oe1, *oe2;
1742 gimple *prod;
1743 int first = bitmap_first_set_bit (candidates2);
1745 /* Build the new addition chain. */
1746 oe1 = (*ops)[first];
1747 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, "Building (");
1750 print_generic_expr (dump_file, oe1->op);
1752 zero_one_operation (&oe1->op, c->oecode, c->op);
1753 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1755 gimple *sum;
1756 oe2 = (*ops)[i];
1757 if (dump_file && (dump_flags & TDF_DETAILS))
1759 fprintf (dump_file, " + ");
1760 print_generic_expr (dump_file, oe2->op);
1762 zero_one_operation (&oe2->op, c->oecode, c->op);
1763 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1764 oe1->op, oe2->op, opcode);
1765 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1766 oe2->rank = 0;
1767 oe1->op = gimple_get_lhs (sum);
1770 /* Apply the multiplication/division. */
1771 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1772 oe1->op, c->op, c->oecode);
1773 if (dump_file && (dump_flags & TDF_DETAILS))
1775 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1776 print_generic_expr (dump_file, c->op);
1777 fprintf (dump_file, "\n");
1780 /* Record it in the addition chain and disable further
1781 undistribution with this op. */
1782 oe1->op = gimple_assign_lhs (prod);
1783 oe1->rank = get_rank (oe1->op);
1784 subops[first].release ();
1786 changed = true;
1789 cvec.pop ();
1792 for (i = 0; i < ops->length (); ++i)
1793 subops[i].release ();
1794 free (subops);
1795 cvec.release ();
1797 return changed;
1800 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1801 first: element index for each relevant BIT_FIELD_REF.
1802 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1803 typedef std::pair<unsigned, unsigned> v_info_elem;
1804 struct v_info {
1805 tree vec_type;
1806 auto_vec<v_info_elem, 32> vec;
1808 typedef v_info *v_info_ptr;
1810 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1811 static int
1812 sort_by_mach_mode (const void *p_i, const void *p_j)
1814 const tree tr1 = *((const tree *) p_i);
1815 const tree tr2 = *((const tree *) p_j);
1816 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1817 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1818 if (mode1 > mode2)
1819 return 1;
1820 else if (mode1 < mode2)
1821 return -1;
1822 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1823 return -1;
1824 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1825 return 1;
1826 return 0;
1829 /* Cleanup hash map for VECTOR information. */
1830 static void
1831 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1833 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1834 it != info_map.end (); ++it)
1836 v_info_ptr info = (*it).second;
1837 delete info;
1838 (*it).second = NULL;
1842 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1843 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1844 is transformed to
1845 Vs = (V1 + V2 + ... + Vn)
1846 Vs[0] + Vs[1] + ... + Vs[k]
1848 The basic steps are listed below:
1850 1) Check the addition chain *OPS by looking those summands coming from
1851 VECTOR bit_field_ref on VECTOR type. Put the information into
1852 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1854 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1855 continuous, they can cover the whole VECTOR perfectly without any holes.
1856 Obtain one VECTOR list which contain candidates to be transformed.
1858 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1859 candidates with same mode, build the addition statements for them and
1860 generate BIT_FIELD_REFs accordingly.
1862 TODO:
1863 The current implementation requires the whole VECTORs should be fully
1864 covered, but it can be extended to support partial, checking adjacent
1865 but not fill the whole, it may need some cost model to define the
1866 boundary to do or not.
1868 static bool
1869 undistribute_bitref_for_vector (enum tree_code opcode,
1870 vec<operand_entry *> *ops, struct loop *loop)
1872 if (ops->length () <= 1)
1873 return false;
1875 if (opcode != PLUS_EXPR
1876 && opcode != MULT_EXPR
1877 && opcode != BIT_XOR_EXPR
1878 && opcode != BIT_IOR_EXPR
1879 && opcode != BIT_AND_EXPR)
1880 return false;
1882 hash_map<tree, v_info_ptr> v_info_map;
1883 operand_entry *oe1;
1884 unsigned i;
1886 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1887 information into map. */
1888 FOR_EACH_VEC_ELT (*ops, i, oe1)
1890 enum tree_code dcode;
1891 gimple *oe1def;
1893 if (TREE_CODE (oe1->op) != SSA_NAME)
1894 continue;
1895 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1896 if (!is_gimple_assign (oe1def))
1897 continue;
1898 dcode = gimple_assign_rhs_code (oe1def);
1899 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1900 continue;
1902 tree rhs = gimple_assign_rhs1 (oe1def);
1903 tree vec = TREE_OPERAND (rhs, 0);
1904 tree vec_type = TREE_TYPE (vec);
1906 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1907 continue;
1909 /* Ignore it if target machine can't support this VECTOR type. */
1910 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1911 continue;
1913 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1914 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1915 continue;
1917 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1918 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1919 continue;
1921 /* The type of BIT_FIELD_REF might not be equal to the element type of
1922 the vector. We want to use a vector type with element type the
1923 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1924 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1926 machine_mode simd_mode;
1927 unsigned HOST_WIDE_INT size, nunits;
1928 unsigned HOST_WIDE_INT elem_size
1929 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1930 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1931 continue;
1932 if (size <= elem_size || (size % elem_size) != 0)
1933 continue;
1934 nunits = size / elem_size;
1935 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1936 nunits).exists (&simd_mode))
1937 continue;
1938 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1940 /* Ignore it if target machine can't support this VECTOR type. */
1941 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1942 continue;
1944 /* Check const vector type, constrain BIT_FIELD_REF offset and
1945 size. */
1946 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1947 continue;
1949 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1950 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1951 continue;
1954 tree elem_type = TREE_TYPE (vec_type);
1955 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1956 if (maybe_ne (bit_field_size (rhs), elem_size))
1957 continue;
1959 unsigned idx;
1960 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1961 continue;
1963 /* Ignore it if target machine can't support this type of VECTOR
1964 operation. */
1965 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1966 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1967 continue;
1969 bool existed;
1970 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1971 if (!existed)
1973 info = new v_info;
1974 info->vec_type = vec_type;
1976 else if (!types_compatible_p (vec_type, info->vec_type))
1977 continue;
1978 info->vec.safe_push (std::make_pair (idx, i));
1981 /* At least two VECTOR to combine. */
1982 if (v_info_map.elements () <= 1)
1984 cleanup_vinfo_map (v_info_map);
1985 return false;
1988 /* Verify all VECTOR candidates by checking two conditions:
1989 1) sorted offsets are adjacent, no holes.
1990 2) can fill the whole VECTOR perfectly.
1991 And add the valid candidates to a vector for further handling. */
1992 auto_vec<tree> valid_vecs (v_info_map.elements ());
1993 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1994 it != v_info_map.end (); ++it)
1996 tree cand_vec = (*it).first;
1997 v_info_ptr cand_info = (*it).second;
1998 unsigned int num_elems
1999 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2000 if (cand_info->vec.length () != num_elems)
2001 continue;
2002 sbitmap holes = sbitmap_alloc (num_elems);
2003 bitmap_ones (holes);
2004 bool valid = true;
2005 v_info_elem *curr;
2006 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2008 if (!bitmap_bit_p (holes, curr->first))
2010 valid = false;
2011 break;
2013 else
2014 bitmap_clear_bit (holes, curr->first);
2016 if (valid && bitmap_empty_p (holes))
2017 valid_vecs.quick_push (cand_vec);
2018 sbitmap_free (holes);
2021 /* At least two VECTOR to combine. */
2022 if (valid_vecs.length () <= 1)
2024 cleanup_vinfo_map (v_info_map);
2025 return false;
2028 valid_vecs.qsort (sort_by_mach_mode);
2029 /* Go through all candidates by machine mode order, query the mode_to_total
2030 to get the total number for each mode and skip the single one. */
2031 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2033 tree tvec = valid_vecs[i];
2034 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2036 /* Skip modes with only a single candidate. */
2037 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2038 continue;
2040 unsigned int idx, j;
2041 gimple *sum = NULL;
2042 tree sum_vec = tvec;
2043 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2044 v_info_elem *elem;
2045 tree vec_type = info_ptr->vec_type;
2047 /* Build the sum for all candidates with same mode. */
2050 sum = build_and_add_sum (vec_type, sum_vec,
2051 valid_vecs[i + 1], opcode);
2052 if (!useless_type_conversion_p (vec_type,
2053 TREE_TYPE (valid_vecs[i + 1])))
2055 /* Update the operands only after build_and_add_sum,
2056 so that we don't have to repeat the placement algorithm
2057 of build_and_add_sum. */
2058 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2059 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2060 valid_vecs[i + 1]);
2061 tree lhs = make_ssa_name (vec_type);
2062 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2063 gimple_set_uid (g, gimple_uid (sum));
2064 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2065 gimple_assign_set_rhs2 (sum, lhs);
2066 if (sum_vec == tvec)
2068 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2069 lhs = make_ssa_name (vec_type);
2070 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2071 gimple_set_uid (g, gimple_uid (sum));
2072 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2073 gimple_assign_set_rhs1 (sum, lhs);
2075 update_stmt (sum);
2077 sum_vec = gimple_get_lhs (sum);
2078 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2079 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2080 /* Update those related ops of current candidate VECTOR. */
2081 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2083 idx = elem->second;
2084 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2085 /* Set this then op definition will get DCEd later. */
2086 gimple_set_visited (def, true);
2087 if (opcode == PLUS_EXPR
2088 || opcode == BIT_XOR_EXPR
2089 || opcode == BIT_IOR_EXPR)
2090 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2091 else if (opcode == MULT_EXPR)
2092 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2093 else
2095 gcc_assert (opcode == BIT_AND_EXPR);
2096 (*ops)[idx]->op
2097 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2099 (*ops)[idx]->rank = 0;
2101 if (dump_file && (dump_flags & TDF_DETAILS))
2103 fprintf (dump_file, "Generating addition -> ");
2104 print_gimple_stmt (dump_file, sum, 0);
2106 i++;
2108 while ((i < valid_vecs.length () - 1)
2109 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2111 /* Referring to first valid VECTOR with this mode, generate the
2112 BIT_FIELD_REF statements accordingly. */
2113 info_ptr = *(v_info_map.get (tvec));
2114 gcc_assert (sum);
2115 tree elem_type = TREE_TYPE (vec_type);
2116 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2118 idx = elem->second;
2119 tree dst = make_ssa_name (elem_type);
2120 tree pos = bitsize_int (elem->first
2121 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2122 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2123 TYPE_SIZE (elem_type), pos);
2124 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2125 insert_stmt_after (gs, sum);
2126 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2127 /* Set this then op definition will get DCEd later. */
2128 gimple_set_visited (def, true);
2129 (*ops)[idx]->op = gimple_assign_lhs (gs);
2130 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2131 if (dump_file && (dump_flags & TDF_DETAILS))
2133 fprintf (dump_file, "Generating bit_field_ref -> ");
2134 print_gimple_stmt (dump_file, gs, 0);
2139 if (dump_file && (dump_flags & TDF_DETAILS))
2140 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2142 cleanup_vinfo_map (v_info_map);
2144 return true;
2147 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2148 expression, examine the other OPS to see if any of them are comparisons
2149 of the same values, which we may be able to combine or eliminate.
2150 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2152 static bool
2153 eliminate_redundant_comparison (enum tree_code opcode,
2154 vec<operand_entry *> *ops,
2155 unsigned int currindex,
2156 operand_entry *curr)
2158 tree op1, op2;
2159 enum tree_code lcode, rcode;
2160 gimple *def1, *def2;
2161 int i;
2162 operand_entry *oe;
2164 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2165 return false;
2167 /* Check that CURR is a comparison. */
2168 if (TREE_CODE (curr->op) != SSA_NAME)
2169 return false;
2170 def1 = SSA_NAME_DEF_STMT (curr->op);
2171 if (!is_gimple_assign (def1))
2172 return false;
2173 lcode = gimple_assign_rhs_code (def1);
2174 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2175 return false;
2176 op1 = gimple_assign_rhs1 (def1);
2177 op2 = gimple_assign_rhs2 (def1);
2179 /* Now look for a similar comparison in the remaining OPS. */
2180 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2182 tree t;
2184 if (TREE_CODE (oe->op) != SSA_NAME)
2185 continue;
2186 def2 = SSA_NAME_DEF_STMT (oe->op);
2187 if (!is_gimple_assign (def2))
2188 continue;
2189 rcode = gimple_assign_rhs_code (def2);
2190 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2191 continue;
2193 /* If we got here, we have a match. See if we can combine the
2194 two comparisons. */
2195 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2196 if (opcode == BIT_IOR_EXPR)
2197 t = maybe_fold_or_comparisons (type,
2198 lcode, op1, op2,
2199 rcode, gimple_assign_rhs1 (def2),
2200 gimple_assign_rhs2 (def2));
2201 else
2202 t = maybe_fold_and_comparisons (type,
2203 lcode, op1, op2,
2204 rcode, gimple_assign_rhs1 (def2),
2205 gimple_assign_rhs2 (def2));
2206 if (!t)
2207 continue;
2209 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2210 always give us a boolean_type_node value back. If the original
2211 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2212 we need to convert. */
2213 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2214 t = fold_convert (TREE_TYPE (curr->op), t);
2216 if (TREE_CODE (t) != INTEGER_CST
2217 && !operand_equal_p (t, curr->op, 0))
2219 enum tree_code subcode;
2220 tree newop1, newop2;
2221 if (!COMPARISON_CLASS_P (t))
2222 continue;
2223 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2224 STRIP_USELESS_TYPE_CONVERSION (newop1);
2225 STRIP_USELESS_TYPE_CONVERSION (newop2);
2226 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2227 continue;
2230 if (dump_file && (dump_flags & TDF_DETAILS))
2232 fprintf (dump_file, "Equivalence: ");
2233 print_generic_expr (dump_file, curr->op);
2234 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2235 print_generic_expr (dump_file, oe->op);
2236 fprintf (dump_file, " -> ");
2237 print_generic_expr (dump_file, t);
2238 fprintf (dump_file, "\n");
2241 /* Now we can delete oe, as it has been subsumed by the new combined
2242 expression t. */
2243 ops->ordered_remove (i);
2244 reassociate_stats.ops_eliminated ++;
2246 /* If t is the same as curr->op, we're done. Otherwise we must
2247 replace curr->op with t. Special case is if we got a constant
2248 back, in which case we add it to the end instead of in place of
2249 the current entry. */
2250 if (TREE_CODE (t) == INTEGER_CST)
2252 ops->ordered_remove (currindex);
2253 add_to_ops_vec (ops, t);
2255 else if (!operand_equal_p (t, curr->op, 0))
2257 gimple *sum;
2258 enum tree_code subcode;
2259 tree newop1;
2260 tree newop2;
2261 gcc_assert (COMPARISON_CLASS_P (t));
2262 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2263 STRIP_USELESS_TYPE_CONVERSION (newop1);
2264 STRIP_USELESS_TYPE_CONVERSION (newop2);
2265 gcc_checking_assert (is_gimple_val (newop1)
2266 && is_gimple_val (newop2));
2267 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2268 curr->op = gimple_get_lhs (sum);
2270 return true;
2273 return false;
2277 /* Transform repeated addition of same values into multiply with
2278 constant. */
2279 static bool
2280 transform_add_to_multiply (vec<operand_entry *> *ops)
2282 operand_entry *oe;
2283 tree op = NULL_TREE;
2284 int j;
2285 int i, start = -1, end = 0, count = 0;
2286 auto_vec<std::pair <int, int> > indxs;
2287 bool changed = false;
2289 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2290 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2291 || !flag_unsafe_math_optimizations))
2292 return false;
2294 /* Look for repeated operands. */
2295 FOR_EACH_VEC_ELT (*ops, i, oe)
2297 if (start == -1)
2299 count = 1;
2300 op = oe->op;
2301 start = i;
2303 else if (operand_equal_p (oe->op, op, 0))
2305 count++;
2306 end = i;
2308 else
2310 if (count > 1)
2311 indxs.safe_push (std::make_pair (start, end));
2312 count = 1;
2313 op = oe->op;
2314 start = i;
2318 if (count > 1)
2319 indxs.safe_push (std::make_pair (start, end));
2321 for (j = indxs.length () - 1; j >= 0; --j)
2323 /* Convert repeated operand addition to multiplication. */
2324 start = indxs[j].first;
2325 end = indxs[j].second;
2326 op = (*ops)[start]->op;
2327 count = end - start + 1;
2328 for (i = end; i >= start; --i)
2329 ops->unordered_remove (i);
2330 tree tmp = make_ssa_name (TREE_TYPE (op));
2331 tree cst = build_int_cst (integer_type_node, count);
2332 gassign *mul_stmt
2333 = gimple_build_assign (tmp, MULT_EXPR,
2334 op, fold_convert (TREE_TYPE (op), cst));
2335 gimple_set_visited (mul_stmt, true);
2336 add_to_ops_vec (ops, tmp, mul_stmt);
2337 changed = true;
2340 return changed;
2344 /* Perform various identities and other optimizations on the list of
2345 operand entries, stored in OPS. The tree code for the binary
2346 operation between all the operands is OPCODE. */
2348 static void
2349 optimize_ops_list (enum tree_code opcode,
2350 vec<operand_entry *> *ops)
2352 unsigned int length = ops->length ();
2353 unsigned int i;
2354 operand_entry *oe;
2355 operand_entry *oelast = NULL;
2356 bool iterate = false;
2358 if (length == 1)
2359 return;
2361 oelast = ops->last ();
2363 /* If the last two are constants, pop the constants off, merge them
2364 and try the next two. */
2365 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2367 operand_entry *oelm1 = (*ops)[length - 2];
2369 if (oelm1->rank == 0
2370 && is_gimple_min_invariant (oelm1->op)
2371 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2372 TREE_TYPE (oelast->op)))
2374 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2375 oelm1->op, oelast->op);
2377 if (folded && is_gimple_min_invariant (folded))
2379 if (dump_file && (dump_flags & TDF_DETAILS))
2380 fprintf (dump_file, "Merging constants\n");
2382 ops->pop ();
2383 ops->pop ();
2385 add_to_ops_vec (ops, folded);
2386 reassociate_stats.constants_eliminated++;
2388 optimize_ops_list (opcode, ops);
2389 return;
2394 eliminate_using_constants (opcode, ops);
2395 oelast = NULL;
2397 for (i = 0; ops->iterate (i, &oe);)
2399 bool done = false;
2401 if (eliminate_not_pairs (opcode, ops, i, oe))
2402 return;
2403 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2404 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2405 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2407 if (done)
2408 return;
2409 iterate = true;
2410 oelast = NULL;
2411 continue;
2413 oelast = oe;
2414 i++;
2417 if (iterate)
2418 optimize_ops_list (opcode, ops);
2421 /* The following functions are subroutines to optimize_range_tests and allow
2422 it to try to change a logical combination of comparisons into a range
2423 test.
2425 For example, both
2426 X == 2 || X == 5 || X == 3 || X == 4
2428 X >= 2 && X <= 5
2429 are converted to
2430 (unsigned) (X - 2) <= 3
2432 For more information see comments above fold_test_range in fold-const.c,
2433 this implementation is for GIMPLE. */
2437 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2439 void
2440 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2442 if (!skip_exp)
2443 print_generic_expr (file, r->exp);
2444 fprintf (file, " %c[", r->in_p ? '+' : '-');
2445 print_generic_expr (file, r->low);
2446 fputs (", ", file);
2447 print_generic_expr (file, r->high);
2448 fputc (']', file);
2451 /* Dump the range entry R to STDERR. */
2453 DEBUG_FUNCTION void
2454 debug_range_entry (struct range_entry *r)
2456 dump_range_entry (stderr, r, false);
2457 fputc ('\n', stderr);
2460 /* This is similar to make_range in fold-const.c, but on top of
2461 GIMPLE instead of trees. If EXP is non-NULL, it should be
2462 an SSA_NAME and STMT argument is ignored, otherwise STMT
2463 argument should be a GIMPLE_COND. */
2465 void
2466 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2468 int in_p;
2469 tree low, high;
2470 bool is_bool, strict_overflow_p;
2472 r->exp = NULL_TREE;
2473 r->in_p = false;
2474 r->strict_overflow_p = false;
2475 r->low = NULL_TREE;
2476 r->high = NULL_TREE;
2477 if (exp != NULL_TREE
2478 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2479 return;
2481 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2482 and see if we can refine the range. Some of the cases below may not
2483 happen, but it doesn't seem worth worrying about this. We "continue"
2484 the outer loop when we've changed something; otherwise we "break"
2485 the switch, which will "break" the while. */
2486 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2487 high = low;
2488 in_p = 0;
2489 strict_overflow_p = false;
2490 is_bool = false;
2491 if (exp == NULL_TREE)
2492 is_bool = true;
2493 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2495 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2496 is_bool = true;
2497 else
2498 return;
2500 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2501 is_bool = true;
2503 while (1)
2505 enum tree_code code;
2506 tree arg0, arg1, exp_type;
2507 tree nexp;
2508 location_t loc;
2510 if (exp != NULL_TREE)
2512 if (TREE_CODE (exp) != SSA_NAME
2513 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2514 break;
2516 stmt = SSA_NAME_DEF_STMT (exp);
2517 if (!is_gimple_assign (stmt))
2518 break;
2520 code = gimple_assign_rhs_code (stmt);
2521 arg0 = gimple_assign_rhs1 (stmt);
2522 arg1 = gimple_assign_rhs2 (stmt);
2523 exp_type = TREE_TYPE (exp);
2525 else
2527 code = gimple_cond_code (stmt);
2528 arg0 = gimple_cond_lhs (stmt);
2529 arg1 = gimple_cond_rhs (stmt);
2530 exp_type = boolean_type_node;
2533 if (TREE_CODE (arg0) != SSA_NAME
2534 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2535 break;
2536 loc = gimple_location (stmt);
2537 switch (code)
2539 case BIT_NOT_EXPR:
2540 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2541 /* Ensure the range is either +[-,0], +[0,0],
2542 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2543 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2544 or similar expression of unconditional true or
2545 false, it should not be negated. */
2546 && ((high && integer_zerop (high))
2547 || (low && integer_onep (low))))
2549 in_p = !in_p;
2550 exp = arg0;
2551 continue;
2553 break;
2554 case SSA_NAME:
2555 exp = arg0;
2556 continue;
2557 CASE_CONVERT:
2558 if (is_bool)
2560 if ((TYPE_PRECISION (exp_type) == 1
2561 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2562 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2563 return;
2565 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2567 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2568 is_bool = true;
2569 else
2570 return;
2572 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2573 is_bool = true;
2574 goto do_default;
2575 case EQ_EXPR:
2576 case NE_EXPR:
2577 case LT_EXPR:
2578 case LE_EXPR:
2579 case GE_EXPR:
2580 case GT_EXPR:
2581 is_bool = true;
2582 /* FALLTHRU */
2583 default:
2584 if (!is_bool)
2585 return;
2586 do_default:
2587 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2588 &low, &high, &in_p,
2589 &strict_overflow_p);
2590 if (nexp != NULL_TREE)
2592 exp = nexp;
2593 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2594 continue;
2596 break;
2598 break;
2600 if (is_bool)
2602 r->exp = exp;
2603 r->in_p = in_p;
2604 r->low = low;
2605 r->high = high;
2606 r->strict_overflow_p = strict_overflow_p;
2610 /* Comparison function for qsort. Sort entries
2611 without SSA_NAME exp first, then with SSA_NAMEs sorted
2612 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2613 by increasing ->low and if ->low is the same, by increasing
2614 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2615 maximum. */
2617 static int
2618 range_entry_cmp (const void *a, const void *b)
2620 const struct range_entry *p = (const struct range_entry *) a;
2621 const struct range_entry *q = (const struct range_entry *) b;
2623 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2625 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2627 /* Group range_entries for the same SSA_NAME together. */
2628 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2629 return -1;
2630 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2631 return 1;
2632 /* If ->low is different, NULL low goes first, then by
2633 ascending low. */
2634 if (p->low != NULL_TREE)
2636 if (q->low != NULL_TREE)
2638 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2639 p->low, q->low);
2640 if (tem && integer_onep (tem))
2641 return -1;
2642 tem = fold_binary (GT_EXPR, boolean_type_node,
2643 p->low, q->low);
2644 if (tem && integer_onep (tem))
2645 return 1;
2647 else
2648 return 1;
2650 else if (q->low != NULL_TREE)
2651 return -1;
2652 /* If ->high is different, NULL high goes last, before that by
2653 ascending high. */
2654 if (p->high != NULL_TREE)
2656 if (q->high != NULL_TREE)
2658 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2659 p->high, q->high);
2660 if (tem && integer_onep (tem))
2661 return -1;
2662 tem = fold_binary (GT_EXPR, boolean_type_node,
2663 p->high, q->high);
2664 if (tem && integer_onep (tem))
2665 return 1;
2667 else
2668 return -1;
2670 else if (q->high != NULL_TREE)
2671 return 1;
2672 /* If both ranges are the same, sort below by ascending idx. */
2674 else
2675 return 1;
2677 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2678 return -1;
2680 if (p->idx < q->idx)
2681 return -1;
2682 else
2684 gcc_checking_assert (p->idx > q->idx);
2685 return 1;
2689 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2690 insert needed statements BEFORE or after GSI. */
2692 static tree
2693 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2695 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2696 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2697 if (TREE_CODE (ret) != SSA_NAME)
2699 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2700 if (before)
2701 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2702 else
2703 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2704 ret = gimple_assign_lhs (g);
2706 return ret;
2709 /* Helper routine of optimize_range_test.
2710 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2711 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2712 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2713 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2714 an array of COUNT pointers to other ranges. Return
2715 true if the range merge has been successful.
2716 If OPCODE is ERROR_MARK, this is called from within
2717 maybe_optimize_range_tests and is performing inter-bb range optimization.
2718 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2719 oe->rank. */
2721 static bool
2722 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2723 struct range_entry **otherrangep,
2724 unsigned int count, enum tree_code opcode,
2725 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2726 bool in_p, tree low, tree high, bool strict_overflow_p)
2728 operand_entry *oe = (*ops)[range->idx];
2729 tree op = oe->op;
2730 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2731 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2732 location_t loc = gimple_location (stmt);
2733 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2734 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2735 in_p, low, high);
2736 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2737 gimple_stmt_iterator gsi;
2738 unsigned int i, uid;
2740 if (tem == NULL_TREE)
2741 return false;
2743 /* If op is default def SSA_NAME, there is no place to insert the
2744 new comparison. Give up, unless we can use OP itself as the
2745 range test. */
2746 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2748 if (op == range->exp
2749 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2750 || TREE_CODE (optype) == BOOLEAN_TYPE)
2751 && (op == tem
2752 || (TREE_CODE (tem) == EQ_EXPR
2753 && TREE_OPERAND (tem, 0) == op
2754 && integer_onep (TREE_OPERAND (tem, 1))))
2755 && opcode != BIT_IOR_EXPR
2756 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2758 stmt = NULL;
2759 tem = op;
2761 else
2762 return false;
2765 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2766 warning_at (loc, OPT_Wstrict_overflow,
2767 "assuming signed overflow does not occur "
2768 "when simplifying range test");
2770 if (dump_file && (dump_flags & TDF_DETAILS))
2772 struct range_entry *r;
2773 fprintf (dump_file, "Optimizing range tests ");
2774 dump_range_entry (dump_file, range, false);
2775 for (i = 0; i < count; i++)
2777 if (otherrange)
2778 r = otherrange + i;
2779 else
2780 r = otherrangep[i];
2781 if (r->exp
2782 && r->exp != range->exp
2783 && TREE_CODE (r->exp) == SSA_NAME)
2785 fprintf (dump_file, " and ");
2786 dump_range_entry (dump_file, r, false);
2788 else
2790 fprintf (dump_file, " and");
2791 dump_range_entry (dump_file, r, true);
2794 fprintf (dump_file, "\n into ");
2795 print_generic_expr (dump_file, tem);
2796 fprintf (dump_file, "\n");
2799 if (opcode == BIT_IOR_EXPR
2800 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2801 tem = invert_truthvalue_loc (loc, tem);
2803 tem = fold_convert_loc (loc, optype, tem);
2804 if (stmt)
2806 gsi = gsi_for_stmt (stmt);
2807 uid = gimple_uid (stmt);
2809 else
2811 gsi = gsi_none ();
2812 uid = 0;
2814 if (stmt == NULL)
2815 gcc_checking_assert (tem == op);
2816 /* In rare cases range->exp can be equal to lhs of stmt.
2817 In that case we have to insert after the stmt rather then before
2818 it. If stmt is a PHI, insert it at the start of the basic block. */
2819 else if (op != range->exp)
2821 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2822 tem = force_into_ssa_name (&gsi, tem, true);
2823 gsi_prev (&gsi);
2825 else if (gimple_code (stmt) != GIMPLE_PHI)
2827 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2828 tem = force_into_ssa_name (&gsi, tem, false);
2830 else
2832 gsi = gsi_after_labels (gimple_bb (stmt));
2833 if (!gsi_end_p (gsi))
2834 uid = gimple_uid (gsi_stmt (gsi));
2835 else
2837 gsi = gsi_start_bb (gimple_bb (stmt));
2838 uid = 1;
2839 while (!gsi_end_p (gsi))
2841 uid = gimple_uid (gsi_stmt (gsi));
2842 gsi_next (&gsi);
2845 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2846 tem = force_into_ssa_name (&gsi, tem, true);
2847 if (gsi_end_p (gsi))
2848 gsi = gsi_last_bb (gimple_bb (stmt));
2849 else
2850 gsi_prev (&gsi);
2852 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2853 if (gimple_uid (gsi_stmt (gsi)))
2854 break;
2855 else
2856 gimple_set_uid (gsi_stmt (gsi), uid);
2858 oe->op = tem;
2859 range->exp = exp;
2860 range->low = low;
2861 range->high = high;
2862 range->in_p = in_p;
2863 range->strict_overflow_p = false;
2865 for (i = 0; i < count; i++)
2867 if (otherrange)
2868 range = otherrange + i;
2869 else
2870 range = otherrangep[i];
2871 oe = (*ops)[range->idx];
2872 /* Now change all the other range test immediate uses, so that
2873 those tests will be optimized away. */
2874 if (opcode == ERROR_MARK)
2876 if (oe->op)
2877 oe->op = build_int_cst (TREE_TYPE (oe->op),
2878 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2879 else
2880 oe->op = (oe->rank == BIT_IOR_EXPR
2881 ? boolean_false_node : boolean_true_node);
2883 else
2884 oe->op = error_mark_node;
2885 range->exp = NULL_TREE;
2886 range->low = NULL_TREE;
2887 range->high = NULL_TREE;
2889 return true;
2892 /* Optimize X == CST1 || X == CST2
2893 if popcount (CST1 ^ CST2) == 1 into
2894 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2895 Similarly for ranges. E.g.
2896 X != 2 && X != 3 && X != 10 && X != 11
2897 will be transformed by the previous optimization into
2898 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2899 and this loop can transform that into
2900 !(((X & ~8) - 2U) <= 1U). */
2902 static bool
2903 optimize_range_tests_xor (enum tree_code opcode, tree type,
2904 tree lowi, tree lowj, tree highi, tree highj,
2905 vec<operand_entry *> *ops,
2906 struct range_entry *rangei,
2907 struct range_entry *rangej)
2909 tree lowxor, highxor, tem, exp;
2910 /* Check lowi ^ lowj == highi ^ highj and
2911 popcount (lowi ^ lowj) == 1. */
2912 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2913 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2914 return false;
2915 if (!integer_pow2p (lowxor))
2916 return false;
2917 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2918 if (!tree_int_cst_equal (lowxor, highxor))
2919 return false;
2921 exp = rangei->exp;
2922 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2923 int prec = GET_MODE_PRECISION (mode);
2924 if (TYPE_PRECISION (type) < prec
2925 || (wi::to_wide (TYPE_MIN_VALUE (type))
2926 != wi::min_value (prec, TYPE_SIGN (type)))
2927 || (wi::to_wide (TYPE_MAX_VALUE (type))
2928 != wi::max_value (prec, TYPE_SIGN (type))))
2930 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2931 exp = fold_convert (type, exp);
2932 lowxor = fold_convert (type, lowxor);
2933 lowi = fold_convert (type, lowi);
2934 highi = fold_convert (type, highi);
2936 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2937 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2938 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2939 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2940 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2941 NULL, rangei->in_p, lowj, highj,
2942 rangei->strict_overflow_p
2943 || rangej->strict_overflow_p))
2944 return true;
2945 return false;
2948 /* Optimize X == CST1 || X == CST2
2949 if popcount (CST2 - CST1) == 1 into
2950 ((X - CST1) & ~(CST2 - CST1)) == 0.
2951 Similarly for ranges. E.g.
2952 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2953 || X == 75 || X == 45
2954 will be transformed by the previous optimization into
2955 (X - 43U) <= 3U || (X - 75U) <= 3U
2956 and this loop can transform that into
2957 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2958 static bool
2959 optimize_range_tests_diff (enum tree_code opcode, tree type,
2960 tree lowi, tree lowj, tree highi, tree highj,
2961 vec<operand_entry *> *ops,
2962 struct range_entry *rangei,
2963 struct range_entry *rangej)
2965 tree tem1, tem2, mask;
2966 /* Check highi - lowi == highj - lowj. */
2967 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2968 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2969 return false;
2970 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2971 if (!tree_int_cst_equal (tem1, tem2))
2972 return false;
2973 /* Check popcount (lowj - lowi) == 1. */
2974 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2975 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2976 return false;
2977 if (!integer_pow2p (tem1))
2978 return false;
2980 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2981 int prec = GET_MODE_PRECISION (mode);
2982 if (TYPE_PRECISION (type) < prec
2983 || (wi::to_wide (TYPE_MIN_VALUE (type))
2984 != wi::min_value (prec, TYPE_SIGN (type)))
2985 || (wi::to_wide (TYPE_MAX_VALUE (type))
2986 != wi::max_value (prec, TYPE_SIGN (type))))
2987 type = build_nonstandard_integer_type (prec, 1);
2988 else
2989 type = unsigned_type_for (type);
2990 tem1 = fold_convert (type, tem1);
2991 tem2 = fold_convert (type, tem2);
2992 lowi = fold_convert (type, lowi);
2993 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2994 tem1 = fold_build2 (MINUS_EXPR, type,
2995 fold_convert (type, rangei->exp), lowi);
2996 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2997 lowj = build_int_cst (type, 0);
2998 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2999 NULL, rangei->in_p, lowj, tem2,
3000 rangei->strict_overflow_p
3001 || rangej->strict_overflow_p))
3002 return true;
3003 return false;
3006 /* It does some common checks for function optimize_range_tests_xor and
3007 optimize_range_tests_diff.
3008 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3009 Else it calls optimize_range_tests_diff. */
3011 static bool
3012 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3013 bool optimize_xor, vec<operand_entry *> *ops,
3014 struct range_entry *ranges)
3016 int i, j;
3017 bool any_changes = false;
3018 for (i = first; i < length; i++)
3020 tree lowi, highi, lowj, highj, type, tem;
3022 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3023 continue;
3024 type = TREE_TYPE (ranges[i].exp);
3025 if (!INTEGRAL_TYPE_P (type))
3026 continue;
3027 lowi = ranges[i].low;
3028 if (lowi == NULL_TREE)
3029 lowi = TYPE_MIN_VALUE (type);
3030 highi = ranges[i].high;
3031 if (highi == NULL_TREE)
3032 continue;
3033 for (j = i + 1; j < length && j < i + 64; j++)
3035 bool changes;
3036 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3037 continue;
3038 lowj = ranges[j].low;
3039 if (lowj == NULL_TREE)
3040 continue;
3041 highj = ranges[j].high;
3042 if (highj == NULL_TREE)
3043 highj = TYPE_MAX_VALUE (type);
3044 /* Check lowj > highi. */
3045 tem = fold_binary (GT_EXPR, boolean_type_node,
3046 lowj, highi);
3047 if (tem == NULL_TREE || !integer_onep (tem))
3048 continue;
3049 if (optimize_xor)
3050 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3051 highi, highj, ops,
3052 ranges + i, ranges + j);
3053 else
3054 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3055 highi, highj, ops,
3056 ranges + i, ranges + j);
3057 if (changes)
3059 any_changes = true;
3060 break;
3064 return any_changes;
3067 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3068 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3069 EXP on success, NULL otherwise. */
3071 static tree
3072 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3073 wide_int *mask, tree *totallowp)
3075 tree tem = int_const_binop (MINUS_EXPR, high, low);
3076 if (tem == NULL_TREE
3077 || TREE_CODE (tem) != INTEGER_CST
3078 || TREE_OVERFLOW (tem)
3079 || tree_int_cst_sgn (tem) == -1
3080 || compare_tree_int (tem, prec) != -1)
3081 return NULL_TREE;
3083 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3084 *mask = wi::shifted_mask (0, max, false, prec);
3085 if (TREE_CODE (exp) == BIT_AND_EXPR
3086 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3088 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3089 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3090 if (wi::popcount (msk) == 1
3091 && wi::ltu_p (msk, prec - max))
3093 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3094 max += msk.to_uhwi ();
3095 exp = TREE_OPERAND (exp, 0);
3096 if (integer_zerop (low)
3097 && TREE_CODE (exp) == PLUS_EXPR
3098 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3100 tree ret = TREE_OPERAND (exp, 0);
3101 STRIP_NOPS (ret);
3102 widest_int bias
3103 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3104 TYPE_PRECISION (TREE_TYPE (low))));
3105 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3106 if (totallowp)
3108 *totallowp = tbias;
3109 return ret;
3111 else if (!tree_int_cst_lt (totallow, tbias))
3112 return NULL_TREE;
3113 bias = wi::to_widest (tbias);
3114 bias -= wi::to_widest (totallow);
3115 if (bias >= 0 && bias < prec - max)
3117 *mask = wi::lshift (*mask, bias);
3118 return ret;
3123 if (totallowp)
3124 return exp;
3125 if (!tree_int_cst_lt (totallow, low))
3126 return exp;
3127 tem = int_const_binop (MINUS_EXPR, low, totallow);
3128 if (tem == NULL_TREE
3129 || TREE_CODE (tem) != INTEGER_CST
3130 || TREE_OVERFLOW (tem)
3131 || compare_tree_int (tem, prec - max) == 1)
3132 return NULL_TREE;
3134 *mask = wi::lshift (*mask, wi::to_widest (tem));
3135 return exp;
3138 /* Attempt to optimize small range tests using bit test.
3139 E.g.
3140 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3141 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3142 has been by earlier optimizations optimized into:
3143 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3144 As all the 43 through 82 range is less than 64 numbers,
3145 for 64-bit word targets optimize that into:
3146 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3148 static bool
3149 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3150 vec<operand_entry *> *ops,
3151 struct range_entry *ranges)
3153 int i, j;
3154 bool any_changes = false;
3155 int prec = GET_MODE_BITSIZE (word_mode);
3156 auto_vec<struct range_entry *, 64> candidates;
3158 for (i = first; i < length - 1; i++)
3160 tree lowi, highi, lowj, highj, type;
3162 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3163 continue;
3164 type = TREE_TYPE (ranges[i].exp);
3165 if (!INTEGRAL_TYPE_P (type))
3166 continue;
3167 lowi = ranges[i].low;
3168 if (lowi == NULL_TREE)
3169 lowi = TYPE_MIN_VALUE (type);
3170 highi = ranges[i].high;
3171 if (highi == NULL_TREE)
3172 continue;
3173 wide_int mask;
3174 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3175 highi, &mask, &lowi);
3176 if (exp == NULL_TREE)
3177 continue;
3178 bool strict_overflow_p = ranges[i].strict_overflow_p;
3179 candidates.truncate (0);
3180 int end = MIN (i + 64, length);
3181 for (j = i + 1; j < end; j++)
3183 tree exp2;
3184 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3185 continue;
3186 if (ranges[j].exp == exp)
3188 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3190 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3191 if (exp2 == exp)
3193 else if (TREE_CODE (exp2) == PLUS_EXPR)
3195 exp2 = TREE_OPERAND (exp2, 0);
3196 STRIP_NOPS (exp2);
3197 if (exp2 != exp)
3198 continue;
3200 else
3201 continue;
3203 else
3204 continue;
3205 lowj = ranges[j].low;
3206 if (lowj == NULL_TREE)
3207 continue;
3208 highj = ranges[j].high;
3209 if (highj == NULL_TREE)
3210 highj = TYPE_MAX_VALUE (type);
3211 wide_int mask2;
3212 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3213 highj, &mask2, NULL);
3214 if (exp2 != exp)
3215 continue;
3216 mask |= mask2;
3217 strict_overflow_p |= ranges[j].strict_overflow_p;
3218 candidates.safe_push (&ranges[j]);
3221 /* If every possible relative value of the expression is a valid shift
3222 amount, then we can merge the entry test in the bit test. In this
3223 case, if we would need otherwise 2 or more comparisons, then use
3224 the bit test; in the other cases, the threshold is 3 comparisons. */
3225 bool entry_test_needed;
3226 value_range r;
3227 if (TREE_CODE (exp) == SSA_NAME
3228 && get_range_query (cfun)->range_of_expr (r, exp)
3229 && r.kind () == VR_RANGE
3230 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3232 wide_int min = r.lower_bound ();
3233 wide_int ilowi = wi::to_wide (lowi);
3234 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3236 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3237 mask = wi::lshift (mask, ilowi - min);
3239 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3241 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3242 mask = wi::lrshift (mask, min - ilowi);
3244 entry_test_needed = false;
3246 else
3247 entry_test_needed = true;
3248 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3250 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3251 wi::to_widest (lowi)
3252 + prec - 1 - wi::clz (mask));
3253 operand_entry *oe = (*ops)[ranges[i].idx];
3254 tree op = oe->op;
3255 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3256 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3257 location_t loc = gimple_location (stmt);
3258 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3260 /* See if it isn't cheaper to pretend the minimum value of the
3261 range is 0, if maximum value is small enough.
3262 We can avoid then subtraction of the minimum value, but the
3263 mask constant could be perhaps more expensive. */
3264 if (compare_tree_int (lowi, 0) > 0
3265 && compare_tree_int (high, prec) < 0)
3267 int cost_diff;
3268 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3269 rtx reg = gen_raw_REG (word_mode, 10000);
3270 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3271 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3272 GEN_INT (-m)),
3273 word_mode, speed_p);
3274 rtx r = immed_wide_int_const (mask, word_mode);
3275 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3276 word_mode, speed_p);
3277 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3278 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3279 word_mode, speed_p);
3280 if (cost_diff > 0)
3282 mask = wi::lshift (mask, m);
3283 lowi = build_zero_cst (TREE_TYPE (lowi));
3287 tree tem;
3288 if (entry_test_needed)
3290 tem = build_range_check (loc, optype, unshare_expr (exp),
3291 false, lowi, high);
3292 if (tem == NULL_TREE || is_gimple_val (tem))
3293 continue;
3295 else
3296 tem = NULL_TREE;
3297 tree etype = unsigned_type_for (TREE_TYPE (exp));
3298 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3299 fold_convert_loc (loc, etype, exp),
3300 fold_convert_loc (loc, etype, lowi));
3301 exp = fold_convert_loc (loc, integer_type_node, exp);
3302 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3303 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3304 build_int_cst (word_type, 1), exp);
3305 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3306 wide_int_to_tree (word_type, mask));
3307 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3308 build_zero_cst (word_type));
3309 if (is_gimple_val (exp))
3310 continue;
3312 /* The shift might have undefined behavior if TEM is true,
3313 but reassociate_bb isn't prepared to have basic blocks
3314 split when it is running. So, temporarily emit a code
3315 with BIT_IOR_EXPR instead of &&, and fix it up in
3316 branch_fixup. */
3317 gimple_seq seq = NULL;
3318 if (tem)
3320 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3321 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3322 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3324 gimple_seq seq2;
3325 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3326 gimple_seq_add_seq_without_update (&seq, seq2);
3327 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3328 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3329 if (tem)
3331 gimple *g = gimple_build_assign (make_ssa_name (optype),
3332 BIT_IOR_EXPR, tem, exp);
3333 gimple_set_location (g, loc);
3334 gimple_seq_add_stmt_without_update (&seq, g);
3335 exp = gimple_assign_lhs (g);
3337 tree val = build_zero_cst (optype);
3338 if (update_range_test (&ranges[i], NULL, candidates.address (),
3339 candidates.length (), opcode, ops, exp,
3340 seq, false, val, val, strict_overflow_p))
3342 any_changes = true;
3343 if (tem)
3344 reassoc_branch_fixups.safe_push (tem);
3346 else
3347 gimple_seq_discard (seq);
3350 return any_changes;
3353 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3354 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3355 Also, handle x < C && y < C && z < C where C is power of two as
3356 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3357 as (x | y | z) < 0. */
3359 static bool
3360 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3361 vec<operand_entry *> *ops,
3362 struct range_entry *ranges)
3364 int i;
3365 unsigned int b;
3366 bool any_changes = false;
3367 auto_vec<int, 128> buckets;
3368 auto_vec<int, 32> chains;
3369 auto_vec<struct range_entry *, 32> candidates;
3371 for (i = first; i < length; i++)
3373 int idx;
3375 if (ranges[i].exp == NULL_TREE
3376 || TREE_CODE (ranges[i].exp) != SSA_NAME
3377 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3378 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3379 continue;
3381 if (ranges[i].low != NULL_TREE
3382 && ranges[i].high != NULL_TREE
3383 && ranges[i].in_p
3384 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3386 idx = !integer_zerop (ranges[i].low);
3387 if (idx && !integer_all_onesp (ranges[i].low))
3388 continue;
3390 else if (ranges[i].high != NULL_TREE
3391 && TREE_CODE (ranges[i].high) == INTEGER_CST
3392 && ranges[i].in_p)
3394 wide_int w = wi::to_wide (ranges[i].high);
3395 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3396 int l = wi::clz (w);
3397 idx = 2;
3398 if (l <= 0
3399 || l >= prec
3400 || w != wi::mask (prec - l, false, prec))
3401 continue;
3402 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3403 && ranges[i].low == NULL_TREE)
3404 || (ranges[i].low
3405 && integer_zerop (ranges[i].low))))
3406 continue;
3408 else if (ranges[i].high == NULL_TREE
3409 && ranges[i].low != NULL_TREE
3410 /* Perform this optimization only in the last
3411 reassoc pass, as it interferes with the reassociation
3412 itself or could also with VRP etc. which might not
3413 be able to virtually undo the optimization. */
3414 && !reassoc_insert_powi_p
3415 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3416 && integer_zerop (ranges[i].low))
3417 idx = 3;
3418 else
3419 continue;
3421 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3422 if (buckets.length () <= b)
3423 buckets.safe_grow_cleared (b + 1, true);
3424 if (chains.length () <= (unsigned) i)
3425 chains.safe_grow (i + 1, true);
3426 chains[i] = buckets[b];
3427 buckets[b] = i + 1;
3430 FOR_EACH_VEC_ELT (buckets, b, i)
3431 if (i && chains[i - 1])
3433 int j, k = i;
3434 if ((b % 4) == 2)
3436 /* When ranges[X - 1].high + 1 is a power of two,
3437 we need to process the same bucket up to
3438 precision - 1 times, each time split the entries
3439 with the same high bound into one chain and the
3440 rest into another one to be processed later. */
3441 int this_prev = i;
3442 int other_prev = 0;
3443 for (j = chains[i - 1]; j; j = chains[j - 1])
3445 if (tree_int_cst_equal (ranges[i - 1].high,
3446 ranges[j - 1].high))
3448 chains[this_prev - 1] = j;
3449 this_prev = j;
3451 else if (other_prev == 0)
3453 buckets[b] = j;
3454 other_prev = j;
3456 else
3458 chains[other_prev - 1] = j;
3459 other_prev = j;
3462 chains[this_prev - 1] = 0;
3463 if (other_prev)
3464 chains[other_prev - 1] = 0;
3465 if (chains[i - 1] == 0)
3467 if (other_prev)
3468 b--;
3469 continue;
3472 for (j = chains[i - 1]; j; j = chains[j - 1])
3474 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3475 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3476 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3477 k = j;
3479 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3480 tree type2 = NULL_TREE;
3481 bool strict_overflow_p = false;
3482 candidates.truncate (0);
3483 for (j = i; j; j = chains[j - 1])
3485 tree type = TREE_TYPE (ranges[j - 1].exp);
3486 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3487 if ((b % 4) == 3)
3489 /* For the signed < 0 cases, the types should be
3490 really compatible (all signed with the same precision,
3491 instead put ranges that have different in_p from
3492 k first. */
3493 if (!useless_type_conversion_p (type1, type))
3494 continue;
3495 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3496 candidates.safe_push (&ranges[j - 1]);
3497 type2 = type1;
3498 continue;
3500 if (j == k
3501 || useless_type_conversion_p (type1, type))
3503 else if (type2 == NULL_TREE
3504 || useless_type_conversion_p (type2, type))
3506 if (type2 == NULL_TREE)
3507 type2 = type;
3508 candidates.safe_push (&ranges[j - 1]);
3511 unsigned l = candidates.length ();
3512 for (j = i; j; j = chains[j - 1])
3514 tree type = TREE_TYPE (ranges[j - 1].exp);
3515 if (j == k)
3516 continue;
3517 if ((b % 4) == 3)
3519 if (!useless_type_conversion_p (type1, type))
3520 continue;
3521 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3522 candidates.safe_push (&ranges[j - 1]);
3523 continue;
3525 if (useless_type_conversion_p (type1, type))
3527 else if (type2 == NULL_TREE
3528 || useless_type_conversion_p (type2, type))
3529 continue;
3530 candidates.safe_push (&ranges[j - 1]);
3532 gimple_seq seq = NULL;
3533 tree op = NULL_TREE;
3534 unsigned int id;
3535 struct range_entry *r;
3536 candidates.safe_push (&ranges[k - 1]);
3537 FOR_EACH_VEC_ELT (candidates, id, r)
3539 gimple *g;
3540 enum tree_code code;
3541 if (id == 0)
3543 op = r->exp;
3544 continue;
3546 if (id == l)
3548 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3549 g = gimple_build_assign (make_ssa_name (type1), code, op);
3550 gimple_seq_add_stmt_without_update (&seq, g);
3551 op = gimple_assign_lhs (g);
3553 tree type = TREE_TYPE (r->exp);
3554 tree exp = r->exp;
3555 if (id >= l && !useless_type_conversion_p (type1, type))
3557 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3558 gimple_seq_add_stmt_without_update (&seq, g);
3559 exp = gimple_assign_lhs (g);
3561 if ((b % 4) == 3)
3562 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3563 else
3564 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3565 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3566 code, op, exp);
3567 gimple_seq_add_stmt_without_update (&seq, g);
3568 op = gimple_assign_lhs (g);
3570 candidates.pop ();
3571 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3572 candidates.length (), opcode, ops, op,
3573 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3574 ranges[k - 1].high, strict_overflow_p))
3575 any_changes = true;
3576 else
3577 gimple_seq_discard (seq);
3578 if ((b % 4) == 2 && buckets[b] != i)
3579 /* There is more work to do for this bucket. */
3580 b--;
3583 return any_changes;
3586 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3587 a >= 0 && a < b into (unsigned) a < (unsigned) b
3588 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3590 static bool
3591 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3592 vec<operand_entry *> *ops,
3593 struct range_entry *ranges,
3594 basic_block first_bb)
3596 int i;
3597 bool any_changes = false;
3598 hash_map<tree, int> *map = NULL;
3600 for (i = first; i < length; i++)
3602 if (ranges[i].exp == NULL_TREE
3603 || TREE_CODE (ranges[i].exp) != SSA_NAME
3604 || !ranges[i].in_p)
3605 continue;
3607 tree type = TREE_TYPE (ranges[i].exp);
3608 if (!INTEGRAL_TYPE_P (type)
3609 || TYPE_UNSIGNED (type)
3610 || ranges[i].low == NULL_TREE
3611 || !integer_zerop (ranges[i].low)
3612 || ranges[i].high != NULL_TREE)
3613 continue;
3614 /* EXP >= 0 here. */
3615 if (map == NULL)
3616 map = new hash_map <tree, int>;
3617 map->put (ranges[i].exp, i);
3620 if (map == NULL)
3621 return false;
3623 for (i = 0; i < length; i++)
3625 bool in_p = ranges[i].in_p;
3626 if (ranges[i].low == NULL_TREE
3627 || ranges[i].high == NULL_TREE)
3628 continue;
3629 if (!integer_zerop (ranges[i].low)
3630 || !integer_zerop (ranges[i].high))
3632 if (ranges[i].exp
3633 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3634 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3635 && integer_onep (ranges[i].low)
3636 && integer_onep (ranges[i].high))
3637 in_p = !in_p;
3638 else
3639 continue;
3642 gimple *stmt;
3643 tree_code ccode;
3644 tree rhs1, rhs2;
3645 if (ranges[i].exp)
3647 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3648 continue;
3649 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3650 if (!is_gimple_assign (stmt))
3651 continue;
3652 ccode = gimple_assign_rhs_code (stmt);
3653 rhs1 = gimple_assign_rhs1 (stmt);
3654 rhs2 = gimple_assign_rhs2 (stmt);
3656 else
3658 operand_entry *oe = (*ops)[ranges[i].idx];
3659 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3660 if (gimple_code (stmt) != GIMPLE_COND)
3661 continue;
3662 ccode = gimple_cond_code (stmt);
3663 rhs1 = gimple_cond_lhs (stmt);
3664 rhs2 = gimple_cond_rhs (stmt);
3667 if (TREE_CODE (rhs1) != SSA_NAME
3668 || rhs2 == NULL_TREE
3669 || TREE_CODE (rhs2) != SSA_NAME)
3670 continue;
3672 switch (ccode)
3674 case GT_EXPR:
3675 case GE_EXPR:
3676 case LT_EXPR:
3677 case LE_EXPR:
3678 break;
3679 default:
3680 continue;
3682 if (in_p)
3683 ccode = invert_tree_comparison (ccode, false);
3684 switch (ccode)
3686 case GT_EXPR:
3687 case GE_EXPR:
3688 std::swap (rhs1, rhs2);
3689 ccode = swap_tree_comparison (ccode);
3690 break;
3691 case LT_EXPR:
3692 case LE_EXPR:
3693 break;
3694 default:
3695 gcc_unreachable ();
3698 int *idx = map->get (rhs1);
3699 if (idx == NULL)
3700 continue;
3702 /* maybe_optimize_range_tests allows statements without side-effects
3703 in the basic blocks as long as they are consumed in the same bb.
3704 Make sure rhs2's def stmt is not among them, otherwise we can't
3705 use safely get_nonzero_bits on it. E.g. in:
3706 # RANGE [-83, 1] NONZERO 173
3707 # k_32 = PHI <k_47(13), k_12(9)>
3709 if (k_32 >= 0)
3710 goto <bb 5>; [26.46%]
3711 else
3712 goto <bb 9>; [73.54%]
3714 <bb 5> [local count: 140323371]:
3715 # RANGE [0, 1] NONZERO 1
3716 _5 = (int) k_32;
3717 # RANGE [0, 4] NONZERO 4
3718 _21 = _5 << 2;
3719 # RANGE [0, 4] NONZERO 4
3720 iftmp.0_44 = (char) _21;
3721 if (k_32 < iftmp.0_44)
3722 goto <bb 6>; [84.48%]
3723 else
3724 goto <bb 9>; [15.52%]
3725 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3726 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3727 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3728 those stmts even for negative k_32 and the value ranges would be no
3729 longer guaranteed and so the optimization would be invalid. */
3730 while (opcode == ERROR_MARK)
3732 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3733 basic_block bb2 = gimple_bb (g);
3734 if (bb2
3735 && bb2 != first_bb
3736 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3738 /* As an exception, handle a few common cases. */
3739 if (gimple_assign_cast_p (g)
3740 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3742 tree op0 = gimple_assign_rhs1 (g);
3743 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3744 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3745 > TYPE_PRECISION (TREE_TYPE (op0))))
3746 /* Zero-extension is always ok. */
3747 break;
3748 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3749 == TYPE_PRECISION (TREE_TYPE (op0))
3750 && TREE_CODE (op0) == SSA_NAME)
3752 /* Cast from signed to unsigned or vice versa. Retry
3753 with the op0 as new rhs2. */
3754 rhs2 = op0;
3755 continue;
3758 else if (is_gimple_assign (g)
3759 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3760 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3761 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3762 /* Masking with INTEGER_CST with MSB clear is always ok
3763 too. */
3764 break;
3765 rhs2 = NULL_TREE;
3767 break;
3769 if (rhs2 == NULL_TREE)
3770 continue;
3772 wide_int nz = get_nonzero_bits (rhs2);
3773 if (wi::neg_p (nz))
3774 continue;
3776 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3777 and RHS2 is known to be RHS2 >= 0. */
3778 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3780 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3781 if ((ranges[*idx].strict_overflow_p
3782 || ranges[i].strict_overflow_p)
3783 && issue_strict_overflow_warning (wc))
3784 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3785 "assuming signed overflow does not occur "
3786 "when simplifying range test");
3788 if (dump_file && (dump_flags & TDF_DETAILS))
3790 struct range_entry *r = &ranges[*idx];
3791 fprintf (dump_file, "Optimizing range test ");
3792 print_generic_expr (dump_file, r->exp);
3793 fprintf (dump_file, " +[");
3794 print_generic_expr (dump_file, r->low);
3795 fprintf (dump_file, ", ");
3796 print_generic_expr (dump_file, r->high);
3797 fprintf (dump_file, "] and comparison ");
3798 print_generic_expr (dump_file, rhs1);
3799 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3800 print_generic_expr (dump_file, rhs2);
3801 fprintf (dump_file, "\n into (");
3802 print_generic_expr (dump_file, utype);
3803 fprintf (dump_file, ") ");
3804 print_generic_expr (dump_file, rhs1);
3805 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3806 print_generic_expr (dump_file, utype);
3807 fprintf (dump_file, ") ");
3808 print_generic_expr (dump_file, rhs2);
3809 fprintf (dump_file, "\n");
3812 operand_entry *oe = (*ops)[ranges[i].idx];
3813 ranges[i].in_p = 0;
3814 if (opcode == BIT_IOR_EXPR
3815 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3817 ranges[i].in_p = 1;
3818 ccode = invert_tree_comparison (ccode, false);
3821 unsigned int uid = gimple_uid (stmt);
3822 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3823 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3824 gimple_set_uid (g, uid);
3825 rhs1 = gimple_assign_lhs (g);
3826 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3827 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3829 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3830 gimple_set_uid (g, uid);
3831 rhs2 = gimple_assign_lhs (g);
3832 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3834 if (tree_swap_operands_p (rhs1, rhs2))
3836 std::swap (rhs1, rhs2);
3837 ccode = swap_tree_comparison (ccode);
3839 if (gimple_code (stmt) == GIMPLE_COND)
3841 gcond *c = as_a <gcond *> (stmt);
3842 gimple_cond_set_code (c, ccode);
3843 gimple_cond_set_lhs (c, rhs1);
3844 gimple_cond_set_rhs (c, rhs2);
3845 update_stmt (stmt);
3847 else
3849 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3850 if (!INTEGRAL_TYPE_P (ctype)
3851 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3852 && TYPE_PRECISION (ctype) != 1))
3853 ctype = boolean_type_node;
3854 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3855 gimple_set_uid (g, uid);
3856 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3857 if (oe->op && ctype != TREE_TYPE (oe->op))
3859 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3860 NOP_EXPR, gimple_assign_lhs (g));
3861 gimple_set_uid (g, uid);
3862 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3864 ranges[i].exp = gimple_assign_lhs (g);
3865 oe->op = ranges[i].exp;
3866 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3867 ranges[i].high = ranges[i].low;
3869 ranges[i].strict_overflow_p = false;
3870 oe = (*ops)[ranges[*idx].idx];
3871 /* Now change all the other range test immediate uses, so that
3872 those tests will be optimized away. */
3873 if (opcode == ERROR_MARK)
3875 if (oe->op)
3876 oe->op = build_int_cst (TREE_TYPE (oe->op),
3877 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3878 else
3879 oe->op = (oe->rank == BIT_IOR_EXPR
3880 ? boolean_false_node : boolean_true_node);
3882 else
3883 oe->op = error_mark_node;
3884 ranges[*idx].exp = NULL_TREE;
3885 ranges[*idx].low = NULL_TREE;
3886 ranges[*idx].high = NULL_TREE;
3887 any_changes = true;
3890 delete map;
3891 return any_changes;
3894 /* Optimize range tests, similarly how fold_range_test optimizes
3895 it on trees. The tree code for the binary
3896 operation between all the operands is OPCODE.
3897 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3898 maybe_optimize_range_tests for inter-bb range optimization.
3899 In that case if oe->op is NULL, oe->id is bb->index whose
3900 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3901 the actual opcode.
3902 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3904 static bool
3905 optimize_range_tests (enum tree_code opcode,
3906 vec<operand_entry *> *ops, basic_block first_bb)
3908 unsigned int length = ops->length (), i, j, first;
3909 operand_entry *oe;
3910 struct range_entry *ranges;
3911 bool any_changes = false;
3913 if (length == 1)
3914 return false;
3916 ranges = XNEWVEC (struct range_entry, length);
3917 for (i = 0; i < length; i++)
3919 oe = (*ops)[i];
3920 ranges[i].idx = i;
3921 init_range_entry (ranges + i, oe->op,
3922 oe->op
3923 ? NULL
3924 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3925 /* For | invert it now, we will invert it again before emitting
3926 the optimized expression. */
3927 if (opcode == BIT_IOR_EXPR
3928 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3929 ranges[i].in_p = !ranges[i].in_p;
3932 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3933 for (i = 0; i < length; i++)
3934 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3935 break;
3937 /* Try to merge ranges. */
3938 for (first = i; i < length; i++)
3940 tree low = ranges[i].low;
3941 tree high = ranges[i].high;
3942 int in_p = ranges[i].in_p;
3943 bool strict_overflow_p = ranges[i].strict_overflow_p;
3944 int update_fail_count = 0;
3946 for (j = i + 1; j < length; j++)
3948 if (ranges[i].exp != ranges[j].exp)
3949 break;
3950 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3951 ranges[j].in_p, ranges[j].low, ranges[j].high))
3952 break;
3953 strict_overflow_p |= ranges[j].strict_overflow_p;
3956 if (j == i + 1)
3957 continue;
3959 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3960 opcode, ops, ranges[i].exp, NULL, in_p,
3961 low, high, strict_overflow_p))
3963 i = j - 1;
3964 any_changes = true;
3966 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3967 while update_range_test would fail. */
3968 else if (update_fail_count == 64)
3969 i = j - 1;
3970 else
3971 ++update_fail_count;
3974 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3975 ops, ranges);
3977 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3978 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3979 ops, ranges);
3980 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3981 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3982 ops, ranges);
3983 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3984 ranges, first_bb);
3985 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3986 ops, ranges);
3988 if (any_changes && opcode != ERROR_MARK)
3990 j = 0;
3991 FOR_EACH_VEC_ELT (*ops, i, oe)
3993 if (oe->op == error_mark_node)
3994 continue;
3995 else if (i != j)
3996 (*ops)[j] = oe;
3997 j++;
3999 ops->truncate (j);
4002 XDELETEVEC (ranges);
4003 return any_changes;
4006 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4007 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4008 otherwise the comparison code. TYPE is a return value that is set
4009 to type of comparison. */
4011 static tree_code
4012 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4013 tree *lhs, tree *rhs, gassign **vcond)
4015 if (TREE_CODE (var) != SSA_NAME)
4016 return ERROR_MARK;
4018 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4019 if (stmt == NULL)
4020 return ERROR_MARK;
4021 if (vcond)
4022 *vcond = stmt;
4024 /* ??? If we start creating more COND_EXPR, we could perform
4025 this same optimization with them. For now, simplify. */
4026 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4027 return ERROR_MARK;
4029 tree cond = gimple_assign_rhs1 (stmt);
4030 tree_code cmp = TREE_CODE (cond);
4031 if (cmp != SSA_NAME)
4032 return ERROR_MARK;
4034 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4035 if (assign == NULL
4036 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4037 return ERROR_MARK;
4039 cmp = gimple_assign_rhs_code (assign);
4040 if (lhs)
4041 *lhs = gimple_assign_rhs1 (assign);
4042 if (rhs)
4043 *rhs = gimple_assign_rhs2 (assign);
4045 /* ??? For now, allow only canonical true and false result vectors.
4046 We could expand this to other constants should the need arise,
4047 but at the moment we don't create them. */
4048 tree t = gimple_assign_rhs2 (stmt);
4049 tree f = gimple_assign_rhs3 (stmt);
4050 bool inv;
4051 if (integer_all_onesp (t))
4052 inv = false;
4053 else if (integer_all_onesp (f))
4055 cmp = invert_tree_comparison (cmp, false);
4056 inv = true;
4058 else
4059 return ERROR_MARK;
4060 if (!integer_zerop (f))
4061 return ERROR_MARK;
4063 /* Success! */
4064 if (rets)
4065 *rets = assign;
4066 if (reti)
4067 *reti = inv;
4068 if (type)
4069 *type = TREE_TYPE (cond);
4070 return cmp;
4073 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4074 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4076 static bool
4077 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4079 unsigned int length = ops->length (), i, j;
4080 bool any_changes = false;
4082 if (length == 1)
4083 return false;
4085 for (i = 0; i < length; ++i)
4087 tree elt0 = (*ops)[i]->op;
4089 gassign *stmt0, *vcond0;
4090 bool invert;
4091 tree type, lhs0, rhs0;
4092 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4093 &rhs0, &vcond0);
4094 if (cmp0 == ERROR_MARK)
4095 continue;
4097 for (j = i + 1; j < length; ++j)
4099 tree &elt1 = (*ops)[j]->op;
4101 gassign *stmt1, *vcond1;
4102 tree lhs1, rhs1;
4103 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4104 &rhs1, &vcond1);
4105 if (cmp1 == ERROR_MARK)
4106 continue;
4108 tree comb;
4109 if (opcode == BIT_AND_EXPR)
4110 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4111 cmp1, lhs1, rhs1);
4112 else if (opcode == BIT_IOR_EXPR)
4113 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4114 cmp1, lhs1, rhs1);
4115 else
4116 gcc_unreachable ();
4117 if (comb == NULL)
4118 continue;
4120 /* Success! */
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4123 fprintf (dump_file, "Transforming ");
4124 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4125 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4126 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4127 fprintf (dump_file, " into ");
4128 print_generic_expr (dump_file, comb);
4129 fputc ('\n', dump_file);
4132 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4133 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4134 true, GSI_SAME_STMT);
4135 if (invert)
4136 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4137 gimple_assign_rhs3_ptr (vcond0));
4138 gimple_assign_set_rhs1 (vcond0, exp);
4139 update_stmt (vcond0);
4141 elt1 = error_mark_node;
4142 any_changes = true;
4146 if (any_changes)
4148 operand_entry *oe;
4149 j = 0;
4150 FOR_EACH_VEC_ELT (*ops, i, oe)
4152 if (oe->op == error_mark_node)
4153 continue;
4154 else if (i != j)
4155 (*ops)[j] = oe;
4156 j++;
4158 ops->truncate (j);
4161 return any_changes;
4164 /* Return true if STMT is a cast like:
4165 <bb N>:
4167 _123 = (int) _234;
4169 <bb M>:
4170 # _345 = PHI <_123(N), 1(...), 1(...)>
4171 where _234 has bool type, _123 has single use and
4172 bb N has a single successor M. This is commonly used in
4173 the last block of a range test.
4175 Also Return true if STMT is tcc_compare like:
4176 <bb N>:
4178 _234 = a_2(D) == 2;
4180 <bb M>:
4181 # _345 = PHI <_234(N), 1(...), 1(...)>
4182 _346 = (int) _345;
4183 where _234 has booltype, single use and
4184 bb N has a single successor M. This is commonly used in
4185 the last block of a range test. */
4187 static bool
4188 final_range_test_p (gimple *stmt)
4190 basic_block bb, rhs_bb, lhs_bb;
4191 edge e;
4192 tree lhs, rhs;
4193 use_operand_p use_p;
4194 gimple *use_stmt;
4196 if (!gimple_assign_cast_p (stmt)
4197 && (!is_gimple_assign (stmt)
4198 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4199 != tcc_comparison)))
4200 return false;
4201 bb = gimple_bb (stmt);
4202 if (!single_succ_p (bb))
4203 return false;
4204 e = single_succ_edge (bb);
4205 if (e->flags & EDGE_COMPLEX)
4206 return false;
4208 lhs = gimple_assign_lhs (stmt);
4209 rhs = gimple_assign_rhs1 (stmt);
4210 if (gimple_assign_cast_p (stmt)
4211 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4212 || TREE_CODE (rhs) != SSA_NAME
4213 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4214 return false;
4216 if (!gimple_assign_cast_p (stmt)
4217 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4218 return false;
4220 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4221 if (!single_imm_use (lhs, &use_p, &use_stmt))
4222 return false;
4224 if (gimple_code (use_stmt) != GIMPLE_PHI
4225 || gimple_bb (use_stmt) != e->dest)
4226 return false;
4228 /* And that the rhs is defined in the same loop. */
4229 if (gimple_assign_cast_p (stmt))
4231 if (TREE_CODE (rhs) != SSA_NAME
4232 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4233 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4234 return false;
4236 else
4238 if (TREE_CODE (lhs) != SSA_NAME
4239 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4240 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4241 return false;
4244 return true;
4247 /* Return true if BB is suitable basic block for inter-bb range test
4248 optimization. If BACKWARD is true, BB should be the only predecessor
4249 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4250 or compared with to find a common basic block to which all conditions
4251 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4252 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4253 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4254 block points to an empty block that falls through into *OTHER_BB and
4255 the phi args match that path. */
4257 static bool
4258 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4259 bool *test_swapped_p, bool backward)
4261 edge_iterator ei, ei2;
4262 edge e, e2;
4263 gimple *stmt;
4264 gphi_iterator gsi;
4265 bool other_edge_seen = false;
4266 bool is_cond;
4268 if (test_bb == bb)
4269 return false;
4270 /* Check last stmt first. */
4271 stmt = last_stmt (bb);
4272 if (stmt == NULL
4273 || (gimple_code (stmt) != GIMPLE_COND
4274 && (backward || !final_range_test_p (stmt)))
4275 || gimple_visited_p (stmt)
4276 || stmt_could_throw_p (cfun, stmt)
4277 || *other_bb == bb)
4278 return false;
4279 is_cond = gimple_code (stmt) == GIMPLE_COND;
4280 if (is_cond)
4282 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4283 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4284 to *OTHER_BB (if not set yet, try to find it out). */
4285 if (EDGE_COUNT (bb->succs) != 2)
4286 return false;
4287 FOR_EACH_EDGE (e, ei, bb->succs)
4289 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4290 return false;
4291 if (e->dest == test_bb)
4293 if (backward)
4294 continue;
4295 else
4296 return false;
4298 if (e->dest == bb)
4299 return false;
4300 if (*other_bb == NULL)
4302 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4303 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4304 return false;
4305 else if (e->dest == e2->dest)
4306 *other_bb = e->dest;
4307 if (*other_bb == NULL)
4308 return false;
4310 if (e->dest == *other_bb)
4311 other_edge_seen = true;
4312 else if (backward)
4313 return false;
4315 if (*other_bb == NULL || !other_edge_seen)
4316 return false;
4318 else if (single_succ (bb) != *other_bb)
4319 return false;
4321 /* Now check all PHIs of *OTHER_BB. */
4322 e = find_edge (bb, *other_bb);
4323 e2 = find_edge (test_bb, *other_bb);
4324 retry:;
4325 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4327 gphi *phi = gsi.phi ();
4328 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4329 corresponding to BB and TEST_BB predecessor must be the same. */
4330 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4331 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4333 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4334 one of the PHIs should have the lhs of the last stmt in
4335 that block as PHI arg and that PHI should have 0 or 1
4336 corresponding to it in all other range test basic blocks
4337 considered. */
4338 if (!is_cond)
4340 if (gimple_phi_arg_def (phi, e->dest_idx)
4341 == gimple_assign_lhs (stmt)
4342 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4343 || integer_onep (gimple_phi_arg_def (phi,
4344 e2->dest_idx))))
4345 continue;
4347 else
4349 gimple *test_last = last_stmt (test_bb);
4350 if (gimple_code (test_last) == GIMPLE_COND)
4352 if (backward ? e2->src != test_bb : e->src != bb)
4353 return false;
4355 /* For last_bb, handle also:
4356 if (x_3(D) == 3)
4357 goto <bb 6>; [34.00%]
4358 else
4359 goto <bb 7>; [66.00%]
4361 <bb 6> [local count: 79512730]:
4363 <bb 7> [local count: 1073741824]:
4364 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4365 where bb 7 is *OTHER_BB, but the PHI values from the
4366 earlier bbs match the path through the empty bb
4367 in between. */
4368 edge e3;
4369 if (backward)
4370 e3 = EDGE_SUCC (test_bb,
4371 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4372 else
4373 e3 = EDGE_SUCC (bb,
4374 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4375 if (empty_block_p (e3->dest)
4376 && single_succ_p (e3->dest)
4377 && single_succ (e3->dest) == *other_bb
4378 && single_pred_p (e3->dest)
4379 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4381 if (backward)
4382 e2 = single_succ_edge (e3->dest);
4383 else
4384 e = single_succ_edge (e3->dest);
4385 if (test_swapped_p)
4386 *test_swapped_p = true;
4387 goto retry;
4390 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4391 == gimple_assign_lhs (test_last)
4392 && (integer_zerop (gimple_phi_arg_def (phi,
4393 e->dest_idx))
4394 || integer_onep (gimple_phi_arg_def (phi,
4395 e->dest_idx))))
4396 continue;
4399 return false;
4402 return true;
4405 /* Return true if BB doesn't have side-effects that would disallow
4406 range test optimization, all SSA_NAMEs set in the bb are consumed
4407 in the bb and there are no PHIs. */
4409 bool
4410 no_side_effect_bb (basic_block bb)
4412 gimple_stmt_iterator gsi;
4413 gimple *last;
4415 if (!gimple_seq_empty_p (phi_nodes (bb)))
4416 return false;
4417 last = last_stmt (bb);
4418 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4420 gimple *stmt = gsi_stmt (gsi);
4421 tree lhs;
4422 imm_use_iterator imm_iter;
4423 use_operand_p use_p;
4425 if (is_gimple_debug (stmt))
4426 continue;
4427 if (gimple_has_side_effects (stmt))
4428 return false;
4429 if (stmt == last)
4430 return true;
4431 if (!is_gimple_assign (stmt))
4432 return false;
4433 lhs = gimple_assign_lhs (stmt);
4434 if (TREE_CODE (lhs) != SSA_NAME)
4435 return false;
4436 if (gimple_assign_rhs_could_trap_p (stmt))
4437 return false;
4438 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4440 gimple *use_stmt = USE_STMT (use_p);
4441 if (is_gimple_debug (use_stmt))
4442 continue;
4443 if (gimple_bb (use_stmt) != bb)
4444 return false;
4447 return false;
4450 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4451 return true and fill in *OPS recursively. */
4453 static bool
4454 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4455 class loop *loop)
4457 gimple *stmt = SSA_NAME_DEF_STMT (var);
4458 tree rhs[2];
4459 int i;
4461 if (!is_reassociable_op (stmt, code, loop))
4462 return false;
4464 rhs[0] = gimple_assign_rhs1 (stmt);
4465 rhs[1] = gimple_assign_rhs2 (stmt);
4466 gimple_set_visited (stmt, true);
4467 for (i = 0; i < 2; i++)
4468 if (TREE_CODE (rhs[i]) == SSA_NAME
4469 && !get_ops (rhs[i], code, ops, loop)
4470 && has_single_use (rhs[i]))
4472 operand_entry *oe = operand_entry_pool.allocate ();
4474 oe->op = rhs[i];
4475 oe->rank = code;
4476 oe->id = 0;
4477 oe->count = 1;
4478 oe->stmt_to_insert = NULL;
4479 ops->safe_push (oe);
4481 return true;
4484 /* Find the ops that were added by get_ops starting from VAR, see if
4485 they were changed during update_range_test and if yes, create new
4486 stmts. */
4488 static tree
4489 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4490 unsigned int *pidx, class loop *loop)
4492 gimple *stmt = SSA_NAME_DEF_STMT (var);
4493 tree rhs[4];
4494 int i;
4496 if (!is_reassociable_op (stmt, code, loop))
4497 return NULL;
4499 rhs[0] = gimple_assign_rhs1 (stmt);
4500 rhs[1] = gimple_assign_rhs2 (stmt);
4501 rhs[2] = rhs[0];
4502 rhs[3] = rhs[1];
4503 for (i = 0; i < 2; i++)
4504 if (TREE_CODE (rhs[i]) == SSA_NAME)
4506 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4507 if (rhs[2 + i] == NULL_TREE)
4509 if (has_single_use (rhs[i]))
4510 rhs[2 + i] = ops[(*pidx)++]->op;
4511 else
4512 rhs[2 + i] = rhs[i];
4515 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4516 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4518 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4519 var = make_ssa_name (TREE_TYPE (var));
4520 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4521 rhs[2], rhs[3]);
4522 gimple_set_uid (g, gimple_uid (stmt));
4523 gimple_set_visited (g, true);
4524 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4526 return var;
4529 /* Structure to track the initial value passed to get_ops and
4530 the range in the ops vector for each basic block. */
4532 struct inter_bb_range_test_entry
4534 tree op;
4535 unsigned int first_idx, last_idx;
4538 /* Inter-bb range test optimization.
4540 Returns TRUE if a gimple conditional is optimized to a true/false,
4541 otherwise return FALSE.
4543 This indicates to the caller that it should run a CFG cleanup pass
4544 once reassociation is completed. */
4546 static bool
4547 maybe_optimize_range_tests (gimple *stmt)
4549 basic_block first_bb = gimple_bb (stmt);
4550 basic_block last_bb = first_bb;
4551 basic_block other_bb = NULL;
4552 basic_block bb;
4553 edge_iterator ei;
4554 edge e;
4555 auto_vec<operand_entry *> ops;
4556 auto_vec<inter_bb_range_test_entry> bbinfo;
4557 bool any_changes = false;
4558 bool cfg_cleanup_needed = false;
4560 /* Consider only basic blocks that end with GIMPLE_COND or
4561 a cast statement satisfying final_range_test_p. All
4562 but the last bb in the first_bb .. last_bb range
4563 should end with GIMPLE_COND. */
4564 if (gimple_code (stmt) == GIMPLE_COND)
4566 if (EDGE_COUNT (first_bb->succs) != 2)
4567 return cfg_cleanup_needed;
4569 else if (final_range_test_p (stmt))
4570 other_bb = single_succ (first_bb);
4571 else
4572 return cfg_cleanup_needed;
4574 if (stmt_could_throw_p (cfun, stmt))
4575 return cfg_cleanup_needed;
4577 /* As relative ordering of post-dominator sons isn't fixed,
4578 maybe_optimize_range_tests can be called first on any
4579 bb in the range we want to optimize. So, start searching
4580 backwards, if first_bb can be set to a predecessor. */
4581 while (single_pred_p (first_bb))
4583 basic_block pred_bb = single_pred (first_bb);
4584 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4585 break;
4586 if (!no_side_effect_bb (first_bb))
4587 break;
4588 first_bb = pred_bb;
4590 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4591 Before starting forward search in last_bb successors, find
4592 out the other_bb. */
4593 if (first_bb == last_bb)
4595 other_bb = NULL;
4596 /* As non-GIMPLE_COND last stmt always terminates the range,
4597 if forward search didn't discover anything, just give up. */
4598 if (gimple_code (stmt) != GIMPLE_COND)
4599 return cfg_cleanup_needed;
4600 /* Look at both successors. Either it ends with a GIMPLE_COND
4601 and satisfies suitable_cond_bb, or ends with a cast and
4602 other_bb is that cast's successor. */
4603 FOR_EACH_EDGE (e, ei, first_bb->succs)
4604 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4605 || e->dest == first_bb)
4606 return cfg_cleanup_needed;
4607 else if (single_pred_p (e->dest))
4609 stmt = last_stmt (e->dest);
4610 if (stmt
4611 && gimple_code (stmt) == GIMPLE_COND
4612 && EDGE_COUNT (e->dest->succs) == 2)
4614 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4615 NULL, true))
4616 break;
4617 else
4618 other_bb = NULL;
4620 else if (stmt
4621 && final_range_test_p (stmt)
4622 && find_edge (first_bb, single_succ (e->dest)))
4624 other_bb = single_succ (e->dest);
4625 if (other_bb == first_bb)
4626 other_bb = NULL;
4629 if (other_bb == NULL)
4630 return cfg_cleanup_needed;
4632 /* Now do the forward search, moving last_bb to successor bbs
4633 that aren't other_bb. */
4634 while (EDGE_COUNT (last_bb->succs) == 2)
4636 FOR_EACH_EDGE (e, ei, last_bb->succs)
4637 if (e->dest != other_bb)
4638 break;
4639 if (e == NULL)
4640 break;
4641 if (!single_pred_p (e->dest))
4642 break;
4643 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4644 break;
4645 if (!no_side_effect_bb (e->dest))
4646 break;
4647 last_bb = e->dest;
4649 if (first_bb == last_bb)
4650 return cfg_cleanup_needed;
4651 /* Here basic blocks first_bb through last_bb's predecessor
4652 end with GIMPLE_COND, all of them have one of the edges to
4653 other_bb and another to another block in the range,
4654 all blocks except first_bb don't have side-effects and
4655 last_bb ends with either GIMPLE_COND, or cast satisfying
4656 final_range_test_p. */
4657 for (bb = last_bb; ; bb = single_pred (bb))
4659 enum tree_code code;
4660 tree lhs, rhs;
4661 inter_bb_range_test_entry bb_ent;
4663 bb_ent.op = NULL_TREE;
4664 bb_ent.first_idx = ops.length ();
4665 bb_ent.last_idx = bb_ent.first_idx;
4666 e = find_edge (bb, other_bb);
4667 stmt = last_stmt (bb);
4668 gimple_set_visited (stmt, true);
4669 if (gimple_code (stmt) != GIMPLE_COND)
4671 use_operand_p use_p;
4672 gimple *phi;
4673 edge e2;
4674 unsigned int d;
4676 lhs = gimple_assign_lhs (stmt);
4677 rhs = gimple_assign_rhs1 (stmt);
4678 gcc_assert (bb == last_bb);
4680 /* stmt is
4681 _123 = (int) _234;
4683 _234 = a_2(D) == 2;
4685 followed by:
4686 <bb M>:
4687 # _345 = PHI <_123(N), 1(...), 1(...)>
4689 or 0 instead of 1. If it is 0, the _234
4690 range test is anded together with all the
4691 other range tests, if it is 1, it is ored with
4692 them. */
4693 single_imm_use (lhs, &use_p, &phi);
4694 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4695 e2 = find_edge (first_bb, other_bb);
4696 d = e2->dest_idx;
4697 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4698 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4699 code = BIT_AND_EXPR;
4700 else
4702 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4703 code = BIT_IOR_EXPR;
4706 /* If _234 SSA_NAME_DEF_STMT is
4707 _234 = _567 | _789;
4708 (or &, corresponding to 1/0 in the phi arguments,
4709 push into ops the individual range test arguments
4710 of the bitwise or resp. and, recursively. */
4711 if (TREE_CODE (rhs) == SSA_NAME
4712 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4713 != tcc_comparison)
4714 && !get_ops (rhs, code, &ops,
4715 loop_containing_stmt (stmt))
4716 && has_single_use (rhs))
4718 /* Otherwise, push the _234 range test itself. */
4719 operand_entry *oe = operand_entry_pool.allocate ();
4721 oe->op = rhs;
4722 oe->rank = code;
4723 oe->id = 0;
4724 oe->count = 1;
4725 oe->stmt_to_insert = NULL;
4726 ops.safe_push (oe);
4727 bb_ent.last_idx++;
4728 bb_ent.op = rhs;
4730 else if (is_gimple_assign (stmt)
4731 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4732 == tcc_comparison)
4733 && !get_ops (lhs, code, &ops,
4734 loop_containing_stmt (stmt))
4735 && has_single_use (lhs))
4737 operand_entry *oe = operand_entry_pool.allocate ();
4738 oe->op = lhs;
4739 oe->rank = code;
4740 oe->id = 0;
4741 oe->count = 1;
4742 ops.safe_push (oe);
4743 bb_ent.last_idx++;
4744 bb_ent.op = lhs;
4746 else
4748 bb_ent.last_idx = ops.length ();
4749 bb_ent.op = rhs;
4751 bbinfo.safe_push (bb_ent);
4752 continue;
4754 else if (bb == last_bb)
4756 /* For last_bb, handle also:
4757 if (x_3(D) == 3)
4758 goto <bb 6>; [34.00%]
4759 else
4760 goto <bb 7>; [66.00%]
4762 <bb 6> [local count: 79512730]:
4764 <bb 7> [local count: 1073741824]:
4765 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4766 where bb 7 is OTHER_BB, but the PHI values from the
4767 earlier bbs match the path through the empty bb
4768 in between. */
4769 bool test_swapped_p = false;
4770 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4771 &other_bb, &test_swapped_p, true);
4772 gcc_assert (ok);
4773 if (test_swapped_p)
4774 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4776 /* Otherwise stmt is GIMPLE_COND. */
4777 code = gimple_cond_code (stmt);
4778 lhs = gimple_cond_lhs (stmt);
4779 rhs = gimple_cond_rhs (stmt);
4780 if (TREE_CODE (lhs) == SSA_NAME
4781 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4782 && ((code != EQ_EXPR && code != NE_EXPR)
4783 || rhs != boolean_false_node
4784 /* Either push into ops the individual bitwise
4785 or resp. and operands, depending on which
4786 edge is other_bb. */
4787 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4788 ^ (code == EQ_EXPR))
4789 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4790 loop_containing_stmt (stmt))))
4792 /* Or push the GIMPLE_COND stmt itself. */
4793 operand_entry *oe = operand_entry_pool.allocate ();
4795 oe->op = NULL;
4796 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4797 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4798 /* oe->op = NULL signs that there is no SSA_NAME
4799 for the range test, and oe->id instead is the
4800 basic block number, at which's end the GIMPLE_COND
4801 is. */
4802 oe->id = bb->index;
4803 oe->count = 1;
4804 oe->stmt_to_insert = NULL;
4805 ops.safe_push (oe);
4806 bb_ent.op = NULL;
4807 bb_ent.last_idx++;
4809 else if (ops.length () > bb_ent.first_idx)
4811 bb_ent.op = lhs;
4812 bb_ent.last_idx = ops.length ();
4814 bbinfo.safe_push (bb_ent);
4815 if (bb == first_bb)
4816 break;
4818 if (ops.length () > 1)
4819 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4820 if (any_changes)
4822 unsigned int idx, max_idx = 0;
4823 /* update_ops relies on has_single_use predicates returning the
4824 same values as it did during get_ops earlier. Additionally it
4825 never removes statements, only adds new ones and it should walk
4826 from the single imm use and check the predicate already before
4827 making those changes.
4828 On the other side, the handling of GIMPLE_COND directly can turn
4829 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4830 it needs to be done in a separate loop afterwards. */
4831 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4833 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4834 && bbinfo[idx].op != NULL_TREE)
4836 tree new_op;
4838 max_idx = idx;
4839 stmt = last_stmt (bb);
4840 new_op = update_ops (bbinfo[idx].op,
4841 (enum tree_code)
4842 ops[bbinfo[idx].first_idx]->rank,
4843 ops, &bbinfo[idx].first_idx,
4844 loop_containing_stmt (stmt));
4845 if (new_op == NULL_TREE)
4847 gcc_assert (bb == last_bb);
4848 new_op = ops[bbinfo[idx].first_idx++]->op;
4850 if (bbinfo[idx].op != new_op)
4852 imm_use_iterator iter;
4853 use_operand_p use_p;
4854 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4856 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4857 if (is_gimple_debug (use_stmt))
4858 continue;
4859 else if (gimple_code (use_stmt) == GIMPLE_COND
4860 || gimple_code (use_stmt) == GIMPLE_PHI)
4861 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4862 SET_USE (use_p, new_op);
4863 else if ((is_gimple_assign (use_stmt)
4864 && (TREE_CODE_CLASS
4865 (gimple_assign_rhs_code (use_stmt))
4866 == tcc_comparison)))
4867 cast_or_tcc_cmp_stmt = use_stmt;
4868 else if (gimple_assign_cast_p (use_stmt))
4869 cast_or_tcc_cmp_stmt = use_stmt;
4870 else
4871 gcc_unreachable ();
4873 if (cast_or_tcc_cmp_stmt)
4875 gcc_assert (bb == last_bb);
4876 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4877 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4878 enum tree_code rhs_code
4879 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4880 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4881 : CONVERT_EXPR;
4882 gassign *g;
4883 if (is_gimple_min_invariant (new_op))
4885 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4886 g = gimple_build_assign (new_lhs, new_op);
4888 else
4889 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4890 gimple_stmt_iterator gsi
4891 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4892 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4893 gimple_set_visited (g, true);
4894 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4895 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4896 if (is_gimple_debug (use_stmt))
4897 continue;
4898 else if (gimple_code (use_stmt) == GIMPLE_COND
4899 || gimple_code (use_stmt) == GIMPLE_PHI)
4900 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4901 SET_USE (use_p, new_lhs);
4902 else
4903 gcc_unreachable ();
4907 if (bb == first_bb)
4908 break;
4910 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4912 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4913 && bbinfo[idx].op == NULL_TREE
4914 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4916 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4918 if (idx > max_idx)
4919 max_idx = idx;
4921 /* If we collapse the conditional to a true/false
4922 condition, then bubble that knowledge up to our caller. */
4923 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4925 gimple_cond_make_false (cond_stmt);
4926 cfg_cleanup_needed = true;
4928 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4930 gimple_cond_make_true (cond_stmt);
4931 cfg_cleanup_needed = true;
4933 else
4935 gimple_cond_set_code (cond_stmt, NE_EXPR);
4936 gimple_cond_set_lhs (cond_stmt,
4937 ops[bbinfo[idx].first_idx]->op);
4938 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4940 update_stmt (cond_stmt);
4942 if (bb == first_bb)
4943 break;
4946 /* The above changes could result in basic blocks after the first
4947 modified one, up to and including last_bb, to be executed even if
4948 they would not be in the original program. If the value ranges of
4949 assignment lhs' in those bbs were dependent on the conditions
4950 guarding those basic blocks which now can change, the VRs might
4951 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4952 are only used within the same bb, it should be not a big deal if
4953 we just reset all the VRs in those bbs. See PR68671. */
4954 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4955 reset_flow_sensitive_info_in_bb (bb);
4957 return cfg_cleanup_needed;
4960 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4961 of STMT in it's operands. This is also known as a "destructive
4962 update" operation. */
4964 static bool
4965 is_phi_for_stmt (gimple *stmt, tree operand)
4967 gimple *def_stmt;
4968 gphi *def_phi;
4969 tree lhs;
4970 use_operand_p arg_p;
4971 ssa_op_iter i;
4973 if (TREE_CODE (operand) != SSA_NAME)
4974 return false;
4976 lhs = gimple_assign_lhs (stmt);
4978 def_stmt = SSA_NAME_DEF_STMT (operand);
4979 def_phi = dyn_cast <gphi *> (def_stmt);
4980 if (!def_phi)
4981 return false;
4983 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4984 if (lhs == USE_FROM_PTR (arg_p))
4985 return true;
4986 return false;
4989 /* Remove def stmt of VAR if VAR has zero uses and recurse
4990 on rhs1 operand if so. */
4992 static void
4993 remove_visited_stmt_chain (tree var)
4995 gimple *stmt;
4996 gimple_stmt_iterator gsi;
4998 while (1)
5000 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5001 return;
5002 stmt = SSA_NAME_DEF_STMT (var);
5003 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5005 var = gimple_assign_rhs1 (stmt);
5006 gsi = gsi_for_stmt (stmt);
5007 reassoc_remove_stmt (&gsi);
5008 release_defs (stmt);
5010 else
5011 return;
5015 /* This function checks three consequtive operands in
5016 passed operands vector OPS starting from OPINDEX and
5017 swaps two operands if it is profitable for binary operation
5018 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5020 We pair ops with the same rank if possible.
5022 The alternative we try is to see if STMT is a destructive
5023 update style statement, which is like:
5024 b = phi (a, ...)
5025 a = c + b;
5026 In that case, we want to use the destructive update form to
5027 expose the possible vectorizer sum reduction opportunity.
5028 In that case, the third operand will be the phi node. This
5029 check is not performed if STMT is null.
5031 We could, of course, try to be better as noted above, and do a
5032 lot of work to try to find these opportunities in >3 operand
5033 cases, but it is unlikely to be worth it. */
5035 static void
5036 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5037 unsigned int opindex, gimple *stmt)
5039 operand_entry *oe1, *oe2, *oe3;
5041 oe1 = ops[opindex];
5042 oe2 = ops[opindex + 1];
5043 oe3 = ops[opindex + 2];
5045 if ((oe1->rank == oe2->rank
5046 && oe2->rank != oe3->rank)
5047 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5048 && !is_phi_for_stmt (stmt, oe1->op)
5049 && !is_phi_for_stmt (stmt, oe2->op)))
5050 std::swap (*oe1, *oe3);
5051 else if ((oe1->rank == oe3->rank
5052 && oe2->rank != oe3->rank)
5053 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5054 && !is_phi_for_stmt (stmt, oe1->op)
5055 && !is_phi_for_stmt (stmt, oe3->op)))
5056 std::swap (*oe1, *oe2);
5059 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5060 two definitions, otherwise return STMT. */
5062 static inline gimple *
5063 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5065 if (TREE_CODE (rhs1) == SSA_NAME
5066 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5067 stmt = SSA_NAME_DEF_STMT (rhs1);
5068 if (TREE_CODE (rhs2) == SSA_NAME
5069 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5070 stmt = SSA_NAME_DEF_STMT (rhs2);
5071 return stmt;
5074 /* If the stmt that defines operand has to be inserted, insert it
5075 before the use. */
5076 static void
5077 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5079 gcc_assert (is_gimple_assign (stmt_to_insert));
5080 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5081 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5082 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5083 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5084 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5086 /* If the insert point is not stmt, then insert_point would be
5087 the point where operand rhs1 or rhs2 is defined. In this case,
5088 stmt_to_insert has to be inserted afterwards. This would
5089 only happen when the stmt insertion point is flexible. */
5090 if (stmt == insert_point)
5091 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5092 else
5093 insert_stmt_after (stmt_to_insert, insert_point);
5097 /* Recursively rewrite our linearized statements so that the operators
5098 match those in OPS[OPINDEX], putting the computation in rank
5099 order. Return new lhs.
5100 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5101 the current stmt and during recursive invocations.
5102 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5103 recursive invocations. */
5105 static tree
5106 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5107 const vec<operand_entry *> &ops, bool changed,
5108 bool next_changed)
5110 tree rhs1 = gimple_assign_rhs1 (stmt);
5111 tree rhs2 = gimple_assign_rhs2 (stmt);
5112 tree lhs = gimple_assign_lhs (stmt);
5113 operand_entry *oe;
5115 /* The final recursion case for this function is that you have
5116 exactly two operations left.
5117 If we had exactly one op in the entire list to start with, we
5118 would have never called this function, and the tail recursion
5119 rewrites them one at a time. */
5120 if (opindex + 2 == ops.length ())
5122 operand_entry *oe1, *oe2;
5124 oe1 = ops[opindex];
5125 oe2 = ops[opindex + 1];
5127 if (rhs1 != oe1->op || rhs2 != oe2->op)
5129 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5130 unsigned int uid = gimple_uid (stmt);
5132 if (dump_file && (dump_flags & TDF_DETAILS))
5134 fprintf (dump_file, "Transforming ");
5135 print_gimple_stmt (dump_file, stmt, 0);
5138 /* If the stmt that defines operand has to be inserted, insert it
5139 before the use. */
5140 if (oe1->stmt_to_insert)
5141 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5142 if (oe2->stmt_to_insert)
5143 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5144 /* Even when changed is false, reassociation could have e.g. removed
5145 some redundant operations, so unless we are just swapping the
5146 arguments or unless there is no change at all (then we just
5147 return lhs), force creation of a new SSA_NAME. */
5148 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5150 gimple *insert_point
5151 = find_insert_point (stmt, oe1->op, oe2->op);
5152 lhs = make_ssa_name (TREE_TYPE (lhs));
5153 stmt
5154 = gimple_build_assign (lhs, rhs_code,
5155 oe1->op, oe2->op);
5156 gimple_set_uid (stmt, uid);
5157 gimple_set_visited (stmt, true);
5158 if (insert_point == gsi_stmt (gsi))
5159 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5160 else
5161 insert_stmt_after (stmt, insert_point);
5163 else
5165 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5166 == stmt);
5167 gimple_assign_set_rhs1 (stmt, oe1->op);
5168 gimple_assign_set_rhs2 (stmt, oe2->op);
5169 update_stmt (stmt);
5172 if (rhs1 != oe1->op && rhs1 != oe2->op)
5173 remove_visited_stmt_chain (rhs1);
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, " into ");
5178 print_gimple_stmt (dump_file, stmt, 0);
5181 return lhs;
5184 /* If we hit here, we should have 3 or more ops left. */
5185 gcc_assert (opindex + 2 < ops.length ());
5187 /* Rewrite the next operator. */
5188 oe = ops[opindex];
5190 /* If the stmt that defines operand has to be inserted, insert it
5191 before the use. */
5192 if (oe->stmt_to_insert)
5193 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5195 /* Recurse on the LHS of the binary operator, which is guaranteed to
5196 be the non-leaf side. */
5197 tree new_rhs1
5198 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5199 changed || oe->op != rhs2 || next_changed,
5200 false);
5202 if (oe->op != rhs2 || new_rhs1 != rhs1)
5204 if (dump_file && (dump_flags & TDF_DETAILS))
5206 fprintf (dump_file, "Transforming ");
5207 print_gimple_stmt (dump_file, stmt, 0);
5210 /* If changed is false, this is either opindex == 0
5211 or all outer rhs2's were equal to corresponding oe->op,
5212 and powi_result is NULL.
5213 That means lhs is equivalent before and after reassociation.
5214 Otherwise ensure the old lhs SSA_NAME is not reused and
5215 create a new stmt as well, so that any debug stmts will be
5216 properly adjusted. */
5217 if (changed)
5219 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5220 unsigned int uid = gimple_uid (stmt);
5221 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5223 lhs = make_ssa_name (TREE_TYPE (lhs));
5224 stmt = gimple_build_assign (lhs, rhs_code,
5225 new_rhs1, oe->op);
5226 gimple_set_uid (stmt, uid);
5227 gimple_set_visited (stmt, true);
5228 if (insert_point == gsi_stmt (gsi))
5229 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5230 else
5231 insert_stmt_after (stmt, insert_point);
5233 else
5235 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5236 == stmt);
5237 gimple_assign_set_rhs1 (stmt, new_rhs1);
5238 gimple_assign_set_rhs2 (stmt, oe->op);
5239 update_stmt (stmt);
5242 if (dump_file && (dump_flags & TDF_DETAILS))
5244 fprintf (dump_file, " into ");
5245 print_gimple_stmt (dump_file, stmt, 0);
5248 return lhs;
5251 /* Find out how many cycles we need to compute statements chain.
5252 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5253 maximum number of independent statements we may execute per cycle. */
5255 static int
5256 get_required_cycles (int ops_num, int cpu_width)
5258 int res;
5259 int elog;
5260 unsigned int rest;
5262 /* While we have more than 2 * cpu_width operands
5263 we may reduce number of operands by cpu_width
5264 per cycle. */
5265 res = ops_num / (2 * cpu_width);
5267 /* Remained operands count may be reduced twice per cycle
5268 until we have only one operand. */
5269 rest = (unsigned)(ops_num - res * cpu_width);
5270 elog = exact_log2 (rest);
5271 if (elog >= 0)
5272 res += elog;
5273 else
5274 res += floor_log2 (rest) + 1;
5276 return res;
5279 /* Returns an optimal number of registers to use for computation of
5280 given statements. */
5282 static int
5283 get_reassociation_width (int ops_num, enum tree_code opc,
5284 machine_mode mode)
5286 int param_width = param_tree_reassoc_width;
5287 int width;
5288 int width_min;
5289 int cycles_best;
5291 if (param_width > 0)
5292 width = param_width;
5293 else
5294 width = targetm.sched.reassociation_width (opc, mode);
5296 if (width == 1)
5297 return width;
5299 /* Get the minimal time required for sequence computation. */
5300 cycles_best = get_required_cycles (ops_num, width);
5302 /* Check if we may use less width and still compute sequence for
5303 the same time. It will allow us to reduce registers usage.
5304 get_required_cycles is monotonically increasing with lower width
5305 so we can perform a binary search for the minimal width that still
5306 results in the optimal cycle count. */
5307 width_min = 1;
5308 while (width > width_min)
5310 int width_mid = (width + width_min) / 2;
5312 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5313 width = width_mid;
5314 else if (width_min < width_mid)
5315 width_min = width_mid;
5316 else
5317 break;
5320 return width;
5323 /* Recursively rewrite our linearized statements so that the operators
5324 match those in OPS[OPINDEX], putting the computation in rank
5325 order and trying to allow operations to be executed in
5326 parallel. */
5328 static void
5329 rewrite_expr_tree_parallel (gassign *stmt, int width,
5330 const vec<operand_entry *> &ops)
5332 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5333 int op_num = ops.length ();
5334 gcc_assert (op_num > 0);
5335 int stmt_num = op_num - 1;
5336 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5337 int op_index = op_num - 1;
5338 int stmt_index = 0;
5339 int ready_stmts_end = 0;
5340 int i = 0;
5341 gimple *stmt1 = NULL, *stmt2 = NULL;
5342 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5344 /* We start expression rewriting from the top statements.
5345 So, in this loop we create a full list of statements
5346 we will work with. */
5347 stmts[stmt_num - 1] = stmt;
5348 for (i = stmt_num - 2; i >= 0; i--)
5349 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5351 for (i = 0; i < stmt_num; i++)
5353 tree op1, op2;
5355 /* Determine whether we should use results of
5356 already handled statements or not. */
5357 if (ready_stmts_end == 0
5358 && (i - stmt_index >= width || op_index < 1))
5359 ready_stmts_end = i;
5361 /* Now we choose operands for the next statement. Non zero
5362 value in ready_stmts_end means here that we should use
5363 the result of already generated statements as new operand. */
5364 if (ready_stmts_end > 0)
5366 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5367 if (ready_stmts_end > stmt_index)
5368 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5369 else if (op_index >= 0)
5371 operand_entry *oe = ops[op_index--];
5372 stmt2 = oe->stmt_to_insert;
5373 op2 = oe->op;
5375 else
5377 gcc_assert (stmt_index < i);
5378 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5381 if (stmt_index >= ready_stmts_end)
5382 ready_stmts_end = 0;
5384 else
5386 if (op_index > 1)
5387 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5388 operand_entry *oe2 = ops[op_index--];
5389 operand_entry *oe1 = ops[op_index--];
5390 op2 = oe2->op;
5391 stmt2 = oe2->stmt_to_insert;
5392 op1 = oe1->op;
5393 stmt1 = oe1->stmt_to_insert;
5396 /* If we emit the last statement then we should put
5397 operands into the last statement. It will also
5398 break the loop. */
5399 if (op_index < 0 && stmt_index == i)
5400 i = stmt_num - 1;
5402 if (dump_file && (dump_flags & TDF_DETAILS))
5404 fprintf (dump_file, "Transforming ");
5405 print_gimple_stmt (dump_file, stmts[i], 0);
5408 /* If the stmt that defines operand has to be inserted, insert it
5409 before the use. */
5410 if (stmt1)
5411 insert_stmt_before_use (stmts[i], stmt1);
5412 if (stmt2)
5413 insert_stmt_before_use (stmts[i], stmt2);
5414 stmt1 = stmt2 = NULL;
5416 /* We keep original statement only for the last one. All
5417 others are recreated. */
5418 if (i == stmt_num - 1)
5420 gimple_assign_set_rhs1 (stmts[i], op1);
5421 gimple_assign_set_rhs2 (stmts[i], op2);
5422 update_stmt (stmts[i]);
5424 else
5426 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5427 gimple_set_visited (stmts[i], true);
5429 if (dump_file && (dump_flags & TDF_DETAILS))
5431 fprintf (dump_file, " into ");
5432 print_gimple_stmt (dump_file, stmts[i], 0);
5436 remove_visited_stmt_chain (last_rhs1);
5439 /* Transform STMT, which is really (A +B) + (C + D) into the left
5440 linear form, ((A+B)+C)+D.
5441 Recurse on D if necessary. */
5443 static void
5444 linearize_expr (gimple *stmt)
5446 gimple_stmt_iterator gsi;
5447 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5448 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5449 gimple *oldbinrhs = binrhs;
5450 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5451 gimple *newbinrhs = NULL;
5452 class loop *loop = loop_containing_stmt (stmt);
5453 tree lhs = gimple_assign_lhs (stmt);
5455 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5456 && is_reassociable_op (binrhs, rhscode, loop));
5458 gsi = gsi_for_stmt (stmt);
5460 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5461 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5462 gimple_assign_rhs_code (binrhs),
5463 gimple_assign_lhs (binlhs),
5464 gimple_assign_rhs2 (binrhs));
5465 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5466 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5467 gimple_set_uid (binrhs, gimple_uid (stmt));
5469 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5470 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5472 if (dump_file && (dump_flags & TDF_DETAILS))
5474 fprintf (dump_file, "Linearized: ");
5475 print_gimple_stmt (dump_file, stmt, 0);
5478 reassociate_stats.linearized++;
5479 update_stmt (stmt);
5481 gsi = gsi_for_stmt (oldbinrhs);
5482 reassoc_remove_stmt (&gsi);
5483 release_defs (oldbinrhs);
5485 gimple_set_visited (stmt, true);
5486 gimple_set_visited (binlhs, true);
5487 gimple_set_visited (binrhs, true);
5489 /* Tail recurse on the new rhs if it still needs reassociation. */
5490 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5491 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5492 want to change the algorithm while converting to tuples. */
5493 linearize_expr (stmt);
5496 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5497 it. Otherwise, return NULL. */
5499 static gimple *
5500 get_single_immediate_use (tree lhs)
5502 use_operand_p immuse;
5503 gimple *immusestmt;
5505 if (TREE_CODE (lhs) == SSA_NAME
5506 && single_imm_use (lhs, &immuse, &immusestmt)
5507 && is_gimple_assign (immusestmt))
5508 return immusestmt;
5510 return NULL;
5513 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5514 representing the negated value. Insertions of any necessary
5515 instructions go before GSI.
5516 This function is recursive in that, if you hand it "a_5" as the
5517 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5518 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5520 static tree
5521 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5523 gimple *negatedefstmt = NULL;
5524 tree resultofnegate;
5525 gimple_stmt_iterator gsi;
5526 unsigned int uid;
5528 /* If we are trying to negate a name, defined by an add, negate the
5529 add operands instead. */
5530 if (TREE_CODE (tonegate) == SSA_NAME)
5531 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5532 if (TREE_CODE (tonegate) == SSA_NAME
5533 && is_gimple_assign (negatedefstmt)
5534 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5535 && has_single_use (gimple_assign_lhs (negatedefstmt))
5536 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5538 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5539 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5540 tree lhs = gimple_assign_lhs (negatedefstmt);
5541 gimple *g;
5543 gsi = gsi_for_stmt (negatedefstmt);
5544 rhs1 = negate_value (rhs1, &gsi);
5546 gsi = gsi_for_stmt (negatedefstmt);
5547 rhs2 = negate_value (rhs2, &gsi);
5549 gsi = gsi_for_stmt (negatedefstmt);
5550 lhs = make_ssa_name (TREE_TYPE (lhs));
5551 gimple_set_visited (negatedefstmt, true);
5552 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5553 gimple_set_uid (g, gimple_uid (negatedefstmt));
5554 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5555 return lhs;
5558 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5559 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5560 NULL_TREE, true, GSI_SAME_STMT);
5561 gsi = *gsip;
5562 uid = gimple_uid (gsi_stmt (gsi));
5563 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5565 gimple *stmt = gsi_stmt (gsi);
5566 if (gimple_uid (stmt) != 0)
5567 break;
5568 gimple_set_uid (stmt, uid);
5570 return resultofnegate;
5573 /* Return true if we should break up the subtract in STMT into an add
5574 with negate. This is true when we the subtract operands are really
5575 adds, or the subtract itself is used in an add expression. In
5576 either case, breaking up the subtract into an add with negate
5577 exposes the adds to reassociation. */
5579 static bool
5580 should_break_up_subtract (gimple *stmt)
5582 tree lhs = gimple_assign_lhs (stmt);
5583 tree binlhs = gimple_assign_rhs1 (stmt);
5584 tree binrhs = gimple_assign_rhs2 (stmt);
5585 gimple *immusestmt;
5586 class loop *loop = loop_containing_stmt (stmt);
5588 if (TREE_CODE (binlhs) == SSA_NAME
5589 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5590 return true;
5592 if (TREE_CODE (binrhs) == SSA_NAME
5593 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5594 return true;
5596 if (TREE_CODE (lhs) == SSA_NAME
5597 && (immusestmt = get_single_immediate_use (lhs))
5598 && is_gimple_assign (immusestmt)
5599 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5600 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5601 && gimple_assign_rhs1 (immusestmt) == lhs)
5602 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5603 return true;
5604 return false;
5607 /* Transform STMT from A - B into A + -B. */
5609 static void
5610 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5612 tree rhs1 = gimple_assign_rhs1 (stmt);
5613 tree rhs2 = gimple_assign_rhs2 (stmt);
5615 if (dump_file && (dump_flags & TDF_DETAILS))
5617 fprintf (dump_file, "Breaking up subtract ");
5618 print_gimple_stmt (dump_file, stmt, 0);
5621 rhs2 = negate_value (rhs2, gsip);
5622 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5623 update_stmt (stmt);
5626 /* Determine whether STMT is a builtin call that raises an SSA name
5627 to an integer power and has only one use. If so, and this is early
5628 reassociation and unsafe math optimizations are permitted, place
5629 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5630 If any of these conditions does not hold, return FALSE. */
5632 static bool
5633 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5635 tree arg1;
5636 REAL_VALUE_TYPE c, cint;
5638 switch (gimple_call_combined_fn (stmt))
5640 CASE_CFN_POW:
5641 if (flag_errno_math)
5642 return false;
5644 *base = gimple_call_arg (stmt, 0);
5645 arg1 = gimple_call_arg (stmt, 1);
5647 if (TREE_CODE (arg1) != REAL_CST)
5648 return false;
5650 c = TREE_REAL_CST (arg1);
5652 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5653 return false;
5655 *exponent = real_to_integer (&c);
5656 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5657 if (!real_identical (&c, &cint))
5658 return false;
5660 break;
5662 CASE_CFN_POWI:
5663 *base = gimple_call_arg (stmt, 0);
5664 arg1 = gimple_call_arg (stmt, 1);
5666 if (!tree_fits_shwi_p (arg1))
5667 return false;
5669 *exponent = tree_to_shwi (arg1);
5670 break;
5672 default:
5673 return false;
5676 /* Expanding negative exponents is generally unproductive, so we don't
5677 complicate matters with those. Exponents of zero and one should
5678 have been handled by expression folding. */
5679 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5680 return false;
5682 return true;
5685 /* Try to derive and add operand entry for OP to *OPS. Return false if
5686 unsuccessful. */
5688 static bool
5689 try_special_add_to_ops (vec<operand_entry *> *ops,
5690 enum tree_code code,
5691 tree op, gimple* def_stmt)
5693 tree base = NULL_TREE;
5694 HOST_WIDE_INT exponent = 0;
5696 if (TREE_CODE (op) != SSA_NAME
5697 || ! has_single_use (op))
5698 return false;
5700 if (code == MULT_EXPR
5701 && reassoc_insert_powi_p
5702 && flag_unsafe_math_optimizations
5703 && is_gimple_call (def_stmt)
5704 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5706 add_repeat_to_ops_vec (ops, base, exponent);
5707 gimple_set_visited (def_stmt, true);
5708 return true;
5710 else if (code == MULT_EXPR
5711 && is_gimple_assign (def_stmt)
5712 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5713 && !HONOR_SNANS (TREE_TYPE (op))
5714 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5715 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5717 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5718 tree cst = build_minus_one_cst (TREE_TYPE (op));
5719 add_to_ops_vec (ops, rhs1);
5720 add_to_ops_vec (ops, cst);
5721 gimple_set_visited (def_stmt, true);
5722 return true;
5725 return false;
5728 /* Recursively linearize a binary expression that is the RHS of STMT.
5729 Place the operands of the expression tree in the vector named OPS. */
5731 static void
5732 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5733 bool is_associative, bool set_visited)
5735 tree binlhs = gimple_assign_rhs1 (stmt);
5736 tree binrhs = gimple_assign_rhs2 (stmt);
5737 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5738 bool binlhsisreassoc = false;
5739 bool binrhsisreassoc = false;
5740 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5741 class loop *loop = loop_containing_stmt (stmt);
5743 if (set_visited)
5744 gimple_set_visited (stmt, true);
5746 if (TREE_CODE (binlhs) == SSA_NAME)
5748 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5749 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5750 && !stmt_could_throw_p (cfun, binlhsdef));
5753 if (TREE_CODE (binrhs) == SSA_NAME)
5755 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5756 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5757 && !stmt_could_throw_p (cfun, binrhsdef));
5760 /* If the LHS is not reassociable, but the RHS is, we need to swap
5761 them. If neither is reassociable, there is nothing we can do, so
5762 just put them in the ops vector. If the LHS is reassociable,
5763 linearize it. If both are reassociable, then linearize the RHS
5764 and the LHS. */
5766 if (!binlhsisreassoc)
5768 /* If this is not a associative operation like division, give up. */
5769 if (!is_associative)
5771 add_to_ops_vec (ops, binrhs);
5772 return;
5775 if (!binrhsisreassoc)
5777 bool swap = false;
5778 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5779 /* If we add ops for the rhs we expect to be able to recurse
5780 to it via the lhs during expression rewrite so swap
5781 operands. */
5782 swap = true;
5783 else
5784 add_to_ops_vec (ops, binrhs);
5786 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5787 add_to_ops_vec (ops, binlhs);
5789 if (!swap)
5790 return;
5793 if (dump_file && (dump_flags & TDF_DETAILS))
5795 fprintf (dump_file, "swapping operands of ");
5796 print_gimple_stmt (dump_file, stmt, 0);
5799 swap_ssa_operands (stmt,
5800 gimple_assign_rhs1_ptr (stmt),
5801 gimple_assign_rhs2_ptr (stmt));
5802 update_stmt (stmt);
5804 if (dump_file && (dump_flags & TDF_DETAILS))
5806 fprintf (dump_file, " is now ");
5807 print_gimple_stmt (dump_file, stmt, 0);
5809 if (!binrhsisreassoc)
5810 return;
5812 /* We want to make it so the lhs is always the reassociative op,
5813 so swap. */
5814 std::swap (binlhs, binrhs);
5816 else if (binrhsisreassoc)
5818 linearize_expr (stmt);
5819 binlhs = gimple_assign_rhs1 (stmt);
5820 binrhs = gimple_assign_rhs2 (stmt);
5823 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5824 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5825 rhscode, loop));
5826 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5827 is_associative, set_visited);
5829 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5830 add_to_ops_vec (ops, binrhs);
5833 /* Repropagate the negates back into subtracts, since no other pass
5834 currently does it. */
5836 static void
5837 repropagate_negates (void)
5839 unsigned int i = 0;
5840 tree negate;
5842 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5844 gimple *user = get_single_immediate_use (negate);
5846 if (!user || !is_gimple_assign (user))
5847 continue;
5849 /* The negate operand can be either operand of a PLUS_EXPR
5850 (it can be the LHS if the RHS is a constant for example).
5852 Force the negate operand to the RHS of the PLUS_EXPR, then
5853 transform the PLUS_EXPR into a MINUS_EXPR. */
5854 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5856 /* If the negated operand appears on the LHS of the
5857 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5858 to force the negated operand to the RHS of the PLUS_EXPR. */
5859 if (gimple_assign_rhs1 (user) == negate)
5861 swap_ssa_operands (user,
5862 gimple_assign_rhs1_ptr (user),
5863 gimple_assign_rhs2_ptr (user));
5866 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5867 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5868 if (gimple_assign_rhs2 (user) == negate)
5870 tree rhs1 = gimple_assign_rhs1 (user);
5871 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5872 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5873 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5874 update_stmt (user);
5877 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5879 if (gimple_assign_rhs1 (user) == negate)
5881 /* We have
5882 x = -a
5883 y = x - b
5884 which we transform into
5885 x = a + b
5886 y = -x .
5887 This pushes down the negate which we possibly can merge
5888 into some other operation, hence insert it into the
5889 plus_negates vector. */
5890 gimple *feed = SSA_NAME_DEF_STMT (negate);
5891 tree a = gimple_assign_rhs1 (feed);
5892 tree b = gimple_assign_rhs2 (user);
5893 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5894 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5895 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5896 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5897 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5898 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5899 user = gsi_stmt (gsi2);
5900 update_stmt (user);
5901 reassoc_remove_stmt (&gsi);
5902 release_defs (feed);
5903 plus_negates.safe_push (gimple_assign_lhs (user));
5905 else
5907 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5908 rid of one operation. */
5909 gimple *feed = SSA_NAME_DEF_STMT (negate);
5910 tree a = gimple_assign_rhs1 (feed);
5911 tree rhs1 = gimple_assign_rhs1 (user);
5912 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5913 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5914 update_stmt (gsi_stmt (gsi));
5920 /* Break up subtract operations in block BB.
5922 We do this top down because we don't know whether the subtract is
5923 part of a possible chain of reassociation except at the top.
5925 IE given
5926 d = f + g
5927 c = a + e
5928 b = c - d
5929 q = b - r
5930 k = t - q
5932 we want to break up k = t - q, but we won't until we've transformed q
5933 = b - r, which won't be broken up until we transform b = c - d.
5935 En passant, clear the GIMPLE visited flag on every statement
5936 and set UIDs within each basic block. */
5938 static void
5939 break_up_subtract_bb (basic_block bb)
5941 gimple_stmt_iterator gsi;
5942 basic_block son;
5943 unsigned int uid = 1;
5945 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5947 gimple *stmt = gsi_stmt (gsi);
5948 gimple_set_visited (stmt, false);
5949 gimple_set_uid (stmt, uid++);
5951 if (!is_gimple_assign (stmt)
5952 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5953 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5954 continue;
5956 /* Look for simple gimple subtract operations. */
5957 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5959 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
5960 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
5961 continue;
5963 /* Check for a subtract used only in an addition. If this
5964 is the case, transform it into add of a negate for better
5965 reassociation. IE transform C = A-B into C = A + -B if C
5966 is only used in an addition. */
5967 if (should_break_up_subtract (stmt))
5968 break_up_subtract (stmt, &gsi);
5970 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5971 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
5972 plus_negates.safe_push (gimple_assign_lhs (stmt));
5974 for (son = first_dom_son (CDI_DOMINATORS, bb);
5975 son;
5976 son = next_dom_son (CDI_DOMINATORS, son))
5977 break_up_subtract_bb (son);
5980 /* Used for repeated factor analysis. */
5981 struct repeat_factor
5983 /* An SSA name that occurs in a multiply chain. */
5984 tree factor;
5986 /* Cached rank of the factor. */
5987 unsigned rank;
5989 /* Number of occurrences of the factor in the chain. */
5990 HOST_WIDE_INT count;
5992 /* An SSA name representing the product of this factor and
5993 all factors appearing later in the repeated factor vector. */
5994 tree repr;
5998 static vec<repeat_factor> repeat_factor_vec;
6000 /* Used for sorting the repeat factor vector. Sort primarily by
6001 ascending occurrence count, secondarily by descending rank. */
6003 static int
6004 compare_repeat_factors (const void *x1, const void *x2)
6006 const repeat_factor *rf1 = (const repeat_factor *) x1;
6007 const repeat_factor *rf2 = (const repeat_factor *) x2;
6009 if (rf1->count != rf2->count)
6010 return rf1->count - rf2->count;
6012 return rf2->rank - rf1->rank;
6015 /* Look for repeated operands in OPS in the multiply tree rooted at
6016 STMT. Replace them with an optimal sequence of multiplies and powi
6017 builtin calls, and remove the used operands from OPS. Return an
6018 SSA name representing the value of the replacement sequence. */
6020 static tree
6021 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6023 unsigned i, j, vec_len;
6024 int ii;
6025 operand_entry *oe;
6026 repeat_factor *rf1, *rf2;
6027 repeat_factor rfnew;
6028 tree result = NULL_TREE;
6029 tree target_ssa, iter_result;
6030 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6031 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6032 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6033 gimple *mul_stmt, *pow_stmt;
6035 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6036 target, unless type is integral. */
6037 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6038 return NULL_TREE;
6040 /* Allocate the repeated factor vector. */
6041 repeat_factor_vec.create (10);
6043 /* Scan the OPS vector for all SSA names in the product and build
6044 up a vector of occurrence counts for each factor. */
6045 FOR_EACH_VEC_ELT (*ops, i, oe)
6047 if (TREE_CODE (oe->op) == SSA_NAME)
6049 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6051 if (rf1->factor == oe->op)
6053 rf1->count += oe->count;
6054 break;
6058 if (j >= repeat_factor_vec.length ())
6060 rfnew.factor = oe->op;
6061 rfnew.rank = oe->rank;
6062 rfnew.count = oe->count;
6063 rfnew.repr = NULL_TREE;
6064 repeat_factor_vec.safe_push (rfnew);
6069 /* Sort the repeated factor vector by (a) increasing occurrence count,
6070 and (b) decreasing rank. */
6071 repeat_factor_vec.qsort (compare_repeat_factors);
6073 /* It is generally best to combine as many base factors as possible
6074 into a product before applying __builtin_powi to the result.
6075 However, the sort order chosen for the repeated factor vector
6076 allows us to cache partial results for the product of the base
6077 factors for subsequent use. When we already have a cached partial
6078 result from a previous iteration, it is best to make use of it
6079 before looking for another __builtin_pow opportunity.
6081 As an example, consider x * x * y * y * y * z * z * z * z.
6082 We want to first compose the product x * y * z, raise it to the
6083 second power, then multiply this by y * z, and finally multiply
6084 by z. This can be done in 5 multiplies provided we cache y * z
6085 for use in both expressions:
6087 t1 = y * z
6088 t2 = t1 * x
6089 t3 = t2 * t2
6090 t4 = t1 * t3
6091 result = t4 * z
6093 If we instead ignored the cached y * z and first multiplied by
6094 the __builtin_pow opportunity z * z, we would get the inferior:
6096 t1 = y * z
6097 t2 = t1 * x
6098 t3 = t2 * t2
6099 t4 = z * z
6100 t5 = t3 * t4
6101 result = t5 * y */
6103 vec_len = repeat_factor_vec.length ();
6105 /* Repeatedly look for opportunities to create a builtin_powi call. */
6106 while (true)
6108 HOST_WIDE_INT power;
6110 /* First look for the largest cached product of factors from
6111 preceding iterations. If found, create a builtin_powi for
6112 it if the minimum occurrence count for its factors is at
6113 least 2, or just use this cached product as our next
6114 multiplicand if the minimum occurrence count is 1. */
6115 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6117 if (rf1->repr && rf1->count > 0)
6118 break;
6121 if (j < vec_len)
6123 power = rf1->count;
6125 if (power == 1)
6127 iter_result = rf1->repr;
6129 if (dump_file && (dump_flags & TDF_DETAILS))
6131 unsigned elt;
6132 repeat_factor *rf;
6133 fputs ("Multiplying by cached product ", dump_file);
6134 for (elt = j; elt < vec_len; elt++)
6136 rf = &repeat_factor_vec[elt];
6137 print_generic_expr (dump_file, rf->factor);
6138 if (elt < vec_len - 1)
6139 fputs (" * ", dump_file);
6141 fputs ("\n", dump_file);
6144 else
6146 if (INTEGRAL_TYPE_P (type))
6148 gcc_assert (power > 1);
6149 gimple_stmt_iterator gsip = gsi;
6150 gsi_prev (&gsip);
6151 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6152 rf1->repr, power);
6153 gimple_stmt_iterator gsic = gsi;
6154 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6156 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6157 gimple_set_visited (gsi_stmt (gsic), true);
6158 gsi_prev (&gsic);
6161 else
6163 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6164 pow_stmt
6165 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6166 build_int_cst (integer_type_node,
6167 power));
6168 gimple_call_set_lhs (pow_stmt, iter_result);
6169 gimple_set_location (pow_stmt, gimple_location (stmt));
6170 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6171 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6174 if (dump_file && (dump_flags & TDF_DETAILS))
6176 unsigned elt;
6177 repeat_factor *rf;
6178 fputs ("Building __builtin_pow call for cached product (",
6179 dump_file);
6180 for (elt = j; elt < vec_len; elt++)
6182 rf = &repeat_factor_vec[elt];
6183 print_generic_expr (dump_file, rf->factor);
6184 if (elt < vec_len - 1)
6185 fputs (" * ", dump_file);
6187 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6188 power);
6192 else
6194 /* Otherwise, find the first factor in the repeated factor
6195 vector whose occurrence count is at least 2. If no such
6196 factor exists, there are no builtin_powi opportunities
6197 remaining. */
6198 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6200 if (rf1->count >= 2)
6201 break;
6204 if (j >= vec_len)
6205 break;
6207 power = rf1->count;
6209 if (dump_file && (dump_flags & TDF_DETAILS))
6211 unsigned elt;
6212 repeat_factor *rf;
6213 fputs ("Building __builtin_pow call for (", dump_file);
6214 for (elt = j; elt < vec_len; elt++)
6216 rf = &repeat_factor_vec[elt];
6217 print_generic_expr (dump_file, rf->factor);
6218 if (elt < vec_len - 1)
6219 fputs (" * ", dump_file);
6221 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6224 reassociate_stats.pows_created++;
6226 /* Visit each element of the vector in reverse order (so that
6227 high-occurrence elements are visited first, and within the
6228 same occurrence count, lower-ranked elements are visited
6229 first). Form a linear product of all elements in this order
6230 whose occurrencce count is at least that of element J.
6231 Record the SSA name representing the product of each element
6232 with all subsequent elements in the vector. */
6233 if (j == vec_len - 1)
6234 rf1->repr = rf1->factor;
6235 else
6237 for (ii = vec_len - 2; ii >= (int)j; ii--)
6239 tree op1, op2;
6241 rf1 = &repeat_factor_vec[ii];
6242 rf2 = &repeat_factor_vec[ii + 1];
6244 /* Init the last factor's representative to be itself. */
6245 if (!rf2->repr)
6246 rf2->repr = rf2->factor;
6248 op1 = rf1->factor;
6249 op2 = rf2->repr;
6251 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6252 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6253 op1, op2);
6254 gimple_set_location (mul_stmt, gimple_location (stmt));
6255 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6256 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6257 rf1->repr = target_ssa;
6259 /* Don't reprocess the multiply we just introduced. */
6260 gimple_set_visited (mul_stmt, true);
6264 /* Form a call to __builtin_powi for the maximum product
6265 just formed, raised to the power obtained earlier. */
6266 rf1 = &repeat_factor_vec[j];
6267 if (INTEGRAL_TYPE_P (type))
6269 gcc_assert (power > 1);
6270 gimple_stmt_iterator gsip = gsi;
6271 gsi_prev (&gsip);
6272 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6273 rf1->repr, power);
6274 gimple_stmt_iterator gsic = gsi;
6275 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6277 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6278 gimple_set_visited (gsi_stmt (gsic), true);
6279 gsi_prev (&gsic);
6282 else
6284 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6285 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6286 build_int_cst (integer_type_node,
6287 power));
6288 gimple_call_set_lhs (pow_stmt, iter_result);
6289 gimple_set_location (pow_stmt, gimple_location (stmt));
6290 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6291 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6295 /* If we previously formed at least one other builtin_powi call,
6296 form the product of this one and those others. */
6297 if (result)
6299 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6300 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6301 result, iter_result);
6302 gimple_set_location (mul_stmt, gimple_location (stmt));
6303 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6304 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6305 gimple_set_visited (mul_stmt, true);
6306 result = new_result;
6308 else
6309 result = iter_result;
6311 /* Decrement the occurrence count of each element in the product
6312 by the count found above, and remove this many copies of each
6313 factor from OPS. */
6314 for (i = j; i < vec_len; i++)
6316 unsigned k = power;
6317 unsigned n;
6319 rf1 = &repeat_factor_vec[i];
6320 rf1->count -= power;
6322 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6324 if (oe->op == rf1->factor)
6326 if (oe->count <= k)
6328 ops->ordered_remove (n);
6329 k -= oe->count;
6331 if (k == 0)
6332 break;
6334 else
6336 oe->count -= k;
6337 break;
6344 /* At this point all elements in the repeated factor vector have a
6345 remaining occurrence count of 0 or 1, and those with a count of 1
6346 don't have cached representatives. Re-sort the ops vector and
6347 clean up. */
6348 ops->qsort (sort_by_operand_rank);
6349 repeat_factor_vec.release ();
6351 /* Return the final product computed herein. Note that there may
6352 still be some elements with single occurrence count left in OPS;
6353 those will be handled by the normal reassociation logic. */
6354 return result;
6357 /* Attempt to optimize
6358 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6359 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6361 static void
6362 attempt_builtin_copysign (vec<operand_entry *> *ops)
6364 operand_entry *oe;
6365 unsigned int i;
6366 unsigned int length = ops->length ();
6367 tree cst = ops->last ()->op;
6369 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6370 return;
6372 FOR_EACH_VEC_ELT (*ops, i, oe)
6374 if (TREE_CODE (oe->op) == SSA_NAME
6375 && has_single_use (oe->op))
6377 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6378 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6380 tree arg0, arg1;
6381 switch (gimple_call_combined_fn (old_call))
6383 CASE_CFN_COPYSIGN:
6384 CASE_CFN_COPYSIGN_FN:
6385 arg0 = gimple_call_arg (old_call, 0);
6386 arg1 = gimple_call_arg (old_call, 1);
6387 /* The first argument of copysign must be a constant,
6388 otherwise there's nothing to do. */
6389 if (TREE_CODE (arg0) == REAL_CST)
6391 tree type = TREE_TYPE (arg0);
6392 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6393 /* If we couldn't fold to a single constant, skip it.
6394 That happens e.g. for inexact multiplication when
6395 -frounding-math. */
6396 if (mul == NULL_TREE)
6397 break;
6398 /* Instead of adjusting OLD_CALL, let's build a new
6399 call to not leak the LHS and prevent keeping bogus
6400 debug statements. DCE will clean up the old call. */
6401 gcall *new_call;
6402 if (gimple_call_internal_p (old_call))
6403 new_call = gimple_build_call_internal
6404 (IFN_COPYSIGN, 2, mul, arg1);
6405 else
6406 new_call = gimple_build_call
6407 (gimple_call_fndecl (old_call), 2, mul, arg1);
6408 tree lhs = make_ssa_name (type);
6409 gimple_call_set_lhs (new_call, lhs);
6410 gimple_set_location (new_call,
6411 gimple_location (old_call));
6412 insert_stmt_after (new_call, old_call);
6413 /* We've used the constant, get rid of it. */
6414 ops->pop ();
6415 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6416 /* Handle the CST1 < 0 case by negating the result. */
6417 if (cst1_neg)
6419 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6420 gimple *negate_stmt
6421 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6422 insert_stmt_after (negate_stmt, new_call);
6423 oe->op = negrhs;
6425 else
6426 oe->op = lhs;
6427 if (dump_file && (dump_flags & TDF_DETAILS))
6429 fprintf (dump_file, "Optimizing copysign: ");
6430 print_generic_expr (dump_file, cst);
6431 fprintf (dump_file, " * COPYSIGN (");
6432 print_generic_expr (dump_file, arg0);
6433 fprintf (dump_file, ", ");
6434 print_generic_expr (dump_file, arg1);
6435 fprintf (dump_file, ") into %sCOPYSIGN (",
6436 cst1_neg ? "-" : "");
6437 print_generic_expr (dump_file, mul);
6438 fprintf (dump_file, ", ");
6439 print_generic_expr (dump_file, arg1);
6440 fprintf (dump_file, "\n");
6442 return;
6444 break;
6445 default:
6446 break;
6453 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6455 static void
6456 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6458 tree rhs1;
6460 if (dump_file && (dump_flags & TDF_DETAILS))
6462 fprintf (dump_file, "Transforming ");
6463 print_gimple_stmt (dump_file, stmt, 0);
6466 rhs1 = gimple_assign_rhs1 (stmt);
6467 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6468 update_stmt (stmt);
6469 remove_visited_stmt_chain (rhs1);
6471 if (dump_file && (dump_flags & TDF_DETAILS))
6473 fprintf (dump_file, " into ");
6474 print_gimple_stmt (dump_file, stmt, 0);
6478 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6480 static void
6481 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6482 tree rhs1, tree rhs2)
6484 if (dump_file && (dump_flags & TDF_DETAILS))
6486 fprintf (dump_file, "Transforming ");
6487 print_gimple_stmt (dump_file, stmt, 0);
6490 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6491 update_stmt (gsi_stmt (*gsi));
6492 remove_visited_stmt_chain (rhs1);
6494 if (dump_file && (dump_flags & TDF_DETAILS))
6496 fprintf (dump_file, " into ");
6497 print_gimple_stmt (dump_file, stmt, 0);
6501 /* Reassociate expressions in basic block BB and its post-dominator as
6502 children.
6504 Bubble up return status from maybe_optimize_range_tests. */
6506 static bool
6507 reassociate_bb (basic_block bb)
6509 gimple_stmt_iterator gsi;
6510 basic_block son;
6511 gimple *stmt = last_stmt (bb);
6512 bool cfg_cleanup_needed = false;
6514 if (stmt && !gimple_visited_p (stmt))
6515 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6517 bool do_prev = false;
6518 for (gsi = gsi_last_bb (bb);
6519 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6521 do_prev = true;
6522 stmt = gsi_stmt (gsi);
6524 if (is_gimple_assign (stmt)
6525 && !stmt_could_throw_p (cfun, stmt))
6527 tree lhs, rhs1, rhs2;
6528 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6530 /* If this was part of an already processed statement,
6531 we don't need to touch it again. */
6532 if (gimple_visited_p (stmt))
6534 /* This statement might have become dead because of previous
6535 reassociations. */
6536 if (has_zero_uses (gimple_get_lhs (stmt)))
6538 reassoc_remove_stmt (&gsi);
6539 release_defs (stmt);
6540 /* We might end up removing the last stmt above which
6541 places the iterator to the end of the sequence.
6542 Reset it to the last stmt in this case and make sure
6543 we don't do gsi_prev in that case. */
6544 if (gsi_end_p (gsi))
6546 gsi = gsi_last_bb (bb);
6547 do_prev = false;
6550 continue;
6553 /* If this is not a gimple binary expression, there is
6554 nothing for us to do with it. */
6555 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6556 continue;
6558 lhs = gimple_assign_lhs (stmt);
6559 rhs1 = gimple_assign_rhs1 (stmt);
6560 rhs2 = gimple_assign_rhs2 (stmt);
6562 /* For non-bit or min/max operations we can't associate
6563 all types. Verify that here. */
6564 if ((rhs_code != BIT_IOR_EXPR
6565 && rhs_code != BIT_AND_EXPR
6566 && rhs_code != BIT_XOR_EXPR
6567 && rhs_code != MIN_EXPR
6568 && rhs_code != MAX_EXPR
6569 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6570 || !can_reassociate_op_p (rhs1)
6571 || !can_reassociate_op_p (rhs2))
6572 continue;
6574 if (associative_tree_code (rhs_code))
6576 auto_vec<operand_entry *> ops;
6577 tree powi_result = NULL_TREE;
6578 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6580 /* There may be no immediate uses left by the time we
6581 get here because we may have eliminated them all. */
6582 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6583 continue;
6585 gimple_set_visited (stmt, true);
6586 linearize_expr_tree (&ops, stmt, true, true);
6587 ops.qsort (sort_by_operand_rank);
6588 int orig_len = ops.length ();
6589 optimize_ops_list (rhs_code, &ops);
6590 if (undistribute_ops_list (rhs_code, &ops,
6591 loop_containing_stmt (stmt)))
6593 ops.qsort (sort_by_operand_rank);
6594 optimize_ops_list (rhs_code, &ops);
6596 if (undistribute_bitref_for_vector (rhs_code, &ops,
6597 loop_containing_stmt (stmt)))
6599 ops.qsort (sort_by_operand_rank);
6600 optimize_ops_list (rhs_code, &ops);
6602 if (rhs_code == PLUS_EXPR
6603 && transform_add_to_multiply (&ops))
6604 ops.qsort (sort_by_operand_rank);
6606 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6608 if (is_vector)
6609 optimize_vec_cond_expr (rhs_code, &ops);
6610 else
6611 optimize_range_tests (rhs_code, &ops, NULL);
6614 if (rhs_code == MULT_EXPR && !is_vector)
6616 attempt_builtin_copysign (&ops);
6618 if (reassoc_insert_powi_p
6619 && (flag_unsafe_math_optimizations
6620 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6621 powi_result = attempt_builtin_powi (stmt, &ops);
6624 operand_entry *last;
6625 bool negate_result = false;
6626 if (ops.length () > 1
6627 && rhs_code == MULT_EXPR)
6629 last = ops.last ();
6630 if ((integer_minus_onep (last->op)
6631 || real_minus_onep (last->op))
6632 && !HONOR_SNANS (TREE_TYPE (lhs))
6633 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6634 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6636 ops.pop ();
6637 negate_result = true;
6641 tree new_lhs = lhs;
6642 /* If the operand vector is now empty, all operands were
6643 consumed by the __builtin_powi optimization. */
6644 if (ops.length () == 0)
6645 transform_stmt_to_copy (&gsi, stmt, powi_result);
6646 else if (ops.length () == 1)
6648 tree last_op = ops.last ()->op;
6650 /* If the stmt that defines operand has to be inserted, insert it
6651 before the use. */
6652 if (ops.last ()->stmt_to_insert)
6653 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6654 if (powi_result)
6655 transform_stmt_to_multiply (&gsi, stmt, last_op,
6656 powi_result);
6657 else
6658 transform_stmt_to_copy (&gsi, stmt, last_op);
6660 else
6662 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6663 int ops_num = ops.length ();
6664 int width;
6666 /* For binary bit operations, if there are at least 3
6667 operands and the last operand in OPS is a constant,
6668 move it to the front. This helps ensure that we generate
6669 (X & Y) & C rather than (X & C) & Y. The former will
6670 often match a canonical bit test when we get to RTL. */
6671 if (ops.length () > 2
6672 && (rhs_code == BIT_AND_EXPR
6673 || rhs_code == BIT_IOR_EXPR
6674 || rhs_code == BIT_XOR_EXPR)
6675 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6676 std::swap (*ops[0], *ops[ops_num - 1]);
6678 /* Only rewrite the expression tree to parallel in the
6679 last reassoc pass to avoid useless work back-and-forth
6680 with initial linearization. */
6681 if (!reassoc_insert_powi_p
6682 && ops.length () > 3
6683 && (width = get_reassociation_width (ops_num, rhs_code,
6684 mode)) > 1)
6686 if (dump_file && (dump_flags & TDF_DETAILS))
6687 fprintf (dump_file,
6688 "Width = %d was chosen for reassociation\n",
6689 width);
6690 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6691 width, ops);
6693 else
6695 /* When there are three operands left, we want
6696 to make sure the ones that get the double
6697 binary op are chosen wisely. */
6698 int len = ops.length ();
6699 if (len >= 3)
6700 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6702 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6703 powi_result != NULL
6704 || negate_result,
6705 len != orig_len);
6708 /* If we combined some repeated factors into a
6709 __builtin_powi call, multiply that result by the
6710 reassociated operands. */
6711 if (powi_result)
6713 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6714 tree type = TREE_TYPE (lhs);
6715 tree target_ssa = make_temp_ssa_name (type, NULL,
6716 "reassocpow");
6717 gimple_set_lhs (lhs_stmt, target_ssa);
6718 update_stmt (lhs_stmt);
6719 if (lhs != new_lhs)
6721 target_ssa = new_lhs;
6722 new_lhs = lhs;
6724 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6725 powi_result, target_ssa);
6726 gimple_set_location (mul_stmt, gimple_location (stmt));
6727 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6728 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6732 if (negate_result)
6734 stmt = SSA_NAME_DEF_STMT (lhs);
6735 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6736 gimple_set_lhs (stmt, tmp);
6737 if (lhs != new_lhs)
6738 tmp = new_lhs;
6739 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6740 tmp);
6741 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6742 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6743 update_stmt (stmt);
6748 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6749 son;
6750 son = next_dom_son (CDI_POST_DOMINATORS, son))
6751 cfg_cleanup_needed |= reassociate_bb (son);
6753 return cfg_cleanup_needed;
6756 /* Add jumps around shifts for range tests turned into bit tests.
6757 For each SSA_NAME VAR we have code like:
6758 VAR = ...; // final stmt of range comparison
6759 // bit test here...;
6760 OTHERVAR = ...; // final stmt of the bit test sequence
6761 RES = VAR | OTHERVAR;
6762 Turn the above into:
6763 VAR = ...;
6764 if (VAR != 0)
6765 goto <l3>;
6766 else
6767 goto <l2>;
6768 <l2>:
6769 // bit test here...;
6770 OTHERVAR = ...;
6771 <l3>:
6772 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6774 static void
6775 branch_fixup (void)
6777 tree var;
6778 unsigned int i;
6780 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6782 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6783 gimple *use_stmt;
6784 use_operand_p use;
6785 bool ok = single_imm_use (var, &use, &use_stmt);
6786 gcc_assert (ok
6787 && is_gimple_assign (use_stmt)
6788 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6789 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6791 basic_block cond_bb = gimple_bb (def_stmt);
6792 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6793 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6795 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6796 gimple *g = gimple_build_cond (NE_EXPR, var,
6797 build_zero_cst (TREE_TYPE (var)),
6798 NULL_TREE, NULL_TREE);
6799 location_t loc = gimple_location (use_stmt);
6800 gimple_set_location (g, loc);
6801 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6803 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6804 etrue->probability = profile_probability::even ();
6805 edge efalse = find_edge (cond_bb, then_bb);
6806 efalse->flags = EDGE_FALSE_VALUE;
6807 efalse->probability -= etrue->probability;
6808 then_bb->count -= etrue->count ();
6810 tree othervar = NULL_TREE;
6811 if (gimple_assign_rhs1 (use_stmt) == var)
6812 othervar = gimple_assign_rhs2 (use_stmt);
6813 else if (gimple_assign_rhs2 (use_stmt) == var)
6814 othervar = gimple_assign_rhs1 (use_stmt);
6815 else
6816 gcc_unreachable ();
6817 tree lhs = gimple_assign_lhs (use_stmt);
6818 gphi *phi = create_phi_node (lhs, merge_bb);
6819 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6820 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6821 gsi = gsi_for_stmt (use_stmt);
6822 gsi_remove (&gsi, true);
6824 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6825 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6827 reassoc_branch_fixups.release ();
6830 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6831 void debug_ops_vector (vec<operand_entry *> ops);
6833 /* Dump the operand entry vector OPS to FILE. */
6835 void
6836 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6838 operand_entry *oe;
6839 unsigned int i;
6841 FOR_EACH_VEC_ELT (ops, i, oe)
6843 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6844 print_generic_expr (file, oe->op);
6845 fprintf (file, "\n");
6849 /* Dump the operand entry vector OPS to STDERR. */
6851 DEBUG_FUNCTION void
6852 debug_ops_vector (vec<operand_entry *> ops)
6854 dump_ops_vector (stderr, ops);
6857 /* Bubble up return status from reassociate_bb. */
6859 static bool
6860 do_reassoc (void)
6862 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6863 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6866 /* Initialize the reassociation pass. */
6868 static void
6869 init_reassoc (void)
6871 int i;
6872 int64_t rank = 2;
6873 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6875 /* Find the loops, so that we can prevent moving calculations in
6876 them. */
6877 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6879 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6881 next_operand_entry_id = 0;
6883 /* Reverse RPO (Reverse Post Order) will give us something where
6884 deeper loops come later. */
6885 pre_and_rev_post_order_compute (NULL, bbs, false);
6886 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6887 operand_rank = new hash_map<tree, int64_t>;
6889 /* Give each default definition a distinct rank. This includes
6890 parameters and the static chain. Walk backwards over all
6891 SSA names so that we get proper rank ordering according
6892 to tree_swap_operands_p. */
6893 for (i = num_ssa_names - 1; i > 0; --i)
6895 tree name = ssa_name (i);
6896 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6897 insert_operand_rank (name, ++rank);
6900 /* Set up rank for each BB */
6901 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6902 bb_rank[bbs[i]] = ++rank << 16;
6904 free (bbs);
6905 calculate_dominance_info (CDI_POST_DOMINATORS);
6906 plus_negates = vNULL;
6909 /* Cleanup after the reassociation pass, and print stats if
6910 requested. */
6912 static void
6913 fini_reassoc (void)
6915 statistics_counter_event (cfun, "Linearized",
6916 reassociate_stats.linearized);
6917 statistics_counter_event (cfun, "Constants eliminated",
6918 reassociate_stats.constants_eliminated);
6919 statistics_counter_event (cfun, "Ops eliminated",
6920 reassociate_stats.ops_eliminated);
6921 statistics_counter_event (cfun, "Statements rewritten",
6922 reassociate_stats.rewritten);
6923 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6924 reassociate_stats.pows_encountered);
6925 statistics_counter_event (cfun, "Built-in powi calls created",
6926 reassociate_stats.pows_created);
6928 delete operand_rank;
6929 operand_entry_pool.release ();
6930 free (bb_rank);
6931 plus_negates.release ();
6932 free_dominance_info (CDI_POST_DOMINATORS);
6933 loop_optimizer_finalize ();
6936 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6937 insertion of __builtin_powi calls.
6939 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6940 optimization of a gimple conditional. Otherwise returns zero. */
6942 static unsigned int
6943 execute_reassoc (bool insert_powi_p)
6945 reassoc_insert_powi_p = insert_powi_p;
6947 init_reassoc ();
6949 bool cfg_cleanup_needed;
6950 cfg_cleanup_needed = do_reassoc ();
6951 repropagate_negates ();
6952 branch_fixup ();
6954 fini_reassoc ();
6955 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6958 namespace {
6960 const pass_data pass_data_reassoc =
6962 GIMPLE_PASS, /* type */
6963 "reassoc", /* name */
6964 OPTGROUP_NONE, /* optinfo_flags */
6965 TV_TREE_REASSOC, /* tv_id */
6966 ( PROP_cfg | PROP_ssa ), /* properties_required */
6967 0, /* properties_provided */
6968 0, /* properties_destroyed */
6969 0, /* todo_flags_start */
6970 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6973 class pass_reassoc : public gimple_opt_pass
6975 public:
6976 pass_reassoc (gcc::context *ctxt)
6977 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6980 /* opt_pass methods: */
6981 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6982 void set_pass_param (unsigned int n, bool param)
6984 gcc_assert (n == 0);
6985 insert_powi_p = param;
6987 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6988 virtual unsigned int execute (function *)
6989 { return execute_reassoc (insert_powi_p); }
6991 private:
6992 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6993 point 3a in the pass header comment. */
6994 bool insert_powi_p;
6995 }; // class pass_reassoc
6997 } // anon namespace
6999 gimple_opt_pass *
7000 make_pass_reassoc (gcc::context *ctxt)
7002 return new pass_reassoc (ctxt);