RISC-V: Fix more splitters accidentally calling gen_reg_rtx.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob510dfd1e188f6d08483e05a67f5ac57924d8ea36
1 /* Reassociation for trees.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 class loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
481 constant_type (tree t)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
494 else
495 return OTHER_CONST_TYPE;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500 static int
501 sort_by_operand_rank (const void *pa, const void *pb)
503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
512 if (oea->rank == 0)
514 if (constant_type (oeb->op) != constant_type (oea->op))
515 return constant_type (oea->op) - constant_type (oeb->op);
516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
519 return oeb->id > oea->id ? 1 : -1;
522 if (TREE_CODE (oea->op) != SSA_NAME)
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
532 /* Lastly, make sure the versions that are the same go next to each
533 other. */
534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
565 return oeb->id > oea->id ? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
573 operand_entry *oe = operand_entry_pool.allocate ();
575 oe->op = op;
576 oe->rank = get_rank (op);
577 oe->id = next_operand_entry_id++;
578 oe->count = 1;
579 oe->stmt_to_insert = stmt_to_insert;
580 ops->safe_push (oe);
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
586 static void
587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 HOST_WIDE_INT repeat)
590 operand_entry *oe = operand_entry_pool.allocate ();
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
596 oe->stmt_to_insert = NULL;
597 ops->safe_push (oe);
599 reassociate_stats.pows_encountered++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
605 static bool
606 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
608 basic_block bb = gimple_bb (stmt);
610 if (gimple_bb (stmt) == NULL)
611 return false;
613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
620 tree rhs1 = gimple_assign_rhs1 (stmt);
621 tree rhs2 = gimple_assign_rhs2 (stmt);
622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
632 return false;
636 /* Return true if STMT is a nop-conversion. */
638 static bool
639 gimple_nop_conversion_p (gimple *stmt)
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
648 return false;
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
654 static tree
655 get_unary_op (tree name, enum tree_code opcode)
657 gimple *stmt = SSA_NAME_DEF_STMT (name);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
664 if (!is_gimple_assign (stmt))
665 return NULL_TREE;
667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
669 return NULL_TREE;
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
674 static bool
675 ops_equal_values_p (tree op1, tree op2)
677 if (op1 == op2)
678 return true;
680 tree orig_op1 = op1;
681 if (TREE_CODE (op1) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
692 if (TREE_CODE (op2) == SSA_NAME)
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
697 op2 = gimple_assign_rhs1 (stmt);
698 if (op1 == op2
699 || orig_op1 == op2)
700 return true;
704 return false;
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
712 static bool
713 eliminate_duplicate_pair (enum tree_code opcode,
714 vec<operand_entry *> *ops,
715 bool *all_done,
716 unsigned int i,
717 operand_entry *curr,
718 operand_entry *last)
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last && last->op == curr->op)
728 switch (opcode)
730 case MAX_EXPR:
731 case MIN_EXPR:
732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, "Equivalence: ");
737 print_generic_expr (dump_file, curr->op);
738 fprintf (dump_file, " [&|minmax] ");
739 print_generic_expr (dump_file, last->op);
740 fprintf (dump_file, " -> ");
741 print_generic_stmt (dump_file, last->op);
744 ops->ordered_remove (i);
745 reassociate_stats.ops_eliminated ++;
747 return true;
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
752 fprintf (dump_file, "Equivalence: ");
753 print_generic_expr (dump_file, curr->op);
754 fprintf (dump_file, " ^ ");
755 print_generic_expr (dump_file, last->op);
756 fprintf (dump_file, " -> nothing\n");
759 reassociate_stats.ops_eliminated += 2;
761 if (ops->length () == 2)
763 ops->truncate (0);
764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 *all_done = true;
767 else
769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
773 return true;
775 default:
776 break;
779 return false;
782 static vec<tree> plus_negates;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
790 static bool
791 eliminate_plus_minus_pair (enum tree_code opcode,
792 vec<operand_entry *> *ops,
793 unsigned int currindex,
794 operand_entry *curr)
796 tree negateop;
797 tree notop;
798 unsigned int i;
799 operand_entry *oe;
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802 return false;
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
807 return false;
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
813 for (i = currindex + 1;
814 ops->iterate (i, &oe)
815 && oe->rank >= curr->rank - 1 ;
816 i++)
818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, negateop);
825 fprintf (dump_file, " + -");
826 print_generic_expr (dump_file, oe->op);
827 fprintf (dump_file, " -> 0\n");
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
835 return true;
837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
840 tree op_type = TREE_TYPE (oe->op);
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop);
846 fprintf (dump_file, " + ~");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> -1\n");
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
856 return true;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864 plus_negates.safe_push (curr->op);
866 return false;
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
875 static bool
876 eliminate_not_pairs (enum tree_code opcode,
877 vec<operand_entry *> *ops,
878 unsigned int currindex,
879 operand_entry *curr)
881 tree notop;
882 unsigned int i;
883 operand_entry *oe;
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
897 for (i = currindex + 1;
898 ops->iterate (i, &oe)
899 && oe->rank >= curr->rank - 1;
900 i++)
902 if (oe->op == notop)
904 if (dump_file && (dump_flags & TDF_DETAILS))
906 fprintf (dump_file, "Equivalence: ");
907 print_generic_expr (dump_file, notop);
908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
912 print_generic_expr (dump_file, oe->op);
913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
919 if (opcode == BIT_AND_EXPR)
920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 else if (opcode == BIT_IOR_EXPR)
922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
927 return true;
931 return false;
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
941 static void
942 eliminate_using_constants (enum tree_code opcode,
943 vec<operand_entry *> *ops)
945 operand_entry *oelast = ops->last ();
946 tree type = TREE_TYPE (oelast->op);
948 if (oelast->rank == 0
949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
951 switch (opcode)
953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
961 reassociate_stats.ops_eliminated += ops->length () - 1;
963 ops->truncate (0);
964 ops->quick_push (oelast);
965 return;
968 else if (integer_all_onesp (oelast->op))
970 if (ops->length () != 1)
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
974 ops->pop ();
975 reassociate_stats.ops_eliminated++;
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
982 if (ops->length () != 1)
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
987 reassociate_stats.ops_eliminated += ops->length () - 1;
989 ops->truncate (0);
990 ops->quick_push (oelast);
991 return;
994 else if (integer_zerop (oelast->op))
996 if (ops->length () != 1)
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1004 break;
1005 case MULT_EXPR:
1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
1008 && !HONOR_NANS (type)
1009 && !HONOR_SIGNED_ZEROS (type)
1010 && real_zerop (oelast->op)))
1012 if (ops->length () != 1)
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
1017 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (0);
1019 ops->quick_push (oelast);
1020 return;
1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
1025 && !HONOR_SNANS (type)
1026 && real_onep (oelast->op)))
1028 if (ops->length () != 1)
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
1032 ops->pop ();
1033 reassociate_stats.ops_eliminated++;
1034 return;
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
1047 if (ops->length () != 1)
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 ops->pop ();
1052 reassociate_stats.ops_eliminated++;
1053 return;
1056 break;
1057 default:
1058 break;
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 bool, bool);
1067 /* Structure for tracking and counting operands. */
1068 struct oecount {
1069 unsigned int cnt;
1070 unsigned int id;
1071 enum tree_code oecode;
1072 tree op;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec<oecount> cvec;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher : int_hash <int, 0, 1>
1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1090 inline hashval_t
1091 oecount_hasher::hash (int p)
1093 const oecount *c = &cvec[p - 42];
1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1097 /* Comparison function for oecount. */
1099 inline bool
1100 oecount_hasher::equal (int p1, int p2)
1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
1104 return c1->oecode == c2->oecode && c1->op == c2->op;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1109 static int
1110 oecount_cmp (const void *p1, const void *p2)
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
1114 if (c1->cnt != c2->cnt)
1115 return c1->cnt > c2->cnt ? 1 : -1;
1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1->id > c2->id ? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1124 static bool
1125 stmt_is_power_of_op (gimple *stmt, tree op)
1127 if (!is_gimple_call (stmt))
1128 return false;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1136 default:
1137 return false;
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple *stmt)
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1152 switch (gimple_call_combined_fn (stmt))
1154 CASE_CFN_POW:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1162 CASE_CFN_POWI:
1163 arg1 = gimple_call_arg (stmt, 1);
1164 power = TREE_INT_CST_LOW (arg1) - 1;
1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1168 default:
1169 gcc_unreachable ();
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1176 static tree
1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
1182 tree new_lhs, new_debug_lhs = NULL_TREE;
1183 tree lhs = gimple_get_lhs (stmt);
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1194 if (new_debug_lhs == NULL_TREE)
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1209 repl = new_debug_lhs;
1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 SET_USE (use, repl);
1213 update_stmt (use_stmt);
1215 return new_lhs;
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1222 static void
1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 vec<gimple *> &stmts_to_fix)
1226 unsigned i;
1227 gimple *stmt;
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
1233 *def = make_new_ssa_for_def (stmt, opcode, op);
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236 make_new_ssa_for_def (stmt, opcode, op);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1243 static void
1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1246 tree lhs;
1247 gimple *use_stmt;
1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
1264 unlink_stmt_vdef (stmt);
1265 reassoc_remove_stmt (&gsi);
1266 release_defs (stmt);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1273 static void
1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1276 tree orig_def = *def;
1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
1284 tree name;
1286 if (opcode == MULT_EXPR)
1288 if (stmt_is_power_of_op (stmt, op))
1290 if (decrement_power (stmt) == 1)
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1296 break;
1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1300 if (gimple_assign_rhs1 (stmt) == op)
1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
1305 propagate_op_to_single_use (cst, stmt, def);
1306 break;
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 break;
1318 name = gimple_assign_rhs1 (stmt);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
1331 propagate_op_to_single_use (name, stmt, def);
1332 break;
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 if (stmt_is_power_of_op (stmt2, op))
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
1352 else if (is_gimple_assign (stmt2)
1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1355 if (gimple_assign_rhs1 (stmt2) == op)
1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
1359 break;
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1364 stmts_to_fix.safe_push (stmt2);
1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 break;
1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
1376 stmts_to_fix.safe_push (stmt);
1378 while (1);
1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1387 static bool
1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1402 if (bb1 == bb2)
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1425 gimple *s = gsi_stmt (gsi);
1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1432 return false;
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1438 /* Insert STMT after INSERT_POINT. */
1440 static void
1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1478 static gimple *
1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1481 gimple *op1def = NULL, *op2def = NULL;
1482 gimple_stmt_iterator gsi;
1483 tree op;
1484 gassign *sum;
1486 /* Create the addition statement. */
1487 op = make_ssa_name (type);
1488 sum = gimple_build_assign (op, opcode, op1, op2);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499 if (gsi_end_p (gsi))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1506 else
1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1510 else
1512 gimple *insert_point;
1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
1517 else
1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
1521 update_stmt (sum);
1523 return sum;
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1561 static bool
1562 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, class loop *loop)
1565 unsigned int length = ops->length ();
1566 operand_entry *oe1;
1567 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
1570 vec<operand_entry *> *subops;
1571 bool changed = false;
1572 unsigned int next_oecount_id = 0;
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap candidates (length);
1580 bitmap_clear (candidates);
1581 nr_candidates = 0;
1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
1584 enum tree_code dcode;
1585 gimple *oe1def;
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1598 bitmap_set_bit (candidates, i);
1599 nr_candidates++;
1602 if (nr_candidates < 2)
1603 return false;
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
1609 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1610 fprintf (dump_file, " %d\n", nr_candidates);
1613 /* Build linearized sub-operand lists and the counting table. */
1614 cvec.create (0);
1616 hash_table<oecount_hasher> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1624 gimple *oedef;
1625 enum tree_code oecode;
1626 unsigned j;
1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1635 oecount c;
1636 int *slot;
1637 int idx;
1638 c.oecode = oecode;
1639 c.cnt = 1;
1640 c.id = next_oecount_id++;
1641 c.op = oe1->op;
1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
1644 slot = ctable.find_slot (idx, INSERT);
1645 if (!*slot)
1647 *slot = idx;
1649 else
1651 cvec.pop ();
1652 cvec[*slot - 42].cnt++;
1657 /* Sort the counting table. */
1658 cvec.qsort (oecount_cmp);
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec, j, c)
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 print_generic_expr (dump_file, c->op);
1670 fprintf (dump_file, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap candidates2 (length);
1676 while (!cvec.is_empty ())
1678 oecount *c = &cvec.last ();
1679 if (c->cnt < 2)
1680 break;
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2);
1685 nr_candidates2 = 0;
1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1688 gimple *oedef;
1689 enum tree_code oecode;
1690 unsigned j;
1691 tree op = (*ops)[i]->op;
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1705 if (oe1->op == c->op)
1707 bitmap_set_bit (candidates2, i);
1708 ++nr_candidates2;
1709 break;
1714 if (nr_candidates2 >= 2)
1716 operand_entry *oe1, *oe2;
1717 gimple *prod;
1718 int first = bitmap_first_set_bit (candidates2);
1720 /* Build the new addition chain. */
1721 oe1 = (*ops)[first];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Building (");
1725 print_generic_expr (dump_file, oe1->op);
1727 zero_one_operation (&oe1->op, c->oecode, c->op);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1730 gimple *sum;
1731 oe2 = (*ops)[i];
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, " + ");
1735 print_generic_expr (dump_file, oe2->op);
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1745 /* Apply the multiplication/division. */
1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
1759 subops[first].release ();
1761 changed = true;
1764 cvec.pop ();
1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
1769 free (subops);
1770 cvec.release ();
1772 return changed;
1775 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1776 first: element index for each relevant BIT_FIELD_REF.
1777 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1778 typedef std::pair<unsigned, unsigned> v_info_elem;
1779 typedef auto_vec<v_info_elem, 32> v_info;
1780 typedef v_info *v_info_ptr;
1782 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1783 static int
1784 sort_by_mach_mode (const void *p_i, const void *p_j)
1786 const tree tr1 = *((const tree *) p_i);
1787 const tree tr2 = *((const tree *) p_j);
1788 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1789 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1790 if (mode1 > mode2)
1791 return 1;
1792 else if (mode1 < mode2)
1793 return -1;
1794 else
1795 return 0;
1798 /* Cleanup hash map for VECTOR information. */
1799 static void
1800 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1802 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1803 it != info_map.end (); ++it)
1805 v_info_ptr info = (*it).second;
1806 delete info;
1807 (*it).second = NULL;
1811 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1813 is transformed to
1814 Vs = (V1 + V2 + ... + Vn)
1815 Vs[0] + Vs[1] + ... + Vs[k]
1817 The basic steps are listed below:
1819 1) Check the addition chain *OPS by looking those summands coming from
1820 VECTOR bit_field_ref on VECTOR type. Put the information into
1821 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1823 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824 continuous, they can cover the whole VECTOR perfectly without any holes.
1825 Obtain one VECTOR list which contain candidates to be transformed.
1827 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828 candidates with same mode, build the addition statements for them and
1829 generate BIT_FIELD_REFs accordingly.
1831 TODO:
1832 The current implementation requires the whole VECTORs should be fully
1833 covered, but it can be extended to support partial, checking adjacent
1834 but not fill the whole, it may need some cost model to define the
1835 boundary to do or not.
1837 static bool
1838 undistribute_bitref_for_vector (enum tree_code opcode,
1839 vec<operand_entry *> *ops, struct loop *loop)
1841 if (ops->length () <= 1)
1842 return false;
1844 if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
1845 && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1846 return false;
1848 hash_map<tree, v_info_ptr> v_info_map;
1849 operand_entry *oe1;
1850 unsigned i;
1852 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1853 information into map. */
1854 FOR_EACH_VEC_ELT (*ops, i, oe1)
1856 enum tree_code dcode;
1857 gimple *oe1def;
1859 if (TREE_CODE (oe1->op) != SSA_NAME)
1860 continue;
1861 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1862 if (!is_gimple_assign (oe1def))
1863 continue;
1864 dcode = gimple_assign_rhs_code (oe1def);
1865 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1866 continue;
1868 tree rhs = gimple_assign_rhs1 (oe1def);
1869 tree vec = TREE_OPERAND (rhs, 0);
1870 tree vec_type = TREE_TYPE (vec);
1872 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1873 continue;
1875 /* Ignore it if target machine can't support this VECTOR type. */
1876 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1877 continue;
1879 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1880 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1881 continue;
1883 tree elem_type = TREE_TYPE (vec_type);
1884 unsigned HOST_WIDE_INT elem_size
1885 = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
1886 if (maybe_ne (bit_field_size (rhs), elem_size))
1887 continue;
1889 unsigned idx;
1890 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1891 continue;
1893 /* Ignore it if target machine can't support this type of VECTOR
1894 operation. */
1895 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1896 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1897 continue;
1899 bool existed;
1900 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1901 if (!existed)
1902 info = new v_info;
1903 info->safe_push (std::make_pair (idx, i));
1906 /* At least two VECTOR to combine. */
1907 if (v_info_map.elements () <= 1)
1909 cleanup_vinfo_map (v_info_map);
1910 return false;
1913 /* Verify all VECTOR candidates by checking two conditions:
1914 1) sorted offsets are adjacent, no holes.
1915 2) can fill the whole VECTOR perfectly.
1916 And add the valid candidates to a vector for further handling. */
1917 auto_vec<tree> valid_vecs (v_info_map.elements ());
1918 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1919 it != v_info_map.end (); ++it)
1921 tree cand_vec = (*it).first;
1922 v_info_ptr cand_info = (*it).second;
1923 unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
1924 if (cand_info->length () != num_elems)
1925 continue;
1926 sbitmap holes = sbitmap_alloc (num_elems);
1927 bitmap_ones (holes);
1928 bool valid = true;
1929 v_info_elem *curr;
1930 FOR_EACH_VEC_ELT (*cand_info, i, curr)
1932 if (!bitmap_bit_p (holes, curr->first))
1934 valid = false;
1935 break;
1937 else
1938 bitmap_clear_bit (holes, curr->first);
1940 if (valid && bitmap_empty_p (holes))
1941 valid_vecs.quick_push (cand_vec);
1942 sbitmap_free (holes);
1945 /* At least two VECTOR to combine. */
1946 if (valid_vecs.length () <= 1)
1948 cleanup_vinfo_map (v_info_map);
1949 return false;
1952 valid_vecs.qsort (sort_by_mach_mode);
1953 /* Go through all candidates by machine mode order, query the mode_to_total
1954 to get the total number for each mode and skip the single one. */
1955 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
1957 tree tvec = valid_vecs[i];
1958 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
1960 /* Skip modes with only a single candidate. */
1961 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
1962 continue;
1964 unsigned int idx, j;
1965 gimple *sum = NULL;
1966 v_info_ptr info_ptr;
1967 tree sum_vec = tvec;
1968 v_info_elem *elem;
1970 /* Build the sum for all candidates with same mode. */
1973 sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
1974 valid_vecs[i + 1], opcode);
1975 sum_vec = gimple_get_lhs (sum);
1976 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
1977 /* Update those related ops of current candidate VECTOR. */
1978 FOR_EACH_VEC_ELT (*info_ptr, j, elem)
1980 idx = elem->second;
1981 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
1982 /* Set this then op definition will get DCEd later. */
1983 gimple_set_visited (def, true);
1984 if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
1985 || opcode == BIT_IOR_EXPR)
1986 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
1987 else if (opcode == MULT_EXPR)
1988 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
1989 else
1991 gcc_assert (opcode == BIT_AND_EXPR);
1992 (*ops)[idx]->op
1993 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
1995 (*ops)[idx]->rank = 0;
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1999 fprintf (dump_file, "Generating addition -> ");
2000 print_gimple_stmt (dump_file, sum, 0);
2002 i++;
2004 while ((i < valid_vecs.length () - 1)
2005 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2007 /* Referring to first valid VECTOR with this mode, generate the
2008 BIT_FIELD_REF statements accordingly. */
2009 info_ptr = *(v_info_map.get (tvec));
2010 gcc_assert (sum);
2011 tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
2012 FOR_EACH_VEC_ELT (*info_ptr, j, elem)
2014 idx = elem->second;
2015 tree dst = make_ssa_name (elem_type);
2016 gimple *gs = gimple_build_assign (
2017 dst, BIT_FIELD_REF,
2018 build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
2019 bitsize_int (elem->first
2020 * tree_to_uhwi (TYPE_SIZE (elem_type)))));
2021 insert_stmt_after (gs, sum);
2022 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2023 /* Set this then op definition will get DCEd later. */
2024 gimple_set_visited (def, true);
2025 (*ops)[idx]->op = gimple_assign_lhs (gs);
2026 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2029 fprintf (dump_file, "Generating bit_field_ref -> ");
2030 print_gimple_stmt (dump_file, gs, 0);
2035 if (dump_file && (dump_flags & TDF_DETAILS))
2036 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2038 cleanup_vinfo_map (v_info_map);
2040 return true;
2043 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2044 expression, examine the other OPS to see if any of them are comparisons
2045 of the same values, which we may be able to combine or eliminate.
2046 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2048 static bool
2049 eliminate_redundant_comparison (enum tree_code opcode,
2050 vec<operand_entry *> *ops,
2051 unsigned int currindex,
2052 operand_entry *curr)
2054 tree op1, op2;
2055 enum tree_code lcode, rcode;
2056 gimple *def1, *def2;
2057 int i;
2058 operand_entry *oe;
2060 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2061 return false;
2063 /* Check that CURR is a comparison. */
2064 if (TREE_CODE (curr->op) != SSA_NAME)
2065 return false;
2066 def1 = SSA_NAME_DEF_STMT (curr->op);
2067 if (!is_gimple_assign (def1))
2068 return false;
2069 lcode = gimple_assign_rhs_code (def1);
2070 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2071 return false;
2072 op1 = gimple_assign_rhs1 (def1);
2073 op2 = gimple_assign_rhs2 (def1);
2075 /* Now look for a similar comparison in the remaining OPS. */
2076 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2078 tree t;
2080 if (TREE_CODE (oe->op) != SSA_NAME)
2081 continue;
2082 def2 = SSA_NAME_DEF_STMT (oe->op);
2083 if (!is_gimple_assign (def2))
2084 continue;
2085 rcode = gimple_assign_rhs_code (def2);
2086 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2087 continue;
2089 /* If we got here, we have a match. See if we can combine the
2090 two comparisons. */
2091 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2092 if (opcode == BIT_IOR_EXPR)
2093 t = maybe_fold_or_comparisons (type,
2094 lcode, op1, op2,
2095 rcode, gimple_assign_rhs1 (def2),
2096 gimple_assign_rhs2 (def2));
2097 else
2098 t = maybe_fold_and_comparisons (type,
2099 lcode, op1, op2,
2100 rcode, gimple_assign_rhs1 (def2),
2101 gimple_assign_rhs2 (def2));
2102 if (!t)
2103 continue;
2105 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2106 always give us a boolean_type_node value back. If the original
2107 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2108 we need to convert. */
2109 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2110 t = fold_convert (TREE_TYPE (curr->op), t);
2112 if (TREE_CODE (t) != INTEGER_CST
2113 && !operand_equal_p (t, curr->op, 0))
2115 enum tree_code subcode;
2116 tree newop1, newop2;
2117 if (!COMPARISON_CLASS_P (t))
2118 continue;
2119 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2120 STRIP_USELESS_TYPE_CONVERSION (newop1);
2121 STRIP_USELESS_TYPE_CONVERSION (newop2);
2122 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2123 continue;
2126 if (dump_file && (dump_flags & TDF_DETAILS))
2128 fprintf (dump_file, "Equivalence: ");
2129 print_generic_expr (dump_file, curr->op);
2130 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2131 print_generic_expr (dump_file, oe->op);
2132 fprintf (dump_file, " -> ");
2133 print_generic_expr (dump_file, t);
2134 fprintf (dump_file, "\n");
2137 /* Now we can delete oe, as it has been subsumed by the new combined
2138 expression t. */
2139 ops->ordered_remove (i);
2140 reassociate_stats.ops_eliminated ++;
2142 /* If t is the same as curr->op, we're done. Otherwise we must
2143 replace curr->op with t. Special case is if we got a constant
2144 back, in which case we add it to the end instead of in place of
2145 the current entry. */
2146 if (TREE_CODE (t) == INTEGER_CST)
2148 ops->ordered_remove (currindex);
2149 add_to_ops_vec (ops, t);
2151 else if (!operand_equal_p (t, curr->op, 0))
2153 gimple *sum;
2154 enum tree_code subcode;
2155 tree newop1;
2156 tree newop2;
2157 gcc_assert (COMPARISON_CLASS_P (t));
2158 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2159 STRIP_USELESS_TYPE_CONVERSION (newop1);
2160 STRIP_USELESS_TYPE_CONVERSION (newop2);
2161 gcc_checking_assert (is_gimple_val (newop1)
2162 && is_gimple_val (newop2));
2163 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2164 curr->op = gimple_get_lhs (sum);
2166 return true;
2169 return false;
2173 /* Transform repeated addition of same values into multiply with
2174 constant. */
2175 static bool
2176 transform_add_to_multiply (vec<operand_entry *> *ops)
2178 operand_entry *oe;
2179 tree op = NULL_TREE;
2180 int j;
2181 int i, start = -1, end = 0, count = 0;
2182 auto_vec<std::pair <int, int> > indxs;
2183 bool changed = false;
2185 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2186 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2187 || !flag_unsafe_math_optimizations))
2188 return false;
2190 /* Look for repeated operands. */
2191 FOR_EACH_VEC_ELT (*ops, i, oe)
2193 if (start == -1)
2195 count = 1;
2196 op = oe->op;
2197 start = i;
2199 else if (operand_equal_p (oe->op, op, 0))
2201 count++;
2202 end = i;
2204 else
2206 if (count > 1)
2207 indxs.safe_push (std::make_pair (start, end));
2208 count = 1;
2209 op = oe->op;
2210 start = i;
2214 if (count > 1)
2215 indxs.safe_push (std::make_pair (start, end));
2217 for (j = indxs.length () - 1; j >= 0; --j)
2219 /* Convert repeated operand addition to multiplication. */
2220 start = indxs[j].first;
2221 end = indxs[j].second;
2222 op = (*ops)[start]->op;
2223 count = end - start + 1;
2224 for (i = end; i >= start; --i)
2225 ops->unordered_remove (i);
2226 tree tmp = make_ssa_name (TREE_TYPE (op));
2227 tree cst = build_int_cst (integer_type_node, count);
2228 gassign *mul_stmt
2229 = gimple_build_assign (tmp, MULT_EXPR,
2230 op, fold_convert (TREE_TYPE (op), cst));
2231 gimple_set_visited (mul_stmt, true);
2232 add_to_ops_vec (ops, tmp, mul_stmt);
2233 changed = true;
2236 return changed;
2240 /* Perform various identities and other optimizations on the list of
2241 operand entries, stored in OPS. The tree code for the binary
2242 operation between all the operands is OPCODE. */
2244 static void
2245 optimize_ops_list (enum tree_code opcode,
2246 vec<operand_entry *> *ops)
2248 unsigned int length = ops->length ();
2249 unsigned int i;
2250 operand_entry *oe;
2251 operand_entry *oelast = NULL;
2252 bool iterate = false;
2254 if (length == 1)
2255 return;
2257 oelast = ops->last ();
2259 /* If the last two are constants, pop the constants off, merge them
2260 and try the next two. */
2261 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2263 operand_entry *oelm1 = (*ops)[length - 2];
2265 if (oelm1->rank == 0
2266 && is_gimple_min_invariant (oelm1->op)
2267 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2268 TREE_TYPE (oelast->op)))
2270 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2271 oelm1->op, oelast->op);
2273 if (folded && is_gimple_min_invariant (folded))
2275 if (dump_file && (dump_flags & TDF_DETAILS))
2276 fprintf (dump_file, "Merging constants\n");
2278 ops->pop ();
2279 ops->pop ();
2281 add_to_ops_vec (ops, folded);
2282 reassociate_stats.constants_eliminated++;
2284 optimize_ops_list (opcode, ops);
2285 return;
2290 eliminate_using_constants (opcode, ops);
2291 oelast = NULL;
2293 for (i = 0; ops->iterate (i, &oe);)
2295 bool done = false;
2297 if (eliminate_not_pairs (opcode, ops, i, oe))
2298 return;
2299 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2300 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2301 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2303 if (done)
2304 return;
2305 iterate = true;
2306 oelast = NULL;
2307 continue;
2309 oelast = oe;
2310 i++;
2313 if (iterate)
2314 optimize_ops_list (opcode, ops);
2317 /* The following functions are subroutines to optimize_range_tests and allow
2318 it to try to change a logical combination of comparisons into a range
2319 test.
2321 For example, both
2322 X == 2 || X == 5 || X == 3 || X == 4
2324 X >= 2 && X <= 5
2325 are converted to
2326 (unsigned) (X - 2) <= 3
2328 For more information see comments above fold_test_range in fold-const.c,
2329 this implementation is for GIMPLE. */
2331 struct range_entry
2333 tree exp;
2334 tree low;
2335 tree high;
2336 bool in_p;
2337 bool strict_overflow_p;
2338 unsigned int idx, next;
2341 /* This is similar to make_range in fold-const.c, but on top of
2342 GIMPLE instead of trees. If EXP is non-NULL, it should be
2343 an SSA_NAME and STMT argument is ignored, otherwise STMT
2344 argument should be a GIMPLE_COND. */
2346 static void
2347 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2349 int in_p;
2350 tree low, high;
2351 bool is_bool, strict_overflow_p;
2353 r->exp = NULL_TREE;
2354 r->in_p = false;
2355 r->strict_overflow_p = false;
2356 r->low = NULL_TREE;
2357 r->high = NULL_TREE;
2358 if (exp != NULL_TREE
2359 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2360 return;
2362 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2363 and see if we can refine the range. Some of the cases below may not
2364 happen, but it doesn't seem worth worrying about this. We "continue"
2365 the outer loop when we've changed something; otherwise we "break"
2366 the switch, which will "break" the while. */
2367 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2368 high = low;
2369 in_p = 0;
2370 strict_overflow_p = false;
2371 is_bool = false;
2372 if (exp == NULL_TREE)
2373 is_bool = true;
2374 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2376 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2377 is_bool = true;
2378 else
2379 return;
2381 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2382 is_bool = true;
2384 while (1)
2386 enum tree_code code;
2387 tree arg0, arg1, exp_type;
2388 tree nexp;
2389 location_t loc;
2391 if (exp != NULL_TREE)
2393 if (TREE_CODE (exp) != SSA_NAME
2394 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2395 break;
2397 stmt = SSA_NAME_DEF_STMT (exp);
2398 if (!is_gimple_assign (stmt))
2399 break;
2401 code = gimple_assign_rhs_code (stmt);
2402 arg0 = gimple_assign_rhs1 (stmt);
2403 arg1 = gimple_assign_rhs2 (stmt);
2404 exp_type = TREE_TYPE (exp);
2406 else
2408 code = gimple_cond_code (stmt);
2409 arg0 = gimple_cond_lhs (stmt);
2410 arg1 = gimple_cond_rhs (stmt);
2411 exp_type = boolean_type_node;
2414 if (TREE_CODE (arg0) != SSA_NAME
2415 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2416 break;
2417 loc = gimple_location (stmt);
2418 switch (code)
2420 case BIT_NOT_EXPR:
2421 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2422 /* Ensure the range is either +[-,0], +[0,0],
2423 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2424 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2425 or similar expression of unconditional true or
2426 false, it should not be negated. */
2427 && ((high && integer_zerop (high))
2428 || (low && integer_onep (low))))
2430 in_p = !in_p;
2431 exp = arg0;
2432 continue;
2434 break;
2435 case SSA_NAME:
2436 exp = arg0;
2437 continue;
2438 CASE_CONVERT:
2439 if (is_bool)
2441 if ((TYPE_PRECISION (exp_type) == 1
2442 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2443 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2444 return;
2446 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2448 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2449 is_bool = true;
2450 else
2451 return;
2453 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2454 is_bool = true;
2455 goto do_default;
2456 case EQ_EXPR:
2457 case NE_EXPR:
2458 case LT_EXPR:
2459 case LE_EXPR:
2460 case GE_EXPR:
2461 case GT_EXPR:
2462 is_bool = true;
2463 /* FALLTHRU */
2464 default:
2465 if (!is_bool)
2466 return;
2467 do_default:
2468 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2469 &low, &high, &in_p,
2470 &strict_overflow_p);
2471 if (nexp != NULL_TREE)
2473 exp = nexp;
2474 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2475 continue;
2477 break;
2479 break;
2481 if (is_bool)
2483 r->exp = exp;
2484 r->in_p = in_p;
2485 r->low = low;
2486 r->high = high;
2487 r->strict_overflow_p = strict_overflow_p;
2491 /* Comparison function for qsort. Sort entries
2492 without SSA_NAME exp first, then with SSA_NAMEs sorted
2493 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2494 by increasing ->low and if ->low is the same, by increasing
2495 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2496 maximum. */
2498 static int
2499 range_entry_cmp (const void *a, const void *b)
2501 const struct range_entry *p = (const struct range_entry *) a;
2502 const struct range_entry *q = (const struct range_entry *) b;
2504 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2506 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2508 /* Group range_entries for the same SSA_NAME together. */
2509 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2510 return -1;
2511 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2512 return 1;
2513 /* If ->low is different, NULL low goes first, then by
2514 ascending low. */
2515 if (p->low != NULL_TREE)
2517 if (q->low != NULL_TREE)
2519 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2520 p->low, q->low);
2521 if (tem && integer_onep (tem))
2522 return -1;
2523 tem = fold_binary (GT_EXPR, boolean_type_node,
2524 p->low, q->low);
2525 if (tem && integer_onep (tem))
2526 return 1;
2528 else
2529 return 1;
2531 else if (q->low != NULL_TREE)
2532 return -1;
2533 /* If ->high is different, NULL high goes last, before that by
2534 ascending high. */
2535 if (p->high != NULL_TREE)
2537 if (q->high != NULL_TREE)
2539 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2540 p->high, q->high);
2541 if (tem && integer_onep (tem))
2542 return -1;
2543 tem = fold_binary (GT_EXPR, boolean_type_node,
2544 p->high, q->high);
2545 if (tem && integer_onep (tem))
2546 return 1;
2548 else
2549 return -1;
2551 else if (q->high != NULL_TREE)
2552 return 1;
2553 /* If both ranges are the same, sort below by ascending idx. */
2555 else
2556 return 1;
2558 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2559 return -1;
2561 if (p->idx < q->idx)
2562 return -1;
2563 else
2565 gcc_checking_assert (p->idx > q->idx);
2566 return 1;
2570 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2571 insert needed statements BEFORE or after GSI. */
2573 static tree
2574 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2576 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2577 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2578 if (TREE_CODE (ret) != SSA_NAME)
2580 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2581 if (before)
2582 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2583 else
2584 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2585 ret = gimple_assign_lhs (g);
2587 return ret;
2590 /* Helper routine of optimize_range_test.
2591 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2592 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2593 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2594 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2595 an array of COUNT pointers to other ranges. Return
2596 true if the range merge has been successful.
2597 If OPCODE is ERROR_MARK, this is called from within
2598 maybe_optimize_range_tests and is performing inter-bb range optimization.
2599 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2600 oe->rank. */
2602 static bool
2603 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2604 struct range_entry **otherrangep,
2605 unsigned int count, enum tree_code opcode,
2606 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2607 bool in_p, tree low, tree high, bool strict_overflow_p)
2609 operand_entry *oe = (*ops)[range->idx];
2610 tree op = oe->op;
2611 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2612 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2613 location_t loc = gimple_location (stmt);
2614 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2615 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2616 in_p, low, high);
2617 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2618 gimple_stmt_iterator gsi;
2619 unsigned int i, uid;
2621 if (tem == NULL_TREE)
2622 return false;
2624 /* If op is default def SSA_NAME, there is no place to insert the
2625 new comparison. Give up, unless we can use OP itself as the
2626 range test. */
2627 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2629 if (op == range->exp
2630 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2631 || TREE_CODE (optype) == BOOLEAN_TYPE)
2632 && (op == tem
2633 || (TREE_CODE (tem) == EQ_EXPR
2634 && TREE_OPERAND (tem, 0) == op
2635 && integer_onep (TREE_OPERAND (tem, 1))))
2636 && opcode != BIT_IOR_EXPR
2637 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2639 stmt = NULL;
2640 tem = op;
2642 else
2643 return false;
2646 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2647 warning_at (loc, OPT_Wstrict_overflow,
2648 "assuming signed overflow does not occur "
2649 "when simplifying range test");
2651 if (dump_file && (dump_flags & TDF_DETAILS))
2653 struct range_entry *r;
2654 fprintf (dump_file, "Optimizing range tests ");
2655 print_generic_expr (dump_file, range->exp);
2656 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2657 print_generic_expr (dump_file, range->low);
2658 fprintf (dump_file, ", ");
2659 print_generic_expr (dump_file, range->high);
2660 fprintf (dump_file, "]");
2661 for (i = 0; i < count; i++)
2663 if (otherrange)
2664 r = otherrange + i;
2665 else
2666 r = otherrangep[i];
2667 if (r->exp
2668 && r->exp != range->exp
2669 && TREE_CODE (r->exp) == SSA_NAME)
2671 fprintf (dump_file, " and ");
2672 print_generic_expr (dump_file, r->exp);
2674 else
2675 fprintf (dump_file, " and");
2676 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2677 print_generic_expr (dump_file, r->low);
2678 fprintf (dump_file, ", ");
2679 print_generic_expr (dump_file, r->high);
2680 fprintf (dump_file, "]");
2682 fprintf (dump_file, "\n into ");
2683 print_generic_expr (dump_file, tem);
2684 fprintf (dump_file, "\n");
2687 if (opcode == BIT_IOR_EXPR
2688 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2689 tem = invert_truthvalue_loc (loc, tem);
2691 tem = fold_convert_loc (loc, optype, tem);
2692 if (stmt)
2694 gsi = gsi_for_stmt (stmt);
2695 uid = gimple_uid (stmt);
2697 else
2699 gsi = gsi_none ();
2700 uid = 0;
2702 if (stmt == NULL)
2703 gcc_checking_assert (tem == op);
2704 /* In rare cases range->exp can be equal to lhs of stmt.
2705 In that case we have to insert after the stmt rather then before
2706 it. If stmt is a PHI, insert it at the start of the basic block. */
2707 else if (op != range->exp)
2709 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2710 tem = force_into_ssa_name (&gsi, tem, true);
2711 gsi_prev (&gsi);
2713 else if (gimple_code (stmt) != GIMPLE_PHI)
2715 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2716 tem = force_into_ssa_name (&gsi, tem, false);
2718 else
2720 gsi = gsi_after_labels (gimple_bb (stmt));
2721 if (!gsi_end_p (gsi))
2722 uid = gimple_uid (gsi_stmt (gsi));
2723 else
2725 gsi = gsi_start_bb (gimple_bb (stmt));
2726 uid = 1;
2727 while (!gsi_end_p (gsi))
2729 uid = gimple_uid (gsi_stmt (gsi));
2730 gsi_next (&gsi);
2733 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2734 tem = force_into_ssa_name (&gsi, tem, true);
2735 if (gsi_end_p (gsi))
2736 gsi = gsi_last_bb (gimple_bb (stmt));
2737 else
2738 gsi_prev (&gsi);
2740 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2741 if (gimple_uid (gsi_stmt (gsi)))
2742 break;
2743 else
2744 gimple_set_uid (gsi_stmt (gsi), uid);
2746 oe->op = tem;
2747 range->exp = exp;
2748 range->low = low;
2749 range->high = high;
2750 range->in_p = in_p;
2751 range->strict_overflow_p = false;
2753 for (i = 0; i < count; i++)
2755 if (otherrange)
2756 range = otherrange + i;
2757 else
2758 range = otherrangep[i];
2759 oe = (*ops)[range->idx];
2760 /* Now change all the other range test immediate uses, so that
2761 those tests will be optimized away. */
2762 if (opcode == ERROR_MARK)
2764 if (oe->op)
2765 oe->op = build_int_cst (TREE_TYPE (oe->op),
2766 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2767 else
2768 oe->op = (oe->rank == BIT_IOR_EXPR
2769 ? boolean_false_node : boolean_true_node);
2771 else
2772 oe->op = error_mark_node;
2773 range->exp = NULL_TREE;
2774 range->low = NULL_TREE;
2775 range->high = NULL_TREE;
2777 return true;
2780 /* Optimize X == CST1 || X == CST2
2781 if popcount (CST1 ^ CST2) == 1 into
2782 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2783 Similarly for ranges. E.g.
2784 X != 2 && X != 3 && X != 10 && X != 11
2785 will be transformed by the previous optimization into
2786 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2787 and this loop can transform that into
2788 !(((X & ~8) - 2U) <= 1U). */
2790 static bool
2791 optimize_range_tests_xor (enum tree_code opcode, tree type,
2792 tree lowi, tree lowj, tree highi, tree highj,
2793 vec<operand_entry *> *ops,
2794 struct range_entry *rangei,
2795 struct range_entry *rangej)
2797 tree lowxor, highxor, tem, exp;
2798 /* Check lowi ^ lowj == highi ^ highj and
2799 popcount (lowi ^ lowj) == 1. */
2800 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2801 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2802 return false;
2803 if (!integer_pow2p (lowxor))
2804 return false;
2805 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2806 if (!tree_int_cst_equal (lowxor, highxor))
2807 return false;
2809 exp = rangei->exp;
2810 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2811 int prec = GET_MODE_PRECISION (mode);
2812 if (TYPE_PRECISION (type) < prec
2813 || (wi::to_wide (TYPE_MIN_VALUE (type))
2814 != wi::min_value (prec, TYPE_SIGN (type)))
2815 || (wi::to_wide (TYPE_MAX_VALUE (type))
2816 != wi::max_value (prec, TYPE_SIGN (type))))
2818 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2819 exp = fold_convert (type, exp);
2820 lowxor = fold_convert (type, lowxor);
2821 lowi = fold_convert (type, lowi);
2822 highi = fold_convert (type, highi);
2824 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2825 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2826 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2827 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2828 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2829 NULL, rangei->in_p, lowj, highj,
2830 rangei->strict_overflow_p
2831 || rangej->strict_overflow_p))
2832 return true;
2833 return false;
2836 /* Optimize X == CST1 || X == CST2
2837 if popcount (CST2 - CST1) == 1 into
2838 ((X - CST1) & ~(CST2 - CST1)) == 0.
2839 Similarly for ranges. E.g.
2840 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2841 || X == 75 || X == 45
2842 will be transformed by the previous optimization into
2843 (X - 43U) <= 3U || (X - 75U) <= 3U
2844 and this loop can transform that into
2845 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2846 static bool
2847 optimize_range_tests_diff (enum tree_code opcode, tree type,
2848 tree lowi, tree lowj, tree highi, tree highj,
2849 vec<operand_entry *> *ops,
2850 struct range_entry *rangei,
2851 struct range_entry *rangej)
2853 tree tem1, tem2, mask;
2854 /* Check highi - lowi == highj - lowj. */
2855 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2856 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2857 return false;
2858 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2859 if (!tree_int_cst_equal (tem1, tem2))
2860 return false;
2861 /* Check popcount (lowj - lowi) == 1. */
2862 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2863 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2864 return false;
2865 if (!integer_pow2p (tem1))
2866 return false;
2868 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2869 int prec = GET_MODE_PRECISION (mode);
2870 if (TYPE_PRECISION (type) < prec
2871 || (wi::to_wide (TYPE_MIN_VALUE (type))
2872 != wi::min_value (prec, TYPE_SIGN (type)))
2873 || (wi::to_wide (TYPE_MAX_VALUE (type))
2874 != wi::max_value (prec, TYPE_SIGN (type))))
2875 type = build_nonstandard_integer_type (prec, 1);
2876 else
2877 type = unsigned_type_for (type);
2878 tem1 = fold_convert (type, tem1);
2879 tem2 = fold_convert (type, tem2);
2880 lowi = fold_convert (type, lowi);
2881 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2882 tem1 = fold_build2 (MINUS_EXPR, type,
2883 fold_convert (type, rangei->exp), lowi);
2884 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2885 lowj = build_int_cst (type, 0);
2886 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2887 NULL, rangei->in_p, lowj, tem2,
2888 rangei->strict_overflow_p
2889 || rangej->strict_overflow_p))
2890 return true;
2891 return false;
2894 /* It does some common checks for function optimize_range_tests_xor and
2895 optimize_range_tests_diff.
2896 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2897 Else it calls optimize_range_tests_diff. */
2899 static bool
2900 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2901 bool optimize_xor, vec<operand_entry *> *ops,
2902 struct range_entry *ranges)
2904 int i, j;
2905 bool any_changes = false;
2906 for (i = first; i < length; i++)
2908 tree lowi, highi, lowj, highj, type, tem;
2910 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2911 continue;
2912 type = TREE_TYPE (ranges[i].exp);
2913 if (!INTEGRAL_TYPE_P (type))
2914 continue;
2915 lowi = ranges[i].low;
2916 if (lowi == NULL_TREE)
2917 lowi = TYPE_MIN_VALUE (type);
2918 highi = ranges[i].high;
2919 if (highi == NULL_TREE)
2920 continue;
2921 for (j = i + 1; j < length && j < i + 64; j++)
2923 bool changes;
2924 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2925 continue;
2926 lowj = ranges[j].low;
2927 if (lowj == NULL_TREE)
2928 continue;
2929 highj = ranges[j].high;
2930 if (highj == NULL_TREE)
2931 highj = TYPE_MAX_VALUE (type);
2932 /* Check lowj > highi. */
2933 tem = fold_binary (GT_EXPR, boolean_type_node,
2934 lowj, highi);
2935 if (tem == NULL_TREE || !integer_onep (tem))
2936 continue;
2937 if (optimize_xor)
2938 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2939 highi, highj, ops,
2940 ranges + i, ranges + j);
2941 else
2942 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2943 highi, highj, ops,
2944 ranges + i, ranges + j);
2945 if (changes)
2947 any_changes = true;
2948 break;
2952 return any_changes;
2955 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2956 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2957 EXP on success, NULL otherwise. */
2959 static tree
2960 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2961 wide_int *mask, tree *totallowp)
2963 tree tem = int_const_binop (MINUS_EXPR, high, low);
2964 if (tem == NULL_TREE
2965 || TREE_CODE (tem) != INTEGER_CST
2966 || TREE_OVERFLOW (tem)
2967 || tree_int_cst_sgn (tem) == -1
2968 || compare_tree_int (tem, prec) != -1)
2969 return NULL_TREE;
2971 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2972 *mask = wi::shifted_mask (0, max, false, prec);
2973 if (TREE_CODE (exp) == BIT_AND_EXPR
2974 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2976 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2977 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2978 if (wi::popcount (msk) == 1
2979 && wi::ltu_p (msk, prec - max))
2981 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2982 max += msk.to_uhwi ();
2983 exp = TREE_OPERAND (exp, 0);
2984 if (integer_zerop (low)
2985 && TREE_CODE (exp) == PLUS_EXPR
2986 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2988 tree ret = TREE_OPERAND (exp, 0);
2989 STRIP_NOPS (ret);
2990 widest_int bias
2991 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2992 TYPE_PRECISION (TREE_TYPE (low))));
2993 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2994 if (totallowp)
2996 *totallowp = tbias;
2997 return ret;
2999 else if (!tree_int_cst_lt (totallow, tbias))
3000 return NULL_TREE;
3001 bias = wi::to_widest (tbias);
3002 bias -= wi::to_widest (totallow);
3003 if (bias >= 0 && bias < prec - max)
3005 *mask = wi::lshift (*mask, bias);
3006 return ret;
3011 if (totallowp)
3012 return exp;
3013 if (!tree_int_cst_lt (totallow, low))
3014 return exp;
3015 tem = int_const_binop (MINUS_EXPR, low, totallow);
3016 if (tem == NULL_TREE
3017 || TREE_CODE (tem) != INTEGER_CST
3018 || TREE_OVERFLOW (tem)
3019 || compare_tree_int (tem, prec - max) == 1)
3020 return NULL_TREE;
3022 *mask = wi::lshift (*mask, wi::to_widest (tem));
3023 return exp;
3026 /* Attempt to optimize small range tests using bit test.
3027 E.g.
3028 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3029 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3030 has been by earlier optimizations optimized into:
3031 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3032 As all the 43 through 82 range is less than 64 numbers,
3033 for 64-bit word targets optimize that into:
3034 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3036 static bool
3037 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3038 vec<operand_entry *> *ops,
3039 struct range_entry *ranges)
3041 int i, j;
3042 bool any_changes = false;
3043 int prec = GET_MODE_BITSIZE (word_mode);
3044 auto_vec<struct range_entry *, 64> candidates;
3046 for (i = first; i < length - 2; i++)
3048 tree lowi, highi, lowj, highj, type;
3050 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3051 continue;
3052 type = TREE_TYPE (ranges[i].exp);
3053 if (!INTEGRAL_TYPE_P (type))
3054 continue;
3055 lowi = ranges[i].low;
3056 if (lowi == NULL_TREE)
3057 lowi = TYPE_MIN_VALUE (type);
3058 highi = ranges[i].high;
3059 if (highi == NULL_TREE)
3060 continue;
3061 wide_int mask;
3062 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3063 highi, &mask, &lowi);
3064 if (exp == NULL_TREE)
3065 continue;
3066 bool strict_overflow_p = ranges[i].strict_overflow_p;
3067 candidates.truncate (0);
3068 int end = MIN (i + 64, length);
3069 for (j = i + 1; j < end; j++)
3071 tree exp2;
3072 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3073 continue;
3074 if (ranges[j].exp == exp)
3076 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3078 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3079 if (exp2 == exp)
3081 else if (TREE_CODE (exp2) == PLUS_EXPR)
3083 exp2 = TREE_OPERAND (exp2, 0);
3084 STRIP_NOPS (exp2);
3085 if (exp2 != exp)
3086 continue;
3088 else
3089 continue;
3091 else
3092 continue;
3093 lowj = ranges[j].low;
3094 if (lowj == NULL_TREE)
3095 continue;
3096 highj = ranges[j].high;
3097 if (highj == NULL_TREE)
3098 highj = TYPE_MAX_VALUE (type);
3099 wide_int mask2;
3100 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3101 highj, &mask2, NULL);
3102 if (exp2 != exp)
3103 continue;
3104 mask |= mask2;
3105 strict_overflow_p |= ranges[j].strict_overflow_p;
3106 candidates.safe_push (&ranges[j]);
3109 /* If we need otherwise 3 or more comparisons, use a bit test. */
3110 if (candidates.length () >= 2)
3112 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3113 wi::to_widest (lowi)
3114 + prec - 1 - wi::clz (mask));
3115 operand_entry *oe = (*ops)[ranges[i].idx];
3116 tree op = oe->op;
3117 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3118 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3119 location_t loc = gimple_location (stmt);
3120 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3122 /* See if it isn't cheaper to pretend the minimum value of the
3123 range is 0, if maximum value is small enough.
3124 We can avoid then subtraction of the minimum value, but the
3125 mask constant could be perhaps more expensive. */
3126 if (compare_tree_int (lowi, 0) > 0
3127 && compare_tree_int (high, prec) < 0)
3129 int cost_diff;
3130 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3131 rtx reg = gen_raw_REG (word_mode, 10000);
3132 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3133 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3134 GEN_INT (-m)), speed_p);
3135 rtx r = immed_wide_int_const (mask, word_mode);
3136 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3137 word_mode, speed_p);
3138 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3139 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3140 word_mode, speed_p);
3141 if (cost_diff > 0)
3143 mask = wi::lshift (mask, m);
3144 lowi = build_zero_cst (TREE_TYPE (lowi));
3148 tree tem = build_range_check (loc, optype, unshare_expr (exp),
3149 false, lowi, high);
3150 if (tem == NULL_TREE || is_gimple_val (tem))
3151 continue;
3152 tree etype = unsigned_type_for (TREE_TYPE (exp));
3153 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3154 fold_convert_loc (loc, etype, exp),
3155 fold_convert_loc (loc, etype, lowi));
3156 exp = fold_convert_loc (loc, integer_type_node, exp);
3157 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3158 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3159 build_int_cst (word_type, 1), exp);
3160 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3161 wide_int_to_tree (word_type, mask));
3162 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3163 build_zero_cst (word_type));
3164 if (is_gimple_val (exp))
3165 continue;
3167 /* The shift might have undefined behavior if TEM is true,
3168 but reassociate_bb isn't prepared to have basic blocks
3169 split when it is running. So, temporarily emit a code
3170 with BIT_IOR_EXPR instead of &&, and fix it up in
3171 branch_fixup. */
3172 gimple_seq seq;
3173 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3174 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3175 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3176 gimple_seq seq2;
3177 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3178 gimple_seq_add_seq_without_update (&seq, seq2);
3179 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3180 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3181 gimple *g = gimple_build_assign (make_ssa_name (optype),
3182 BIT_IOR_EXPR, tem, exp);
3183 gimple_set_location (g, loc);
3184 gimple_seq_add_stmt_without_update (&seq, g);
3185 exp = gimple_assign_lhs (g);
3186 tree val = build_zero_cst (optype);
3187 if (update_range_test (&ranges[i], NULL, candidates.address (),
3188 candidates.length (), opcode, ops, exp,
3189 seq, false, val, val, strict_overflow_p))
3191 any_changes = true;
3192 reassoc_branch_fixups.safe_push (tem);
3194 else
3195 gimple_seq_discard (seq);
3198 return any_changes;
3201 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3202 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3204 static bool
3205 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3206 vec<operand_entry *> *ops,
3207 struct range_entry *ranges)
3209 int i;
3210 unsigned int b;
3211 bool any_changes = false;
3212 auto_vec<int, 128> buckets;
3213 auto_vec<int, 32> chains;
3214 auto_vec<struct range_entry *, 32> candidates;
3216 for (i = first; i < length; i++)
3218 if (ranges[i].exp == NULL_TREE
3219 || TREE_CODE (ranges[i].exp) != SSA_NAME
3220 || !ranges[i].in_p
3221 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3222 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3223 || ranges[i].low == NULL_TREE
3224 || ranges[i].low != ranges[i].high)
3225 continue;
3227 bool zero_p = integer_zerop (ranges[i].low);
3228 if (!zero_p && !integer_all_onesp (ranges[i].low))
3229 continue;
3231 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3232 if (buckets.length () <= b)
3233 buckets.safe_grow_cleared (b + 1);
3234 if (chains.length () <= (unsigned) i)
3235 chains.safe_grow (i + 1);
3236 chains[i] = buckets[b];
3237 buckets[b] = i + 1;
3240 FOR_EACH_VEC_ELT (buckets, b, i)
3241 if (i && chains[i - 1])
3243 int j, k = i;
3244 for (j = chains[i - 1]; j; j = chains[j - 1])
3246 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3247 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3248 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3249 k = j;
3251 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3252 tree type2 = NULL_TREE;
3253 bool strict_overflow_p = false;
3254 candidates.truncate (0);
3255 for (j = i; j; j = chains[j - 1])
3257 tree type = TREE_TYPE (ranges[j - 1].exp);
3258 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3259 if (j == k
3260 || useless_type_conversion_p (type1, type))
3262 else if (type2 == NULL_TREE
3263 || useless_type_conversion_p (type2, type))
3265 if (type2 == NULL_TREE)
3266 type2 = type;
3267 candidates.safe_push (&ranges[j - 1]);
3270 unsigned l = candidates.length ();
3271 for (j = i; j; j = chains[j - 1])
3273 tree type = TREE_TYPE (ranges[j - 1].exp);
3274 if (j == k)
3275 continue;
3276 if (useless_type_conversion_p (type1, type))
3278 else if (type2 == NULL_TREE
3279 || useless_type_conversion_p (type2, type))
3280 continue;
3281 candidates.safe_push (&ranges[j - 1]);
3283 gimple_seq seq = NULL;
3284 tree op = NULL_TREE;
3285 unsigned int id;
3286 struct range_entry *r;
3287 candidates.safe_push (&ranges[k - 1]);
3288 FOR_EACH_VEC_ELT (candidates, id, r)
3290 gimple *g;
3291 if (id == 0)
3293 op = r->exp;
3294 continue;
3296 if (id == l)
3298 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3299 gimple_seq_add_stmt_without_update (&seq, g);
3300 op = gimple_assign_lhs (g);
3302 tree type = TREE_TYPE (r->exp);
3303 tree exp = r->exp;
3304 if (id >= l && !useless_type_conversion_p (type1, type))
3306 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3307 gimple_seq_add_stmt_without_update (&seq, g);
3308 exp = gimple_assign_lhs (g);
3310 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3311 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3312 op, exp);
3313 gimple_seq_add_stmt_without_update (&seq, g);
3314 op = gimple_assign_lhs (g);
3316 candidates.pop ();
3317 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3318 candidates.length (), opcode, ops, op,
3319 seq, true, ranges[k - 1].low,
3320 ranges[k - 1].low, strict_overflow_p))
3321 any_changes = true;
3322 else
3323 gimple_seq_discard (seq);
3326 return any_changes;
3329 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3330 a >= 0 && a < b into (unsigned) a < (unsigned) b
3331 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3333 static bool
3334 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3335 vec<operand_entry *> *ops,
3336 struct range_entry *ranges,
3337 basic_block first_bb)
3339 int i;
3340 bool any_changes = false;
3341 hash_map<tree, int> *map = NULL;
3343 for (i = first; i < length; i++)
3345 if (ranges[i].exp == NULL_TREE
3346 || TREE_CODE (ranges[i].exp) != SSA_NAME
3347 || !ranges[i].in_p)
3348 continue;
3350 tree type = TREE_TYPE (ranges[i].exp);
3351 if (!INTEGRAL_TYPE_P (type)
3352 || TYPE_UNSIGNED (type)
3353 || ranges[i].low == NULL_TREE
3354 || !integer_zerop (ranges[i].low)
3355 || ranges[i].high != NULL_TREE)
3356 continue;
3357 /* EXP >= 0 here. */
3358 if (map == NULL)
3359 map = new hash_map <tree, int>;
3360 map->put (ranges[i].exp, i);
3363 if (map == NULL)
3364 return false;
3366 for (i = 0; i < length; i++)
3368 bool in_p = ranges[i].in_p;
3369 if (ranges[i].low == NULL_TREE
3370 || ranges[i].high == NULL_TREE)
3371 continue;
3372 if (!integer_zerop (ranges[i].low)
3373 || !integer_zerop (ranges[i].high))
3375 if (ranges[i].exp
3376 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3377 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3378 && integer_onep (ranges[i].low)
3379 && integer_onep (ranges[i].high))
3380 in_p = !in_p;
3381 else
3382 continue;
3385 gimple *stmt;
3386 tree_code ccode;
3387 tree rhs1, rhs2;
3388 if (ranges[i].exp)
3390 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3391 continue;
3392 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3393 if (!is_gimple_assign (stmt))
3394 continue;
3395 ccode = gimple_assign_rhs_code (stmt);
3396 rhs1 = gimple_assign_rhs1 (stmt);
3397 rhs2 = gimple_assign_rhs2 (stmt);
3399 else
3401 operand_entry *oe = (*ops)[ranges[i].idx];
3402 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3403 if (gimple_code (stmt) != GIMPLE_COND)
3404 continue;
3405 ccode = gimple_cond_code (stmt);
3406 rhs1 = gimple_cond_lhs (stmt);
3407 rhs2 = gimple_cond_rhs (stmt);
3410 if (TREE_CODE (rhs1) != SSA_NAME
3411 || rhs2 == NULL_TREE
3412 || TREE_CODE (rhs2) != SSA_NAME)
3413 continue;
3415 switch (ccode)
3417 case GT_EXPR:
3418 case GE_EXPR:
3419 case LT_EXPR:
3420 case LE_EXPR:
3421 break;
3422 default:
3423 continue;
3425 if (in_p)
3426 ccode = invert_tree_comparison (ccode, false);
3427 switch (ccode)
3429 case GT_EXPR:
3430 case GE_EXPR:
3431 std::swap (rhs1, rhs2);
3432 ccode = swap_tree_comparison (ccode);
3433 break;
3434 case LT_EXPR:
3435 case LE_EXPR:
3436 break;
3437 default:
3438 gcc_unreachable ();
3441 int *idx = map->get (rhs1);
3442 if (idx == NULL)
3443 continue;
3445 /* maybe_optimize_range_tests allows statements without side-effects
3446 in the basic blocks as long as they are consumed in the same bb.
3447 Make sure rhs2's def stmt is not among them, otherwise we can't
3448 use safely get_nonzero_bits on it. E.g. in:
3449 # RANGE [-83, 1] NONZERO 173
3450 # k_32 = PHI <k_47(13), k_12(9)>
3452 if (k_32 >= 0)
3453 goto <bb 5>; [26.46%]
3454 else
3455 goto <bb 9>; [73.54%]
3457 <bb 5> [local count: 140323371]:
3458 # RANGE [0, 1] NONZERO 1
3459 _5 = (int) k_32;
3460 # RANGE [0, 4] NONZERO 4
3461 _21 = _5 << 2;
3462 # RANGE [0, 4] NONZERO 4
3463 iftmp.0_44 = (char) _21;
3464 if (k_32 < iftmp.0_44)
3465 goto <bb 6>; [84.48%]
3466 else
3467 goto <bb 9>; [15.52%]
3468 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3469 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3470 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3471 those stmts even for negative k_32 and the value ranges would be no
3472 longer guaranteed and so the optimization would be invalid. */
3473 while (opcode == ERROR_MARK)
3475 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3476 basic_block bb2 = gimple_bb (g);
3477 if (bb2
3478 && bb2 != first_bb
3479 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3481 /* As an exception, handle a few common cases. */
3482 if (gimple_assign_cast_p (g)
3483 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3485 tree op0 = gimple_assign_rhs1 (g);
3486 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3487 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3488 > TYPE_PRECISION (TREE_TYPE (op0))))
3489 /* Zero-extension is always ok. */
3490 break;
3491 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3492 == TYPE_PRECISION (TREE_TYPE (op0))
3493 && TREE_CODE (op0) == SSA_NAME)
3495 /* Cast from signed to unsigned or vice versa. Retry
3496 with the op0 as new rhs2. */
3497 rhs2 = op0;
3498 continue;
3501 else if (is_gimple_assign (g)
3502 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3503 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3504 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3505 /* Masking with INTEGER_CST with MSB clear is always ok
3506 too. */
3507 break;
3508 rhs2 = NULL_TREE;
3510 break;
3512 if (rhs2 == NULL_TREE)
3513 continue;
3515 wide_int nz = get_nonzero_bits (rhs2);
3516 if (wi::neg_p (nz))
3517 continue;
3519 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3520 and RHS2 is known to be RHS2 >= 0. */
3521 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3523 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3524 if ((ranges[*idx].strict_overflow_p
3525 || ranges[i].strict_overflow_p)
3526 && issue_strict_overflow_warning (wc))
3527 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3528 "assuming signed overflow does not occur "
3529 "when simplifying range test");
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3533 struct range_entry *r = &ranges[*idx];
3534 fprintf (dump_file, "Optimizing range test ");
3535 print_generic_expr (dump_file, r->exp);
3536 fprintf (dump_file, " +[");
3537 print_generic_expr (dump_file, r->low);
3538 fprintf (dump_file, ", ");
3539 print_generic_expr (dump_file, r->high);
3540 fprintf (dump_file, "] and comparison ");
3541 print_generic_expr (dump_file, rhs1);
3542 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3543 print_generic_expr (dump_file, rhs2);
3544 fprintf (dump_file, "\n into (");
3545 print_generic_expr (dump_file, utype);
3546 fprintf (dump_file, ") ");
3547 print_generic_expr (dump_file, rhs1);
3548 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3549 print_generic_expr (dump_file, utype);
3550 fprintf (dump_file, ") ");
3551 print_generic_expr (dump_file, rhs2);
3552 fprintf (dump_file, "\n");
3555 operand_entry *oe = (*ops)[ranges[i].idx];
3556 ranges[i].in_p = 0;
3557 if (opcode == BIT_IOR_EXPR
3558 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3560 ranges[i].in_p = 1;
3561 ccode = invert_tree_comparison (ccode, false);
3564 unsigned int uid = gimple_uid (stmt);
3565 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3566 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3567 gimple_set_uid (g, uid);
3568 rhs1 = gimple_assign_lhs (g);
3569 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3570 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3572 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3573 gimple_set_uid (g, uid);
3574 rhs2 = gimple_assign_lhs (g);
3575 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3577 if (tree_swap_operands_p (rhs1, rhs2))
3579 std::swap (rhs1, rhs2);
3580 ccode = swap_tree_comparison (ccode);
3582 if (gimple_code (stmt) == GIMPLE_COND)
3584 gcond *c = as_a <gcond *> (stmt);
3585 gimple_cond_set_code (c, ccode);
3586 gimple_cond_set_lhs (c, rhs1);
3587 gimple_cond_set_rhs (c, rhs2);
3588 update_stmt (stmt);
3590 else
3592 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3593 if (!INTEGRAL_TYPE_P (ctype)
3594 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3595 && TYPE_PRECISION (ctype) != 1))
3596 ctype = boolean_type_node;
3597 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3598 gimple_set_uid (g, uid);
3599 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3600 if (oe->op && ctype != TREE_TYPE (oe->op))
3602 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3603 NOP_EXPR, gimple_assign_lhs (g));
3604 gimple_set_uid (g, uid);
3605 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3607 ranges[i].exp = gimple_assign_lhs (g);
3608 oe->op = ranges[i].exp;
3609 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3610 ranges[i].high = ranges[i].low;
3612 ranges[i].strict_overflow_p = false;
3613 oe = (*ops)[ranges[*idx].idx];
3614 /* Now change all the other range test immediate uses, so that
3615 those tests will be optimized away. */
3616 if (opcode == ERROR_MARK)
3618 if (oe->op)
3619 oe->op = build_int_cst (TREE_TYPE (oe->op),
3620 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3621 else
3622 oe->op = (oe->rank == BIT_IOR_EXPR
3623 ? boolean_false_node : boolean_true_node);
3625 else
3626 oe->op = error_mark_node;
3627 ranges[*idx].exp = NULL_TREE;
3628 ranges[*idx].low = NULL_TREE;
3629 ranges[*idx].high = NULL_TREE;
3630 any_changes = true;
3633 delete map;
3634 return any_changes;
3637 /* Optimize range tests, similarly how fold_range_test optimizes
3638 it on trees. The tree code for the binary
3639 operation between all the operands is OPCODE.
3640 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3641 maybe_optimize_range_tests for inter-bb range optimization.
3642 In that case if oe->op is NULL, oe->id is bb->index whose
3643 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3644 the actual opcode.
3645 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3647 static bool
3648 optimize_range_tests (enum tree_code opcode,
3649 vec<operand_entry *> *ops, basic_block first_bb)
3651 unsigned int length = ops->length (), i, j, first;
3652 operand_entry *oe;
3653 struct range_entry *ranges;
3654 bool any_changes = false;
3656 if (length == 1)
3657 return false;
3659 ranges = XNEWVEC (struct range_entry, length);
3660 for (i = 0; i < length; i++)
3662 oe = (*ops)[i];
3663 ranges[i].idx = i;
3664 init_range_entry (ranges + i, oe->op,
3665 oe->op
3666 ? NULL
3667 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3668 /* For | invert it now, we will invert it again before emitting
3669 the optimized expression. */
3670 if (opcode == BIT_IOR_EXPR
3671 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3672 ranges[i].in_p = !ranges[i].in_p;
3675 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3676 for (i = 0; i < length; i++)
3677 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3678 break;
3680 /* Try to merge ranges. */
3681 for (first = i; i < length; i++)
3683 tree low = ranges[i].low;
3684 tree high = ranges[i].high;
3685 int in_p = ranges[i].in_p;
3686 bool strict_overflow_p = ranges[i].strict_overflow_p;
3687 int update_fail_count = 0;
3689 for (j = i + 1; j < length; j++)
3691 if (ranges[i].exp != ranges[j].exp)
3692 break;
3693 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3694 ranges[j].in_p, ranges[j].low, ranges[j].high))
3695 break;
3696 strict_overflow_p |= ranges[j].strict_overflow_p;
3699 if (j == i + 1)
3700 continue;
3702 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3703 opcode, ops, ranges[i].exp, NULL, in_p,
3704 low, high, strict_overflow_p))
3706 i = j - 1;
3707 any_changes = true;
3709 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3710 while update_range_test would fail. */
3711 else if (update_fail_count == 64)
3712 i = j - 1;
3713 else
3714 ++update_fail_count;
3717 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3718 ops, ranges);
3720 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3721 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3722 ops, ranges);
3723 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3724 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3725 ops, ranges);
3726 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3727 ops, ranges);
3728 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3729 ranges, first_bb);
3731 if (any_changes && opcode != ERROR_MARK)
3733 j = 0;
3734 FOR_EACH_VEC_ELT (*ops, i, oe)
3736 if (oe->op == error_mark_node)
3737 continue;
3738 else if (i != j)
3739 (*ops)[j] = oe;
3740 j++;
3742 ops->truncate (j);
3745 XDELETEVEC (ranges);
3746 return any_changes;
3749 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3750 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3751 otherwise the comparison code. TYPE is a return value that is set
3752 to type of comparison. */
3754 static tree_code
3755 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3757 if (TREE_CODE (var) != SSA_NAME)
3758 return ERROR_MARK;
3760 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3761 if (stmt == NULL)
3762 return ERROR_MARK;
3764 /* ??? If we start creating more COND_EXPR, we could perform
3765 this same optimization with them. For now, simplify. */
3766 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3767 return ERROR_MARK;
3769 tree cond = gimple_assign_rhs1 (stmt);
3770 tree_code cmp = TREE_CODE (cond);
3771 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3772 return ERROR_MARK;
3774 /* ??? For now, allow only canonical true and false result vectors.
3775 We could expand this to other constants should the need arise,
3776 but at the moment we don't create them. */
3777 tree t = gimple_assign_rhs2 (stmt);
3778 tree f = gimple_assign_rhs3 (stmt);
3779 bool inv;
3780 if (integer_all_onesp (t))
3781 inv = false;
3782 else if (integer_all_onesp (f))
3784 cmp = invert_tree_comparison (cmp, false);
3785 inv = true;
3787 else
3788 return ERROR_MARK;
3789 if (!integer_zerop (f))
3790 return ERROR_MARK;
3792 /* Success! */
3793 if (rets)
3794 *rets = stmt;
3795 if (reti)
3796 *reti = inv;
3797 if (type)
3798 *type = TREE_TYPE (cond);
3799 return cmp;
3802 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3803 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3805 static bool
3806 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3808 unsigned int length = ops->length (), i, j;
3809 bool any_changes = false;
3811 if (length == 1)
3812 return false;
3814 for (i = 0; i < length; ++i)
3816 tree elt0 = (*ops)[i]->op;
3818 gassign *stmt0;
3819 bool invert;
3820 tree type;
3821 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3822 if (cmp0 == ERROR_MARK)
3823 continue;
3825 for (j = i + 1; j < length; ++j)
3827 tree &elt1 = (*ops)[j]->op;
3829 gassign *stmt1;
3830 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3831 if (cmp1 == ERROR_MARK)
3832 continue;
3834 tree cond0 = gimple_assign_rhs1 (stmt0);
3835 tree x0 = TREE_OPERAND (cond0, 0);
3836 tree y0 = TREE_OPERAND (cond0, 1);
3838 tree cond1 = gimple_assign_rhs1 (stmt1);
3839 tree x1 = TREE_OPERAND (cond1, 0);
3840 tree y1 = TREE_OPERAND (cond1, 1);
3842 tree comb;
3843 if (opcode == BIT_AND_EXPR)
3844 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3845 y1);
3846 else if (opcode == BIT_IOR_EXPR)
3847 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3848 y1);
3849 else
3850 gcc_unreachable ();
3851 if (comb == NULL)
3852 continue;
3854 /* Success! */
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, "Transforming ");
3858 print_generic_expr (dump_file, cond0);
3859 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3860 print_generic_expr (dump_file, cond1);
3861 fprintf (dump_file, " into ");
3862 print_generic_expr (dump_file, comb);
3863 fputc ('\n', dump_file);
3866 gimple_assign_set_rhs1 (stmt0, comb);
3867 if (invert)
3868 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3869 *gimple_assign_rhs3_ptr (stmt0));
3870 update_stmt (stmt0);
3872 elt1 = error_mark_node;
3873 any_changes = true;
3877 if (any_changes)
3879 operand_entry *oe;
3880 j = 0;
3881 FOR_EACH_VEC_ELT (*ops, i, oe)
3883 if (oe->op == error_mark_node)
3884 continue;
3885 else if (i != j)
3886 (*ops)[j] = oe;
3887 j++;
3889 ops->truncate (j);
3892 return any_changes;
3895 /* Return true if STMT is a cast like:
3896 <bb N>:
3898 _123 = (int) _234;
3900 <bb M>:
3901 # _345 = PHI <_123(N), 1(...), 1(...)>
3902 where _234 has bool type, _123 has single use and
3903 bb N has a single successor M. This is commonly used in
3904 the last block of a range test.
3906 Also Return true if STMT is tcc_compare like:
3907 <bb N>:
3909 _234 = a_2(D) == 2;
3911 <bb M>:
3912 # _345 = PHI <_234(N), 1(...), 1(...)>
3913 _346 = (int) _345;
3914 where _234 has booltype, single use and
3915 bb N has a single successor M. This is commonly used in
3916 the last block of a range test. */
3918 static bool
3919 final_range_test_p (gimple *stmt)
3921 basic_block bb, rhs_bb, lhs_bb;
3922 edge e;
3923 tree lhs, rhs;
3924 use_operand_p use_p;
3925 gimple *use_stmt;
3927 if (!gimple_assign_cast_p (stmt)
3928 && (!is_gimple_assign (stmt)
3929 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3930 != tcc_comparison)))
3931 return false;
3932 bb = gimple_bb (stmt);
3933 if (!single_succ_p (bb))
3934 return false;
3935 e = single_succ_edge (bb);
3936 if (e->flags & EDGE_COMPLEX)
3937 return false;
3939 lhs = gimple_assign_lhs (stmt);
3940 rhs = gimple_assign_rhs1 (stmt);
3941 if (gimple_assign_cast_p (stmt)
3942 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3943 || TREE_CODE (rhs) != SSA_NAME
3944 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3945 return false;
3947 if (!gimple_assign_cast_p (stmt)
3948 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3949 return false;
3951 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3952 if (!single_imm_use (lhs, &use_p, &use_stmt))
3953 return false;
3955 if (gimple_code (use_stmt) != GIMPLE_PHI
3956 || gimple_bb (use_stmt) != e->dest)
3957 return false;
3959 /* And that the rhs is defined in the same loop. */
3960 if (gimple_assign_cast_p (stmt))
3962 if (TREE_CODE (rhs) != SSA_NAME
3963 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3964 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3965 return false;
3967 else
3969 if (TREE_CODE (lhs) != SSA_NAME
3970 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3971 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3972 return false;
3975 return true;
3978 /* Return true if BB is suitable basic block for inter-bb range test
3979 optimization. If BACKWARD is true, BB should be the only predecessor
3980 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3981 or compared with to find a common basic block to which all conditions
3982 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3983 be the only predecessor of BB. */
3985 static bool
3986 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3987 bool backward)
3989 edge_iterator ei, ei2;
3990 edge e, e2;
3991 gimple *stmt;
3992 gphi_iterator gsi;
3993 bool other_edge_seen = false;
3994 bool is_cond;
3996 if (test_bb == bb)
3997 return false;
3998 /* Check last stmt first. */
3999 stmt = last_stmt (bb);
4000 if (stmt == NULL
4001 || (gimple_code (stmt) != GIMPLE_COND
4002 && (backward || !final_range_test_p (stmt)))
4003 || gimple_visited_p (stmt)
4004 || stmt_could_throw_p (cfun, stmt)
4005 || *other_bb == bb)
4006 return false;
4007 is_cond = gimple_code (stmt) == GIMPLE_COND;
4008 if (is_cond)
4010 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4011 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4012 to *OTHER_BB (if not set yet, try to find it out). */
4013 if (EDGE_COUNT (bb->succs) != 2)
4014 return false;
4015 FOR_EACH_EDGE (e, ei, bb->succs)
4017 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4018 return false;
4019 if (e->dest == test_bb)
4021 if (backward)
4022 continue;
4023 else
4024 return false;
4026 if (e->dest == bb)
4027 return false;
4028 if (*other_bb == NULL)
4030 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4031 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4032 return false;
4033 else if (e->dest == e2->dest)
4034 *other_bb = e->dest;
4035 if (*other_bb == NULL)
4036 return false;
4038 if (e->dest == *other_bb)
4039 other_edge_seen = true;
4040 else if (backward)
4041 return false;
4043 if (*other_bb == NULL || !other_edge_seen)
4044 return false;
4046 else if (single_succ (bb) != *other_bb)
4047 return false;
4049 /* Now check all PHIs of *OTHER_BB. */
4050 e = find_edge (bb, *other_bb);
4051 e2 = find_edge (test_bb, *other_bb);
4052 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4054 gphi *phi = gsi.phi ();
4055 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4056 corresponding to BB and TEST_BB predecessor must be the same. */
4057 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4058 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4060 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4061 one of the PHIs should have the lhs of the last stmt in
4062 that block as PHI arg and that PHI should have 0 or 1
4063 corresponding to it in all other range test basic blocks
4064 considered. */
4065 if (!is_cond)
4067 if (gimple_phi_arg_def (phi, e->dest_idx)
4068 == gimple_assign_lhs (stmt)
4069 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4070 || integer_onep (gimple_phi_arg_def (phi,
4071 e2->dest_idx))))
4072 continue;
4074 else
4076 gimple *test_last = last_stmt (test_bb);
4077 if (gimple_code (test_last) != GIMPLE_COND
4078 && gimple_phi_arg_def (phi, e2->dest_idx)
4079 == gimple_assign_lhs (test_last)
4080 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4081 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4082 continue;
4085 return false;
4088 return true;
4091 /* Return true if BB doesn't have side-effects that would disallow
4092 range test optimization, all SSA_NAMEs set in the bb are consumed
4093 in the bb and there are no PHIs. */
4095 static bool
4096 no_side_effect_bb (basic_block bb)
4098 gimple_stmt_iterator gsi;
4099 gimple *last;
4101 if (!gimple_seq_empty_p (phi_nodes (bb)))
4102 return false;
4103 last = last_stmt (bb);
4104 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4106 gimple *stmt = gsi_stmt (gsi);
4107 tree lhs;
4108 imm_use_iterator imm_iter;
4109 use_operand_p use_p;
4111 if (is_gimple_debug (stmt))
4112 continue;
4113 if (gimple_has_side_effects (stmt))
4114 return false;
4115 if (stmt == last)
4116 return true;
4117 if (!is_gimple_assign (stmt))
4118 return false;
4119 lhs = gimple_assign_lhs (stmt);
4120 if (TREE_CODE (lhs) != SSA_NAME)
4121 return false;
4122 if (gimple_assign_rhs_could_trap_p (stmt))
4123 return false;
4124 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4126 gimple *use_stmt = USE_STMT (use_p);
4127 if (is_gimple_debug (use_stmt))
4128 continue;
4129 if (gimple_bb (use_stmt) != bb)
4130 return false;
4133 return false;
4136 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4137 return true and fill in *OPS recursively. */
4139 static bool
4140 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4141 class loop *loop)
4143 gimple *stmt = SSA_NAME_DEF_STMT (var);
4144 tree rhs[2];
4145 int i;
4147 if (!is_reassociable_op (stmt, code, loop))
4148 return false;
4150 rhs[0] = gimple_assign_rhs1 (stmt);
4151 rhs[1] = gimple_assign_rhs2 (stmt);
4152 gimple_set_visited (stmt, true);
4153 for (i = 0; i < 2; i++)
4154 if (TREE_CODE (rhs[i]) == SSA_NAME
4155 && !get_ops (rhs[i], code, ops, loop)
4156 && has_single_use (rhs[i]))
4158 operand_entry *oe = operand_entry_pool.allocate ();
4160 oe->op = rhs[i];
4161 oe->rank = code;
4162 oe->id = 0;
4163 oe->count = 1;
4164 oe->stmt_to_insert = NULL;
4165 ops->safe_push (oe);
4167 return true;
4170 /* Find the ops that were added by get_ops starting from VAR, see if
4171 they were changed during update_range_test and if yes, create new
4172 stmts. */
4174 static tree
4175 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4176 unsigned int *pidx, class loop *loop)
4178 gimple *stmt = SSA_NAME_DEF_STMT (var);
4179 tree rhs[4];
4180 int i;
4182 if (!is_reassociable_op (stmt, code, loop))
4183 return NULL;
4185 rhs[0] = gimple_assign_rhs1 (stmt);
4186 rhs[1] = gimple_assign_rhs2 (stmt);
4187 rhs[2] = rhs[0];
4188 rhs[3] = rhs[1];
4189 for (i = 0; i < 2; i++)
4190 if (TREE_CODE (rhs[i]) == SSA_NAME)
4192 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4193 if (rhs[2 + i] == NULL_TREE)
4195 if (has_single_use (rhs[i]))
4196 rhs[2 + i] = ops[(*pidx)++]->op;
4197 else
4198 rhs[2 + i] = rhs[i];
4201 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4202 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4204 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4205 var = make_ssa_name (TREE_TYPE (var));
4206 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4207 rhs[2], rhs[3]);
4208 gimple_set_uid (g, gimple_uid (stmt));
4209 gimple_set_visited (g, true);
4210 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4212 return var;
4215 /* Structure to track the initial value passed to get_ops and
4216 the range in the ops vector for each basic block. */
4218 struct inter_bb_range_test_entry
4220 tree op;
4221 unsigned int first_idx, last_idx;
4224 /* Inter-bb range test optimization.
4226 Returns TRUE if a gimple conditional is optimized to a true/false,
4227 otherwise return FALSE.
4229 This indicates to the caller that it should run a CFG cleanup pass
4230 once reassociation is completed. */
4232 static bool
4233 maybe_optimize_range_tests (gimple *stmt)
4235 basic_block first_bb = gimple_bb (stmt);
4236 basic_block last_bb = first_bb;
4237 basic_block other_bb = NULL;
4238 basic_block bb;
4239 edge_iterator ei;
4240 edge e;
4241 auto_vec<operand_entry *> ops;
4242 auto_vec<inter_bb_range_test_entry> bbinfo;
4243 bool any_changes = false;
4244 bool cfg_cleanup_needed = false;
4246 /* Consider only basic blocks that end with GIMPLE_COND or
4247 a cast statement satisfying final_range_test_p. All
4248 but the last bb in the first_bb .. last_bb range
4249 should end with GIMPLE_COND. */
4250 if (gimple_code (stmt) == GIMPLE_COND)
4252 if (EDGE_COUNT (first_bb->succs) != 2)
4253 return cfg_cleanup_needed;
4255 else if (final_range_test_p (stmt))
4256 other_bb = single_succ (first_bb);
4257 else
4258 return cfg_cleanup_needed;
4260 if (stmt_could_throw_p (cfun, stmt))
4261 return cfg_cleanup_needed;
4263 /* As relative ordering of post-dominator sons isn't fixed,
4264 maybe_optimize_range_tests can be called first on any
4265 bb in the range we want to optimize. So, start searching
4266 backwards, if first_bb can be set to a predecessor. */
4267 while (single_pred_p (first_bb))
4269 basic_block pred_bb = single_pred (first_bb);
4270 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4271 break;
4272 if (!no_side_effect_bb (first_bb))
4273 break;
4274 first_bb = pred_bb;
4276 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4277 Before starting forward search in last_bb successors, find
4278 out the other_bb. */
4279 if (first_bb == last_bb)
4281 other_bb = NULL;
4282 /* As non-GIMPLE_COND last stmt always terminates the range,
4283 if forward search didn't discover anything, just give up. */
4284 if (gimple_code (stmt) != GIMPLE_COND)
4285 return cfg_cleanup_needed;
4286 /* Look at both successors. Either it ends with a GIMPLE_COND
4287 and satisfies suitable_cond_bb, or ends with a cast and
4288 other_bb is that cast's successor. */
4289 FOR_EACH_EDGE (e, ei, first_bb->succs)
4290 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4291 || e->dest == first_bb)
4292 return cfg_cleanup_needed;
4293 else if (single_pred_p (e->dest))
4295 stmt = last_stmt (e->dest);
4296 if (stmt
4297 && gimple_code (stmt) == GIMPLE_COND
4298 && EDGE_COUNT (e->dest->succs) == 2)
4300 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4301 break;
4302 else
4303 other_bb = NULL;
4305 else if (stmt
4306 && final_range_test_p (stmt)
4307 && find_edge (first_bb, single_succ (e->dest)))
4309 other_bb = single_succ (e->dest);
4310 if (other_bb == first_bb)
4311 other_bb = NULL;
4314 if (other_bb == NULL)
4315 return cfg_cleanup_needed;
4317 /* Now do the forward search, moving last_bb to successor bbs
4318 that aren't other_bb. */
4319 while (EDGE_COUNT (last_bb->succs) == 2)
4321 FOR_EACH_EDGE (e, ei, last_bb->succs)
4322 if (e->dest != other_bb)
4323 break;
4324 if (e == NULL)
4325 break;
4326 if (!single_pred_p (e->dest))
4327 break;
4328 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4329 break;
4330 if (!no_side_effect_bb (e->dest))
4331 break;
4332 last_bb = e->dest;
4334 if (first_bb == last_bb)
4335 return cfg_cleanup_needed;
4336 /* Here basic blocks first_bb through last_bb's predecessor
4337 end with GIMPLE_COND, all of them have one of the edges to
4338 other_bb and another to another block in the range,
4339 all blocks except first_bb don't have side-effects and
4340 last_bb ends with either GIMPLE_COND, or cast satisfying
4341 final_range_test_p. */
4342 for (bb = last_bb; ; bb = single_pred (bb))
4344 enum tree_code code;
4345 tree lhs, rhs;
4346 inter_bb_range_test_entry bb_ent;
4348 bb_ent.op = NULL_TREE;
4349 bb_ent.first_idx = ops.length ();
4350 bb_ent.last_idx = bb_ent.first_idx;
4351 e = find_edge (bb, other_bb);
4352 stmt = last_stmt (bb);
4353 gimple_set_visited (stmt, true);
4354 if (gimple_code (stmt) != GIMPLE_COND)
4356 use_operand_p use_p;
4357 gimple *phi;
4358 edge e2;
4359 unsigned int d;
4361 lhs = gimple_assign_lhs (stmt);
4362 rhs = gimple_assign_rhs1 (stmt);
4363 gcc_assert (bb == last_bb);
4365 /* stmt is
4366 _123 = (int) _234;
4368 _234 = a_2(D) == 2;
4370 followed by:
4371 <bb M>:
4372 # _345 = PHI <_123(N), 1(...), 1(...)>
4374 or 0 instead of 1. If it is 0, the _234
4375 range test is anded together with all the
4376 other range tests, if it is 1, it is ored with
4377 them. */
4378 single_imm_use (lhs, &use_p, &phi);
4379 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4380 e2 = find_edge (first_bb, other_bb);
4381 d = e2->dest_idx;
4382 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4383 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4384 code = BIT_AND_EXPR;
4385 else
4387 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4388 code = BIT_IOR_EXPR;
4391 /* If _234 SSA_NAME_DEF_STMT is
4392 _234 = _567 | _789;
4393 (or &, corresponding to 1/0 in the phi arguments,
4394 push into ops the individual range test arguments
4395 of the bitwise or resp. and, recursively. */
4396 if (TREE_CODE (rhs) == SSA_NAME
4397 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4398 != tcc_comparison)
4399 && !get_ops (rhs, code, &ops,
4400 loop_containing_stmt (stmt))
4401 && has_single_use (rhs))
4403 /* Otherwise, push the _234 range test itself. */
4404 operand_entry *oe = operand_entry_pool.allocate ();
4406 oe->op = rhs;
4407 oe->rank = code;
4408 oe->id = 0;
4409 oe->count = 1;
4410 oe->stmt_to_insert = NULL;
4411 ops.safe_push (oe);
4412 bb_ent.last_idx++;
4413 bb_ent.op = rhs;
4415 else if (is_gimple_assign (stmt)
4416 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4417 == tcc_comparison)
4418 && !get_ops (lhs, code, &ops,
4419 loop_containing_stmt (stmt))
4420 && has_single_use (lhs))
4422 operand_entry *oe = operand_entry_pool.allocate ();
4423 oe->op = lhs;
4424 oe->rank = code;
4425 oe->id = 0;
4426 oe->count = 1;
4427 ops.safe_push (oe);
4428 bb_ent.last_idx++;
4429 bb_ent.op = lhs;
4431 else
4433 bb_ent.last_idx = ops.length ();
4434 bb_ent.op = rhs;
4436 bbinfo.safe_push (bb_ent);
4437 continue;
4439 /* Otherwise stmt is GIMPLE_COND. */
4440 code = gimple_cond_code (stmt);
4441 lhs = gimple_cond_lhs (stmt);
4442 rhs = gimple_cond_rhs (stmt);
4443 if (TREE_CODE (lhs) == SSA_NAME
4444 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4445 && ((code != EQ_EXPR && code != NE_EXPR)
4446 || rhs != boolean_false_node
4447 /* Either push into ops the individual bitwise
4448 or resp. and operands, depending on which
4449 edge is other_bb. */
4450 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4451 ^ (code == EQ_EXPR))
4452 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4453 loop_containing_stmt (stmt))))
4455 /* Or push the GIMPLE_COND stmt itself. */
4456 operand_entry *oe = operand_entry_pool.allocate ();
4458 oe->op = NULL;
4459 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4460 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4461 /* oe->op = NULL signs that there is no SSA_NAME
4462 for the range test, and oe->id instead is the
4463 basic block number, at which's end the GIMPLE_COND
4464 is. */
4465 oe->id = bb->index;
4466 oe->count = 1;
4467 oe->stmt_to_insert = NULL;
4468 ops.safe_push (oe);
4469 bb_ent.op = NULL;
4470 bb_ent.last_idx++;
4472 else if (ops.length () > bb_ent.first_idx)
4474 bb_ent.op = lhs;
4475 bb_ent.last_idx = ops.length ();
4477 bbinfo.safe_push (bb_ent);
4478 if (bb == first_bb)
4479 break;
4481 if (ops.length () > 1)
4482 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4483 if (any_changes)
4485 unsigned int idx, max_idx = 0;
4486 /* update_ops relies on has_single_use predicates returning the
4487 same values as it did during get_ops earlier. Additionally it
4488 never removes statements, only adds new ones and it should walk
4489 from the single imm use and check the predicate already before
4490 making those changes.
4491 On the other side, the handling of GIMPLE_COND directly can turn
4492 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4493 it needs to be done in a separate loop afterwards. */
4494 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4496 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4497 && bbinfo[idx].op != NULL_TREE)
4499 tree new_op;
4501 max_idx = idx;
4502 stmt = last_stmt (bb);
4503 new_op = update_ops (bbinfo[idx].op,
4504 (enum tree_code)
4505 ops[bbinfo[idx].first_idx]->rank,
4506 ops, &bbinfo[idx].first_idx,
4507 loop_containing_stmt (stmt));
4508 if (new_op == NULL_TREE)
4510 gcc_assert (bb == last_bb);
4511 new_op = ops[bbinfo[idx].first_idx++]->op;
4513 if (bbinfo[idx].op != new_op)
4515 imm_use_iterator iter;
4516 use_operand_p use_p;
4517 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4519 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4520 if (is_gimple_debug (use_stmt))
4521 continue;
4522 else if (gimple_code (use_stmt) == GIMPLE_COND
4523 || gimple_code (use_stmt) == GIMPLE_PHI)
4524 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4525 SET_USE (use_p, new_op);
4526 else if ((is_gimple_assign (use_stmt)
4527 && (TREE_CODE_CLASS
4528 (gimple_assign_rhs_code (use_stmt))
4529 == tcc_comparison)))
4530 cast_or_tcc_cmp_stmt = use_stmt;
4531 else if (gimple_assign_cast_p (use_stmt))
4532 cast_or_tcc_cmp_stmt = use_stmt;
4533 else
4534 gcc_unreachable ();
4536 if (cast_or_tcc_cmp_stmt)
4538 gcc_assert (bb == last_bb);
4539 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4540 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4541 enum tree_code rhs_code
4542 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4543 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4544 : CONVERT_EXPR;
4545 gassign *g;
4546 if (is_gimple_min_invariant (new_op))
4548 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4549 g = gimple_build_assign (new_lhs, new_op);
4551 else
4552 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4553 gimple_stmt_iterator gsi
4554 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4555 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4556 gimple_set_visited (g, true);
4557 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4558 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4559 if (is_gimple_debug (use_stmt))
4560 continue;
4561 else if (gimple_code (use_stmt) == GIMPLE_COND
4562 || gimple_code (use_stmt) == GIMPLE_PHI)
4563 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4564 SET_USE (use_p, new_lhs);
4565 else
4566 gcc_unreachable ();
4570 if (bb == first_bb)
4571 break;
4573 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4575 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4576 && bbinfo[idx].op == NULL_TREE
4577 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4579 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4581 if (idx > max_idx)
4582 max_idx = idx;
4584 /* If we collapse the conditional to a true/false
4585 condition, then bubble that knowledge up to our caller. */
4586 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4588 gimple_cond_make_false (cond_stmt);
4589 cfg_cleanup_needed = true;
4591 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4593 gimple_cond_make_true (cond_stmt);
4594 cfg_cleanup_needed = true;
4596 else
4598 gimple_cond_set_code (cond_stmt, NE_EXPR);
4599 gimple_cond_set_lhs (cond_stmt,
4600 ops[bbinfo[idx].first_idx]->op);
4601 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4603 update_stmt (cond_stmt);
4605 if (bb == first_bb)
4606 break;
4609 /* The above changes could result in basic blocks after the first
4610 modified one, up to and including last_bb, to be executed even if
4611 they would not be in the original program. If the value ranges of
4612 assignment lhs' in those bbs were dependent on the conditions
4613 guarding those basic blocks which now can change, the VRs might
4614 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4615 are only used within the same bb, it should be not a big deal if
4616 we just reset all the VRs in those bbs. See PR68671. */
4617 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4618 reset_flow_sensitive_info_in_bb (bb);
4620 return cfg_cleanup_needed;
4623 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4624 of STMT in it's operands. This is also known as a "destructive
4625 update" operation. */
4627 static bool
4628 is_phi_for_stmt (gimple *stmt, tree operand)
4630 gimple *def_stmt;
4631 gphi *def_phi;
4632 tree lhs;
4633 use_operand_p arg_p;
4634 ssa_op_iter i;
4636 if (TREE_CODE (operand) != SSA_NAME)
4637 return false;
4639 lhs = gimple_assign_lhs (stmt);
4641 def_stmt = SSA_NAME_DEF_STMT (operand);
4642 def_phi = dyn_cast <gphi *> (def_stmt);
4643 if (!def_phi)
4644 return false;
4646 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4647 if (lhs == USE_FROM_PTR (arg_p))
4648 return true;
4649 return false;
4652 /* Remove def stmt of VAR if VAR has zero uses and recurse
4653 on rhs1 operand if so. */
4655 static void
4656 remove_visited_stmt_chain (tree var)
4658 gimple *stmt;
4659 gimple_stmt_iterator gsi;
4661 while (1)
4663 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4664 return;
4665 stmt = SSA_NAME_DEF_STMT (var);
4666 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4668 var = gimple_assign_rhs1 (stmt);
4669 gsi = gsi_for_stmt (stmt);
4670 reassoc_remove_stmt (&gsi);
4671 release_defs (stmt);
4673 else
4674 return;
4678 /* This function checks three consequtive operands in
4679 passed operands vector OPS starting from OPINDEX and
4680 swaps two operands if it is profitable for binary operation
4681 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4683 We pair ops with the same rank if possible.
4685 The alternative we try is to see if STMT is a destructive
4686 update style statement, which is like:
4687 b = phi (a, ...)
4688 a = c + b;
4689 In that case, we want to use the destructive update form to
4690 expose the possible vectorizer sum reduction opportunity.
4691 In that case, the third operand will be the phi node. This
4692 check is not performed if STMT is null.
4694 We could, of course, try to be better as noted above, and do a
4695 lot of work to try to find these opportunities in >3 operand
4696 cases, but it is unlikely to be worth it. */
4698 static void
4699 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4700 unsigned int opindex, gimple *stmt)
4702 operand_entry *oe1, *oe2, *oe3;
4704 oe1 = ops[opindex];
4705 oe2 = ops[opindex + 1];
4706 oe3 = ops[opindex + 2];
4708 if ((oe1->rank == oe2->rank
4709 && oe2->rank != oe3->rank)
4710 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4711 && !is_phi_for_stmt (stmt, oe1->op)
4712 && !is_phi_for_stmt (stmt, oe2->op)))
4713 std::swap (*oe1, *oe3);
4714 else if ((oe1->rank == oe3->rank
4715 && oe2->rank != oe3->rank)
4716 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4717 && !is_phi_for_stmt (stmt, oe1->op)
4718 && !is_phi_for_stmt (stmt, oe3->op)))
4719 std::swap (*oe1, *oe2);
4722 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4723 two definitions, otherwise return STMT. */
4725 static inline gimple *
4726 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4728 if (TREE_CODE (rhs1) == SSA_NAME
4729 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4730 stmt = SSA_NAME_DEF_STMT (rhs1);
4731 if (TREE_CODE (rhs2) == SSA_NAME
4732 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4733 stmt = SSA_NAME_DEF_STMT (rhs2);
4734 return stmt;
4737 /* If the stmt that defines operand has to be inserted, insert it
4738 before the use. */
4739 static void
4740 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4742 gcc_assert (is_gimple_assign (stmt_to_insert));
4743 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4744 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4745 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4746 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4747 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4749 /* If the insert point is not stmt, then insert_point would be
4750 the point where operand rhs1 or rhs2 is defined. In this case,
4751 stmt_to_insert has to be inserted afterwards. This would
4752 only happen when the stmt insertion point is flexible. */
4753 if (stmt == insert_point)
4754 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4755 else
4756 insert_stmt_after (stmt_to_insert, insert_point);
4760 /* Recursively rewrite our linearized statements so that the operators
4761 match those in OPS[OPINDEX], putting the computation in rank
4762 order. Return new lhs.
4763 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4764 the current stmt and during recursive invocations.
4765 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4766 recursive invocations. */
4768 static tree
4769 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4770 vec<operand_entry *> ops, bool changed, bool next_changed)
4772 tree rhs1 = gimple_assign_rhs1 (stmt);
4773 tree rhs2 = gimple_assign_rhs2 (stmt);
4774 tree lhs = gimple_assign_lhs (stmt);
4775 operand_entry *oe;
4777 /* The final recursion case for this function is that you have
4778 exactly two operations left.
4779 If we had exactly one op in the entire list to start with, we
4780 would have never called this function, and the tail recursion
4781 rewrites them one at a time. */
4782 if (opindex + 2 == ops.length ())
4784 operand_entry *oe1, *oe2;
4786 oe1 = ops[opindex];
4787 oe2 = ops[opindex + 1];
4789 if (rhs1 != oe1->op || rhs2 != oe2->op)
4791 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4792 unsigned int uid = gimple_uid (stmt);
4794 if (dump_file && (dump_flags & TDF_DETAILS))
4796 fprintf (dump_file, "Transforming ");
4797 print_gimple_stmt (dump_file, stmt, 0);
4800 /* If the stmt that defines operand has to be inserted, insert it
4801 before the use. */
4802 if (oe1->stmt_to_insert)
4803 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4804 if (oe2->stmt_to_insert)
4805 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4806 /* Even when changed is false, reassociation could have e.g. removed
4807 some redundant operations, so unless we are just swapping the
4808 arguments or unless there is no change at all (then we just
4809 return lhs), force creation of a new SSA_NAME. */
4810 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4812 gimple *insert_point
4813 = find_insert_point (stmt, oe1->op, oe2->op);
4814 lhs = make_ssa_name (TREE_TYPE (lhs));
4815 stmt
4816 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4817 oe1->op, oe2->op);
4818 gimple_set_uid (stmt, uid);
4819 gimple_set_visited (stmt, true);
4820 if (insert_point == gsi_stmt (gsi))
4821 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4822 else
4823 insert_stmt_after (stmt, insert_point);
4825 else
4827 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4828 == stmt);
4829 gimple_assign_set_rhs1 (stmt, oe1->op);
4830 gimple_assign_set_rhs2 (stmt, oe2->op);
4831 update_stmt (stmt);
4834 if (rhs1 != oe1->op && rhs1 != oe2->op)
4835 remove_visited_stmt_chain (rhs1);
4837 if (dump_file && (dump_flags & TDF_DETAILS))
4839 fprintf (dump_file, " into ");
4840 print_gimple_stmt (dump_file, stmt, 0);
4843 return lhs;
4846 /* If we hit here, we should have 3 or more ops left. */
4847 gcc_assert (opindex + 2 < ops.length ());
4849 /* Rewrite the next operator. */
4850 oe = ops[opindex];
4852 /* If the stmt that defines operand has to be inserted, insert it
4853 before the use. */
4854 if (oe->stmt_to_insert)
4855 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4857 /* Recurse on the LHS of the binary operator, which is guaranteed to
4858 be the non-leaf side. */
4859 tree new_rhs1
4860 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4861 changed || oe->op != rhs2 || next_changed,
4862 false);
4864 if (oe->op != rhs2 || new_rhs1 != rhs1)
4866 if (dump_file && (dump_flags & TDF_DETAILS))
4868 fprintf (dump_file, "Transforming ");
4869 print_gimple_stmt (dump_file, stmt, 0);
4872 /* If changed is false, this is either opindex == 0
4873 or all outer rhs2's were equal to corresponding oe->op,
4874 and powi_result is NULL.
4875 That means lhs is equivalent before and after reassociation.
4876 Otherwise ensure the old lhs SSA_NAME is not reused and
4877 create a new stmt as well, so that any debug stmts will be
4878 properly adjusted. */
4879 if (changed)
4881 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4882 unsigned int uid = gimple_uid (stmt);
4883 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4885 lhs = make_ssa_name (TREE_TYPE (lhs));
4886 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4887 new_rhs1, oe->op);
4888 gimple_set_uid (stmt, uid);
4889 gimple_set_visited (stmt, true);
4890 if (insert_point == gsi_stmt (gsi))
4891 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4892 else
4893 insert_stmt_after (stmt, insert_point);
4895 else
4897 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4898 == stmt);
4899 gimple_assign_set_rhs1 (stmt, new_rhs1);
4900 gimple_assign_set_rhs2 (stmt, oe->op);
4901 update_stmt (stmt);
4904 if (dump_file && (dump_flags & TDF_DETAILS))
4906 fprintf (dump_file, " into ");
4907 print_gimple_stmt (dump_file, stmt, 0);
4910 return lhs;
4913 /* Find out how many cycles we need to compute statements chain.
4914 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4915 maximum number of independent statements we may execute per cycle. */
4917 static int
4918 get_required_cycles (int ops_num, int cpu_width)
4920 int res;
4921 int elog;
4922 unsigned int rest;
4924 /* While we have more than 2 * cpu_width operands
4925 we may reduce number of operands by cpu_width
4926 per cycle. */
4927 res = ops_num / (2 * cpu_width);
4929 /* Remained operands count may be reduced twice per cycle
4930 until we have only one operand. */
4931 rest = (unsigned)(ops_num - res * cpu_width);
4932 elog = exact_log2 (rest);
4933 if (elog >= 0)
4934 res += elog;
4935 else
4936 res += floor_log2 (rest) + 1;
4938 return res;
4941 /* Returns an optimal number of registers to use for computation of
4942 given statements. */
4944 static int
4945 get_reassociation_width (int ops_num, enum tree_code opc,
4946 machine_mode mode)
4948 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4949 int width;
4950 int width_min;
4951 int cycles_best;
4953 if (param_width > 0)
4954 width = param_width;
4955 else
4956 width = targetm.sched.reassociation_width (opc, mode);
4958 if (width == 1)
4959 return width;
4961 /* Get the minimal time required for sequence computation. */
4962 cycles_best = get_required_cycles (ops_num, width);
4964 /* Check if we may use less width and still compute sequence for
4965 the same time. It will allow us to reduce registers usage.
4966 get_required_cycles is monotonically increasing with lower width
4967 so we can perform a binary search for the minimal width that still
4968 results in the optimal cycle count. */
4969 width_min = 1;
4970 while (width > width_min)
4972 int width_mid = (width + width_min) / 2;
4974 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4975 width = width_mid;
4976 else if (width_min < width_mid)
4977 width_min = width_mid;
4978 else
4979 break;
4982 return width;
4985 /* Recursively rewrite our linearized statements so that the operators
4986 match those in OPS[OPINDEX], putting the computation in rank
4987 order and trying to allow operations to be executed in
4988 parallel. */
4990 static void
4991 rewrite_expr_tree_parallel (gassign *stmt, int width,
4992 vec<operand_entry *> ops)
4994 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4995 int op_num = ops.length ();
4996 gcc_assert (op_num > 0);
4997 int stmt_num = op_num - 1;
4998 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4999 int op_index = op_num - 1;
5000 int stmt_index = 0;
5001 int ready_stmts_end = 0;
5002 int i = 0;
5003 gimple *stmt1 = NULL, *stmt2 = NULL;
5004 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5006 /* We start expression rewriting from the top statements.
5007 So, in this loop we create a full list of statements
5008 we will work with. */
5009 stmts[stmt_num - 1] = stmt;
5010 for (i = stmt_num - 2; i >= 0; i--)
5011 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5013 for (i = 0; i < stmt_num; i++)
5015 tree op1, op2;
5017 /* Determine whether we should use results of
5018 already handled statements or not. */
5019 if (ready_stmts_end == 0
5020 && (i - stmt_index >= width || op_index < 1))
5021 ready_stmts_end = i;
5023 /* Now we choose operands for the next statement. Non zero
5024 value in ready_stmts_end means here that we should use
5025 the result of already generated statements as new operand. */
5026 if (ready_stmts_end > 0)
5028 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5029 if (ready_stmts_end > stmt_index)
5030 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5031 else if (op_index >= 0)
5033 operand_entry *oe = ops[op_index--];
5034 stmt2 = oe->stmt_to_insert;
5035 op2 = oe->op;
5037 else
5039 gcc_assert (stmt_index < i);
5040 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5043 if (stmt_index >= ready_stmts_end)
5044 ready_stmts_end = 0;
5046 else
5048 if (op_index > 1)
5049 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5050 operand_entry *oe2 = ops[op_index--];
5051 operand_entry *oe1 = ops[op_index--];
5052 op2 = oe2->op;
5053 stmt2 = oe2->stmt_to_insert;
5054 op1 = oe1->op;
5055 stmt1 = oe1->stmt_to_insert;
5058 /* If we emit the last statement then we should put
5059 operands into the last statement. It will also
5060 break the loop. */
5061 if (op_index < 0 && stmt_index == i)
5062 i = stmt_num - 1;
5064 if (dump_file && (dump_flags & TDF_DETAILS))
5066 fprintf (dump_file, "Transforming ");
5067 print_gimple_stmt (dump_file, stmts[i], 0);
5070 /* If the stmt that defines operand has to be inserted, insert it
5071 before the use. */
5072 if (stmt1)
5073 insert_stmt_before_use (stmts[i], stmt1);
5074 if (stmt2)
5075 insert_stmt_before_use (stmts[i], stmt2);
5076 stmt1 = stmt2 = NULL;
5078 /* We keep original statement only for the last one. All
5079 others are recreated. */
5080 if (i == stmt_num - 1)
5082 gimple_assign_set_rhs1 (stmts[i], op1);
5083 gimple_assign_set_rhs2 (stmts[i], op2);
5084 update_stmt (stmts[i]);
5086 else
5088 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5089 gimple_set_visited (stmts[i], true);
5091 if (dump_file && (dump_flags & TDF_DETAILS))
5093 fprintf (dump_file, " into ");
5094 print_gimple_stmt (dump_file, stmts[i], 0);
5098 remove_visited_stmt_chain (last_rhs1);
5101 /* Transform STMT, which is really (A +B) + (C + D) into the left
5102 linear form, ((A+B)+C)+D.
5103 Recurse on D if necessary. */
5105 static void
5106 linearize_expr (gimple *stmt)
5108 gimple_stmt_iterator gsi;
5109 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5110 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5111 gimple *oldbinrhs = binrhs;
5112 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5113 gimple *newbinrhs = NULL;
5114 class loop *loop = loop_containing_stmt (stmt);
5115 tree lhs = gimple_assign_lhs (stmt);
5117 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5118 && is_reassociable_op (binrhs, rhscode, loop));
5120 gsi = gsi_for_stmt (stmt);
5122 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5123 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5124 gimple_assign_rhs_code (binrhs),
5125 gimple_assign_lhs (binlhs),
5126 gimple_assign_rhs2 (binrhs));
5127 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5128 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5129 gimple_set_uid (binrhs, gimple_uid (stmt));
5131 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5132 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5134 if (dump_file && (dump_flags & TDF_DETAILS))
5136 fprintf (dump_file, "Linearized: ");
5137 print_gimple_stmt (dump_file, stmt, 0);
5140 reassociate_stats.linearized++;
5141 update_stmt (stmt);
5143 gsi = gsi_for_stmt (oldbinrhs);
5144 reassoc_remove_stmt (&gsi);
5145 release_defs (oldbinrhs);
5147 gimple_set_visited (stmt, true);
5148 gimple_set_visited (binlhs, true);
5149 gimple_set_visited (binrhs, true);
5151 /* Tail recurse on the new rhs if it still needs reassociation. */
5152 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5153 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5154 want to change the algorithm while converting to tuples. */
5155 linearize_expr (stmt);
5158 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5159 it. Otherwise, return NULL. */
5161 static gimple *
5162 get_single_immediate_use (tree lhs)
5164 use_operand_p immuse;
5165 gimple *immusestmt;
5167 if (TREE_CODE (lhs) == SSA_NAME
5168 && single_imm_use (lhs, &immuse, &immusestmt)
5169 && is_gimple_assign (immusestmt))
5170 return immusestmt;
5172 return NULL;
5175 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5176 representing the negated value. Insertions of any necessary
5177 instructions go before GSI.
5178 This function is recursive in that, if you hand it "a_5" as the
5179 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5180 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5182 static tree
5183 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5185 gimple *negatedefstmt = NULL;
5186 tree resultofnegate;
5187 gimple_stmt_iterator gsi;
5188 unsigned int uid;
5190 /* If we are trying to negate a name, defined by an add, negate the
5191 add operands instead. */
5192 if (TREE_CODE (tonegate) == SSA_NAME)
5193 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5194 if (TREE_CODE (tonegate) == SSA_NAME
5195 && is_gimple_assign (negatedefstmt)
5196 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5197 && has_single_use (gimple_assign_lhs (negatedefstmt))
5198 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5200 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5201 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5202 tree lhs = gimple_assign_lhs (negatedefstmt);
5203 gimple *g;
5205 gsi = gsi_for_stmt (negatedefstmt);
5206 rhs1 = negate_value (rhs1, &gsi);
5208 gsi = gsi_for_stmt (negatedefstmt);
5209 rhs2 = negate_value (rhs2, &gsi);
5211 gsi = gsi_for_stmt (negatedefstmt);
5212 lhs = make_ssa_name (TREE_TYPE (lhs));
5213 gimple_set_visited (negatedefstmt, true);
5214 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5215 gimple_set_uid (g, gimple_uid (negatedefstmt));
5216 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5217 return lhs;
5220 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5221 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5222 NULL_TREE, true, GSI_SAME_STMT);
5223 gsi = *gsip;
5224 uid = gimple_uid (gsi_stmt (gsi));
5225 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5227 gimple *stmt = gsi_stmt (gsi);
5228 if (gimple_uid (stmt) != 0)
5229 break;
5230 gimple_set_uid (stmt, uid);
5232 return resultofnegate;
5235 /* Return true if we should break up the subtract in STMT into an add
5236 with negate. This is true when we the subtract operands are really
5237 adds, or the subtract itself is used in an add expression. In
5238 either case, breaking up the subtract into an add with negate
5239 exposes the adds to reassociation. */
5241 static bool
5242 should_break_up_subtract (gimple *stmt)
5244 tree lhs = gimple_assign_lhs (stmt);
5245 tree binlhs = gimple_assign_rhs1 (stmt);
5246 tree binrhs = gimple_assign_rhs2 (stmt);
5247 gimple *immusestmt;
5248 class loop *loop = loop_containing_stmt (stmt);
5250 if (TREE_CODE (binlhs) == SSA_NAME
5251 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5252 return true;
5254 if (TREE_CODE (binrhs) == SSA_NAME
5255 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5256 return true;
5258 if (TREE_CODE (lhs) == SSA_NAME
5259 && (immusestmt = get_single_immediate_use (lhs))
5260 && is_gimple_assign (immusestmt)
5261 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5262 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5263 && gimple_assign_rhs1 (immusestmt) == lhs)
5264 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5265 return true;
5266 return false;
5269 /* Transform STMT from A - B into A + -B. */
5271 static void
5272 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5274 tree rhs1 = gimple_assign_rhs1 (stmt);
5275 tree rhs2 = gimple_assign_rhs2 (stmt);
5277 if (dump_file && (dump_flags & TDF_DETAILS))
5279 fprintf (dump_file, "Breaking up subtract ");
5280 print_gimple_stmt (dump_file, stmt, 0);
5283 rhs2 = negate_value (rhs2, gsip);
5284 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5285 update_stmt (stmt);
5288 /* Determine whether STMT is a builtin call that raises an SSA name
5289 to an integer power and has only one use. If so, and this is early
5290 reassociation and unsafe math optimizations are permitted, place
5291 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5292 If any of these conditions does not hold, return FALSE. */
5294 static bool
5295 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5297 tree arg1;
5298 REAL_VALUE_TYPE c, cint;
5300 switch (gimple_call_combined_fn (stmt))
5302 CASE_CFN_POW:
5303 if (flag_errno_math)
5304 return false;
5306 *base = gimple_call_arg (stmt, 0);
5307 arg1 = gimple_call_arg (stmt, 1);
5309 if (TREE_CODE (arg1) != REAL_CST)
5310 return false;
5312 c = TREE_REAL_CST (arg1);
5314 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5315 return false;
5317 *exponent = real_to_integer (&c);
5318 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5319 if (!real_identical (&c, &cint))
5320 return false;
5322 break;
5324 CASE_CFN_POWI:
5325 *base = gimple_call_arg (stmt, 0);
5326 arg1 = gimple_call_arg (stmt, 1);
5328 if (!tree_fits_shwi_p (arg1))
5329 return false;
5331 *exponent = tree_to_shwi (arg1);
5332 break;
5334 default:
5335 return false;
5338 /* Expanding negative exponents is generally unproductive, so we don't
5339 complicate matters with those. Exponents of zero and one should
5340 have been handled by expression folding. */
5341 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5342 return false;
5344 return true;
5347 /* Try to derive and add operand entry for OP to *OPS. Return false if
5348 unsuccessful. */
5350 static bool
5351 try_special_add_to_ops (vec<operand_entry *> *ops,
5352 enum tree_code code,
5353 tree op, gimple* def_stmt)
5355 tree base = NULL_TREE;
5356 HOST_WIDE_INT exponent = 0;
5358 if (TREE_CODE (op) != SSA_NAME
5359 || ! has_single_use (op))
5360 return false;
5362 if (code == MULT_EXPR
5363 && reassoc_insert_powi_p
5364 && flag_unsafe_math_optimizations
5365 && is_gimple_call (def_stmt)
5366 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5368 add_repeat_to_ops_vec (ops, base, exponent);
5369 gimple_set_visited (def_stmt, true);
5370 return true;
5372 else if (code == MULT_EXPR
5373 && is_gimple_assign (def_stmt)
5374 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5375 && !HONOR_SNANS (TREE_TYPE (op))
5376 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5377 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5379 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5380 tree cst = build_minus_one_cst (TREE_TYPE (op));
5381 add_to_ops_vec (ops, rhs1);
5382 add_to_ops_vec (ops, cst);
5383 gimple_set_visited (def_stmt, true);
5384 return true;
5387 return false;
5390 /* Recursively linearize a binary expression that is the RHS of STMT.
5391 Place the operands of the expression tree in the vector named OPS. */
5393 static void
5394 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5395 bool is_associative, bool set_visited)
5397 tree binlhs = gimple_assign_rhs1 (stmt);
5398 tree binrhs = gimple_assign_rhs2 (stmt);
5399 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5400 bool binlhsisreassoc = false;
5401 bool binrhsisreassoc = false;
5402 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5403 class loop *loop = loop_containing_stmt (stmt);
5405 if (set_visited)
5406 gimple_set_visited (stmt, true);
5408 if (TREE_CODE (binlhs) == SSA_NAME)
5410 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5411 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5412 && !stmt_could_throw_p (cfun, binlhsdef));
5415 if (TREE_CODE (binrhs) == SSA_NAME)
5417 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5418 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5419 && !stmt_could_throw_p (cfun, binrhsdef));
5422 /* If the LHS is not reassociable, but the RHS is, we need to swap
5423 them. If neither is reassociable, there is nothing we can do, so
5424 just put them in the ops vector. If the LHS is reassociable,
5425 linearize it. If both are reassociable, then linearize the RHS
5426 and the LHS. */
5428 if (!binlhsisreassoc)
5430 /* If this is not a associative operation like division, give up. */
5431 if (!is_associative)
5433 add_to_ops_vec (ops, binrhs);
5434 return;
5437 if (!binrhsisreassoc)
5439 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5440 add_to_ops_vec (ops, binrhs);
5442 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5443 add_to_ops_vec (ops, binlhs);
5445 return;
5448 if (dump_file && (dump_flags & TDF_DETAILS))
5450 fprintf (dump_file, "swapping operands of ");
5451 print_gimple_stmt (dump_file, stmt, 0);
5454 swap_ssa_operands (stmt,
5455 gimple_assign_rhs1_ptr (stmt),
5456 gimple_assign_rhs2_ptr (stmt));
5457 update_stmt (stmt);
5459 if (dump_file && (dump_flags & TDF_DETAILS))
5461 fprintf (dump_file, " is now ");
5462 print_gimple_stmt (dump_file, stmt, 0);
5465 /* We want to make it so the lhs is always the reassociative op,
5466 so swap. */
5467 std::swap (binlhs, binrhs);
5469 else if (binrhsisreassoc)
5471 linearize_expr (stmt);
5472 binlhs = gimple_assign_rhs1 (stmt);
5473 binrhs = gimple_assign_rhs2 (stmt);
5476 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5477 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5478 rhscode, loop));
5479 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5480 is_associative, set_visited);
5482 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5483 add_to_ops_vec (ops, binrhs);
5486 /* Repropagate the negates back into subtracts, since no other pass
5487 currently does it. */
5489 static void
5490 repropagate_negates (void)
5492 unsigned int i = 0;
5493 tree negate;
5495 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5497 gimple *user = get_single_immediate_use (negate);
5499 if (!user || !is_gimple_assign (user))
5500 continue;
5502 /* The negate operand can be either operand of a PLUS_EXPR
5503 (it can be the LHS if the RHS is a constant for example).
5505 Force the negate operand to the RHS of the PLUS_EXPR, then
5506 transform the PLUS_EXPR into a MINUS_EXPR. */
5507 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5509 /* If the negated operand appears on the LHS of the
5510 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5511 to force the negated operand to the RHS of the PLUS_EXPR. */
5512 if (gimple_assign_rhs1 (user) == negate)
5514 swap_ssa_operands (user,
5515 gimple_assign_rhs1_ptr (user),
5516 gimple_assign_rhs2_ptr (user));
5519 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5520 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5521 if (gimple_assign_rhs2 (user) == negate)
5523 tree rhs1 = gimple_assign_rhs1 (user);
5524 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5525 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5526 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5527 update_stmt (user);
5530 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5532 if (gimple_assign_rhs1 (user) == negate)
5534 /* We have
5535 x = -a
5536 y = x - b
5537 which we transform into
5538 x = a + b
5539 y = -x .
5540 This pushes down the negate which we possibly can merge
5541 into some other operation, hence insert it into the
5542 plus_negates vector. */
5543 gimple *feed = SSA_NAME_DEF_STMT (negate);
5544 tree a = gimple_assign_rhs1 (feed);
5545 tree b = gimple_assign_rhs2 (user);
5546 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5547 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5548 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5549 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5550 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5551 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5552 user = gsi_stmt (gsi2);
5553 update_stmt (user);
5554 reassoc_remove_stmt (&gsi);
5555 release_defs (feed);
5556 plus_negates.safe_push (gimple_assign_lhs (user));
5558 else
5560 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5561 rid of one operation. */
5562 gimple *feed = SSA_NAME_DEF_STMT (negate);
5563 tree a = gimple_assign_rhs1 (feed);
5564 tree rhs1 = gimple_assign_rhs1 (user);
5565 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5566 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5567 update_stmt (gsi_stmt (gsi));
5573 /* Returns true if OP is of a type for which we can do reassociation.
5574 That is for integral or non-saturating fixed-point types, and for
5575 floating point type when associative-math is enabled. */
5577 static bool
5578 can_reassociate_p (tree op)
5580 tree type = TREE_TYPE (op);
5581 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5582 return false;
5583 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5584 || NON_SAT_FIXED_POINT_TYPE_P (type)
5585 || (flag_associative_math && FLOAT_TYPE_P (type)))
5586 return true;
5587 return false;
5590 /* Break up subtract operations in block BB.
5592 We do this top down because we don't know whether the subtract is
5593 part of a possible chain of reassociation except at the top.
5595 IE given
5596 d = f + g
5597 c = a + e
5598 b = c - d
5599 q = b - r
5600 k = t - q
5602 we want to break up k = t - q, but we won't until we've transformed q
5603 = b - r, which won't be broken up until we transform b = c - d.
5605 En passant, clear the GIMPLE visited flag on every statement
5606 and set UIDs within each basic block. */
5608 static void
5609 break_up_subtract_bb (basic_block bb)
5611 gimple_stmt_iterator gsi;
5612 basic_block son;
5613 unsigned int uid = 1;
5615 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5617 gimple *stmt = gsi_stmt (gsi);
5618 gimple_set_visited (stmt, false);
5619 gimple_set_uid (stmt, uid++);
5621 if (!is_gimple_assign (stmt)
5622 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5623 continue;
5625 /* Look for simple gimple subtract operations. */
5626 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5628 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5629 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5630 continue;
5632 /* Check for a subtract used only in an addition. If this
5633 is the case, transform it into add of a negate for better
5634 reassociation. IE transform C = A-B into C = A + -B if C
5635 is only used in an addition. */
5636 if (should_break_up_subtract (stmt))
5637 break_up_subtract (stmt, &gsi);
5639 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5640 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5641 plus_negates.safe_push (gimple_assign_lhs (stmt));
5643 for (son = first_dom_son (CDI_DOMINATORS, bb);
5644 son;
5645 son = next_dom_son (CDI_DOMINATORS, son))
5646 break_up_subtract_bb (son);
5649 /* Used for repeated factor analysis. */
5650 struct repeat_factor
5652 /* An SSA name that occurs in a multiply chain. */
5653 tree factor;
5655 /* Cached rank of the factor. */
5656 unsigned rank;
5658 /* Number of occurrences of the factor in the chain. */
5659 HOST_WIDE_INT count;
5661 /* An SSA name representing the product of this factor and
5662 all factors appearing later in the repeated factor vector. */
5663 tree repr;
5667 static vec<repeat_factor> repeat_factor_vec;
5669 /* Used for sorting the repeat factor vector. Sort primarily by
5670 ascending occurrence count, secondarily by descending rank. */
5672 static int
5673 compare_repeat_factors (const void *x1, const void *x2)
5675 const repeat_factor *rf1 = (const repeat_factor *) x1;
5676 const repeat_factor *rf2 = (const repeat_factor *) x2;
5678 if (rf1->count != rf2->count)
5679 return rf1->count - rf2->count;
5681 return rf2->rank - rf1->rank;
5684 /* Look for repeated operands in OPS in the multiply tree rooted at
5685 STMT. Replace them with an optimal sequence of multiplies and powi
5686 builtin calls, and remove the used operands from OPS. Return an
5687 SSA name representing the value of the replacement sequence. */
5689 static tree
5690 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5692 unsigned i, j, vec_len;
5693 int ii;
5694 operand_entry *oe;
5695 repeat_factor *rf1, *rf2;
5696 repeat_factor rfnew;
5697 tree result = NULL_TREE;
5698 tree target_ssa, iter_result;
5699 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5700 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5701 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5702 gimple *mul_stmt, *pow_stmt;
5704 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5705 target. */
5706 if (!powi_fndecl)
5707 return NULL_TREE;
5709 /* Allocate the repeated factor vector. */
5710 repeat_factor_vec.create (10);
5712 /* Scan the OPS vector for all SSA names in the product and build
5713 up a vector of occurrence counts for each factor. */
5714 FOR_EACH_VEC_ELT (*ops, i, oe)
5716 if (TREE_CODE (oe->op) == SSA_NAME)
5718 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5720 if (rf1->factor == oe->op)
5722 rf1->count += oe->count;
5723 break;
5727 if (j >= repeat_factor_vec.length ())
5729 rfnew.factor = oe->op;
5730 rfnew.rank = oe->rank;
5731 rfnew.count = oe->count;
5732 rfnew.repr = NULL_TREE;
5733 repeat_factor_vec.safe_push (rfnew);
5738 /* Sort the repeated factor vector by (a) increasing occurrence count,
5739 and (b) decreasing rank. */
5740 repeat_factor_vec.qsort (compare_repeat_factors);
5742 /* It is generally best to combine as many base factors as possible
5743 into a product before applying __builtin_powi to the result.
5744 However, the sort order chosen for the repeated factor vector
5745 allows us to cache partial results for the product of the base
5746 factors for subsequent use. When we already have a cached partial
5747 result from a previous iteration, it is best to make use of it
5748 before looking for another __builtin_pow opportunity.
5750 As an example, consider x * x * y * y * y * z * z * z * z.
5751 We want to first compose the product x * y * z, raise it to the
5752 second power, then multiply this by y * z, and finally multiply
5753 by z. This can be done in 5 multiplies provided we cache y * z
5754 for use in both expressions:
5756 t1 = y * z
5757 t2 = t1 * x
5758 t3 = t2 * t2
5759 t4 = t1 * t3
5760 result = t4 * z
5762 If we instead ignored the cached y * z and first multiplied by
5763 the __builtin_pow opportunity z * z, we would get the inferior:
5765 t1 = y * z
5766 t2 = t1 * x
5767 t3 = t2 * t2
5768 t4 = z * z
5769 t5 = t3 * t4
5770 result = t5 * y */
5772 vec_len = repeat_factor_vec.length ();
5774 /* Repeatedly look for opportunities to create a builtin_powi call. */
5775 while (true)
5777 HOST_WIDE_INT power;
5779 /* First look for the largest cached product of factors from
5780 preceding iterations. If found, create a builtin_powi for
5781 it if the minimum occurrence count for its factors is at
5782 least 2, or just use this cached product as our next
5783 multiplicand if the minimum occurrence count is 1. */
5784 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5786 if (rf1->repr && rf1->count > 0)
5787 break;
5790 if (j < vec_len)
5792 power = rf1->count;
5794 if (power == 1)
5796 iter_result = rf1->repr;
5798 if (dump_file && (dump_flags & TDF_DETAILS))
5800 unsigned elt;
5801 repeat_factor *rf;
5802 fputs ("Multiplying by cached product ", dump_file);
5803 for (elt = j; elt < vec_len; elt++)
5805 rf = &repeat_factor_vec[elt];
5806 print_generic_expr (dump_file, rf->factor);
5807 if (elt < vec_len - 1)
5808 fputs (" * ", dump_file);
5810 fputs ("\n", dump_file);
5813 else
5815 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5816 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5817 build_int_cst (integer_type_node,
5818 power));
5819 gimple_call_set_lhs (pow_stmt, iter_result);
5820 gimple_set_location (pow_stmt, gimple_location (stmt));
5821 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5822 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5824 if (dump_file && (dump_flags & TDF_DETAILS))
5826 unsigned elt;
5827 repeat_factor *rf;
5828 fputs ("Building __builtin_pow call for cached product (",
5829 dump_file);
5830 for (elt = j; elt < vec_len; elt++)
5832 rf = &repeat_factor_vec[elt];
5833 print_generic_expr (dump_file, rf->factor);
5834 if (elt < vec_len - 1)
5835 fputs (" * ", dump_file);
5837 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5838 power);
5842 else
5844 /* Otherwise, find the first factor in the repeated factor
5845 vector whose occurrence count is at least 2. If no such
5846 factor exists, there are no builtin_powi opportunities
5847 remaining. */
5848 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5850 if (rf1->count >= 2)
5851 break;
5854 if (j >= vec_len)
5855 break;
5857 power = rf1->count;
5859 if (dump_file && (dump_flags & TDF_DETAILS))
5861 unsigned elt;
5862 repeat_factor *rf;
5863 fputs ("Building __builtin_pow call for (", dump_file);
5864 for (elt = j; elt < vec_len; elt++)
5866 rf = &repeat_factor_vec[elt];
5867 print_generic_expr (dump_file, rf->factor);
5868 if (elt < vec_len - 1)
5869 fputs (" * ", dump_file);
5871 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5874 reassociate_stats.pows_created++;
5876 /* Visit each element of the vector in reverse order (so that
5877 high-occurrence elements are visited first, and within the
5878 same occurrence count, lower-ranked elements are visited
5879 first). Form a linear product of all elements in this order
5880 whose occurrencce count is at least that of element J.
5881 Record the SSA name representing the product of each element
5882 with all subsequent elements in the vector. */
5883 if (j == vec_len - 1)
5884 rf1->repr = rf1->factor;
5885 else
5887 for (ii = vec_len - 2; ii >= (int)j; ii--)
5889 tree op1, op2;
5891 rf1 = &repeat_factor_vec[ii];
5892 rf2 = &repeat_factor_vec[ii + 1];
5894 /* Init the last factor's representative to be itself. */
5895 if (!rf2->repr)
5896 rf2->repr = rf2->factor;
5898 op1 = rf1->factor;
5899 op2 = rf2->repr;
5901 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5902 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5903 op1, op2);
5904 gimple_set_location (mul_stmt, gimple_location (stmt));
5905 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5906 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5907 rf1->repr = target_ssa;
5909 /* Don't reprocess the multiply we just introduced. */
5910 gimple_set_visited (mul_stmt, true);
5914 /* Form a call to __builtin_powi for the maximum product
5915 just formed, raised to the power obtained earlier. */
5916 rf1 = &repeat_factor_vec[j];
5917 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5918 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5919 build_int_cst (integer_type_node,
5920 power));
5921 gimple_call_set_lhs (pow_stmt, iter_result);
5922 gimple_set_location (pow_stmt, gimple_location (stmt));
5923 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5924 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5927 /* If we previously formed at least one other builtin_powi call,
5928 form the product of this one and those others. */
5929 if (result)
5931 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5932 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5933 result, iter_result);
5934 gimple_set_location (mul_stmt, gimple_location (stmt));
5935 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5936 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5937 gimple_set_visited (mul_stmt, true);
5938 result = new_result;
5940 else
5941 result = iter_result;
5943 /* Decrement the occurrence count of each element in the product
5944 by the count found above, and remove this many copies of each
5945 factor from OPS. */
5946 for (i = j; i < vec_len; i++)
5948 unsigned k = power;
5949 unsigned n;
5951 rf1 = &repeat_factor_vec[i];
5952 rf1->count -= power;
5954 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5956 if (oe->op == rf1->factor)
5958 if (oe->count <= k)
5960 ops->ordered_remove (n);
5961 k -= oe->count;
5963 if (k == 0)
5964 break;
5966 else
5968 oe->count -= k;
5969 break;
5976 /* At this point all elements in the repeated factor vector have a
5977 remaining occurrence count of 0 or 1, and those with a count of 1
5978 don't have cached representatives. Re-sort the ops vector and
5979 clean up. */
5980 ops->qsort (sort_by_operand_rank);
5981 repeat_factor_vec.release ();
5983 /* Return the final product computed herein. Note that there may
5984 still be some elements with single occurrence count left in OPS;
5985 those will be handled by the normal reassociation logic. */
5986 return result;
5989 /* Attempt to optimize
5990 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5991 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5993 static void
5994 attempt_builtin_copysign (vec<operand_entry *> *ops)
5996 operand_entry *oe;
5997 unsigned int i;
5998 unsigned int length = ops->length ();
5999 tree cst = ops->last ()->op;
6001 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6002 return;
6004 FOR_EACH_VEC_ELT (*ops, i, oe)
6006 if (TREE_CODE (oe->op) == SSA_NAME
6007 && has_single_use (oe->op))
6009 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6010 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6012 tree arg0, arg1;
6013 switch (gimple_call_combined_fn (old_call))
6015 CASE_CFN_COPYSIGN:
6016 CASE_CFN_COPYSIGN_FN:
6017 arg0 = gimple_call_arg (old_call, 0);
6018 arg1 = gimple_call_arg (old_call, 1);
6019 /* The first argument of copysign must be a constant,
6020 otherwise there's nothing to do. */
6021 if (TREE_CODE (arg0) == REAL_CST)
6023 tree type = TREE_TYPE (arg0);
6024 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6025 /* If we couldn't fold to a single constant, skip it.
6026 That happens e.g. for inexact multiplication when
6027 -frounding-math. */
6028 if (mul == NULL_TREE)
6029 break;
6030 /* Instead of adjusting OLD_CALL, let's build a new
6031 call to not leak the LHS and prevent keeping bogus
6032 debug statements. DCE will clean up the old call. */
6033 gcall *new_call;
6034 if (gimple_call_internal_p (old_call))
6035 new_call = gimple_build_call_internal
6036 (IFN_COPYSIGN, 2, mul, arg1);
6037 else
6038 new_call = gimple_build_call
6039 (gimple_call_fndecl (old_call), 2, mul, arg1);
6040 tree lhs = make_ssa_name (type);
6041 gimple_call_set_lhs (new_call, lhs);
6042 gimple_set_location (new_call,
6043 gimple_location (old_call));
6044 insert_stmt_after (new_call, old_call);
6045 /* We've used the constant, get rid of it. */
6046 ops->pop ();
6047 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6048 /* Handle the CST1 < 0 case by negating the result. */
6049 if (cst1_neg)
6051 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6052 gimple *negate_stmt
6053 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6054 insert_stmt_after (negate_stmt, new_call);
6055 oe->op = negrhs;
6057 else
6058 oe->op = lhs;
6059 if (dump_file && (dump_flags & TDF_DETAILS))
6061 fprintf (dump_file, "Optimizing copysign: ");
6062 print_generic_expr (dump_file, cst);
6063 fprintf (dump_file, " * COPYSIGN (");
6064 print_generic_expr (dump_file, arg0);
6065 fprintf (dump_file, ", ");
6066 print_generic_expr (dump_file, arg1);
6067 fprintf (dump_file, ") into %sCOPYSIGN (",
6068 cst1_neg ? "-" : "");
6069 print_generic_expr (dump_file, mul);
6070 fprintf (dump_file, ", ");
6071 print_generic_expr (dump_file, arg1);
6072 fprintf (dump_file, "\n");
6074 return;
6076 break;
6077 default:
6078 break;
6085 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6087 static void
6088 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6090 tree rhs1;
6092 if (dump_file && (dump_flags & TDF_DETAILS))
6094 fprintf (dump_file, "Transforming ");
6095 print_gimple_stmt (dump_file, stmt, 0);
6098 rhs1 = gimple_assign_rhs1 (stmt);
6099 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6100 update_stmt (stmt);
6101 remove_visited_stmt_chain (rhs1);
6103 if (dump_file && (dump_flags & TDF_DETAILS))
6105 fprintf (dump_file, " into ");
6106 print_gimple_stmt (dump_file, stmt, 0);
6110 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6112 static void
6113 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6114 tree rhs1, tree rhs2)
6116 if (dump_file && (dump_flags & TDF_DETAILS))
6118 fprintf (dump_file, "Transforming ");
6119 print_gimple_stmt (dump_file, stmt, 0);
6122 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6123 update_stmt (gsi_stmt (*gsi));
6124 remove_visited_stmt_chain (rhs1);
6126 if (dump_file && (dump_flags & TDF_DETAILS))
6128 fprintf (dump_file, " into ");
6129 print_gimple_stmt (dump_file, stmt, 0);
6133 /* Reassociate expressions in basic block BB and its post-dominator as
6134 children.
6136 Bubble up return status from maybe_optimize_range_tests. */
6138 static bool
6139 reassociate_bb (basic_block bb)
6141 gimple_stmt_iterator gsi;
6142 basic_block son;
6143 gimple *stmt = last_stmt (bb);
6144 bool cfg_cleanup_needed = false;
6146 if (stmt && !gimple_visited_p (stmt))
6147 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6149 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
6151 stmt = gsi_stmt (gsi);
6153 if (is_gimple_assign (stmt)
6154 && !stmt_could_throw_p (cfun, stmt))
6156 tree lhs, rhs1, rhs2;
6157 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6159 /* If this was part of an already processed statement,
6160 we don't need to touch it again. */
6161 if (gimple_visited_p (stmt))
6163 /* This statement might have become dead because of previous
6164 reassociations. */
6165 if (has_zero_uses (gimple_get_lhs (stmt)))
6167 reassoc_remove_stmt (&gsi);
6168 release_defs (stmt);
6169 /* We might end up removing the last stmt above which
6170 places the iterator to the end of the sequence.
6171 Reset it to the last stmt in this case which might
6172 be the end of the sequence as well if we removed
6173 the last statement of the sequence. In which case
6174 we need to bail out. */
6175 if (gsi_end_p (gsi))
6177 gsi = gsi_last_bb (bb);
6178 if (gsi_end_p (gsi))
6179 break;
6182 continue;
6185 /* If this is not a gimple binary expression, there is
6186 nothing for us to do with it. */
6187 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6188 continue;
6190 lhs = gimple_assign_lhs (stmt);
6191 rhs1 = gimple_assign_rhs1 (stmt);
6192 rhs2 = gimple_assign_rhs2 (stmt);
6194 /* For non-bit or min/max operations we can't associate
6195 all types. Verify that here. */
6196 if (rhs_code != BIT_IOR_EXPR
6197 && rhs_code != BIT_AND_EXPR
6198 && rhs_code != BIT_XOR_EXPR
6199 && rhs_code != MIN_EXPR
6200 && rhs_code != MAX_EXPR
6201 && (!can_reassociate_p (lhs)
6202 || !can_reassociate_p (rhs1)
6203 || !can_reassociate_p (rhs2)))
6204 continue;
6206 if (associative_tree_code (rhs_code))
6208 auto_vec<operand_entry *> ops;
6209 tree powi_result = NULL_TREE;
6210 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6212 /* There may be no immediate uses left by the time we
6213 get here because we may have eliminated them all. */
6214 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6215 continue;
6217 gimple_set_visited (stmt, true);
6218 linearize_expr_tree (&ops, stmt, true, true);
6219 ops.qsort (sort_by_operand_rank);
6220 int orig_len = ops.length ();
6221 optimize_ops_list (rhs_code, &ops);
6222 if (undistribute_ops_list (rhs_code, &ops,
6223 loop_containing_stmt (stmt)))
6225 ops.qsort (sort_by_operand_rank);
6226 optimize_ops_list (rhs_code, &ops);
6228 if (undistribute_bitref_for_vector (rhs_code, &ops,
6229 loop_containing_stmt (stmt)))
6231 ops.qsort (sort_by_operand_rank);
6232 optimize_ops_list (rhs_code, &ops);
6234 if (rhs_code == PLUS_EXPR
6235 && transform_add_to_multiply (&ops))
6236 ops.qsort (sort_by_operand_rank);
6238 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6240 if (is_vector)
6241 optimize_vec_cond_expr (rhs_code, &ops);
6242 else
6243 optimize_range_tests (rhs_code, &ops, NULL);
6246 if (rhs_code == MULT_EXPR && !is_vector)
6248 attempt_builtin_copysign (&ops);
6250 if (reassoc_insert_powi_p
6251 && flag_unsafe_math_optimizations)
6252 powi_result = attempt_builtin_powi (stmt, &ops);
6255 operand_entry *last;
6256 bool negate_result = false;
6257 if (ops.length () > 1
6258 && rhs_code == MULT_EXPR)
6260 last = ops.last ();
6261 if ((integer_minus_onep (last->op)
6262 || real_minus_onep (last->op))
6263 && !HONOR_SNANS (TREE_TYPE (lhs))
6264 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6265 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6267 ops.pop ();
6268 negate_result = true;
6272 tree new_lhs = lhs;
6273 /* If the operand vector is now empty, all operands were
6274 consumed by the __builtin_powi optimization. */
6275 if (ops.length () == 0)
6276 transform_stmt_to_copy (&gsi, stmt, powi_result);
6277 else if (ops.length () == 1)
6279 tree last_op = ops.last ()->op;
6281 /* If the stmt that defines operand has to be inserted, insert it
6282 before the use. */
6283 if (ops.last ()->stmt_to_insert)
6284 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6285 if (powi_result)
6286 transform_stmt_to_multiply (&gsi, stmt, last_op,
6287 powi_result);
6288 else
6289 transform_stmt_to_copy (&gsi, stmt, last_op);
6291 else
6293 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6294 int ops_num = ops.length ();
6295 int width;
6297 /* For binary bit operations, if there are at least 3
6298 operands and the last last operand in OPS is a constant,
6299 move it to the front. This helps ensure that we generate
6300 (X & Y) & C rather than (X & C) & Y. The former will
6301 often match a canonical bit test when we get to RTL. */
6302 if (ops.length () > 2
6303 && (rhs_code == BIT_AND_EXPR
6304 || rhs_code == BIT_IOR_EXPR
6305 || rhs_code == BIT_XOR_EXPR)
6306 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6307 std::swap (*ops[0], *ops[ops_num - 1]);
6309 /* Only rewrite the expression tree to parallel in the
6310 last reassoc pass to avoid useless work back-and-forth
6311 with initial linearization. */
6312 if (!reassoc_insert_powi_p
6313 && ops.length () > 3
6314 && (width = get_reassociation_width (ops_num, rhs_code,
6315 mode)) > 1)
6317 if (dump_file && (dump_flags & TDF_DETAILS))
6318 fprintf (dump_file,
6319 "Width = %d was chosen for reassociation\n",
6320 width);
6321 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6322 width, ops);
6324 else
6326 /* When there are three operands left, we want
6327 to make sure the ones that get the double
6328 binary op are chosen wisely. */
6329 int len = ops.length ();
6330 if (len >= 3)
6331 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6333 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6334 powi_result != NULL
6335 || negate_result,
6336 len != orig_len);
6339 /* If we combined some repeated factors into a
6340 __builtin_powi call, multiply that result by the
6341 reassociated operands. */
6342 if (powi_result)
6344 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6345 tree type = TREE_TYPE (lhs);
6346 tree target_ssa = make_temp_ssa_name (type, NULL,
6347 "reassocpow");
6348 gimple_set_lhs (lhs_stmt, target_ssa);
6349 update_stmt (lhs_stmt);
6350 if (lhs != new_lhs)
6352 target_ssa = new_lhs;
6353 new_lhs = lhs;
6355 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6356 powi_result, target_ssa);
6357 gimple_set_location (mul_stmt, gimple_location (stmt));
6358 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6359 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6363 if (negate_result)
6365 stmt = SSA_NAME_DEF_STMT (lhs);
6366 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6367 gimple_set_lhs (stmt, tmp);
6368 if (lhs != new_lhs)
6369 tmp = new_lhs;
6370 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6371 tmp);
6372 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6373 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6374 update_stmt (stmt);
6379 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6380 son;
6381 son = next_dom_son (CDI_POST_DOMINATORS, son))
6382 cfg_cleanup_needed |= reassociate_bb (son);
6384 return cfg_cleanup_needed;
6387 /* Add jumps around shifts for range tests turned into bit tests.
6388 For each SSA_NAME VAR we have code like:
6389 VAR = ...; // final stmt of range comparison
6390 // bit test here...;
6391 OTHERVAR = ...; // final stmt of the bit test sequence
6392 RES = VAR | OTHERVAR;
6393 Turn the above into:
6394 VAR = ...;
6395 if (VAR != 0)
6396 goto <l3>;
6397 else
6398 goto <l2>;
6399 <l2>:
6400 // bit test here...;
6401 OTHERVAR = ...;
6402 <l3>:
6403 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6405 static void
6406 branch_fixup (void)
6408 tree var;
6409 unsigned int i;
6411 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6413 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6414 gimple *use_stmt;
6415 use_operand_p use;
6416 bool ok = single_imm_use (var, &use, &use_stmt);
6417 gcc_assert (ok
6418 && is_gimple_assign (use_stmt)
6419 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6420 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6422 basic_block cond_bb = gimple_bb (def_stmt);
6423 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6424 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6426 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6427 gimple *g = gimple_build_cond (NE_EXPR, var,
6428 build_zero_cst (TREE_TYPE (var)),
6429 NULL_TREE, NULL_TREE);
6430 location_t loc = gimple_location (use_stmt);
6431 gimple_set_location (g, loc);
6432 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6434 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6435 etrue->probability = profile_probability::even ();
6436 edge efalse = find_edge (cond_bb, then_bb);
6437 efalse->flags = EDGE_FALSE_VALUE;
6438 efalse->probability -= etrue->probability;
6439 then_bb->count -= etrue->count ();
6441 tree othervar = NULL_TREE;
6442 if (gimple_assign_rhs1 (use_stmt) == var)
6443 othervar = gimple_assign_rhs2 (use_stmt);
6444 else if (gimple_assign_rhs2 (use_stmt) == var)
6445 othervar = gimple_assign_rhs1 (use_stmt);
6446 else
6447 gcc_unreachable ();
6448 tree lhs = gimple_assign_lhs (use_stmt);
6449 gphi *phi = create_phi_node (lhs, merge_bb);
6450 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6451 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6452 gsi = gsi_for_stmt (use_stmt);
6453 gsi_remove (&gsi, true);
6455 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6456 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6458 reassoc_branch_fixups.release ();
6461 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6462 void debug_ops_vector (vec<operand_entry *> ops);
6464 /* Dump the operand entry vector OPS to FILE. */
6466 void
6467 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6469 operand_entry *oe;
6470 unsigned int i;
6472 FOR_EACH_VEC_ELT (ops, i, oe)
6474 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6475 print_generic_expr (file, oe->op);
6476 fprintf (file, "\n");
6480 /* Dump the operand entry vector OPS to STDERR. */
6482 DEBUG_FUNCTION void
6483 debug_ops_vector (vec<operand_entry *> ops)
6485 dump_ops_vector (stderr, ops);
6488 /* Bubble up return status from reassociate_bb. */
6490 static bool
6491 do_reassoc (void)
6493 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6494 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6497 /* Initialize the reassociation pass. */
6499 static void
6500 init_reassoc (void)
6502 int i;
6503 long rank = 2;
6504 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6506 /* Find the loops, so that we can prevent moving calculations in
6507 them. */
6508 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6510 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6512 next_operand_entry_id = 0;
6514 /* Reverse RPO (Reverse Post Order) will give us something where
6515 deeper loops come later. */
6516 pre_and_rev_post_order_compute (NULL, bbs, false);
6517 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6518 operand_rank = new hash_map<tree, long>;
6520 /* Give each default definition a distinct rank. This includes
6521 parameters and the static chain. Walk backwards over all
6522 SSA names so that we get proper rank ordering according
6523 to tree_swap_operands_p. */
6524 for (i = num_ssa_names - 1; i > 0; --i)
6526 tree name = ssa_name (i);
6527 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6528 insert_operand_rank (name, ++rank);
6531 /* Set up rank for each BB */
6532 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6533 bb_rank[bbs[i]] = ++rank << 16;
6535 free (bbs);
6536 calculate_dominance_info (CDI_POST_DOMINATORS);
6537 plus_negates = vNULL;
6540 /* Cleanup after the reassociation pass, and print stats if
6541 requested. */
6543 static void
6544 fini_reassoc (void)
6546 statistics_counter_event (cfun, "Linearized",
6547 reassociate_stats.linearized);
6548 statistics_counter_event (cfun, "Constants eliminated",
6549 reassociate_stats.constants_eliminated);
6550 statistics_counter_event (cfun, "Ops eliminated",
6551 reassociate_stats.ops_eliminated);
6552 statistics_counter_event (cfun, "Statements rewritten",
6553 reassociate_stats.rewritten);
6554 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6555 reassociate_stats.pows_encountered);
6556 statistics_counter_event (cfun, "Built-in powi calls created",
6557 reassociate_stats.pows_created);
6559 delete operand_rank;
6560 operand_entry_pool.release ();
6561 free (bb_rank);
6562 plus_negates.release ();
6563 free_dominance_info (CDI_POST_DOMINATORS);
6564 loop_optimizer_finalize ();
6567 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6568 insertion of __builtin_powi calls.
6570 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6571 optimization of a gimple conditional. Otherwise returns zero. */
6573 static unsigned int
6574 execute_reassoc (bool insert_powi_p)
6576 reassoc_insert_powi_p = insert_powi_p;
6578 init_reassoc ();
6580 bool cfg_cleanup_needed;
6581 cfg_cleanup_needed = do_reassoc ();
6582 repropagate_negates ();
6583 branch_fixup ();
6585 fini_reassoc ();
6586 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6589 namespace {
6591 const pass_data pass_data_reassoc =
6593 GIMPLE_PASS, /* type */
6594 "reassoc", /* name */
6595 OPTGROUP_NONE, /* optinfo_flags */
6596 TV_TREE_REASSOC, /* tv_id */
6597 ( PROP_cfg | PROP_ssa ), /* properties_required */
6598 0, /* properties_provided */
6599 0, /* properties_destroyed */
6600 0, /* todo_flags_start */
6601 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6604 class pass_reassoc : public gimple_opt_pass
6606 public:
6607 pass_reassoc (gcc::context *ctxt)
6608 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6611 /* opt_pass methods: */
6612 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6613 void set_pass_param (unsigned int n, bool param)
6615 gcc_assert (n == 0);
6616 insert_powi_p = param;
6618 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6619 virtual unsigned int execute (function *)
6620 { return execute_reassoc (insert_powi_p); }
6622 private:
6623 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6624 point 3a in the pass header comment. */
6625 bool insert_powi_p;
6626 }; // class pass_reassoc
6628 } // anon namespace
6630 gimple_opt_pass *
6631 make_pass_reassoc (gcc::context *ctxt)
6633 return new pass_reassoc (ctxt);