re PR fortran/87318 (gfortran.dg/dtio_1.f90 is invalid)
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob6b0bf5c03548cc0b49b73a406f0d14c7608ec07b
1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
481 constant_type (tree t)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
494 else
495 return OTHER_CONST_TYPE;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500 static int
501 sort_by_operand_rank (const void *pa, const void *pb)
503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
512 if (oea->rank == 0)
514 if (constant_type (oeb->op) != constant_type (oea->op))
515 return constant_type (oea->op) - constant_type (oeb->op);
516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
519 return oeb->id > oea->id ? 1 : -1;
522 if (TREE_CODE (oea->op) != SSA_NAME)
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
532 /* Lastly, make sure the versions that are the same go next to each
533 other. */
534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
565 return oeb->id > oea->id ? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
573 operand_entry *oe = operand_entry_pool.allocate ();
575 oe->op = op;
576 oe->rank = get_rank (op);
577 oe->id = next_operand_entry_id++;
578 oe->count = 1;
579 oe->stmt_to_insert = stmt_to_insert;
580 ops->safe_push (oe);
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
586 static void
587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 HOST_WIDE_INT repeat)
590 operand_entry *oe = operand_entry_pool.allocate ();
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
596 oe->stmt_to_insert = NULL;
597 ops->safe_push (oe);
599 reassociate_stats.pows_encountered++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
605 static bool
606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
608 basic_block bb = gimple_bb (stmt);
610 if (gimple_bb (stmt) == NULL)
611 return false;
613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
620 tree rhs1 = gimple_assign_rhs1 (stmt);
621 tree rhs2 = gimple_assign_rhs2 (stmt);
622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
632 return false;
636 /* Return true if STMT is a nop-conversion. */
638 static bool
639 gimple_nop_conversion_p (gimple *stmt)
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
648 return false;
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
654 static tree
655 get_unary_op (tree name, enum tree_code opcode)
657 gimple *stmt = SSA_NAME_DEF_STMT (name);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
664 if (!is_gimple_assign (stmt))
665 return NULL_TREE;
667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
669 return NULL_TREE;
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
674 static bool
675 ops_equal_values_p (tree op1, tree op2)
677 if (op1 == op2)
678 return true;
680 tree orig_op1 = op1;
681 if (TREE_CODE (op1) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
692 if (TREE_CODE (op2) == SSA_NAME)
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
697 op2 = gimple_assign_rhs1 (stmt);
698 if (op1 == op2
699 || orig_op1 == op2)
700 return true;
704 return false;
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
712 static bool
713 eliminate_duplicate_pair (enum tree_code opcode,
714 vec<operand_entry *> *ops,
715 bool *all_done,
716 unsigned int i,
717 operand_entry *curr,
718 operand_entry *last)
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last && last->op == curr->op)
728 switch (opcode)
730 case MAX_EXPR:
731 case MIN_EXPR:
732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, "Equivalence: ");
737 print_generic_expr (dump_file, curr->op);
738 fprintf (dump_file, " [&|minmax] ");
739 print_generic_expr (dump_file, last->op);
740 fprintf (dump_file, " -> ");
741 print_generic_stmt (dump_file, last->op);
744 ops->ordered_remove (i);
745 reassociate_stats.ops_eliminated ++;
747 return true;
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
752 fprintf (dump_file, "Equivalence: ");
753 print_generic_expr (dump_file, curr->op);
754 fprintf (dump_file, " ^ ");
755 print_generic_expr (dump_file, last->op);
756 fprintf (dump_file, " -> nothing\n");
759 reassociate_stats.ops_eliminated += 2;
761 if (ops->length () == 2)
763 ops->truncate (0);
764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 *all_done = true;
767 else
769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
773 return true;
775 default:
776 break;
779 return false;
782 static vec<tree> plus_negates;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
790 static bool
791 eliminate_plus_minus_pair (enum tree_code opcode,
792 vec<operand_entry *> *ops,
793 unsigned int currindex,
794 operand_entry *curr)
796 tree negateop;
797 tree notop;
798 unsigned int i;
799 operand_entry *oe;
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802 return false;
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
807 return false;
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
813 for (i = currindex + 1;
814 ops->iterate (i, &oe)
815 && oe->rank >= curr->rank - 1 ;
816 i++)
818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, negateop);
825 fprintf (dump_file, " + -");
826 print_generic_expr (dump_file, oe->op);
827 fprintf (dump_file, " -> 0\n");
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
835 return true;
837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
840 tree op_type = TREE_TYPE (oe->op);
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop);
846 fprintf (dump_file, " + ~");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> -1\n");
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
856 return true;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864 plus_negates.safe_push (curr->op);
866 return false;
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
875 static bool
876 eliminate_not_pairs (enum tree_code opcode,
877 vec<operand_entry *> *ops,
878 unsigned int currindex,
879 operand_entry *curr)
881 tree notop;
882 unsigned int i;
883 operand_entry *oe;
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
897 for (i = currindex + 1;
898 ops->iterate (i, &oe)
899 && oe->rank >= curr->rank - 1;
900 i++)
902 if (oe->op == notop)
904 if (dump_file && (dump_flags & TDF_DETAILS))
906 fprintf (dump_file, "Equivalence: ");
907 print_generic_expr (dump_file, notop);
908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
912 print_generic_expr (dump_file, oe->op);
913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
919 if (opcode == BIT_AND_EXPR)
920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 else if (opcode == BIT_IOR_EXPR)
922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
927 return true;
931 return false;
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
941 static void
942 eliminate_using_constants (enum tree_code opcode,
943 vec<operand_entry *> *ops)
945 operand_entry *oelast = ops->last ();
946 tree type = TREE_TYPE (oelast->op);
948 if (oelast->rank == 0
949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
951 switch (opcode)
953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
961 reassociate_stats.ops_eliminated += ops->length () - 1;
963 ops->truncate (0);
964 ops->quick_push (oelast);
965 return;
968 else if (integer_all_onesp (oelast->op))
970 if (ops->length () != 1)
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
974 ops->pop ();
975 reassociate_stats.ops_eliminated++;
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
982 if (ops->length () != 1)
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
987 reassociate_stats.ops_eliminated += ops->length () - 1;
989 ops->truncate (0);
990 ops->quick_push (oelast);
991 return;
994 else if (integer_zerop (oelast->op))
996 if (ops->length () != 1)
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1004 break;
1005 case MULT_EXPR:
1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
1008 && !HONOR_NANS (type)
1009 && !HONOR_SIGNED_ZEROS (type)
1010 && real_zerop (oelast->op)))
1012 if (ops->length () != 1)
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
1017 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (1);
1019 ops->quick_push (oelast);
1020 return;
1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
1025 && !HONOR_SNANS (type)
1026 && real_onep (oelast->op)))
1028 if (ops->length () != 1)
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
1032 ops->pop ();
1033 reassociate_stats.ops_eliminated++;
1034 return;
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
1047 if (ops->length () != 1)
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 ops->pop ();
1052 reassociate_stats.ops_eliminated++;
1053 return;
1056 break;
1057 default:
1058 break;
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 bool, bool);
1067 /* Structure for tracking and counting operands. */
1068 struct oecount {
1069 unsigned int cnt;
1070 unsigned int id;
1071 enum tree_code oecode;
1072 tree op;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec<oecount> cvec;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher : int_hash <int, 0, 1>
1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1090 inline hashval_t
1091 oecount_hasher::hash (int p)
1093 const oecount *c = &cvec[p - 42];
1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1097 /* Comparison function for oecount. */
1099 inline bool
1100 oecount_hasher::equal (int p1, int p2)
1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
1104 return c1->oecode == c2->oecode && c1->op == c2->op;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1109 static int
1110 oecount_cmp (const void *p1, const void *p2)
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
1114 if (c1->cnt != c2->cnt)
1115 return c1->cnt > c2->cnt ? 1 : -1;
1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1->id > c2->id ? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1124 static bool
1125 stmt_is_power_of_op (gimple *stmt, tree op)
1127 if (!is_gimple_call (stmt))
1128 return false;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1136 default:
1137 return false;
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple *stmt)
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1152 switch (gimple_call_combined_fn (stmt))
1154 CASE_CFN_POW:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1162 CASE_CFN_POWI:
1163 arg1 = gimple_call_arg (stmt, 1);
1164 power = TREE_INT_CST_LOW (arg1) - 1;
1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1168 default:
1169 gcc_unreachable ();
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1176 static tree
1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
1182 tree new_lhs, new_debug_lhs = NULL_TREE;
1183 tree lhs = gimple_get_lhs (stmt);
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1194 if (new_debug_lhs == NULL_TREE)
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1209 repl = new_debug_lhs;
1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 SET_USE (use, repl);
1213 update_stmt (use_stmt);
1215 return new_lhs;
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1222 static void
1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 vec<gimple *> &stmts_to_fix)
1226 unsigned i;
1227 gimple *stmt;
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
1233 *def = make_new_ssa_for_def (stmt, opcode, op);
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236 make_new_ssa_for_def (stmt, opcode, op);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1243 static void
1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1246 tree lhs;
1247 gimple *use_stmt;
1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
1264 unlink_stmt_vdef (stmt);
1265 reassoc_remove_stmt (&gsi);
1266 release_defs (stmt);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1273 static void
1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1276 tree orig_def = *def;
1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
1284 tree name;
1286 if (opcode == MULT_EXPR)
1288 if (stmt_is_power_of_op (stmt, op))
1290 if (decrement_power (stmt) == 1)
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1296 break;
1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1300 if (gimple_assign_rhs1 (stmt) == op)
1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
1305 propagate_op_to_single_use (cst, stmt, def);
1306 break;
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 break;
1318 name = gimple_assign_rhs1 (stmt);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
1331 propagate_op_to_single_use (name, stmt, def);
1332 break;
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 if (stmt_is_power_of_op (stmt2, op))
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
1352 else if (is_gimple_assign (stmt2)
1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1355 if (gimple_assign_rhs1 (stmt2) == op)
1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
1359 break;
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1364 stmts_to_fix.safe_push (stmt2);
1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 break;
1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
1376 stmts_to_fix.safe_push (stmt);
1378 while (1);
1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1387 static bool
1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1402 if (bb1 == bb2)
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1425 gimple *s = gsi_stmt (gsi);
1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1432 return false;
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1438 /* Insert STMT after INSERT_POINT. */
1440 static void
1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1478 static gimple *
1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1481 gimple *op1def = NULL, *op2def = NULL;
1482 gimple_stmt_iterator gsi;
1483 tree op;
1484 gassign *sum;
1486 /* Create the addition statement. */
1487 op = make_ssa_name (type);
1488 sum = gimple_build_assign (op, opcode, op1, op2);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499 if (gsi_end_p (gsi))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1506 else
1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1510 else
1512 gimple *insert_point;
1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
1517 else
1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
1521 update_stmt (sum);
1523 return sum;
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1561 static bool
1562 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, struct loop *loop)
1565 unsigned int length = ops->length ();
1566 operand_entry *oe1;
1567 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
1570 vec<operand_entry *> *subops;
1571 bool changed = false;
1572 unsigned int next_oecount_id = 0;
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap candidates (length);
1580 bitmap_clear (candidates);
1581 nr_candidates = 0;
1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
1584 enum tree_code dcode;
1585 gimple *oe1def;
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1598 bitmap_set_bit (candidates, i);
1599 nr_candidates++;
1602 if (nr_candidates < 2)
1603 return false;
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
1609 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1610 fprintf (dump_file, " %d\n", nr_candidates);
1613 /* Build linearized sub-operand lists and the counting table. */
1614 cvec.create (0);
1616 hash_table<oecount_hasher> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1624 gimple *oedef;
1625 enum tree_code oecode;
1626 unsigned j;
1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1635 oecount c;
1636 int *slot;
1637 int idx;
1638 c.oecode = oecode;
1639 c.cnt = 1;
1640 c.id = next_oecount_id++;
1641 c.op = oe1->op;
1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
1644 slot = ctable.find_slot (idx, INSERT);
1645 if (!*slot)
1647 *slot = idx;
1649 else
1651 cvec.pop ();
1652 cvec[*slot - 42].cnt++;
1657 /* Sort the counting table. */
1658 cvec.qsort (oecount_cmp);
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec, j, c)
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 print_generic_expr (dump_file, c->op);
1670 fprintf (dump_file, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap candidates2 (length);
1676 while (!cvec.is_empty ())
1678 oecount *c = &cvec.last ();
1679 if (c->cnt < 2)
1680 break;
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2);
1685 nr_candidates2 = 0;
1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1688 gimple *oedef;
1689 enum tree_code oecode;
1690 unsigned j;
1691 tree op = (*ops)[i]->op;
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1705 if (oe1->op == c->op)
1707 bitmap_set_bit (candidates2, i);
1708 ++nr_candidates2;
1709 break;
1714 if (nr_candidates2 >= 2)
1716 operand_entry *oe1, *oe2;
1717 gimple *prod;
1718 int first = bitmap_first_set_bit (candidates2);
1720 /* Build the new addition chain. */
1721 oe1 = (*ops)[first];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Building (");
1725 print_generic_expr (dump_file, oe1->op);
1727 zero_one_operation (&oe1->op, c->oecode, c->op);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1730 gimple *sum;
1731 oe2 = (*ops)[i];
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, " + ");
1735 print_generic_expr (dump_file, oe2->op);
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1745 /* Apply the multiplication/division. */
1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
1759 subops[first].release ();
1761 changed = true;
1764 cvec.pop ();
1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
1769 free (subops);
1770 cvec.release ();
1772 return changed;
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1780 static bool
1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 vec<operand_entry *> *ops,
1783 unsigned int currindex,
1784 operand_entry *curr)
1786 tree op1, op2;
1787 enum tree_code lcode, rcode;
1788 gimple *def1, *def2;
1789 int i;
1790 operand_entry *oe;
1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793 return false;
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr->op) != SSA_NAME)
1797 return false;
1798 def1 = SSA_NAME_DEF_STMT (curr->op);
1799 if (!is_gimple_assign (def1))
1800 return false;
1801 lcode = gimple_assign_rhs_code (def1);
1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803 return false;
1804 op1 = gimple_assign_rhs1 (def1);
1805 op2 = gimple_assign_rhs2 (def1);
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1810 tree t;
1812 if (TREE_CODE (oe->op) != SSA_NAME)
1813 continue;
1814 def2 = SSA_NAME_DEF_STMT (oe->op);
1815 if (!is_gimple_assign (def2))
1816 continue;
1817 rcode = gimple_assign_rhs_code (def2);
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 continue;
1821 /* If we got here, we have a match. See if we can combine the
1822 two comparisons. */
1823 if (opcode == BIT_IOR_EXPR)
1824 t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 rcode, gimple_assign_rhs1 (def2),
1826 gimple_assign_rhs2 (def2));
1827 else
1828 t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 rcode, gimple_assign_rhs1 (def2),
1830 gimple_assign_rhs2 (def2));
1831 if (!t)
1832 continue;
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 t = fold_convert (TREE_TYPE (curr->op), t);
1841 if (TREE_CODE (t) != INTEGER_CST
1842 && !operand_equal_p (t, curr->op, 0))
1844 enum tree_code subcode;
1845 tree newop1, newop2;
1846 if (!COMPARISON_CLASS_P (t))
1847 continue;
1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 continue;
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1857 fprintf (dump_file, "Equivalence: ");
1858 print_generic_expr (dump_file, curr->op);
1859 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 print_generic_expr (dump_file, oe->op);
1861 fprintf (dump_file, " -> ");
1862 print_generic_expr (dump_file, t);
1863 fprintf (dump_file, "\n");
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1867 expression t. */
1868 ops->ordered_remove (i);
1869 reassociate_stats.ops_eliminated ++;
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t) == INTEGER_CST)
1877 ops->ordered_remove (currindex);
1878 add_to_ops_vec (ops, t);
1880 else if (!operand_equal_p (t, curr->op, 0))
1882 gimple *sum;
1883 enum tree_code subcode;
1884 tree newop1;
1885 tree newop2;
1886 gcc_assert (COMPARISON_CLASS_P (t));
1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 gcc_checking_assert (is_gimple_val (newop1)
1891 && is_gimple_val (newop2));
1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 curr->op = gimple_get_lhs (sum);
1895 return true;
1898 return false;
1902 /* Transform repeated addition of same values into multiply with
1903 constant. */
1904 static bool
1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1907 operand_entry *oe;
1908 tree op = NULL_TREE;
1909 int j;
1910 int i, start = -1, end = 0, count = 0;
1911 auto_vec<std::pair <int, int> > indxs;
1912 bool changed = false;
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 || !flag_unsafe_math_optimizations))
1917 return false;
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops, i, oe)
1922 if (start == -1)
1924 count = 1;
1925 op = oe->op;
1926 start = i;
1928 else if (operand_equal_p (oe->op, op, 0))
1930 count++;
1931 end = i;
1933 else
1935 if (count > 1)
1936 indxs.safe_push (std::make_pair (start, end));
1937 count = 1;
1938 op = oe->op;
1939 start = i;
1943 if (count > 1)
1944 indxs.safe_push (std::make_pair (start, end));
1946 for (j = indxs.length () - 1; j >= 0; --j)
1948 /* Convert repeated operand addition to multiplication. */
1949 start = indxs[j].first;
1950 end = indxs[j].second;
1951 op = (*ops)[start]->op;
1952 count = end - start + 1;
1953 for (i = end; i >= start; --i)
1954 ops->unordered_remove (i);
1955 tree tmp = make_ssa_name (TREE_TYPE (op));
1956 tree cst = build_int_cst (integer_type_node, count);
1957 gassign *mul_stmt
1958 = gimple_build_assign (tmp, MULT_EXPR,
1959 op, fold_convert (TREE_TYPE (op), cst));
1960 gimple_set_visited (mul_stmt, true);
1961 add_to_ops_vec (ops, tmp, mul_stmt);
1962 changed = true;
1965 return changed;
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1973 static void
1974 optimize_ops_list (enum tree_code opcode,
1975 vec<operand_entry *> *ops)
1977 unsigned int length = ops->length ();
1978 unsigned int i;
1979 operand_entry *oe;
1980 operand_entry *oelast = NULL;
1981 bool iterate = false;
1983 if (length == 1)
1984 return;
1986 oelast = ops->last ();
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1992 operand_entry *oelm1 = (*ops)[length - 2];
1994 if (oelm1->rank == 0
1995 && is_gimple_min_invariant (oelm1->op)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 TREE_TYPE (oelast->op)))
1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 oelm1->op, oelast->op);
2002 if (folded && is_gimple_min_invariant (folded))
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "Merging constants\n");
2007 ops->pop ();
2008 ops->pop ();
2010 add_to_ops_vec (ops, folded);
2011 reassociate_stats.constants_eliminated++;
2013 optimize_ops_list (opcode, ops);
2014 return;
2019 eliminate_using_constants (opcode, ops);
2020 oelast = NULL;
2022 for (i = 0; ops->iterate (i, &oe);)
2024 bool done = false;
2026 if (eliminate_not_pairs (opcode, ops, i, oe))
2027 return;
2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2032 if (done)
2033 return;
2034 iterate = true;
2035 oelast = NULL;
2036 continue;
2038 oelast = oe;
2039 i++;
2042 length = ops->length ();
2043 oelast = ops->last ();
2045 if (iterate)
2046 optimize_ops_list (opcode, ops);
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050 it to try to change a logical combination of comparisons into a range
2051 test.
2053 For example, both
2054 X == 2 || X == 5 || X == 3 || X == 4
2056 X >= 2 && X <= 5
2057 are converted to
2058 (unsigned) (X - 2) <= 3
2060 For more information see comments above fold_test_range in fold-const.c,
2061 this implementation is for GIMPLE. */
2063 struct range_entry
2065 tree exp;
2066 tree low;
2067 tree high;
2068 bool in_p;
2069 bool strict_overflow_p;
2070 unsigned int idx, next;
2073 /* This is similar to make_range in fold-const.c, but on top of
2074 GIMPLE instead of trees. If EXP is non-NULL, it should be
2075 an SSA_NAME and STMT argument is ignored, otherwise STMT
2076 argument should be a GIMPLE_COND. */
2078 static void
2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2081 int in_p;
2082 tree low, high;
2083 bool is_bool, strict_overflow_p;
2085 r->exp = NULL_TREE;
2086 r->in_p = false;
2087 r->strict_overflow_p = false;
2088 r->low = NULL_TREE;
2089 r->high = NULL_TREE;
2090 if (exp != NULL_TREE
2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092 return;
2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095 and see if we can refine the range. Some of the cases below may not
2096 happen, but it doesn't seem worth worrying about this. We "continue"
2097 the outer loop when we've changed something; otherwise we "break"
2098 the switch, which will "break" the while. */
2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100 high = low;
2101 in_p = 0;
2102 strict_overflow_p = false;
2103 is_bool = false;
2104 if (exp == NULL_TREE)
2105 is_bool = true;
2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2108 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 is_bool = true;
2110 else
2111 return;
2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114 is_bool = true;
2116 while (1)
2118 enum tree_code code;
2119 tree arg0, arg1, exp_type;
2120 tree nexp;
2121 location_t loc;
2123 if (exp != NULL_TREE)
2125 if (TREE_CODE (exp) != SSA_NAME
2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 break;
2129 stmt = SSA_NAME_DEF_STMT (exp);
2130 if (!is_gimple_assign (stmt))
2131 break;
2133 code = gimple_assign_rhs_code (stmt);
2134 arg0 = gimple_assign_rhs1 (stmt);
2135 arg1 = gimple_assign_rhs2 (stmt);
2136 exp_type = TREE_TYPE (exp);
2138 else
2140 code = gimple_cond_code (stmt);
2141 arg0 = gimple_cond_lhs (stmt);
2142 arg1 = gimple_cond_rhs (stmt);
2143 exp_type = boolean_type_node;
2146 if (TREE_CODE (arg0) != SSA_NAME)
2147 break;
2148 loc = gimple_location (stmt);
2149 switch (code)
2151 case BIT_NOT_EXPR:
2152 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2153 /* Ensure the range is either +[-,0], +[0,0],
2154 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2156 or similar expression of unconditional true or
2157 false, it should not be negated. */
2158 && ((high && integer_zerop (high))
2159 || (low && integer_onep (low))))
2161 in_p = !in_p;
2162 exp = arg0;
2163 continue;
2165 break;
2166 case SSA_NAME:
2167 exp = arg0;
2168 continue;
2169 CASE_CONVERT:
2170 if (is_bool)
2172 if ((TYPE_PRECISION (exp_type) == 1
2173 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2174 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2175 return;
2177 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2179 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2180 is_bool = true;
2181 else
2182 return;
2184 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2185 is_bool = true;
2186 goto do_default;
2187 case EQ_EXPR:
2188 case NE_EXPR:
2189 case LT_EXPR:
2190 case LE_EXPR:
2191 case GE_EXPR:
2192 case GT_EXPR:
2193 is_bool = true;
2194 /* FALLTHRU */
2195 default:
2196 if (!is_bool)
2197 return;
2198 do_default:
2199 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2200 &low, &high, &in_p,
2201 &strict_overflow_p);
2202 if (nexp != NULL_TREE)
2204 exp = nexp;
2205 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2206 continue;
2208 break;
2210 break;
2212 if (is_bool)
2214 r->exp = exp;
2215 r->in_p = in_p;
2216 r->low = low;
2217 r->high = high;
2218 r->strict_overflow_p = strict_overflow_p;
2222 /* Comparison function for qsort. Sort entries
2223 without SSA_NAME exp first, then with SSA_NAMEs sorted
2224 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2225 by increasing ->low and if ->low is the same, by increasing
2226 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2227 maximum. */
2229 static int
2230 range_entry_cmp (const void *a, const void *b)
2232 const struct range_entry *p = (const struct range_entry *) a;
2233 const struct range_entry *q = (const struct range_entry *) b;
2235 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2237 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2239 /* Group range_entries for the same SSA_NAME together. */
2240 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2241 return -1;
2242 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2243 return 1;
2244 /* If ->low is different, NULL low goes first, then by
2245 ascending low. */
2246 if (p->low != NULL_TREE)
2248 if (q->low != NULL_TREE)
2250 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2251 p->low, q->low);
2252 if (tem && integer_onep (tem))
2253 return -1;
2254 tem = fold_binary (GT_EXPR, boolean_type_node,
2255 p->low, q->low);
2256 if (tem && integer_onep (tem))
2257 return 1;
2259 else
2260 return 1;
2262 else if (q->low != NULL_TREE)
2263 return -1;
2264 /* If ->high is different, NULL high goes last, before that by
2265 ascending high. */
2266 if (p->high != NULL_TREE)
2268 if (q->high != NULL_TREE)
2270 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2271 p->high, q->high);
2272 if (tem && integer_onep (tem))
2273 return -1;
2274 tem = fold_binary (GT_EXPR, boolean_type_node,
2275 p->high, q->high);
2276 if (tem && integer_onep (tem))
2277 return 1;
2279 else
2280 return -1;
2282 else if (q->high != NULL_TREE)
2283 return 1;
2284 /* If both ranges are the same, sort below by ascending idx. */
2286 else
2287 return 1;
2289 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2290 return -1;
2292 if (p->idx < q->idx)
2293 return -1;
2294 else
2296 gcc_checking_assert (p->idx > q->idx);
2297 return 1;
2301 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2302 insert needed statements BEFORE or after GSI. */
2304 static tree
2305 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2307 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2308 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2309 if (TREE_CODE (ret) != SSA_NAME)
2311 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2312 if (before)
2313 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2314 else
2315 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2316 ret = gimple_assign_lhs (g);
2318 return ret;
2321 /* Helper routine of optimize_range_test.
2322 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2323 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2324 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2325 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2326 an array of COUNT pointers to other ranges. Return
2327 true if the range merge has been successful.
2328 If OPCODE is ERROR_MARK, this is called from within
2329 maybe_optimize_range_tests and is performing inter-bb range optimization.
2330 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2331 oe->rank. */
2333 static bool
2334 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2335 struct range_entry **otherrangep,
2336 unsigned int count, enum tree_code opcode,
2337 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2338 bool in_p, tree low, tree high, bool strict_overflow_p)
2340 operand_entry *oe = (*ops)[range->idx];
2341 tree op = oe->op;
2342 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2343 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2344 location_t loc = gimple_location (stmt);
2345 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2346 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2347 in_p, low, high);
2348 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2349 gimple_stmt_iterator gsi;
2350 unsigned int i, uid;
2352 if (tem == NULL_TREE)
2353 return false;
2355 /* If op is default def SSA_NAME, there is no place to insert the
2356 new comparison. Give up, unless we can use OP itself as the
2357 range test. */
2358 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2360 if (op == range->exp
2361 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2362 || TREE_CODE (optype) == BOOLEAN_TYPE)
2363 && (op == tem
2364 || (TREE_CODE (tem) == EQ_EXPR
2365 && TREE_OPERAND (tem, 0) == op
2366 && integer_onep (TREE_OPERAND (tem, 1))))
2367 && opcode != BIT_IOR_EXPR
2368 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2370 stmt = NULL;
2371 tem = op;
2373 else
2374 return false;
2377 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2378 warning_at (loc, OPT_Wstrict_overflow,
2379 "assuming signed overflow does not occur "
2380 "when simplifying range test");
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2384 struct range_entry *r;
2385 fprintf (dump_file, "Optimizing range tests ");
2386 print_generic_expr (dump_file, range->exp);
2387 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2388 print_generic_expr (dump_file, range->low);
2389 fprintf (dump_file, ", ");
2390 print_generic_expr (dump_file, range->high);
2391 fprintf (dump_file, "]");
2392 for (i = 0; i < count; i++)
2394 if (otherrange)
2395 r = otherrange + i;
2396 else
2397 r = otherrangep[i];
2398 if (r->exp
2399 && r->exp != range->exp
2400 && TREE_CODE (r->exp) == SSA_NAME)
2402 fprintf (dump_file, " and ");
2403 print_generic_expr (dump_file, r->exp);
2405 else
2406 fprintf (dump_file, " and");
2407 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2408 print_generic_expr (dump_file, r->low);
2409 fprintf (dump_file, ", ");
2410 print_generic_expr (dump_file, r->high);
2411 fprintf (dump_file, "]");
2413 fprintf (dump_file, "\n into ");
2414 print_generic_expr (dump_file, tem);
2415 fprintf (dump_file, "\n");
2418 if (opcode == BIT_IOR_EXPR
2419 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2420 tem = invert_truthvalue_loc (loc, tem);
2422 tem = fold_convert_loc (loc, optype, tem);
2423 if (stmt)
2425 gsi = gsi_for_stmt (stmt);
2426 uid = gimple_uid (stmt);
2428 else
2430 gsi = gsi_none ();
2431 uid = 0;
2433 if (stmt == NULL)
2434 gcc_checking_assert (tem == op);
2435 /* In rare cases range->exp can be equal to lhs of stmt.
2436 In that case we have to insert after the stmt rather then before
2437 it. If stmt is a PHI, insert it at the start of the basic block. */
2438 else if (op != range->exp)
2440 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2441 tem = force_into_ssa_name (&gsi, tem, true);
2442 gsi_prev (&gsi);
2444 else if (gimple_code (stmt) != GIMPLE_PHI)
2446 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2447 tem = force_into_ssa_name (&gsi, tem, false);
2449 else
2451 gsi = gsi_after_labels (gimple_bb (stmt));
2452 if (!gsi_end_p (gsi))
2453 uid = gimple_uid (gsi_stmt (gsi));
2454 else
2456 gsi = gsi_start_bb (gimple_bb (stmt));
2457 uid = 1;
2458 while (!gsi_end_p (gsi))
2460 uid = gimple_uid (gsi_stmt (gsi));
2461 gsi_next (&gsi);
2464 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2465 tem = force_into_ssa_name (&gsi, tem, true);
2466 if (gsi_end_p (gsi))
2467 gsi = gsi_last_bb (gimple_bb (stmt));
2468 else
2469 gsi_prev (&gsi);
2471 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2472 if (gimple_uid (gsi_stmt (gsi)))
2473 break;
2474 else
2475 gimple_set_uid (gsi_stmt (gsi), uid);
2477 oe->op = tem;
2478 range->exp = exp;
2479 range->low = low;
2480 range->high = high;
2481 range->in_p = in_p;
2482 range->strict_overflow_p = false;
2484 for (i = 0; i < count; i++)
2486 if (otherrange)
2487 range = otherrange + i;
2488 else
2489 range = otherrangep[i];
2490 oe = (*ops)[range->idx];
2491 /* Now change all the other range test immediate uses, so that
2492 those tests will be optimized away. */
2493 if (opcode == ERROR_MARK)
2495 if (oe->op)
2496 oe->op = build_int_cst (TREE_TYPE (oe->op),
2497 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2498 else
2499 oe->op = (oe->rank == BIT_IOR_EXPR
2500 ? boolean_false_node : boolean_true_node);
2502 else
2503 oe->op = error_mark_node;
2504 range->exp = NULL_TREE;
2505 range->low = NULL_TREE;
2506 range->high = NULL_TREE;
2508 return true;
2511 /* Optimize X == CST1 || X == CST2
2512 if popcount (CST1 ^ CST2) == 1 into
2513 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2514 Similarly for ranges. E.g.
2515 X != 2 && X != 3 && X != 10 && X != 11
2516 will be transformed by the previous optimization into
2517 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2518 and this loop can transform that into
2519 !(((X & ~8) - 2U) <= 1U). */
2521 static bool
2522 optimize_range_tests_xor (enum tree_code opcode, tree type,
2523 tree lowi, tree lowj, tree highi, tree highj,
2524 vec<operand_entry *> *ops,
2525 struct range_entry *rangei,
2526 struct range_entry *rangej)
2528 tree lowxor, highxor, tem, exp;
2529 /* Check lowi ^ lowj == highi ^ highj and
2530 popcount (lowi ^ lowj) == 1. */
2531 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2532 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2533 return false;
2534 if (!integer_pow2p (lowxor))
2535 return false;
2536 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2537 if (!tree_int_cst_equal (lowxor, highxor))
2538 return false;
2540 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2541 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2542 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2543 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2544 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2545 NULL, rangei->in_p, lowj, highj,
2546 rangei->strict_overflow_p
2547 || rangej->strict_overflow_p))
2548 return true;
2549 return false;
2552 /* Optimize X == CST1 || X == CST2
2553 if popcount (CST2 - CST1) == 1 into
2554 ((X - CST1) & ~(CST2 - CST1)) == 0.
2555 Similarly for ranges. E.g.
2556 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2557 || X == 75 || X == 45
2558 will be transformed by the previous optimization into
2559 (X - 43U) <= 3U || (X - 75U) <= 3U
2560 and this loop can transform that into
2561 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2562 static bool
2563 optimize_range_tests_diff (enum tree_code opcode, tree type,
2564 tree lowi, tree lowj, tree highi, tree highj,
2565 vec<operand_entry *> *ops,
2566 struct range_entry *rangei,
2567 struct range_entry *rangej)
2569 tree tem1, tem2, mask;
2570 /* Check highi - lowi == highj - lowj. */
2571 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2572 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2573 return false;
2574 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2575 if (!tree_int_cst_equal (tem1, tem2))
2576 return false;
2577 /* Check popcount (lowj - lowi) == 1. */
2578 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2579 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2580 return false;
2581 if (!integer_pow2p (tem1))
2582 return false;
2584 type = unsigned_type_for (type);
2585 tem1 = fold_convert (type, tem1);
2586 tem2 = fold_convert (type, tem2);
2587 lowi = fold_convert (type, lowi);
2588 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2589 tem1 = fold_build2 (MINUS_EXPR, type,
2590 fold_convert (type, rangei->exp), lowi);
2591 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2592 lowj = build_int_cst (type, 0);
2593 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2594 NULL, rangei->in_p, lowj, tem2,
2595 rangei->strict_overflow_p
2596 || rangej->strict_overflow_p))
2597 return true;
2598 return false;
2601 /* It does some common checks for function optimize_range_tests_xor and
2602 optimize_range_tests_diff.
2603 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2604 Else it calls optimize_range_tests_diff. */
2606 static bool
2607 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2608 bool optimize_xor, vec<operand_entry *> *ops,
2609 struct range_entry *ranges)
2611 int i, j;
2612 bool any_changes = false;
2613 for (i = first; i < length; i++)
2615 tree lowi, highi, lowj, highj, type, tem;
2617 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2618 continue;
2619 type = TREE_TYPE (ranges[i].exp);
2620 if (!INTEGRAL_TYPE_P (type))
2621 continue;
2622 lowi = ranges[i].low;
2623 if (lowi == NULL_TREE)
2624 lowi = TYPE_MIN_VALUE (type);
2625 highi = ranges[i].high;
2626 if (highi == NULL_TREE)
2627 continue;
2628 for (j = i + 1; j < length && j < i + 64; j++)
2630 bool changes;
2631 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2632 continue;
2633 lowj = ranges[j].low;
2634 if (lowj == NULL_TREE)
2635 continue;
2636 highj = ranges[j].high;
2637 if (highj == NULL_TREE)
2638 highj = TYPE_MAX_VALUE (type);
2639 /* Check lowj > highi. */
2640 tem = fold_binary (GT_EXPR, boolean_type_node,
2641 lowj, highi);
2642 if (tem == NULL_TREE || !integer_onep (tem))
2643 continue;
2644 if (optimize_xor)
2645 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2646 highi, highj, ops,
2647 ranges + i, ranges + j);
2648 else
2649 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2650 highi, highj, ops,
2651 ranges + i, ranges + j);
2652 if (changes)
2654 any_changes = true;
2655 break;
2659 return any_changes;
2662 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2663 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2664 EXP on success, NULL otherwise. */
2666 static tree
2667 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2668 wide_int *mask, tree *totallowp)
2670 tree tem = int_const_binop (MINUS_EXPR, high, low);
2671 if (tem == NULL_TREE
2672 || TREE_CODE (tem) != INTEGER_CST
2673 || TREE_OVERFLOW (tem)
2674 || tree_int_cst_sgn (tem) == -1
2675 || compare_tree_int (tem, prec) != -1)
2676 return NULL_TREE;
2678 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2679 *mask = wi::shifted_mask (0, max, false, prec);
2680 if (TREE_CODE (exp) == BIT_AND_EXPR
2681 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2683 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2684 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2685 if (wi::popcount (msk) == 1
2686 && wi::ltu_p (msk, prec - max))
2688 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2689 max += msk.to_uhwi ();
2690 exp = TREE_OPERAND (exp, 0);
2691 if (integer_zerop (low)
2692 && TREE_CODE (exp) == PLUS_EXPR
2693 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2695 tree ret = TREE_OPERAND (exp, 0);
2696 STRIP_NOPS (ret);
2697 widest_int bias
2698 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2699 TYPE_PRECISION (TREE_TYPE (low))));
2700 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2701 if (totallowp)
2703 *totallowp = tbias;
2704 return ret;
2706 else if (!tree_int_cst_lt (totallow, tbias))
2707 return NULL_TREE;
2708 bias = wi::to_widest (tbias);
2709 bias -= wi::to_widest (totallow);
2710 if (bias >= 0 && bias < prec - max)
2712 *mask = wi::lshift (*mask, bias);
2713 return ret;
2718 if (totallowp)
2719 return exp;
2720 if (!tree_int_cst_lt (totallow, low))
2721 return exp;
2722 tem = int_const_binop (MINUS_EXPR, low, totallow);
2723 if (tem == NULL_TREE
2724 || TREE_CODE (tem) != INTEGER_CST
2725 || TREE_OVERFLOW (tem)
2726 || compare_tree_int (tem, prec - max) == 1)
2727 return NULL_TREE;
2729 *mask = wi::lshift (*mask, wi::to_widest (tem));
2730 return exp;
2733 /* Attempt to optimize small range tests using bit test.
2734 E.g.
2735 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2736 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2737 has been by earlier optimizations optimized into:
2738 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2739 As all the 43 through 82 range is less than 64 numbers,
2740 for 64-bit word targets optimize that into:
2741 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2743 static bool
2744 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2745 vec<operand_entry *> *ops,
2746 struct range_entry *ranges)
2748 int i, j;
2749 bool any_changes = false;
2750 int prec = GET_MODE_BITSIZE (word_mode);
2751 auto_vec<struct range_entry *, 64> candidates;
2753 for (i = first; i < length - 2; i++)
2755 tree lowi, highi, lowj, highj, type;
2757 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2758 continue;
2759 type = TREE_TYPE (ranges[i].exp);
2760 if (!INTEGRAL_TYPE_P (type))
2761 continue;
2762 lowi = ranges[i].low;
2763 if (lowi == NULL_TREE)
2764 lowi = TYPE_MIN_VALUE (type);
2765 highi = ranges[i].high;
2766 if (highi == NULL_TREE)
2767 continue;
2768 wide_int mask;
2769 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2770 highi, &mask, &lowi);
2771 if (exp == NULL_TREE)
2772 continue;
2773 bool strict_overflow_p = ranges[i].strict_overflow_p;
2774 candidates.truncate (0);
2775 int end = MIN (i + 64, length);
2776 for (j = i + 1; j < end; j++)
2778 tree exp2;
2779 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2780 continue;
2781 if (ranges[j].exp == exp)
2783 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2785 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2786 if (exp2 == exp)
2788 else if (TREE_CODE (exp2) == PLUS_EXPR)
2790 exp2 = TREE_OPERAND (exp2, 0);
2791 STRIP_NOPS (exp2);
2792 if (exp2 != exp)
2793 continue;
2795 else
2796 continue;
2798 else
2799 continue;
2800 lowj = ranges[j].low;
2801 if (lowj == NULL_TREE)
2802 continue;
2803 highj = ranges[j].high;
2804 if (highj == NULL_TREE)
2805 highj = TYPE_MAX_VALUE (type);
2806 wide_int mask2;
2807 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2808 highj, &mask2, NULL);
2809 if (exp2 != exp)
2810 continue;
2811 mask |= mask2;
2812 strict_overflow_p |= ranges[j].strict_overflow_p;
2813 candidates.safe_push (&ranges[j]);
2816 /* If we need otherwise 3 or more comparisons, use a bit test. */
2817 if (candidates.length () >= 2)
2819 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2820 wi::to_widest (lowi)
2821 + prec - 1 - wi::clz (mask));
2822 operand_entry *oe = (*ops)[ranges[i].idx];
2823 tree op = oe->op;
2824 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2825 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2826 location_t loc = gimple_location (stmt);
2827 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2829 /* See if it isn't cheaper to pretend the minimum value of the
2830 range is 0, if maximum value is small enough.
2831 We can avoid then subtraction of the minimum value, but the
2832 mask constant could be perhaps more expensive. */
2833 if (compare_tree_int (lowi, 0) > 0
2834 && compare_tree_int (high, prec) < 0)
2836 int cost_diff;
2837 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2838 rtx reg = gen_raw_REG (word_mode, 10000);
2839 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2840 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2841 GEN_INT (-m)), speed_p);
2842 rtx r = immed_wide_int_const (mask, word_mode);
2843 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2844 word_mode, speed_p);
2845 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2846 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2847 word_mode, speed_p);
2848 if (cost_diff > 0)
2850 mask = wi::lshift (mask, m);
2851 lowi = build_zero_cst (TREE_TYPE (lowi));
2855 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2856 false, lowi, high);
2857 if (tem == NULL_TREE || is_gimple_val (tem))
2858 continue;
2859 tree etype = unsigned_type_for (TREE_TYPE (exp));
2860 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2861 fold_convert_loc (loc, etype, exp),
2862 fold_convert_loc (loc, etype, lowi));
2863 exp = fold_convert_loc (loc, integer_type_node, exp);
2864 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2865 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2866 build_int_cst (word_type, 1), exp);
2867 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2868 wide_int_to_tree (word_type, mask));
2869 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2870 build_zero_cst (word_type));
2871 if (is_gimple_val (exp))
2872 continue;
2874 /* The shift might have undefined behavior if TEM is true,
2875 but reassociate_bb isn't prepared to have basic blocks
2876 split when it is running. So, temporarily emit a code
2877 with BIT_IOR_EXPR instead of &&, and fix it up in
2878 branch_fixup. */
2879 gimple_seq seq;
2880 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2881 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2882 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2883 gimple_seq seq2;
2884 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2885 gimple_seq_add_seq_without_update (&seq, seq2);
2886 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2887 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2888 gimple *g = gimple_build_assign (make_ssa_name (optype),
2889 BIT_IOR_EXPR, tem, exp);
2890 gimple_set_location (g, loc);
2891 gimple_seq_add_stmt_without_update (&seq, g);
2892 exp = gimple_assign_lhs (g);
2893 tree val = build_zero_cst (optype);
2894 if (update_range_test (&ranges[i], NULL, candidates.address (),
2895 candidates.length (), opcode, ops, exp,
2896 seq, false, val, val, strict_overflow_p))
2898 any_changes = true;
2899 reassoc_branch_fixups.safe_push (tem);
2901 else
2902 gimple_seq_discard (seq);
2905 return any_changes;
2908 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2909 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2911 static bool
2912 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2913 vec<operand_entry *> *ops,
2914 struct range_entry *ranges)
2916 int i;
2917 unsigned int b;
2918 bool any_changes = false;
2919 auto_vec<int, 128> buckets;
2920 auto_vec<int, 32> chains;
2921 auto_vec<struct range_entry *, 32> candidates;
2923 for (i = first; i < length; i++)
2925 if (ranges[i].exp == NULL_TREE
2926 || TREE_CODE (ranges[i].exp) != SSA_NAME
2927 || !ranges[i].in_p
2928 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2929 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2930 || ranges[i].low == NULL_TREE
2931 || ranges[i].low != ranges[i].high)
2932 continue;
2934 bool zero_p = integer_zerop (ranges[i].low);
2935 if (!zero_p && !integer_all_onesp (ranges[i].low))
2936 continue;
2938 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2939 if (buckets.length () <= b)
2940 buckets.safe_grow_cleared (b + 1);
2941 if (chains.length () <= (unsigned) i)
2942 chains.safe_grow (i + 1);
2943 chains[i] = buckets[b];
2944 buckets[b] = i + 1;
2947 FOR_EACH_VEC_ELT (buckets, b, i)
2948 if (i && chains[i - 1])
2950 int j, k = i;
2951 for (j = chains[i - 1]; j; j = chains[j - 1])
2953 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2954 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2955 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2956 k = j;
2958 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2959 tree type2 = NULL_TREE;
2960 bool strict_overflow_p = false;
2961 candidates.truncate (0);
2962 for (j = i; j; j = chains[j - 1])
2964 tree type = TREE_TYPE (ranges[j - 1].exp);
2965 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2966 if (j == k
2967 || useless_type_conversion_p (type1, type))
2969 else if (type2 == NULL_TREE
2970 || useless_type_conversion_p (type2, type))
2972 if (type2 == NULL_TREE)
2973 type2 = type;
2974 candidates.safe_push (&ranges[j - 1]);
2977 unsigned l = candidates.length ();
2978 for (j = i; j; j = chains[j - 1])
2980 tree type = TREE_TYPE (ranges[j - 1].exp);
2981 if (j == k)
2982 continue;
2983 if (useless_type_conversion_p (type1, type))
2985 else if (type2 == NULL_TREE
2986 || useless_type_conversion_p (type2, type))
2987 continue;
2988 candidates.safe_push (&ranges[j - 1]);
2990 gimple_seq seq = NULL;
2991 tree op = NULL_TREE;
2992 unsigned int id;
2993 struct range_entry *r;
2994 candidates.safe_push (&ranges[k - 1]);
2995 FOR_EACH_VEC_ELT (candidates, id, r)
2997 gimple *g;
2998 if (id == 0)
3000 op = r->exp;
3001 continue;
3003 if (id == l)
3005 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3006 gimple_seq_add_stmt_without_update (&seq, g);
3007 op = gimple_assign_lhs (g);
3009 tree type = TREE_TYPE (r->exp);
3010 tree exp = r->exp;
3011 if (id >= l && !useless_type_conversion_p (type1, type))
3013 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3014 gimple_seq_add_stmt_without_update (&seq, g);
3015 exp = gimple_assign_lhs (g);
3017 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3018 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3019 op, exp);
3020 gimple_seq_add_stmt_without_update (&seq, g);
3021 op = gimple_assign_lhs (g);
3023 candidates.pop ();
3024 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3025 candidates.length (), opcode, ops, op,
3026 seq, true, ranges[k - 1].low,
3027 ranges[k - 1].low, strict_overflow_p))
3028 any_changes = true;
3029 else
3030 gimple_seq_discard (seq);
3033 return any_changes;
3036 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3037 a >= 0 && a < b into (unsigned) a < (unsigned) b
3038 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3040 static bool
3041 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3042 vec<operand_entry *> *ops,
3043 struct range_entry *ranges,
3044 basic_block first_bb)
3046 int i;
3047 bool any_changes = false;
3048 hash_map<tree, int> *map = NULL;
3050 for (i = first; i < length; i++)
3052 if (ranges[i].exp == NULL_TREE
3053 || TREE_CODE (ranges[i].exp) != SSA_NAME
3054 || !ranges[i].in_p)
3055 continue;
3057 tree type = TREE_TYPE (ranges[i].exp);
3058 if (!INTEGRAL_TYPE_P (type)
3059 || TYPE_UNSIGNED (type)
3060 || ranges[i].low == NULL_TREE
3061 || !integer_zerop (ranges[i].low)
3062 || ranges[i].high != NULL_TREE)
3063 continue;
3064 /* EXP >= 0 here. */
3065 if (map == NULL)
3066 map = new hash_map <tree, int>;
3067 map->put (ranges[i].exp, i);
3070 if (map == NULL)
3071 return false;
3073 for (i = 0; i < length; i++)
3075 bool in_p = ranges[i].in_p;
3076 if (ranges[i].low == NULL_TREE
3077 || ranges[i].high == NULL_TREE)
3078 continue;
3079 if (!integer_zerop (ranges[i].low)
3080 || !integer_zerop (ranges[i].high))
3082 if (ranges[i].exp
3083 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3084 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3085 && integer_onep (ranges[i].low)
3086 && integer_onep (ranges[i].high))
3087 in_p = !in_p;
3088 else
3089 continue;
3092 gimple *stmt;
3093 tree_code ccode;
3094 tree rhs1, rhs2;
3095 if (ranges[i].exp)
3097 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3098 continue;
3099 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3100 if (!is_gimple_assign (stmt))
3101 continue;
3102 ccode = gimple_assign_rhs_code (stmt);
3103 rhs1 = gimple_assign_rhs1 (stmt);
3104 rhs2 = gimple_assign_rhs2 (stmt);
3106 else
3108 operand_entry *oe = (*ops)[ranges[i].idx];
3109 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3110 if (gimple_code (stmt) != GIMPLE_COND)
3111 continue;
3112 ccode = gimple_cond_code (stmt);
3113 rhs1 = gimple_cond_lhs (stmt);
3114 rhs2 = gimple_cond_rhs (stmt);
3117 if (TREE_CODE (rhs1) != SSA_NAME
3118 || rhs2 == NULL_TREE
3119 || TREE_CODE (rhs2) != SSA_NAME)
3120 continue;
3122 switch (ccode)
3124 case GT_EXPR:
3125 case GE_EXPR:
3126 case LT_EXPR:
3127 case LE_EXPR:
3128 break;
3129 default:
3130 continue;
3132 if (in_p)
3133 ccode = invert_tree_comparison (ccode, false);
3134 switch (ccode)
3136 case GT_EXPR:
3137 case GE_EXPR:
3138 std::swap (rhs1, rhs2);
3139 ccode = swap_tree_comparison (ccode);
3140 break;
3141 case LT_EXPR:
3142 case LE_EXPR:
3143 break;
3144 default:
3145 gcc_unreachable ();
3148 int *idx = map->get (rhs1);
3149 if (idx == NULL)
3150 continue;
3152 /* maybe_optimize_range_tests allows statements without side-effects
3153 in the basic blocks as long as they are consumed in the same bb.
3154 Make sure rhs2's def stmt is not among them, otherwise we can't
3155 use safely get_nonzero_bits on it. E.g. in:
3156 # RANGE [-83, 1] NONZERO 173
3157 # k_32 = PHI <k_47(13), k_12(9)>
3159 if (k_32 >= 0)
3160 goto <bb 5>; [26.46%]
3161 else
3162 goto <bb 9>; [73.54%]
3164 <bb 5> [local count: 140323371]:
3165 # RANGE [0, 1] NONZERO 1
3166 _5 = (int) k_32;
3167 # RANGE [0, 4] NONZERO 4
3168 _21 = _5 << 2;
3169 # RANGE [0, 4] NONZERO 4
3170 iftmp.0_44 = (char) _21;
3171 if (k_32 < iftmp.0_44)
3172 goto <bb 6>; [84.48%]
3173 else
3174 goto <bb 9>; [15.52%]
3175 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3176 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3177 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3178 those stmts even for negative k_32 and the value ranges would be no
3179 longer guaranteed and so the optimization would be invalid. */
3180 while (opcode == ERROR_MARK)
3182 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3183 basic_block bb2 = gimple_bb (g);
3184 if (bb2
3185 && bb2 != first_bb
3186 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3188 /* As an exception, handle a few common cases. */
3189 if (gimple_assign_cast_p (g)
3190 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3192 tree op0 = gimple_assign_rhs1 (g);
3193 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3194 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3195 > TYPE_PRECISION (TREE_TYPE (op0))))
3196 /* Zero-extension is always ok. */
3197 break;
3198 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3199 == TYPE_PRECISION (TREE_TYPE (op0))
3200 && TREE_CODE (op0) == SSA_NAME)
3202 /* Cast from signed to unsigned or vice versa. Retry
3203 with the op0 as new rhs2. */
3204 rhs2 = op0;
3205 continue;
3208 else if (is_gimple_assign (g)
3209 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3210 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3211 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3212 /* Masking with INTEGER_CST with MSB clear is always ok
3213 too. */
3214 break;
3215 rhs2 = NULL_TREE;
3217 break;
3219 if (rhs2 == NULL_TREE)
3220 continue;
3222 wide_int nz = get_nonzero_bits (rhs2);
3223 if (wi::neg_p (nz))
3224 continue;
3226 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3227 and RHS2 is known to be RHS2 >= 0. */
3228 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3230 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3231 if ((ranges[*idx].strict_overflow_p
3232 || ranges[i].strict_overflow_p)
3233 && issue_strict_overflow_warning (wc))
3234 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3235 "assuming signed overflow does not occur "
3236 "when simplifying range test");
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3240 struct range_entry *r = &ranges[*idx];
3241 fprintf (dump_file, "Optimizing range test ");
3242 print_generic_expr (dump_file, r->exp);
3243 fprintf (dump_file, " +[");
3244 print_generic_expr (dump_file, r->low);
3245 fprintf (dump_file, ", ");
3246 print_generic_expr (dump_file, r->high);
3247 fprintf (dump_file, "] and comparison ");
3248 print_generic_expr (dump_file, rhs1);
3249 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3250 print_generic_expr (dump_file, rhs2);
3251 fprintf (dump_file, "\n into (");
3252 print_generic_expr (dump_file, utype);
3253 fprintf (dump_file, ") ");
3254 print_generic_expr (dump_file, rhs1);
3255 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3256 print_generic_expr (dump_file, utype);
3257 fprintf (dump_file, ") ");
3258 print_generic_expr (dump_file, rhs2);
3259 fprintf (dump_file, "\n");
3262 operand_entry *oe = (*ops)[ranges[i].idx];
3263 ranges[i].in_p = 0;
3264 if (opcode == BIT_IOR_EXPR
3265 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3267 ranges[i].in_p = 1;
3268 ccode = invert_tree_comparison (ccode, false);
3271 unsigned int uid = gimple_uid (stmt);
3272 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3273 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3274 gimple_set_uid (g, uid);
3275 rhs1 = gimple_assign_lhs (g);
3276 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3277 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3279 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3280 gimple_set_uid (g, uid);
3281 rhs2 = gimple_assign_lhs (g);
3282 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3284 if (tree_swap_operands_p (rhs1, rhs2))
3286 std::swap (rhs1, rhs2);
3287 ccode = swap_tree_comparison (ccode);
3289 if (gimple_code (stmt) == GIMPLE_COND)
3291 gcond *c = as_a <gcond *> (stmt);
3292 gimple_cond_set_code (c, ccode);
3293 gimple_cond_set_lhs (c, rhs1);
3294 gimple_cond_set_rhs (c, rhs2);
3295 update_stmt (stmt);
3297 else
3299 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3300 if (!INTEGRAL_TYPE_P (ctype)
3301 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3302 && TYPE_PRECISION (ctype) != 1))
3303 ctype = boolean_type_node;
3304 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3305 gimple_set_uid (g, uid);
3306 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3307 if (oe->op && ctype != TREE_TYPE (oe->op))
3309 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3310 NOP_EXPR, gimple_assign_lhs (g));
3311 gimple_set_uid (g, uid);
3312 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3314 ranges[i].exp = gimple_assign_lhs (g);
3315 oe->op = ranges[i].exp;
3316 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3317 ranges[i].high = ranges[i].low;
3319 ranges[i].strict_overflow_p = false;
3320 oe = (*ops)[ranges[*idx].idx];
3321 /* Now change all the other range test immediate uses, so that
3322 those tests will be optimized away. */
3323 if (opcode == ERROR_MARK)
3325 if (oe->op)
3326 oe->op = build_int_cst (TREE_TYPE (oe->op),
3327 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3328 else
3329 oe->op = (oe->rank == BIT_IOR_EXPR
3330 ? boolean_false_node : boolean_true_node);
3332 else
3333 oe->op = error_mark_node;
3334 ranges[*idx].exp = NULL_TREE;
3335 ranges[*idx].low = NULL_TREE;
3336 ranges[*idx].high = NULL_TREE;
3337 any_changes = true;
3340 delete map;
3341 return any_changes;
3344 /* Optimize range tests, similarly how fold_range_test optimizes
3345 it on trees. The tree code for the binary
3346 operation between all the operands is OPCODE.
3347 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3348 maybe_optimize_range_tests for inter-bb range optimization.
3349 In that case if oe->op is NULL, oe->id is bb->index whose
3350 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3351 the actual opcode.
3352 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3354 static bool
3355 optimize_range_tests (enum tree_code opcode,
3356 vec<operand_entry *> *ops, basic_block first_bb)
3358 unsigned int length = ops->length (), i, j, first;
3359 operand_entry *oe;
3360 struct range_entry *ranges;
3361 bool any_changes = false;
3363 if (length == 1)
3364 return false;
3366 ranges = XNEWVEC (struct range_entry, length);
3367 for (i = 0; i < length; i++)
3369 oe = (*ops)[i];
3370 ranges[i].idx = i;
3371 init_range_entry (ranges + i, oe->op,
3372 oe->op
3373 ? NULL
3374 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3375 /* For | invert it now, we will invert it again before emitting
3376 the optimized expression. */
3377 if (opcode == BIT_IOR_EXPR
3378 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3379 ranges[i].in_p = !ranges[i].in_p;
3382 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3383 for (i = 0; i < length; i++)
3384 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3385 break;
3387 /* Try to merge ranges. */
3388 for (first = i; i < length; i++)
3390 tree low = ranges[i].low;
3391 tree high = ranges[i].high;
3392 int in_p = ranges[i].in_p;
3393 bool strict_overflow_p = ranges[i].strict_overflow_p;
3394 int update_fail_count = 0;
3396 for (j = i + 1; j < length; j++)
3398 if (ranges[i].exp != ranges[j].exp)
3399 break;
3400 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3401 ranges[j].in_p, ranges[j].low, ranges[j].high))
3402 break;
3403 strict_overflow_p |= ranges[j].strict_overflow_p;
3406 if (j == i + 1)
3407 continue;
3409 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3410 opcode, ops, ranges[i].exp, NULL, in_p,
3411 low, high, strict_overflow_p))
3413 i = j - 1;
3414 any_changes = true;
3416 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3417 while update_range_test would fail. */
3418 else if (update_fail_count == 64)
3419 i = j - 1;
3420 else
3421 ++update_fail_count;
3424 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3425 ops, ranges);
3427 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3428 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3429 ops, ranges);
3430 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3431 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3432 ops, ranges);
3433 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3434 ops, ranges);
3435 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3436 ranges, first_bb);
3438 if (any_changes && opcode != ERROR_MARK)
3440 j = 0;
3441 FOR_EACH_VEC_ELT (*ops, i, oe)
3443 if (oe->op == error_mark_node)
3444 continue;
3445 else if (i != j)
3446 (*ops)[j] = oe;
3447 j++;
3449 ops->truncate (j);
3452 XDELETEVEC (ranges);
3453 return any_changes;
3456 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3457 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3458 otherwise the comparison code. */
3460 static tree_code
3461 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3463 if (TREE_CODE (var) != SSA_NAME)
3464 return ERROR_MARK;
3466 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3467 if (stmt == NULL)
3468 return ERROR_MARK;
3470 /* ??? If we start creating more COND_EXPR, we could perform
3471 this same optimization with them. For now, simplify. */
3472 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3473 return ERROR_MARK;
3475 tree cond = gimple_assign_rhs1 (stmt);
3476 tree_code cmp = TREE_CODE (cond);
3477 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3478 return ERROR_MARK;
3480 /* ??? For now, allow only canonical true and false result vectors.
3481 We could expand this to other constants should the need arise,
3482 but at the moment we don't create them. */
3483 tree t = gimple_assign_rhs2 (stmt);
3484 tree f = gimple_assign_rhs3 (stmt);
3485 bool inv;
3486 if (integer_all_onesp (t))
3487 inv = false;
3488 else if (integer_all_onesp (f))
3490 cmp = invert_tree_comparison (cmp, false);
3491 inv = true;
3493 else
3494 return ERROR_MARK;
3495 if (!integer_zerop (f))
3496 return ERROR_MARK;
3498 /* Success! */
3499 if (rets)
3500 *rets = stmt;
3501 if (reti)
3502 *reti = inv;
3503 return cmp;
3506 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3507 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3509 static bool
3510 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3512 unsigned int length = ops->length (), i, j;
3513 bool any_changes = false;
3515 if (length == 1)
3516 return false;
3518 for (i = 0; i < length; ++i)
3520 tree elt0 = (*ops)[i]->op;
3522 gassign *stmt0;
3523 bool invert;
3524 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3525 if (cmp0 == ERROR_MARK)
3526 continue;
3528 for (j = i + 1; j < length; ++j)
3530 tree &elt1 = (*ops)[j]->op;
3532 gassign *stmt1;
3533 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3534 if (cmp1 == ERROR_MARK)
3535 continue;
3537 tree cond0 = gimple_assign_rhs1 (stmt0);
3538 tree x0 = TREE_OPERAND (cond0, 0);
3539 tree y0 = TREE_OPERAND (cond0, 1);
3541 tree cond1 = gimple_assign_rhs1 (stmt1);
3542 tree x1 = TREE_OPERAND (cond1, 0);
3543 tree y1 = TREE_OPERAND (cond1, 1);
3545 tree comb;
3546 if (opcode == BIT_AND_EXPR)
3547 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3548 else if (opcode == BIT_IOR_EXPR)
3549 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3550 else
3551 gcc_unreachable ();
3552 if (comb == NULL)
3553 continue;
3555 /* Success! */
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3558 fprintf (dump_file, "Transforming ");
3559 print_generic_expr (dump_file, cond0);
3560 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3561 print_generic_expr (dump_file, cond1);
3562 fprintf (dump_file, " into ");
3563 print_generic_expr (dump_file, comb);
3564 fputc ('\n', dump_file);
3567 gimple_assign_set_rhs1 (stmt0, comb);
3568 if (invert)
3569 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3570 *gimple_assign_rhs3_ptr (stmt0));
3571 update_stmt (stmt0);
3573 elt1 = error_mark_node;
3574 any_changes = true;
3578 if (any_changes)
3580 operand_entry *oe;
3581 j = 0;
3582 FOR_EACH_VEC_ELT (*ops, i, oe)
3584 if (oe->op == error_mark_node)
3585 continue;
3586 else if (i != j)
3587 (*ops)[j] = oe;
3588 j++;
3590 ops->truncate (j);
3593 return any_changes;
3596 /* Return true if STMT is a cast like:
3597 <bb N>:
3599 _123 = (int) _234;
3601 <bb M>:
3602 # _345 = PHI <_123(N), 1(...), 1(...)>
3603 where _234 has bool type, _123 has single use and
3604 bb N has a single successor M. This is commonly used in
3605 the last block of a range test.
3607 Also Return true if STMT is tcc_compare like:
3608 <bb N>:
3610 _234 = a_2(D) == 2;
3612 <bb M>:
3613 # _345 = PHI <_234(N), 1(...), 1(...)>
3614 _346 = (int) _345;
3615 where _234 has booltype, single use and
3616 bb N has a single successor M. This is commonly used in
3617 the last block of a range test. */
3619 static bool
3620 final_range_test_p (gimple *stmt)
3622 basic_block bb, rhs_bb, lhs_bb;
3623 edge e;
3624 tree lhs, rhs;
3625 use_operand_p use_p;
3626 gimple *use_stmt;
3628 if (!gimple_assign_cast_p (stmt)
3629 && (!is_gimple_assign (stmt)
3630 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3631 != tcc_comparison)))
3632 return false;
3633 bb = gimple_bb (stmt);
3634 if (!single_succ_p (bb))
3635 return false;
3636 e = single_succ_edge (bb);
3637 if (e->flags & EDGE_COMPLEX)
3638 return false;
3640 lhs = gimple_assign_lhs (stmt);
3641 rhs = gimple_assign_rhs1 (stmt);
3642 if (gimple_assign_cast_p (stmt)
3643 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3644 || TREE_CODE (rhs) != SSA_NAME
3645 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3646 return false;
3648 if (!gimple_assign_cast_p (stmt)
3649 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3650 return false;
3652 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3653 if (!single_imm_use (lhs, &use_p, &use_stmt))
3654 return false;
3656 if (gimple_code (use_stmt) != GIMPLE_PHI
3657 || gimple_bb (use_stmt) != e->dest)
3658 return false;
3660 /* And that the rhs is defined in the same loop. */
3661 if (gimple_assign_cast_p (stmt))
3663 if (TREE_CODE (rhs) != SSA_NAME
3664 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3665 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3666 return false;
3668 else
3670 if (TREE_CODE (lhs) != SSA_NAME
3671 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3672 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3673 return false;
3676 return true;
3679 /* Return true if BB is suitable basic block for inter-bb range test
3680 optimization. If BACKWARD is true, BB should be the only predecessor
3681 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3682 or compared with to find a common basic block to which all conditions
3683 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3684 be the only predecessor of BB. */
3686 static bool
3687 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3688 bool backward)
3690 edge_iterator ei, ei2;
3691 edge e, e2;
3692 gimple *stmt;
3693 gphi_iterator gsi;
3694 bool other_edge_seen = false;
3695 bool is_cond;
3697 if (test_bb == bb)
3698 return false;
3699 /* Check last stmt first. */
3700 stmt = last_stmt (bb);
3701 if (stmt == NULL
3702 || (gimple_code (stmt) != GIMPLE_COND
3703 && (backward || !final_range_test_p (stmt)))
3704 || gimple_visited_p (stmt)
3705 || stmt_could_throw_p (stmt)
3706 || *other_bb == bb)
3707 return false;
3708 is_cond = gimple_code (stmt) == GIMPLE_COND;
3709 if (is_cond)
3711 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3712 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3713 to *OTHER_BB (if not set yet, try to find it out). */
3714 if (EDGE_COUNT (bb->succs) != 2)
3715 return false;
3716 FOR_EACH_EDGE (e, ei, bb->succs)
3718 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3719 return false;
3720 if (e->dest == test_bb)
3722 if (backward)
3723 continue;
3724 else
3725 return false;
3727 if (e->dest == bb)
3728 return false;
3729 if (*other_bb == NULL)
3731 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3732 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3733 return false;
3734 else if (e->dest == e2->dest)
3735 *other_bb = e->dest;
3736 if (*other_bb == NULL)
3737 return false;
3739 if (e->dest == *other_bb)
3740 other_edge_seen = true;
3741 else if (backward)
3742 return false;
3744 if (*other_bb == NULL || !other_edge_seen)
3745 return false;
3747 else if (single_succ (bb) != *other_bb)
3748 return false;
3750 /* Now check all PHIs of *OTHER_BB. */
3751 e = find_edge (bb, *other_bb);
3752 e2 = find_edge (test_bb, *other_bb);
3753 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3755 gphi *phi = gsi.phi ();
3756 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3757 corresponding to BB and TEST_BB predecessor must be the same. */
3758 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3759 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3761 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3762 one of the PHIs should have the lhs of the last stmt in
3763 that block as PHI arg and that PHI should have 0 or 1
3764 corresponding to it in all other range test basic blocks
3765 considered. */
3766 if (!is_cond)
3768 if (gimple_phi_arg_def (phi, e->dest_idx)
3769 == gimple_assign_lhs (stmt)
3770 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3771 || integer_onep (gimple_phi_arg_def (phi,
3772 e2->dest_idx))))
3773 continue;
3775 else
3777 gimple *test_last = last_stmt (test_bb);
3778 if (gimple_code (test_last) != GIMPLE_COND
3779 && gimple_phi_arg_def (phi, e2->dest_idx)
3780 == gimple_assign_lhs (test_last)
3781 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3782 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3783 continue;
3786 return false;
3789 return true;
3792 /* Return true if BB doesn't have side-effects that would disallow
3793 range test optimization, all SSA_NAMEs set in the bb are consumed
3794 in the bb and there are no PHIs. */
3796 static bool
3797 no_side_effect_bb (basic_block bb)
3799 gimple_stmt_iterator gsi;
3800 gimple *last;
3802 if (!gimple_seq_empty_p (phi_nodes (bb)))
3803 return false;
3804 last = last_stmt (bb);
3805 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3807 gimple *stmt = gsi_stmt (gsi);
3808 tree lhs;
3809 imm_use_iterator imm_iter;
3810 use_operand_p use_p;
3812 if (is_gimple_debug (stmt))
3813 continue;
3814 if (gimple_has_side_effects (stmt))
3815 return false;
3816 if (stmt == last)
3817 return true;
3818 if (!is_gimple_assign (stmt))
3819 return false;
3820 lhs = gimple_assign_lhs (stmt);
3821 if (TREE_CODE (lhs) != SSA_NAME)
3822 return false;
3823 if (gimple_assign_rhs_could_trap_p (stmt))
3824 return false;
3825 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3827 gimple *use_stmt = USE_STMT (use_p);
3828 if (is_gimple_debug (use_stmt))
3829 continue;
3830 if (gimple_bb (use_stmt) != bb)
3831 return false;
3834 return false;
3837 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3838 return true and fill in *OPS recursively. */
3840 static bool
3841 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3842 struct loop *loop)
3844 gimple *stmt = SSA_NAME_DEF_STMT (var);
3845 tree rhs[2];
3846 int i;
3848 if (!is_reassociable_op (stmt, code, loop))
3849 return false;
3851 rhs[0] = gimple_assign_rhs1 (stmt);
3852 rhs[1] = gimple_assign_rhs2 (stmt);
3853 gimple_set_visited (stmt, true);
3854 for (i = 0; i < 2; i++)
3855 if (TREE_CODE (rhs[i]) == SSA_NAME
3856 && !get_ops (rhs[i], code, ops, loop)
3857 && has_single_use (rhs[i]))
3859 operand_entry *oe = operand_entry_pool.allocate ();
3861 oe->op = rhs[i];
3862 oe->rank = code;
3863 oe->id = 0;
3864 oe->count = 1;
3865 oe->stmt_to_insert = NULL;
3866 ops->safe_push (oe);
3868 return true;
3871 /* Find the ops that were added by get_ops starting from VAR, see if
3872 they were changed during update_range_test and if yes, create new
3873 stmts. */
3875 static tree
3876 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3877 unsigned int *pidx, struct loop *loop)
3879 gimple *stmt = SSA_NAME_DEF_STMT (var);
3880 tree rhs[4];
3881 int i;
3883 if (!is_reassociable_op (stmt, code, loop))
3884 return NULL;
3886 rhs[0] = gimple_assign_rhs1 (stmt);
3887 rhs[1] = gimple_assign_rhs2 (stmt);
3888 rhs[2] = rhs[0];
3889 rhs[3] = rhs[1];
3890 for (i = 0; i < 2; i++)
3891 if (TREE_CODE (rhs[i]) == SSA_NAME)
3893 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3894 if (rhs[2 + i] == NULL_TREE)
3896 if (has_single_use (rhs[i]))
3897 rhs[2 + i] = ops[(*pidx)++]->op;
3898 else
3899 rhs[2 + i] = rhs[i];
3902 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3903 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3905 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3906 var = make_ssa_name (TREE_TYPE (var));
3907 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3908 rhs[2], rhs[3]);
3909 gimple_set_uid (g, gimple_uid (stmt));
3910 gimple_set_visited (g, true);
3911 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3913 return var;
3916 /* Structure to track the initial value passed to get_ops and
3917 the range in the ops vector for each basic block. */
3919 struct inter_bb_range_test_entry
3921 tree op;
3922 unsigned int first_idx, last_idx;
3925 /* Inter-bb range test optimization.
3927 Returns TRUE if a gimple conditional is optimized to a true/false,
3928 otherwise return FALSE.
3930 This indicates to the caller that it should run a CFG cleanup pass
3931 once reassociation is completed. */
3933 static bool
3934 maybe_optimize_range_tests (gimple *stmt)
3936 basic_block first_bb = gimple_bb (stmt);
3937 basic_block last_bb = first_bb;
3938 basic_block other_bb = NULL;
3939 basic_block bb;
3940 edge_iterator ei;
3941 edge e;
3942 auto_vec<operand_entry *> ops;
3943 auto_vec<inter_bb_range_test_entry> bbinfo;
3944 bool any_changes = false;
3945 bool cfg_cleanup_needed = false;
3947 /* Consider only basic blocks that end with GIMPLE_COND or
3948 a cast statement satisfying final_range_test_p. All
3949 but the last bb in the first_bb .. last_bb range
3950 should end with GIMPLE_COND. */
3951 if (gimple_code (stmt) == GIMPLE_COND)
3953 if (EDGE_COUNT (first_bb->succs) != 2)
3954 return cfg_cleanup_needed;
3956 else if (final_range_test_p (stmt))
3957 other_bb = single_succ (first_bb);
3958 else
3959 return cfg_cleanup_needed;
3961 if (stmt_could_throw_p (stmt))
3962 return cfg_cleanup_needed;
3964 /* As relative ordering of post-dominator sons isn't fixed,
3965 maybe_optimize_range_tests can be called first on any
3966 bb in the range we want to optimize. So, start searching
3967 backwards, if first_bb can be set to a predecessor. */
3968 while (single_pred_p (first_bb))
3970 basic_block pred_bb = single_pred (first_bb);
3971 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3972 break;
3973 if (!no_side_effect_bb (first_bb))
3974 break;
3975 first_bb = pred_bb;
3977 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3978 Before starting forward search in last_bb successors, find
3979 out the other_bb. */
3980 if (first_bb == last_bb)
3982 other_bb = NULL;
3983 /* As non-GIMPLE_COND last stmt always terminates the range,
3984 if forward search didn't discover anything, just give up. */
3985 if (gimple_code (stmt) != GIMPLE_COND)
3986 return cfg_cleanup_needed;
3987 /* Look at both successors. Either it ends with a GIMPLE_COND
3988 and satisfies suitable_cond_bb, or ends with a cast and
3989 other_bb is that cast's successor. */
3990 FOR_EACH_EDGE (e, ei, first_bb->succs)
3991 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3992 || e->dest == first_bb)
3993 return cfg_cleanup_needed;
3994 else if (single_pred_p (e->dest))
3996 stmt = last_stmt (e->dest);
3997 if (stmt
3998 && gimple_code (stmt) == GIMPLE_COND
3999 && EDGE_COUNT (e->dest->succs) == 2)
4001 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4002 break;
4003 else
4004 other_bb = NULL;
4006 else if (stmt
4007 && final_range_test_p (stmt)
4008 && find_edge (first_bb, single_succ (e->dest)))
4010 other_bb = single_succ (e->dest);
4011 if (other_bb == first_bb)
4012 other_bb = NULL;
4015 if (other_bb == NULL)
4016 return cfg_cleanup_needed;
4018 /* Now do the forward search, moving last_bb to successor bbs
4019 that aren't other_bb. */
4020 while (EDGE_COUNT (last_bb->succs) == 2)
4022 FOR_EACH_EDGE (e, ei, last_bb->succs)
4023 if (e->dest != other_bb)
4024 break;
4025 if (e == NULL)
4026 break;
4027 if (!single_pred_p (e->dest))
4028 break;
4029 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4030 break;
4031 if (!no_side_effect_bb (e->dest))
4032 break;
4033 last_bb = e->dest;
4035 if (first_bb == last_bb)
4036 return cfg_cleanup_needed;
4037 /* Here basic blocks first_bb through last_bb's predecessor
4038 end with GIMPLE_COND, all of them have one of the edges to
4039 other_bb and another to another block in the range,
4040 all blocks except first_bb don't have side-effects and
4041 last_bb ends with either GIMPLE_COND, or cast satisfying
4042 final_range_test_p. */
4043 for (bb = last_bb; ; bb = single_pred (bb))
4045 enum tree_code code;
4046 tree lhs, rhs;
4047 inter_bb_range_test_entry bb_ent;
4049 bb_ent.op = NULL_TREE;
4050 bb_ent.first_idx = ops.length ();
4051 bb_ent.last_idx = bb_ent.first_idx;
4052 e = find_edge (bb, other_bb);
4053 stmt = last_stmt (bb);
4054 gimple_set_visited (stmt, true);
4055 if (gimple_code (stmt) != GIMPLE_COND)
4057 use_operand_p use_p;
4058 gimple *phi;
4059 edge e2;
4060 unsigned int d;
4062 lhs = gimple_assign_lhs (stmt);
4063 rhs = gimple_assign_rhs1 (stmt);
4064 gcc_assert (bb == last_bb);
4066 /* stmt is
4067 _123 = (int) _234;
4069 _234 = a_2(D) == 2;
4071 followed by:
4072 <bb M>:
4073 # _345 = PHI <_123(N), 1(...), 1(...)>
4075 or 0 instead of 1. If it is 0, the _234
4076 range test is anded together with all the
4077 other range tests, if it is 1, it is ored with
4078 them. */
4079 single_imm_use (lhs, &use_p, &phi);
4080 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4081 e2 = find_edge (first_bb, other_bb);
4082 d = e2->dest_idx;
4083 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4084 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4085 code = BIT_AND_EXPR;
4086 else
4088 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4089 code = BIT_IOR_EXPR;
4092 /* If _234 SSA_NAME_DEF_STMT is
4093 _234 = _567 | _789;
4094 (or &, corresponding to 1/0 in the phi arguments,
4095 push into ops the individual range test arguments
4096 of the bitwise or resp. and, recursively. */
4097 if (TREE_CODE (rhs) == SSA_NAME
4098 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4099 != tcc_comparison)
4100 && !get_ops (rhs, code, &ops,
4101 loop_containing_stmt (stmt))
4102 && has_single_use (rhs))
4104 /* Otherwise, push the _234 range test itself. */
4105 operand_entry *oe = operand_entry_pool.allocate ();
4107 oe->op = rhs;
4108 oe->rank = code;
4109 oe->id = 0;
4110 oe->count = 1;
4111 oe->stmt_to_insert = NULL;
4112 ops.safe_push (oe);
4113 bb_ent.last_idx++;
4114 bb_ent.op = rhs;
4116 else if (is_gimple_assign (stmt)
4117 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4118 == tcc_comparison)
4119 && !get_ops (lhs, code, &ops,
4120 loop_containing_stmt (stmt))
4121 && has_single_use (lhs))
4123 operand_entry *oe = operand_entry_pool.allocate ();
4124 oe->op = lhs;
4125 oe->rank = code;
4126 oe->id = 0;
4127 oe->count = 1;
4128 ops.safe_push (oe);
4129 bb_ent.last_idx++;
4130 bb_ent.op = lhs;
4132 else
4134 bb_ent.last_idx = ops.length ();
4135 bb_ent.op = rhs;
4137 bbinfo.safe_push (bb_ent);
4138 continue;
4140 /* Otherwise stmt is GIMPLE_COND. */
4141 code = gimple_cond_code (stmt);
4142 lhs = gimple_cond_lhs (stmt);
4143 rhs = gimple_cond_rhs (stmt);
4144 if (TREE_CODE (lhs) == SSA_NAME
4145 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4146 && ((code != EQ_EXPR && code != NE_EXPR)
4147 || rhs != boolean_false_node
4148 /* Either push into ops the individual bitwise
4149 or resp. and operands, depending on which
4150 edge is other_bb. */
4151 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4152 ^ (code == EQ_EXPR))
4153 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4154 loop_containing_stmt (stmt))))
4156 /* Or push the GIMPLE_COND stmt itself. */
4157 operand_entry *oe = operand_entry_pool.allocate ();
4159 oe->op = NULL;
4160 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4161 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4162 /* oe->op = NULL signs that there is no SSA_NAME
4163 for the range test, and oe->id instead is the
4164 basic block number, at which's end the GIMPLE_COND
4165 is. */
4166 oe->id = bb->index;
4167 oe->count = 1;
4168 oe->stmt_to_insert = NULL;
4169 ops.safe_push (oe);
4170 bb_ent.op = NULL;
4171 bb_ent.last_idx++;
4173 else if (ops.length () > bb_ent.first_idx)
4175 bb_ent.op = lhs;
4176 bb_ent.last_idx = ops.length ();
4178 bbinfo.safe_push (bb_ent);
4179 if (bb == first_bb)
4180 break;
4182 if (ops.length () > 1)
4183 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4184 if (any_changes)
4186 unsigned int idx, max_idx = 0;
4187 /* update_ops relies on has_single_use predicates returning the
4188 same values as it did during get_ops earlier. Additionally it
4189 never removes statements, only adds new ones and it should walk
4190 from the single imm use and check the predicate already before
4191 making those changes.
4192 On the other side, the handling of GIMPLE_COND directly can turn
4193 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4194 it needs to be done in a separate loop afterwards. */
4195 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4197 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4198 && bbinfo[idx].op != NULL_TREE)
4200 tree new_op;
4202 max_idx = idx;
4203 stmt = last_stmt (bb);
4204 new_op = update_ops (bbinfo[idx].op,
4205 (enum tree_code)
4206 ops[bbinfo[idx].first_idx]->rank,
4207 ops, &bbinfo[idx].first_idx,
4208 loop_containing_stmt (stmt));
4209 if (new_op == NULL_TREE)
4211 gcc_assert (bb == last_bb);
4212 new_op = ops[bbinfo[idx].first_idx++]->op;
4214 if (bbinfo[idx].op != new_op)
4216 imm_use_iterator iter;
4217 use_operand_p use_p;
4218 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4220 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4221 if (is_gimple_debug (use_stmt))
4222 continue;
4223 else if (gimple_code (use_stmt) == GIMPLE_COND
4224 || gimple_code (use_stmt) == GIMPLE_PHI)
4225 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4226 SET_USE (use_p, new_op);
4227 else if ((is_gimple_assign (use_stmt)
4228 && (TREE_CODE_CLASS
4229 (gimple_assign_rhs_code (use_stmt))
4230 == tcc_comparison)))
4231 cast_or_tcc_cmp_stmt = use_stmt;
4232 else if (gimple_assign_cast_p (use_stmt))
4233 cast_or_tcc_cmp_stmt = use_stmt;
4234 else
4235 gcc_unreachable ();
4237 if (cast_or_tcc_cmp_stmt)
4239 gcc_assert (bb == last_bb);
4240 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4241 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4242 enum tree_code rhs_code
4243 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4244 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4245 : CONVERT_EXPR;
4246 gassign *g;
4247 if (is_gimple_min_invariant (new_op))
4249 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4250 g = gimple_build_assign (new_lhs, new_op);
4252 else
4253 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4254 gimple_stmt_iterator gsi
4255 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4256 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4257 gimple_set_visited (g, true);
4258 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4259 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4260 if (is_gimple_debug (use_stmt))
4261 continue;
4262 else if (gimple_code (use_stmt) == GIMPLE_COND
4263 || gimple_code (use_stmt) == GIMPLE_PHI)
4264 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4265 SET_USE (use_p, new_lhs);
4266 else
4267 gcc_unreachable ();
4271 if (bb == first_bb)
4272 break;
4274 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4276 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4277 && bbinfo[idx].op == NULL_TREE
4278 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4280 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4282 if (idx > max_idx)
4283 max_idx = idx;
4285 /* If we collapse the conditional to a true/false
4286 condition, then bubble that knowledge up to our caller. */
4287 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4289 gimple_cond_make_false (cond_stmt);
4290 cfg_cleanup_needed = true;
4292 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4294 gimple_cond_make_true (cond_stmt);
4295 cfg_cleanup_needed = true;
4297 else
4299 gimple_cond_set_code (cond_stmt, NE_EXPR);
4300 gimple_cond_set_lhs (cond_stmt,
4301 ops[bbinfo[idx].first_idx]->op);
4302 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4304 update_stmt (cond_stmt);
4306 if (bb == first_bb)
4307 break;
4310 /* The above changes could result in basic blocks after the first
4311 modified one, up to and including last_bb, to be executed even if
4312 they would not be in the original program. If the value ranges of
4313 assignment lhs' in those bbs were dependent on the conditions
4314 guarding those basic blocks which now can change, the VRs might
4315 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4316 are only used within the same bb, it should be not a big deal if
4317 we just reset all the VRs in those bbs. See PR68671. */
4318 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4319 reset_flow_sensitive_info_in_bb (bb);
4321 return cfg_cleanup_needed;
4324 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4325 of STMT in it's operands. This is also known as a "destructive
4326 update" operation. */
4328 static bool
4329 is_phi_for_stmt (gimple *stmt, tree operand)
4331 gimple *def_stmt;
4332 gphi *def_phi;
4333 tree lhs;
4334 use_operand_p arg_p;
4335 ssa_op_iter i;
4337 if (TREE_CODE (operand) != SSA_NAME)
4338 return false;
4340 lhs = gimple_assign_lhs (stmt);
4342 def_stmt = SSA_NAME_DEF_STMT (operand);
4343 def_phi = dyn_cast <gphi *> (def_stmt);
4344 if (!def_phi)
4345 return false;
4347 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4348 if (lhs == USE_FROM_PTR (arg_p))
4349 return true;
4350 return false;
4353 /* Remove def stmt of VAR if VAR has zero uses and recurse
4354 on rhs1 operand if so. */
4356 static void
4357 remove_visited_stmt_chain (tree var)
4359 gimple *stmt;
4360 gimple_stmt_iterator gsi;
4362 while (1)
4364 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4365 return;
4366 stmt = SSA_NAME_DEF_STMT (var);
4367 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4369 var = gimple_assign_rhs1 (stmt);
4370 gsi = gsi_for_stmt (stmt);
4371 reassoc_remove_stmt (&gsi);
4372 release_defs (stmt);
4374 else
4375 return;
4379 /* This function checks three consequtive operands in
4380 passed operands vector OPS starting from OPINDEX and
4381 swaps two operands if it is profitable for binary operation
4382 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4384 We pair ops with the same rank if possible.
4386 The alternative we try is to see if STMT is a destructive
4387 update style statement, which is like:
4388 b = phi (a, ...)
4389 a = c + b;
4390 In that case, we want to use the destructive update form to
4391 expose the possible vectorizer sum reduction opportunity.
4392 In that case, the third operand will be the phi node. This
4393 check is not performed if STMT is null.
4395 We could, of course, try to be better as noted above, and do a
4396 lot of work to try to find these opportunities in >3 operand
4397 cases, but it is unlikely to be worth it. */
4399 static void
4400 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4401 unsigned int opindex, gimple *stmt)
4403 operand_entry *oe1, *oe2, *oe3;
4405 oe1 = ops[opindex];
4406 oe2 = ops[opindex + 1];
4407 oe3 = ops[opindex + 2];
4409 if ((oe1->rank == oe2->rank
4410 && oe2->rank != oe3->rank)
4411 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4412 && !is_phi_for_stmt (stmt, oe1->op)
4413 && !is_phi_for_stmt (stmt, oe2->op)))
4414 std::swap (*oe1, *oe3);
4415 else if ((oe1->rank == oe3->rank
4416 && oe2->rank != oe3->rank)
4417 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4418 && !is_phi_for_stmt (stmt, oe1->op)
4419 && !is_phi_for_stmt (stmt, oe3->op)))
4420 std::swap (*oe1, *oe2);
4423 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4424 two definitions, otherwise return STMT. */
4426 static inline gimple *
4427 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4429 if (TREE_CODE (rhs1) == SSA_NAME
4430 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4431 stmt = SSA_NAME_DEF_STMT (rhs1);
4432 if (TREE_CODE (rhs2) == SSA_NAME
4433 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4434 stmt = SSA_NAME_DEF_STMT (rhs2);
4435 return stmt;
4438 /* If the stmt that defines operand has to be inserted, insert it
4439 before the use. */
4440 static void
4441 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4443 gcc_assert (is_gimple_assign (stmt_to_insert));
4444 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4445 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4446 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4447 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4448 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4450 /* If the insert point is not stmt, then insert_point would be
4451 the point where operand rhs1 or rhs2 is defined. In this case,
4452 stmt_to_insert has to be inserted afterwards. This would
4453 only happen when the stmt insertion point is flexible. */
4454 if (stmt == insert_point)
4455 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4456 else
4457 insert_stmt_after (stmt_to_insert, insert_point);
4461 /* Recursively rewrite our linearized statements so that the operators
4462 match those in OPS[OPINDEX], putting the computation in rank
4463 order. Return new lhs.
4464 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4465 the current stmt and during recursive invocations.
4466 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4467 recursive invocations. */
4469 static tree
4470 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4471 vec<operand_entry *> ops, bool changed, bool next_changed)
4473 tree rhs1 = gimple_assign_rhs1 (stmt);
4474 tree rhs2 = gimple_assign_rhs2 (stmt);
4475 tree lhs = gimple_assign_lhs (stmt);
4476 operand_entry *oe;
4478 /* The final recursion case for this function is that you have
4479 exactly two operations left.
4480 If we had exactly one op in the entire list to start with, we
4481 would have never called this function, and the tail recursion
4482 rewrites them one at a time. */
4483 if (opindex + 2 == ops.length ())
4485 operand_entry *oe1, *oe2;
4487 oe1 = ops[opindex];
4488 oe2 = ops[opindex + 1];
4490 if (rhs1 != oe1->op || rhs2 != oe2->op)
4492 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4493 unsigned int uid = gimple_uid (stmt);
4495 if (dump_file && (dump_flags & TDF_DETAILS))
4497 fprintf (dump_file, "Transforming ");
4498 print_gimple_stmt (dump_file, stmt, 0);
4501 /* If the stmt that defines operand has to be inserted, insert it
4502 before the use. */
4503 if (oe1->stmt_to_insert)
4504 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4505 if (oe2->stmt_to_insert)
4506 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4507 /* Even when changed is false, reassociation could have e.g. removed
4508 some redundant operations, so unless we are just swapping the
4509 arguments or unless there is no change at all (then we just
4510 return lhs), force creation of a new SSA_NAME. */
4511 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4513 gimple *insert_point
4514 = find_insert_point (stmt, oe1->op, oe2->op);
4515 lhs = make_ssa_name (TREE_TYPE (lhs));
4516 stmt
4517 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4518 oe1->op, oe2->op);
4519 gimple_set_uid (stmt, uid);
4520 gimple_set_visited (stmt, true);
4521 if (insert_point == gsi_stmt (gsi))
4522 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4523 else
4524 insert_stmt_after (stmt, insert_point);
4526 else
4528 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4529 == stmt);
4530 gimple_assign_set_rhs1 (stmt, oe1->op);
4531 gimple_assign_set_rhs2 (stmt, oe2->op);
4532 update_stmt (stmt);
4535 if (rhs1 != oe1->op && rhs1 != oe2->op)
4536 remove_visited_stmt_chain (rhs1);
4538 if (dump_file && (dump_flags & TDF_DETAILS))
4540 fprintf (dump_file, " into ");
4541 print_gimple_stmt (dump_file, stmt, 0);
4544 return lhs;
4547 /* If we hit here, we should have 3 or more ops left. */
4548 gcc_assert (opindex + 2 < ops.length ());
4550 /* Rewrite the next operator. */
4551 oe = ops[opindex];
4553 /* If the stmt that defines operand has to be inserted, insert it
4554 before the use. */
4555 if (oe->stmt_to_insert)
4556 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4558 /* Recurse on the LHS of the binary operator, which is guaranteed to
4559 be the non-leaf side. */
4560 tree new_rhs1
4561 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4562 changed || oe->op != rhs2 || next_changed,
4563 false);
4565 if (oe->op != rhs2 || new_rhs1 != rhs1)
4567 if (dump_file && (dump_flags & TDF_DETAILS))
4569 fprintf (dump_file, "Transforming ");
4570 print_gimple_stmt (dump_file, stmt, 0);
4573 /* If changed is false, this is either opindex == 0
4574 or all outer rhs2's were equal to corresponding oe->op,
4575 and powi_result is NULL.
4576 That means lhs is equivalent before and after reassociation.
4577 Otherwise ensure the old lhs SSA_NAME is not reused and
4578 create a new stmt as well, so that any debug stmts will be
4579 properly adjusted. */
4580 if (changed)
4582 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4583 unsigned int uid = gimple_uid (stmt);
4584 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4586 lhs = make_ssa_name (TREE_TYPE (lhs));
4587 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4588 new_rhs1, oe->op);
4589 gimple_set_uid (stmt, uid);
4590 gimple_set_visited (stmt, true);
4591 if (insert_point == gsi_stmt (gsi))
4592 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4593 else
4594 insert_stmt_after (stmt, insert_point);
4596 else
4598 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4599 == stmt);
4600 gimple_assign_set_rhs1 (stmt, new_rhs1);
4601 gimple_assign_set_rhs2 (stmt, oe->op);
4602 update_stmt (stmt);
4605 if (dump_file && (dump_flags & TDF_DETAILS))
4607 fprintf (dump_file, " into ");
4608 print_gimple_stmt (dump_file, stmt, 0);
4611 return lhs;
4614 /* Find out how many cycles we need to compute statements chain.
4615 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4616 maximum number of independent statements we may execute per cycle. */
4618 static int
4619 get_required_cycles (int ops_num, int cpu_width)
4621 int res;
4622 int elog;
4623 unsigned int rest;
4625 /* While we have more than 2 * cpu_width operands
4626 we may reduce number of operands by cpu_width
4627 per cycle. */
4628 res = ops_num / (2 * cpu_width);
4630 /* Remained operands count may be reduced twice per cycle
4631 until we have only one operand. */
4632 rest = (unsigned)(ops_num - res * cpu_width);
4633 elog = exact_log2 (rest);
4634 if (elog >= 0)
4635 res += elog;
4636 else
4637 res += floor_log2 (rest) + 1;
4639 return res;
4642 /* Returns an optimal number of registers to use for computation of
4643 given statements. */
4645 static int
4646 get_reassociation_width (int ops_num, enum tree_code opc,
4647 machine_mode mode)
4649 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4650 int width;
4651 int width_min;
4652 int cycles_best;
4654 if (param_width > 0)
4655 width = param_width;
4656 else
4657 width = targetm.sched.reassociation_width (opc, mode);
4659 if (width == 1)
4660 return width;
4662 /* Get the minimal time required for sequence computation. */
4663 cycles_best = get_required_cycles (ops_num, width);
4665 /* Check if we may use less width and still compute sequence for
4666 the same time. It will allow us to reduce registers usage.
4667 get_required_cycles is monotonically increasing with lower width
4668 so we can perform a binary search for the minimal width that still
4669 results in the optimal cycle count. */
4670 width_min = 1;
4671 while (width > width_min)
4673 int width_mid = (width + width_min) / 2;
4675 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4676 width = width_mid;
4677 else if (width_min < width_mid)
4678 width_min = width_mid;
4679 else
4680 break;
4683 return width;
4686 /* Recursively rewrite our linearized statements so that the operators
4687 match those in OPS[OPINDEX], putting the computation in rank
4688 order and trying to allow operations to be executed in
4689 parallel. */
4691 static void
4692 rewrite_expr_tree_parallel (gassign *stmt, int width,
4693 vec<operand_entry *> ops)
4695 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4696 int op_num = ops.length ();
4697 gcc_assert (op_num > 0);
4698 int stmt_num = op_num - 1;
4699 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4700 int op_index = op_num - 1;
4701 int stmt_index = 0;
4702 int ready_stmts_end = 0;
4703 int i = 0;
4704 gimple *stmt1 = NULL, *stmt2 = NULL;
4705 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4707 /* We start expression rewriting from the top statements.
4708 So, in this loop we create a full list of statements
4709 we will work with. */
4710 stmts[stmt_num - 1] = stmt;
4711 for (i = stmt_num - 2; i >= 0; i--)
4712 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4714 for (i = 0; i < stmt_num; i++)
4716 tree op1, op2;
4718 /* Determine whether we should use results of
4719 already handled statements or not. */
4720 if (ready_stmts_end == 0
4721 && (i - stmt_index >= width || op_index < 1))
4722 ready_stmts_end = i;
4724 /* Now we choose operands for the next statement. Non zero
4725 value in ready_stmts_end means here that we should use
4726 the result of already generated statements as new operand. */
4727 if (ready_stmts_end > 0)
4729 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4730 if (ready_stmts_end > stmt_index)
4731 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4732 else if (op_index >= 0)
4734 operand_entry *oe = ops[op_index--];
4735 stmt2 = oe->stmt_to_insert;
4736 op2 = oe->op;
4738 else
4740 gcc_assert (stmt_index < i);
4741 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4744 if (stmt_index >= ready_stmts_end)
4745 ready_stmts_end = 0;
4747 else
4749 if (op_index > 1)
4750 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4751 operand_entry *oe2 = ops[op_index--];
4752 operand_entry *oe1 = ops[op_index--];
4753 op2 = oe2->op;
4754 stmt2 = oe2->stmt_to_insert;
4755 op1 = oe1->op;
4756 stmt1 = oe1->stmt_to_insert;
4759 /* If we emit the last statement then we should put
4760 operands into the last statement. It will also
4761 break the loop. */
4762 if (op_index < 0 && stmt_index == i)
4763 i = stmt_num - 1;
4765 if (dump_file && (dump_flags & TDF_DETAILS))
4767 fprintf (dump_file, "Transforming ");
4768 print_gimple_stmt (dump_file, stmts[i], 0);
4771 /* If the stmt that defines operand has to be inserted, insert it
4772 before the use. */
4773 if (stmt1)
4774 insert_stmt_before_use (stmts[i], stmt1);
4775 if (stmt2)
4776 insert_stmt_before_use (stmts[i], stmt2);
4777 stmt1 = stmt2 = NULL;
4779 /* We keep original statement only for the last one. All
4780 others are recreated. */
4781 if (i == stmt_num - 1)
4783 gimple_assign_set_rhs1 (stmts[i], op1);
4784 gimple_assign_set_rhs2 (stmts[i], op2);
4785 update_stmt (stmts[i]);
4787 else
4789 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4791 if (dump_file && (dump_flags & TDF_DETAILS))
4793 fprintf (dump_file, " into ");
4794 print_gimple_stmt (dump_file, stmts[i], 0);
4798 remove_visited_stmt_chain (last_rhs1);
4801 /* Transform STMT, which is really (A +B) + (C + D) into the left
4802 linear form, ((A+B)+C)+D.
4803 Recurse on D if necessary. */
4805 static void
4806 linearize_expr (gimple *stmt)
4808 gimple_stmt_iterator gsi;
4809 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4810 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4811 gimple *oldbinrhs = binrhs;
4812 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4813 gimple *newbinrhs = NULL;
4814 struct loop *loop = loop_containing_stmt (stmt);
4815 tree lhs = gimple_assign_lhs (stmt);
4817 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4818 && is_reassociable_op (binrhs, rhscode, loop));
4820 gsi = gsi_for_stmt (stmt);
4822 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4823 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4824 gimple_assign_rhs_code (binrhs),
4825 gimple_assign_lhs (binlhs),
4826 gimple_assign_rhs2 (binrhs));
4827 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4828 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4829 gimple_set_uid (binrhs, gimple_uid (stmt));
4831 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4832 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4834 if (dump_file && (dump_flags & TDF_DETAILS))
4836 fprintf (dump_file, "Linearized: ");
4837 print_gimple_stmt (dump_file, stmt, 0);
4840 reassociate_stats.linearized++;
4841 update_stmt (stmt);
4843 gsi = gsi_for_stmt (oldbinrhs);
4844 reassoc_remove_stmt (&gsi);
4845 release_defs (oldbinrhs);
4847 gimple_set_visited (stmt, true);
4848 gimple_set_visited (binlhs, true);
4849 gimple_set_visited (binrhs, true);
4851 /* Tail recurse on the new rhs if it still needs reassociation. */
4852 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4853 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4854 want to change the algorithm while converting to tuples. */
4855 linearize_expr (stmt);
4858 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4859 it. Otherwise, return NULL. */
4861 static gimple *
4862 get_single_immediate_use (tree lhs)
4864 use_operand_p immuse;
4865 gimple *immusestmt;
4867 if (TREE_CODE (lhs) == SSA_NAME
4868 && single_imm_use (lhs, &immuse, &immusestmt)
4869 && is_gimple_assign (immusestmt))
4870 return immusestmt;
4872 return NULL;
4875 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4876 representing the negated value. Insertions of any necessary
4877 instructions go before GSI.
4878 This function is recursive in that, if you hand it "a_5" as the
4879 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4880 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4882 static tree
4883 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4885 gimple *negatedefstmt = NULL;
4886 tree resultofnegate;
4887 gimple_stmt_iterator gsi;
4888 unsigned int uid;
4890 /* If we are trying to negate a name, defined by an add, negate the
4891 add operands instead. */
4892 if (TREE_CODE (tonegate) == SSA_NAME)
4893 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4894 if (TREE_CODE (tonegate) == SSA_NAME
4895 && is_gimple_assign (negatedefstmt)
4896 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4897 && has_single_use (gimple_assign_lhs (negatedefstmt))
4898 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4900 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4901 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4902 tree lhs = gimple_assign_lhs (negatedefstmt);
4903 gimple *g;
4905 gsi = gsi_for_stmt (negatedefstmt);
4906 rhs1 = negate_value (rhs1, &gsi);
4908 gsi = gsi_for_stmt (negatedefstmt);
4909 rhs2 = negate_value (rhs2, &gsi);
4911 gsi = gsi_for_stmt (negatedefstmt);
4912 lhs = make_ssa_name (TREE_TYPE (lhs));
4913 gimple_set_visited (negatedefstmt, true);
4914 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4915 gimple_set_uid (g, gimple_uid (negatedefstmt));
4916 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4917 return lhs;
4920 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4921 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4922 NULL_TREE, true, GSI_SAME_STMT);
4923 gsi = *gsip;
4924 uid = gimple_uid (gsi_stmt (gsi));
4925 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4927 gimple *stmt = gsi_stmt (gsi);
4928 if (gimple_uid (stmt) != 0)
4929 break;
4930 gimple_set_uid (stmt, uid);
4932 return resultofnegate;
4935 /* Return true if we should break up the subtract in STMT into an add
4936 with negate. This is true when we the subtract operands are really
4937 adds, or the subtract itself is used in an add expression. In
4938 either case, breaking up the subtract into an add with negate
4939 exposes the adds to reassociation. */
4941 static bool
4942 should_break_up_subtract (gimple *stmt)
4944 tree lhs = gimple_assign_lhs (stmt);
4945 tree binlhs = gimple_assign_rhs1 (stmt);
4946 tree binrhs = gimple_assign_rhs2 (stmt);
4947 gimple *immusestmt;
4948 struct loop *loop = loop_containing_stmt (stmt);
4950 if (TREE_CODE (binlhs) == SSA_NAME
4951 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4952 return true;
4954 if (TREE_CODE (binrhs) == SSA_NAME
4955 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4956 return true;
4958 if (TREE_CODE (lhs) == SSA_NAME
4959 && (immusestmt = get_single_immediate_use (lhs))
4960 && is_gimple_assign (immusestmt)
4961 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4962 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4963 && gimple_assign_rhs1 (immusestmt) == lhs)
4964 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4965 return true;
4966 return false;
4969 /* Transform STMT from A - B into A + -B. */
4971 static void
4972 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4974 tree rhs1 = gimple_assign_rhs1 (stmt);
4975 tree rhs2 = gimple_assign_rhs2 (stmt);
4977 if (dump_file && (dump_flags & TDF_DETAILS))
4979 fprintf (dump_file, "Breaking up subtract ");
4980 print_gimple_stmt (dump_file, stmt, 0);
4983 rhs2 = negate_value (rhs2, gsip);
4984 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4985 update_stmt (stmt);
4988 /* Determine whether STMT is a builtin call that raises an SSA name
4989 to an integer power and has only one use. If so, and this is early
4990 reassociation and unsafe math optimizations are permitted, place
4991 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4992 If any of these conditions does not hold, return FALSE. */
4994 static bool
4995 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4997 tree arg1;
4998 REAL_VALUE_TYPE c, cint;
5000 switch (gimple_call_combined_fn (stmt))
5002 CASE_CFN_POW:
5003 if (flag_errno_math)
5004 return false;
5006 *base = gimple_call_arg (stmt, 0);
5007 arg1 = gimple_call_arg (stmt, 1);
5009 if (TREE_CODE (arg1) != REAL_CST)
5010 return false;
5012 c = TREE_REAL_CST (arg1);
5014 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5015 return false;
5017 *exponent = real_to_integer (&c);
5018 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5019 if (!real_identical (&c, &cint))
5020 return false;
5022 break;
5024 CASE_CFN_POWI:
5025 *base = gimple_call_arg (stmt, 0);
5026 arg1 = gimple_call_arg (stmt, 1);
5028 if (!tree_fits_shwi_p (arg1))
5029 return false;
5031 *exponent = tree_to_shwi (arg1);
5032 break;
5034 default:
5035 return false;
5038 /* Expanding negative exponents is generally unproductive, so we don't
5039 complicate matters with those. Exponents of zero and one should
5040 have been handled by expression folding. */
5041 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5042 return false;
5044 return true;
5047 /* Try to derive and add operand entry for OP to *OPS. Return false if
5048 unsuccessful. */
5050 static bool
5051 try_special_add_to_ops (vec<operand_entry *> *ops,
5052 enum tree_code code,
5053 tree op, gimple* def_stmt)
5055 tree base = NULL_TREE;
5056 HOST_WIDE_INT exponent = 0;
5058 if (TREE_CODE (op) != SSA_NAME
5059 || ! has_single_use (op))
5060 return false;
5062 if (code == MULT_EXPR
5063 && reassoc_insert_powi_p
5064 && flag_unsafe_math_optimizations
5065 && is_gimple_call (def_stmt)
5066 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5068 add_repeat_to_ops_vec (ops, base, exponent);
5069 gimple_set_visited (def_stmt, true);
5070 return true;
5072 else if (code == MULT_EXPR
5073 && is_gimple_assign (def_stmt)
5074 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5075 && !HONOR_SNANS (TREE_TYPE (op))
5076 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5077 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5079 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5080 tree cst = build_minus_one_cst (TREE_TYPE (op));
5081 add_to_ops_vec (ops, rhs1);
5082 add_to_ops_vec (ops, cst);
5083 gimple_set_visited (def_stmt, true);
5084 return true;
5087 return false;
5090 /* Recursively linearize a binary expression that is the RHS of STMT.
5091 Place the operands of the expression tree in the vector named OPS. */
5093 static void
5094 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5095 bool is_associative, bool set_visited)
5097 tree binlhs = gimple_assign_rhs1 (stmt);
5098 tree binrhs = gimple_assign_rhs2 (stmt);
5099 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5100 bool binlhsisreassoc = false;
5101 bool binrhsisreassoc = false;
5102 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5103 struct loop *loop = loop_containing_stmt (stmt);
5105 if (set_visited)
5106 gimple_set_visited (stmt, true);
5108 if (TREE_CODE (binlhs) == SSA_NAME)
5110 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5111 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5112 && !stmt_could_throw_p (binlhsdef));
5115 if (TREE_CODE (binrhs) == SSA_NAME)
5117 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5118 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5119 && !stmt_could_throw_p (binrhsdef));
5122 /* If the LHS is not reassociable, but the RHS is, we need to swap
5123 them. If neither is reassociable, there is nothing we can do, so
5124 just put them in the ops vector. If the LHS is reassociable,
5125 linearize it. If both are reassociable, then linearize the RHS
5126 and the LHS. */
5128 if (!binlhsisreassoc)
5130 /* If this is not a associative operation like division, give up. */
5131 if (!is_associative)
5133 add_to_ops_vec (ops, binrhs);
5134 return;
5137 if (!binrhsisreassoc)
5139 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5140 add_to_ops_vec (ops, binrhs);
5142 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5143 add_to_ops_vec (ops, binlhs);
5145 return;
5148 if (dump_file && (dump_flags & TDF_DETAILS))
5150 fprintf (dump_file, "swapping operands of ");
5151 print_gimple_stmt (dump_file, stmt, 0);
5154 swap_ssa_operands (stmt,
5155 gimple_assign_rhs1_ptr (stmt),
5156 gimple_assign_rhs2_ptr (stmt));
5157 update_stmt (stmt);
5159 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, " is now ");
5162 print_gimple_stmt (dump_file, stmt, 0);
5165 /* We want to make it so the lhs is always the reassociative op,
5166 so swap. */
5167 std::swap (binlhs, binrhs);
5169 else if (binrhsisreassoc)
5171 linearize_expr (stmt);
5172 binlhs = gimple_assign_rhs1 (stmt);
5173 binrhs = gimple_assign_rhs2 (stmt);
5176 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5177 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5178 rhscode, loop));
5179 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5180 is_associative, set_visited);
5182 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5183 add_to_ops_vec (ops, binrhs);
5186 /* Repropagate the negates back into subtracts, since no other pass
5187 currently does it. */
5189 static void
5190 repropagate_negates (void)
5192 unsigned int i = 0;
5193 tree negate;
5195 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5197 gimple *user = get_single_immediate_use (negate);
5199 if (!user || !is_gimple_assign (user))
5200 continue;
5202 /* The negate operand can be either operand of a PLUS_EXPR
5203 (it can be the LHS if the RHS is a constant for example).
5205 Force the negate operand to the RHS of the PLUS_EXPR, then
5206 transform the PLUS_EXPR into a MINUS_EXPR. */
5207 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5209 /* If the negated operand appears on the LHS of the
5210 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5211 to force the negated operand to the RHS of the PLUS_EXPR. */
5212 if (gimple_assign_rhs1 (user) == negate)
5214 swap_ssa_operands (user,
5215 gimple_assign_rhs1_ptr (user),
5216 gimple_assign_rhs2_ptr (user));
5219 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5220 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5221 if (gimple_assign_rhs2 (user) == negate)
5223 tree rhs1 = gimple_assign_rhs1 (user);
5224 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5225 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5226 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5227 update_stmt (user);
5230 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5232 if (gimple_assign_rhs1 (user) == negate)
5234 /* We have
5235 x = -a
5236 y = x - b
5237 which we transform into
5238 x = a + b
5239 y = -x .
5240 This pushes down the negate which we possibly can merge
5241 into some other operation, hence insert it into the
5242 plus_negates vector. */
5243 gimple *feed = SSA_NAME_DEF_STMT (negate);
5244 tree a = gimple_assign_rhs1 (feed);
5245 tree b = gimple_assign_rhs2 (user);
5246 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5247 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5248 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5249 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5250 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5251 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5252 user = gsi_stmt (gsi2);
5253 update_stmt (user);
5254 reassoc_remove_stmt (&gsi);
5255 release_defs (feed);
5256 plus_negates.safe_push (gimple_assign_lhs (user));
5258 else
5260 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5261 rid of one operation. */
5262 gimple *feed = SSA_NAME_DEF_STMT (negate);
5263 tree a = gimple_assign_rhs1 (feed);
5264 tree rhs1 = gimple_assign_rhs1 (user);
5265 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5266 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5267 update_stmt (gsi_stmt (gsi));
5273 /* Returns true if OP is of a type for which we can do reassociation.
5274 That is for integral or non-saturating fixed-point types, and for
5275 floating point type when associative-math is enabled. */
5277 static bool
5278 can_reassociate_p (tree op)
5280 tree type = TREE_TYPE (op);
5281 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5282 return false;
5283 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5284 || NON_SAT_FIXED_POINT_TYPE_P (type)
5285 || (flag_associative_math && FLOAT_TYPE_P (type)))
5286 return true;
5287 return false;
5290 /* Break up subtract operations in block BB.
5292 We do this top down because we don't know whether the subtract is
5293 part of a possible chain of reassociation except at the top.
5295 IE given
5296 d = f + g
5297 c = a + e
5298 b = c - d
5299 q = b - r
5300 k = t - q
5302 we want to break up k = t - q, but we won't until we've transformed q
5303 = b - r, which won't be broken up until we transform b = c - d.
5305 En passant, clear the GIMPLE visited flag on every statement
5306 and set UIDs within each basic block. */
5308 static void
5309 break_up_subtract_bb (basic_block bb)
5311 gimple_stmt_iterator gsi;
5312 basic_block son;
5313 unsigned int uid = 1;
5315 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5317 gimple *stmt = gsi_stmt (gsi);
5318 gimple_set_visited (stmt, false);
5319 gimple_set_uid (stmt, uid++);
5321 if (!is_gimple_assign (stmt)
5322 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5323 continue;
5325 /* Look for simple gimple subtract operations. */
5326 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5328 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5329 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5330 continue;
5332 /* Check for a subtract used only in an addition. If this
5333 is the case, transform it into add of a negate for better
5334 reassociation. IE transform C = A-B into C = A + -B if C
5335 is only used in an addition. */
5336 if (should_break_up_subtract (stmt))
5337 break_up_subtract (stmt, &gsi);
5339 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5340 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5341 plus_negates.safe_push (gimple_assign_lhs (stmt));
5343 for (son = first_dom_son (CDI_DOMINATORS, bb);
5344 son;
5345 son = next_dom_son (CDI_DOMINATORS, son))
5346 break_up_subtract_bb (son);
5349 /* Used for repeated factor analysis. */
5350 struct repeat_factor
5352 /* An SSA name that occurs in a multiply chain. */
5353 tree factor;
5355 /* Cached rank of the factor. */
5356 unsigned rank;
5358 /* Number of occurrences of the factor in the chain. */
5359 HOST_WIDE_INT count;
5361 /* An SSA name representing the product of this factor and
5362 all factors appearing later in the repeated factor vector. */
5363 tree repr;
5367 static vec<repeat_factor> repeat_factor_vec;
5369 /* Used for sorting the repeat factor vector. Sort primarily by
5370 ascending occurrence count, secondarily by descending rank. */
5372 static int
5373 compare_repeat_factors (const void *x1, const void *x2)
5375 const repeat_factor *rf1 = (const repeat_factor *) x1;
5376 const repeat_factor *rf2 = (const repeat_factor *) x2;
5378 if (rf1->count != rf2->count)
5379 return rf1->count - rf2->count;
5381 return rf2->rank - rf1->rank;
5384 /* Look for repeated operands in OPS in the multiply tree rooted at
5385 STMT. Replace them with an optimal sequence of multiplies and powi
5386 builtin calls, and remove the used operands from OPS. Return an
5387 SSA name representing the value of the replacement sequence. */
5389 static tree
5390 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5392 unsigned i, j, vec_len;
5393 int ii;
5394 operand_entry *oe;
5395 repeat_factor *rf1, *rf2;
5396 repeat_factor rfnew;
5397 tree result = NULL_TREE;
5398 tree target_ssa, iter_result;
5399 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5400 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5401 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5402 gimple *mul_stmt, *pow_stmt;
5404 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5405 target. */
5406 if (!powi_fndecl)
5407 return NULL_TREE;
5409 /* Allocate the repeated factor vector. */
5410 repeat_factor_vec.create (10);
5412 /* Scan the OPS vector for all SSA names in the product and build
5413 up a vector of occurrence counts for each factor. */
5414 FOR_EACH_VEC_ELT (*ops, i, oe)
5416 if (TREE_CODE (oe->op) == SSA_NAME)
5418 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5420 if (rf1->factor == oe->op)
5422 rf1->count += oe->count;
5423 break;
5427 if (j >= repeat_factor_vec.length ())
5429 rfnew.factor = oe->op;
5430 rfnew.rank = oe->rank;
5431 rfnew.count = oe->count;
5432 rfnew.repr = NULL_TREE;
5433 repeat_factor_vec.safe_push (rfnew);
5438 /* Sort the repeated factor vector by (a) increasing occurrence count,
5439 and (b) decreasing rank. */
5440 repeat_factor_vec.qsort (compare_repeat_factors);
5442 /* It is generally best to combine as many base factors as possible
5443 into a product before applying __builtin_powi to the result.
5444 However, the sort order chosen for the repeated factor vector
5445 allows us to cache partial results for the product of the base
5446 factors for subsequent use. When we already have a cached partial
5447 result from a previous iteration, it is best to make use of it
5448 before looking for another __builtin_pow opportunity.
5450 As an example, consider x * x * y * y * y * z * z * z * z.
5451 We want to first compose the product x * y * z, raise it to the
5452 second power, then multiply this by y * z, and finally multiply
5453 by z. This can be done in 5 multiplies provided we cache y * z
5454 for use in both expressions:
5456 t1 = y * z
5457 t2 = t1 * x
5458 t3 = t2 * t2
5459 t4 = t1 * t3
5460 result = t4 * z
5462 If we instead ignored the cached y * z and first multiplied by
5463 the __builtin_pow opportunity z * z, we would get the inferior:
5465 t1 = y * z
5466 t2 = t1 * x
5467 t3 = t2 * t2
5468 t4 = z * z
5469 t5 = t3 * t4
5470 result = t5 * y */
5472 vec_len = repeat_factor_vec.length ();
5474 /* Repeatedly look for opportunities to create a builtin_powi call. */
5475 while (true)
5477 HOST_WIDE_INT power;
5479 /* First look for the largest cached product of factors from
5480 preceding iterations. If found, create a builtin_powi for
5481 it if the minimum occurrence count for its factors is at
5482 least 2, or just use this cached product as our next
5483 multiplicand if the minimum occurrence count is 1. */
5484 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5486 if (rf1->repr && rf1->count > 0)
5487 break;
5490 if (j < vec_len)
5492 power = rf1->count;
5494 if (power == 1)
5496 iter_result = rf1->repr;
5498 if (dump_file && (dump_flags & TDF_DETAILS))
5500 unsigned elt;
5501 repeat_factor *rf;
5502 fputs ("Multiplying by cached product ", dump_file);
5503 for (elt = j; elt < vec_len; elt++)
5505 rf = &repeat_factor_vec[elt];
5506 print_generic_expr (dump_file, rf->factor);
5507 if (elt < vec_len - 1)
5508 fputs (" * ", dump_file);
5510 fputs ("\n", dump_file);
5513 else
5515 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5516 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5517 build_int_cst (integer_type_node,
5518 power));
5519 gimple_call_set_lhs (pow_stmt, iter_result);
5520 gimple_set_location (pow_stmt, gimple_location (stmt));
5521 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5522 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5524 if (dump_file && (dump_flags & TDF_DETAILS))
5526 unsigned elt;
5527 repeat_factor *rf;
5528 fputs ("Building __builtin_pow call for cached product (",
5529 dump_file);
5530 for (elt = j; elt < vec_len; elt++)
5532 rf = &repeat_factor_vec[elt];
5533 print_generic_expr (dump_file, rf->factor);
5534 if (elt < vec_len - 1)
5535 fputs (" * ", dump_file);
5537 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5538 power);
5542 else
5544 /* Otherwise, find the first factor in the repeated factor
5545 vector whose occurrence count is at least 2. If no such
5546 factor exists, there are no builtin_powi opportunities
5547 remaining. */
5548 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5550 if (rf1->count >= 2)
5551 break;
5554 if (j >= vec_len)
5555 break;
5557 power = rf1->count;
5559 if (dump_file && (dump_flags & TDF_DETAILS))
5561 unsigned elt;
5562 repeat_factor *rf;
5563 fputs ("Building __builtin_pow call for (", dump_file);
5564 for (elt = j; elt < vec_len; elt++)
5566 rf = &repeat_factor_vec[elt];
5567 print_generic_expr (dump_file, rf->factor);
5568 if (elt < vec_len - 1)
5569 fputs (" * ", dump_file);
5571 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5574 reassociate_stats.pows_created++;
5576 /* Visit each element of the vector in reverse order (so that
5577 high-occurrence elements are visited first, and within the
5578 same occurrence count, lower-ranked elements are visited
5579 first). Form a linear product of all elements in this order
5580 whose occurrencce count is at least that of element J.
5581 Record the SSA name representing the product of each element
5582 with all subsequent elements in the vector. */
5583 if (j == vec_len - 1)
5584 rf1->repr = rf1->factor;
5585 else
5587 for (ii = vec_len - 2; ii >= (int)j; ii--)
5589 tree op1, op2;
5591 rf1 = &repeat_factor_vec[ii];
5592 rf2 = &repeat_factor_vec[ii + 1];
5594 /* Init the last factor's representative to be itself. */
5595 if (!rf2->repr)
5596 rf2->repr = rf2->factor;
5598 op1 = rf1->factor;
5599 op2 = rf2->repr;
5601 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5602 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5603 op1, op2);
5604 gimple_set_location (mul_stmt, gimple_location (stmt));
5605 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5606 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5607 rf1->repr = target_ssa;
5609 /* Don't reprocess the multiply we just introduced. */
5610 gimple_set_visited (mul_stmt, true);
5614 /* Form a call to __builtin_powi for the maximum product
5615 just formed, raised to the power obtained earlier. */
5616 rf1 = &repeat_factor_vec[j];
5617 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5618 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5619 build_int_cst (integer_type_node,
5620 power));
5621 gimple_call_set_lhs (pow_stmt, iter_result);
5622 gimple_set_location (pow_stmt, gimple_location (stmt));
5623 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5624 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5627 /* If we previously formed at least one other builtin_powi call,
5628 form the product of this one and those others. */
5629 if (result)
5631 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5632 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5633 result, iter_result);
5634 gimple_set_location (mul_stmt, gimple_location (stmt));
5635 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5636 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5637 gimple_set_visited (mul_stmt, true);
5638 result = new_result;
5640 else
5641 result = iter_result;
5643 /* Decrement the occurrence count of each element in the product
5644 by the count found above, and remove this many copies of each
5645 factor from OPS. */
5646 for (i = j; i < vec_len; i++)
5648 unsigned k = power;
5649 unsigned n;
5651 rf1 = &repeat_factor_vec[i];
5652 rf1->count -= power;
5654 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5656 if (oe->op == rf1->factor)
5658 if (oe->count <= k)
5660 ops->ordered_remove (n);
5661 k -= oe->count;
5663 if (k == 0)
5664 break;
5666 else
5668 oe->count -= k;
5669 break;
5676 /* At this point all elements in the repeated factor vector have a
5677 remaining occurrence count of 0 or 1, and those with a count of 1
5678 don't have cached representatives. Re-sort the ops vector and
5679 clean up. */
5680 ops->qsort (sort_by_operand_rank);
5681 repeat_factor_vec.release ();
5683 /* Return the final product computed herein. Note that there may
5684 still be some elements with single occurrence count left in OPS;
5685 those will be handled by the normal reassociation logic. */
5686 return result;
5689 /* Attempt to optimize
5690 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5691 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5693 static void
5694 attempt_builtin_copysign (vec<operand_entry *> *ops)
5696 operand_entry *oe;
5697 unsigned int i;
5698 unsigned int length = ops->length ();
5699 tree cst = ops->last ()->op;
5701 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5702 return;
5704 FOR_EACH_VEC_ELT (*ops, i, oe)
5706 if (TREE_CODE (oe->op) == SSA_NAME
5707 && has_single_use (oe->op))
5709 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5710 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5712 tree arg0, arg1;
5713 switch (gimple_call_combined_fn (old_call))
5715 CASE_CFN_COPYSIGN:
5716 CASE_CFN_COPYSIGN_FN:
5717 arg0 = gimple_call_arg (old_call, 0);
5718 arg1 = gimple_call_arg (old_call, 1);
5719 /* The first argument of copysign must be a constant,
5720 otherwise there's nothing to do. */
5721 if (TREE_CODE (arg0) == REAL_CST)
5723 tree type = TREE_TYPE (arg0);
5724 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5725 /* If we couldn't fold to a single constant, skip it.
5726 That happens e.g. for inexact multiplication when
5727 -frounding-math. */
5728 if (mul == NULL_TREE)
5729 break;
5730 /* Instead of adjusting OLD_CALL, let's build a new
5731 call to not leak the LHS and prevent keeping bogus
5732 debug statements. DCE will clean up the old call. */
5733 gcall *new_call;
5734 if (gimple_call_internal_p (old_call))
5735 new_call = gimple_build_call_internal
5736 (IFN_COPYSIGN, 2, mul, arg1);
5737 else
5738 new_call = gimple_build_call
5739 (gimple_call_fndecl (old_call), 2, mul, arg1);
5740 tree lhs = make_ssa_name (type);
5741 gimple_call_set_lhs (new_call, lhs);
5742 gimple_set_location (new_call,
5743 gimple_location (old_call));
5744 insert_stmt_after (new_call, old_call);
5745 /* We've used the constant, get rid of it. */
5746 ops->pop ();
5747 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5748 /* Handle the CST1 < 0 case by negating the result. */
5749 if (cst1_neg)
5751 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5752 gimple *negate_stmt
5753 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5754 insert_stmt_after (negate_stmt, new_call);
5755 oe->op = negrhs;
5757 else
5758 oe->op = lhs;
5759 if (dump_file && (dump_flags & TDF_DETAILS))
5761 fprintf (dump_file, "Optimizing copysign: ");
5762 print_generic_expr (dump_file, cst);
5763 fprintf (dump_file, " * COPYSIGN (");
5764 print_generic_expr (dump_file, arg0);
5765 fprintf (dump_file, ", ");
5766 print_generic_expr (dump_file, arg1);
5767 fprintf (dump_file, ") into %sCOPYSIGN (",
5768 cst1_neg ? "-" : "");
5769 print_generic_expr (dump_file, mul);
5770 fprintf (dump_file, ", ");
5771 print_generic_expr (dump_file, arg1);
5772 fprintf (dump_file, "\n");
5774 return;
5776 break;
5777 default:
5778 break;
5785 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5787 static void
5788 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5790 tree rhs1;
5792 if (dump_file && (dump_flags & TDF_DETAILS))
5794 fprintf (dump_file, "Transforming ");
5795 print_gimple_stmt (dump_file, stmt, 0);
5798 rhs1 = gimple_assign_rhs1 (stmt);
5799 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5800 update_stmt (stmt);
5801 remove_visited_stmt_chain (rhs1);
5803 if (dump_file && (dump_flags & TDF_DETAILS))
5805 fprintf (dump_file, " into ");
5806 print_gimple_stmt (dump_file, stmt, 0);
5810 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5812 static void
5813 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5814 tree rhs1, tree rhs2)
5816 if (dump_file && (dump_flags & TDF_DETAILS))
5818 fprintf (dump_file, "Transforming ");
5819 print_gimple_stmt (dump_file, stmt, 0);
5822 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5823 update_stmt (gsi_stmt (*gsi));
5824 remove_visited_stmt_chain (rhs1);
5826 if (dump_file && (dump_flags & TDF_DETAILS))
5828 fprintf (dump_file, " into ");
5829 print_gimple_stmt (dump_file, stmt, 0);
5833 /* Reassociate expressions in basic block BB and its post-dominator as
5834 children.
5836 Bubble up return status from maybe_optimize_range_tests. */
5838 static bool
5839 reassociate_bb (basic_block bb)
5841 gimple_stmt_iterator gsi;
5842 basic_block son;
5843 gimple *stmt = last_stmt (bb);
5844 bool cfg_cleanup_needed = false;
5846 if (stmt && !gimple_visited_p (stmt))
5847 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5849 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5851 stmt = gsi_stmt (gsi);
5853 if (is_gimple_assign (stmt)
5854 && !stmt_could_throw_p (stmt))
5856 tree lhs, rhs1, rhs2;
5857 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5859 /* If this is not a gimple binary expression, there is
5860 nothing for us to do with it. */
5861 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5862 continue;
5864 /* If this was part of an already processed statement,
5865 we don't need to touch it again. */
5866 if (gimple_visited_p (stmt))
5868 /* This statement might have become dead because of previous
5869 reassociations. */
5870 if (has_zero_uses (gimple_get_lhs (stmt)))
5872 reassoc_remove_stmt (&gsi);
5873 release_defs (stmt);
5874 /* We might end up removing the last stmt above which
5875 places the iterator to the end of the sequence.
5876 Reset it to the last stmt in this case which might
5877 be the end of the sequence as well if we removed
5878 the last statement of the sequence. In which case
5879 we need to bail out. */
5880 if (gsi_end_p (gsi))
5882 gsi = gsi_last_bb (bb);
5883 if (gsi_end_p (gsi))
5884 break;
5887 continue;
5890 lhs = gimple_assign_lhs (stmt);
5891 rhs1 = gimple_assign_rhs1 (stmt);
5892 rhs2 = gimple_assign_rhs2 (stmt);
5894 /* For non-bit or min/max operations we can't associate
5895 all types. Verify that here. */
5896 if (rhs_code != BIT_IOR_EXPR
5897 && rhs_code != BIT_AND_EXPR
5898 && rhs_code != BIT_XOR_EXPR
5899 && rhs_code != MIN_EXPR
5900 && rhs_code != MAX_EXPR
5901 && (!can_reassociate_p (lhs)
5902 || !can_reassociate_p (rhs1)
5903 || !can_reassociate_p (rhs2)))
5904 continue;
5906 if (associative_tree_code (rhs_code))
5908 auto_vec<operand_entry *> ops;
5909 tree powi_result = NULL_TREE;
5910 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5912 /* There may be no immediate uses left by the time we
5913 get here because we may have eliminated them all. */
5914 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5915 continue;
5917 gimple_set_visited (stmt, true);
5918 linearize_expr_tree (&ops, stmt, true, true);
5919 ops.qsort (sort_by_operand_rank);
5920 int orig_len = ops.length ();
5921 optimize_ops_list (rhs_code, &ops);
5922 if (undistribute_ops_list (rhs_code, &ops,
5923 loop_containing_stmt (stmt)))
5925 ops.qsort (sort_by_operand_rank);
5926 optimize_ops_list (rhs_code, &ops);
5929 if (rhs_code == PLUS_EXPR
5930 && transform_add_to_multiply (&ops))
5931 ops.qsort (sort_by_operand_rank);
5933 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5935 if (is_vector)
5936 optimize_vec_cond_expr (rhs_code, &ops);
5937 else
5938 optimize_range_tests (rhs_code, &ops, NULL);
5941 if (rhs_code == MULT_EXPR && !is_vector)
5943 attempt_builtin_copysign (&ops);
5945 if (reassoc_insert_powi_p
5946 && flag_unsafe_math_optimizations)
5947 powi_result = attempt_builtin_powi (stmt, &ops);
5950 operand_entry *last;
5951 bool negate_result = false;
5952 if (ops.length () > 1
5953 && rhs_code == MULT_EXPR)
5955 last = ops.last ();
5956 if ((integer_minus_onep (last->op)
5957 || real_minus_onep (last->op))
5958 && !HONOR_SNANS (TREE_TYPE (lhs))
5959 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5960 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5962 ops.pop ();
5963 negate_result = true;
5967 tree new_lhs = lhs;
5968 /* If the operand vector is now empty, all operands were
5969 consumed by the __builtin_powi optimization. */
5970 if (ops.length () == 0)
5971 transform_stmt_to_copy (&gsi, stmt, powi_result);
5972 else if (ops.length () == 1)
5974 tree last_op = ops.last ()->op;
5976 /* If the stmt that defines operand has to be inserted, insert it
5977 before the use. */
5978 if (ops.last ()->stmt_to_insert)
5979 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5980 if (powi_result)
5981 transform_stmt_to_multiply (&gsi, stmt, last_op,
5982 powi_result);
5983 else
5984 transform_stmt_to_copy (&gsi, stmt, last_op);
5986 else
5988 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5989 int ops_num = ops.length ();
5990 int width = get_reassociation_width (ops_num, rhs_code, mode);
5992 if (dump_file && (dump_flags & TDF_DETAILS))
5993 fprintf (dump_file,
5994 "Width = %d was chosen for reassociation\n", width);
5997 /* For binary bit operations, if there are at least 3
5998 operands and the last last operand in OPS is a constant,
5999 move it to the front. This helps ensure that we generate
6000 (X & Y) & C rather than (X & C) & Y. The former will
6001 often match a canonical bit test when we get to RTL. */
6002 if (ops.length () > 2
6003 && (rhs_code == BIT_AND_EXPR
6004 || rhs_code == BIT_IOR_EXPR
6005 || rhs_code == BIT_XOR_EXPR)
6006 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6007 std::swap (*ops[0], *ops[ops_num - 1]);
6009 if (width > 1
6010 && ops.length () > 3)
6011 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6012 width, ops);
6013 else
6015 /* When there are three operands left, we want
6016 to make sure the ones that get the double
6017 binary op are chosen wisely. */
6018 int len = ops.length ();
6019 if (len >= 3)
6020 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6022 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6023 powi_result != NULL
6024 || negate_result,
6025 len != orig_len);
6028 /* If we combined some repeated factors into a
6029 __builtin_powi call, multiply that result by the
6030 reassociated operands. */
6031 if (powi_result)
6033 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6034 tree type = TREE_TYPE (lhs);
6035 tree target_ssa = make_temp_ssa_name (type, NULL,
6036 "reassocpow");
6037 gimple_set_lhs (lhs_stmt, target_ssa);
6038 update_stmt (lhs_stmt);
6039 if (lhs != new_lhs)
6041 target_ssa = new_lhs;
6042 new_lhs = lhs;
6044 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6045 powi_result, target_ssa);
6046 gimple_set_location (mul_stmt, gimple_location (stmt));
6047 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6048 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6052 if (negate_result)
6054 stmt = SSA_NAME_DEF_STMT (lhs);
6055 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6056 gimple_set_lhs (stmt, tmp);
6057 if (lhs != new_lhs)
6058 tmp = new_lhs;
6059 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6060 tmp);
6061 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6062 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6063 update_stmt (stmt);
6068 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6069 son;
6070 son = next_dom_son (CDI_POST_DOMINATORS, son))
6071 cfg_cleanup_needed |= reassociate_bb (son);
6073 return cfg_cleanup_needed;
6076 /* Add jumps around shifts for range tests turned into bit tests.
6077 For each SSA_NAME VAR we have code like:
6078 VAR = ...; // final stmt of range comparison
6079 // bit test here...;
6080 OTHERVAR = ...; // final stmt of the bit test sequence
6081 RES = VAR | OTHERVAR;
6082 Turn the above into:
6083 VAR = ...;
6084 if (VAR != 0)
6085 goto <l3>;
6086 else
6087 goto <l2>;
6088 <l2>:
6089 // bit test here...;
6090 OTHERVAR = ...;
6091 <l3>:
6092 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6094 static void
6095 branch_fixup (void)
6097 tree var;
6098 unsigned int i;
6100 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6102 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6103 gimple *use_stmt;
6104 use_operand_p use;
6105 bool ok = single_imm_use (var, &use, &use_stmt);
6106 gcc_assert (ok
6107 && is_gimple_assign (use_stmt)
6108 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6109 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6111 basic_block cond_bb = gimple_bb (def_stmt);
6112 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6113 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6115 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6116 gimple *g = gimple_build_cond (NE_EXPR, var,
6117 build_zero_cst (TREE_TYPE (var)),
6118 NULL_TREE, NULL_TREE);
6119 location_t loc = gimple_location (use_stmt);
6120 gimple_set_location (g, loc);
6121 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6123 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6124 etrue->probability = profile_probability::even ();
6125 edge efalse = find_edge (cond_bb, then_bb);
6126 efalse->flags = EDGE_FALSE_VALUE;
6127 efalse->probability -= etrue->probability;
6128 then_bb->count -= etrue->count ();
6130 tree othervar = NULL_TREE;
6131 if (gimple_assign_rhs1 (use_stmt) == var)
6132 othervar = gimple_assign_rhs2 (use_stmt);
6133 else if (gimple_assign_rhs2 (use_stmt) == var)
6134 othervar = gimple_assign_rhs1 (use_stmt);
6135 else
6136 gcc_unreachable ();
6137 tree lhs = gimple_assign_lhs (use_stmt);
6138 gphi *phi = create_phi_node (lhs, merge_bb);
6139 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6140 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6141 gsi = gsi_for_stmt (use_stmt);
6142 gsi_remove (&gsi, true);
6144 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6145 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6147 reassoc_branch_fixups.release ();
6150 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6151 void debug_ops_vector (vec<operand_entry *> ops);
6153 /* Dump the operand entry vector OPS to FILE. */
6155 void
6156 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6158 operand_entry *oe;
6159 unsigned int i;
6161 FOR_EACH_VEC_ELT (ops, i, oe)
6163 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6164 print_generic_expr (file, oe->op);
6165 fprintf (file, "\n");
6169 /* Dump the operand entry vector OPS to STDERR. */
6171 DEBUG_FUNCTION void
6172 debug_ops_vector (vec<operand_entry *> ops)
6174 dump_ops_vector (stderr, ops);
6177 /* Bubble up return status from reassociate_bb. */
6179 static bool
6180 do_reassoc (void)
6182 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6183 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6186 /* Initialize the reassociation pass. */
6188 static void
6189 init_reassoc (void)
6191 int i;
6192 long rank = 2;
6193 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6195 /* Find the loops, so that we can prevent moving calculations in
6196 them. */
6197 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6199 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6201 next_operand_entry_id = 0;
6203 /* Reverse RPO (Reverse Post Order) will give us something where
6204 deeper loops come later. */
6205 pre_and_rev_post_order_compute (NULL, bbs, false);
6206 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6207 operand_rank = new hash_map<tree, long>;
6209 /* Give each default definition a distinct rank. This includes
6210 parameters and the static chain. Walk backwards over all
6211 SSA names so that we get proper rank ordering according
6212 to tree_swap_operands_p. */
6213 for (i = num_ssa_names - 1; i > 0; --i)
6215 tree name = ssa_name (i);
6216 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6217 insert_operand_rank (name, ++rank);
6220 /* Set up rank for each BB */
6221 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6222 bb_rank[bbs[i]] = ++rank << 16;
6224 free (bbs);
6225 calculate_dominance_info (CDI_POST_DOMINATORS);
6226 plus_negates = vNULL;
6229 /* Cleanup after the reassociation pass, and print stats if
6230 requested. */
6232 static void
6233 fini_reassoc (void)
6235 statistics_counter_event (cfun, "Linearized",
6236 reassociate_stats.linearized);
6237 statistics_counter_event (cfun, "Constants eliminated",
6238 reassociate_stats.constants_eliminated);
6239 statistics_counter_event (cfun, "Ops eliminated",
6240 reassociate_stats.ops_eliminated);
6241 statistics_counter_event (cfun, "Statements rewritten",
6242 reassociate_stats.rewritten);
6243 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6244 reassociate_stats.pows_encountered);
6245 statistics_counter_event (cfun, "Built-in powi calls created",
6246 reassociate_stats.pows_created);
6248 delete operand_rank;
6249 operand_entry_pool.release ();
6250 free (bb_rank);
6251 plus_negates.release ();
6252 free_dominance_info (CDI_POST_DOMINATORS);
6253 loop_optimizer_finalize ();
6256 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6257 insertion of __builtin_powi calls.
6259 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6260 optimization of a gimple conditional. Otherwise returns zero. */
6262 static unsigned int
6263 execute_reassoc (bool insert_powi_p)
6265 reassoc_insert_powi_p = insert_powi_p;
6267 init_reassoc ();
6269 bool cfg_cleanup_needed;
6270 cfg_cleanup_needed = do_reassoc ();
6271 repropagate_negates ();
6272 branch_fixup ();
6274 fini_reassoc ();
6275 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6278 namespace {
6280 const pass_data pass_data_reassoc =
6282 GIMPLE_PASS, /* type */
6283 "reassoc", /* name */
6284 OPTGROUP_NONE, /* optinfo_flags */
6285 TV_TREE_REASSOC, /* tv_id */
6286 ( PROP_cfg | PROP_ssa ), /* properties_required */
6287 0, /* properties_provided */
6288 0, /* properties_destroyed */
6289 0, /* todo_flags_start */
6290 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6293 class pass_reassoc : public gimple_opt_pass
6295 public:
6296 pass_reassoc (gcc::context *ctxt)
6297 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6300 /* opt_pass methods: */
6301 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6302 void set_pass_param (unsigned int n, bool param)
6304 gcc_assert (n == 0);
6305 insert_powi_p = param;
6307 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6308 virtual unsigned int execute (function *)
6309 { return execute_reassoc (insert_powi_p); }
6311 private:
6312 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6313 point 3a in the pass header comment. */
6314 bool insert_powi_p;
6315 }; // class pass_reassoc
6317 } // anon namespace
6319 gimple_opt_pass *
6320 make_pass_reassoc (gcc::context *ctxt)
6322 return new pass_reassoc (ctxt);