2018-06-25 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob97a53cd27cdfafba631f4d1984c03043a7c22125
1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
481 constant_type (tree t)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
494 else
495 return OTHER_CONST_TYPE;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500 static int
501 sort_by_operand_rank (const void *pa, const void *pb)
503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
512 if (oea->rank == 0)
514 if (constant_type (oeb->op) != constant_type (oea->op))
515 return constant_type (oea->op) - constant_type (oeb->op);
516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
519 return oeb->id > oea->id ? 1 : -1;
522 if (TREE_CODE (oea->op) != SSA_NAME)
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
532 /* Lastly, make sure the versions that are the same go next to each
533 other. */
534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
565 return oeb->id > oea->id ? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
573 operand_entry *oe = operand_entry_pool.allocate ();
575 oe->op = op;
576 oe->rank = get_rank (op);
577 oe->id = next_operand_entry_id++;
578 oe->count = 1;
579 oe->stmt_to_insert = stmt_to_insert;
580 ops->safe_push (oe);
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
586 static void
587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 HOST_WIDE_INT repeat)
590 operand_entry *oe = operand_entry_pool.allocate ();
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
596 oe->stmt_to_insert = NULL;
597 ops->safe_push (oe);
599 reassociate_stats.pows_encountered++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
605 static bool
606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
608 basic_block bb = gimple_bb (stmt);
610 if (gimple_bb (stmt) == NULL)
611 return false;
613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
620 tree rhs1 = gimple_assign_rhs1 (stmt);
621 tree rhs2 = gimple_assign_rhs1 (stmt);
622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
632 return false;
636 /* Return true if STMT is a nop-conversion. */
638 static bool
639 gimple_nop_conversion_p (gimple *stmt)
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
648 return false;
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
654 static tree
655 get_unary_op (tree name, enum tree_code opcode)
657 gimple *stmt = SSA_NAME_DEF_STMT (name);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
664 if (!is_gimple_assign (stmt))
665 return NULL_TREE;
667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
669 return NULL_TREE;
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
674 static bool
675 ops_equal_values_p (tree op1, tree op2)
677 if (op1 == op2)
678 return true;
680 tree orig_op1 = op1;
681 if (TREE_CODE (op1) == SSA_NAME)
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
692 if (TREE_CODE (op2) == SSA_NAME)
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
697 op2 = gimple_assign_rhs1 (stmt);
698 if (op1 == op2
699 || orig_op1 == op2)
700 return true;
704 return false;
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
712 static bool
713 eliminate_duplicate_pair (enum tree_code opcode,
714 vec<operand_entry *> *ops,
715 bool *all_done,
716 unsigned int i,
717 operand_entry *curr,
718 operand_entry *last)
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last && last->op == curr->op)
728 switch (opcode)
730 case MAX_EXPR:
731 case MIN_EXPR:
732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, "Equivalence: ");
737 print_generic_expr (dump_file, curr->op);
738 fprintf (dump_file, " [&|minmax] ");
739 print_generic_expr (dump_file, last->op);
740 fprintf (dump_file, " -> ");
741 print_generic_stmt (dump_file, last->op);
744 ops->ordered_remove (i);
745 reassociate_stats.ops_eliminated ++;
747 return true;
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
752 fprintf (dump_file, "Equivalence: ");
753 print_generic_expr (dump_file, curr->op);
754 fprintf (dump_file, " ^ ");
755 print_generic_expr (dump_file, last->op);
756 fprintf (dump_file, " -> nothing\n");
759 reassociate_stats.ops_eliminated += 2;
761 if (ops->length () == 2)
763 ops->truncate (0);
764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 *all_done = true;
767 else
769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
773 return true;
775 default:
776 break;
779 return false;
782 static vec<tree> plus_negates;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
790 static bool
791 eliminate_plus_minus_pair (enum tree_code opcode,
792 vec<operand_entry *> *ops,
793 unsigned int currindex,
794 operand_entry *curr)
796 tree negateop;
797 tree notop;
798 unsigned int i;
799 operand_entry *oe;
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802 return false;
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
807 return false;
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
813 for (i = currindex + 1;
814 ops->iterate (i, &oe)
815 && oe->rank >= curr->rank - 1 ;
816 i++)
818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, negateop);
825 fprintf (dump_file, " + -");
826 print_generic_expr (dump_file, oe->op);
827 fprintf (dump_file, " -> 0\n");
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
835 return true;
837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
840 tree op_type = TREE_TYPE (oe->op);
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop);
846 fprintf (dump_file, " + ~");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> -1\n");
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
856 return true;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864 plus_negates.safe_push (curr->op);
866 return false;
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
875 static bool
876 eliminate_not_pairs (enum tree_code opcode,
877 vec<operand_entry *> *ops,
878 unsigned int currindex,
879 operand_entry *curr)
881 tree notop;
882 unsigned int i;
883 operand_entry *oe;
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
897 for (i = currindex + 1;
898 ops->iterate (i, &oe)
899 && oe->rank >= curr->rank - 1;
900 i++)
902 if (oe->op == notop)
904 if (dump_file && (dump_flags & TDF_DETAILS))
906 fprintf (dump_file, "Equivalence: ");
907 print_generic_expr (dump_file, notop);
908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
912 print_generic_expr (dump_file, oe->op);
913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
919 if (opcode == BIT_AND_EXPR)
920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 else if (opcode == BIT_IOR_EXPR)
922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
927 return true;
931 return false;
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
941 static void
942 eliminate_using_constants (enum tree_code opcode,
943 vec<operand_entry *> *ops)
945 operand_entry *oelast = ops->last ();
946 tree type = TREE_TYPE (oelast->op);
948 if (oelast->rank == 0
949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
951 switch (opcode)
953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
961 reassociate_stats.ops_eliminated += ops->length () - 1;
963 ops->truncate (0);
964 ops->quick_push (oelast);
965 return;
968 else if (integer_all_onesp (oelast->op))
970 if (ops->length () != 1)
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
974 ops->pop ();
975 reassociate_stats.ops_eliminated++;
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
982 if (ops->length () != 1)
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
987 reassociate_stats.ops_eliminated += ops->length () - 1;
989 ops->truncate (0);
990 ops->quick_push (oelast);
991 return;
994 else if (integer_zerop (oelast->op))
996 if (ops->length () != 1)
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1004 break;
1005 case MULT_EXPR:
1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
1008 && !HONOR_NANS (type)
1009 && !HONOR_SIGNED_ZEROS (type)
1010 && real_zerop (oelast->op)))
1012 if (ops->length () != 1)
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
1017 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (1);
1019 ops->quick_push (oelast);
1020 return;
1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
1025 && !HONOR_SNANS (type)
1026 && real_onep (oelast->op)))
1028 if (ops->length () != 1)
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
1032 ops->pop ();
1033 reassociate_stats.ops_eliminated++;
1034 return;
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
1047 if (ops->length () != 1)
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 ops->pop ();
1052 reassociate_stats.ops_eliminated++;
1053 return;
1056 break;
1057 default:
1058 break;
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 bool, bool);
1067 /* Structure for tracking and counting operands. */
1068 struct oecount {
1069 unsigned int cnt;
1070 unsigned int id;
1071 enum tree_code oecode;
1072 tree op;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec<oecount> cvec;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher : int_hash <int, 0, 1>
1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1090 inline hashval_t
1091 oecount_hasher::hash (int p)
1093 const oecount *c = &cvec[p - 42];
1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1097 /* Comparison function for oecount. */
1099 inline bool
1100 oecount_hasher::equal (int p1, int p2)
1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
1104 return c1->oecode == c2->oecode && c1->op == c2->op;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1109 static int
1110 oecount_cmp (const void *p1, const void *p2)
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
1114 if (c1->cnt != c2->cnt)
1115 return c1->cnt > c2->cnt ? 1 : -1;
1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1->id > c2->id ? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1124 static bool
1125 stmt_is_power_of_op (gimple *stmt, tree op)
1127 if (!is_gimple_call (stmt))
1128 return false;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1136 default:
1137 return false;
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple *stmt)
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1152 switch (gimple_call_combined_fn (stmt))
1154 CASE_CFN_POW:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1162 CASE_CFN_POWI:
1163 arg1 = gimple_call_arg (stmt, 1);
1164 power = TREE_INT_CST_LOW (arg1) - 1;
1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1168 default:
1169 gcc_unreachable ();
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1176 static tree
1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
1182 tree new_lhs, new_debug_lhs = NULL_TREE;
1183 tree lhs = gimple_get_lhs (stmt);
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1194 if (new_debug_lhs == NULL_TREE)
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1209 repl = new_debug_lhs;
1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 SET_USE (use, repl);
1213 update_stmt (use_stmt);
1215 return new_lhs;
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1222 static void
1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 vec<gimple *> &stmts_to_fix)
1226 unsigned i;
1227 gimple *stmt;
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
1233 *def = make_new_ssa_for_def (stmt, opcode, op);
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236 make_new_ssa_for_def (stmt, opcode, op);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1243 static void
1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1246 tree lhs;
1247 gimple *use_stmt;
1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
1264 unlink_stmt_vdef (stmt);
1265 reassoc_remove_stmt (&gsi);
1266 release_defs (stmt);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1273 static void
1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1276 tree orig_def = *def;
1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
1284 tree name;
1286 if (opcode == MULT_EXPR)
1288 if (stmt_is_power_of_op (stmt, op))
1290 if (decrement_power (stmt) == 1)
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1296 break;
1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1300 if (gimple_assign_rhs1 (stmt) == op)
1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
1305 propagate_op_to_single_use (cst, stmt, def);
1306 break;
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 break;
1318 name = gimple_assign_rhs1 (stmt);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
1331 propagate_op_to_single_use (name, stmt, def);
1332 break;
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 if (stmt_is_power_of_op (stmt2, op))
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
1352 else if (is_gimple_assign (stmt2)
1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1355 if (gimple_assign_rhs1 (stmt2) == op)
1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
1359 break;
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1364 stmts_to_fix.safe_push (stmt2);
1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 break;
1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
1376 stmts_to_fix.safe_push (stmt);
1378 while (1);
1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1387 static bool
1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1402 if (bb1 == bb2)
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1425 gimple *s = gsi_stmt (gsi);
1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1432 return false;
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1438 /* Insert STMT after INSERT_POINT. */
1440 static void
1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1478 static gimple *
1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1481 gimple *op1def = NULL, *op2def = NULL;
1482 gimple_stmt_iterator gsi;
1483 tree op;
1484 gassign *sum;
1486 /* Create the addition statement. */
1487 op = make_ssa_name (type);
1488 sum = gimple_build_assign (op, opcode, op1, op2);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499 if (gsi_end_p (gsi))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1506 else
1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1510 else
1512 gimple *insert_point;
1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
1517 else
1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
1521 update_stmt (sum);
1523 return sum;
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1561 static bool
1562 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, struct loop *loop)
1565 unsigned int length = ops->length ();
1566 operand_entry *oe1;
1567 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
1570 vec<operand_entry *> *subops;
1571 bool changed = false;
1572 unsigned int next_oecount_id = 0;
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap candidates (length);
1580 bitmap_clear (candidates);
1581 nr_candidates = 0;
1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
1584 enum tree_code dcode;
1585 gimple *oe1def;
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1598 bitmap_set_bit (candidates, i);
1599 nr_candidates++;
1602 if (nr_candidates < 2)
1603 return false;
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
1609 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1610 fprintf (dump_file, " %d\n", nr_candidates);
1613 /* Build linearized sub-operand lists and the counting table. */
1614 cvec.create (0);
1616 hash_table<oecount_hasher> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1624 gimple *oedef;
1625 enum tree_code oecode;
1626 unsigned j;
1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1635 oecount c;
1636 int *slot;
1637 int idx;
1638 c.oecode = oecode;
1639 c.cnt = 1;
1640 c.id = next_oecount_id++;
1641 c.op = oe1->op;
1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
1644 slot = ctable.find_slot (idx, INSERT);
1645 if (!*slot)
1647 *slot = idx;
1649 else
1651 cvec.pop ();
1652 cvec[*slot - 42].cnt++;
1657 /* Sort the counting table. */
1658 cvec.qsort (oecount_cmp);
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec, j, c)
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 print_generic_expr (dump_file, c->op);
1670 fprintf (dump_file, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap candidates2 (length);
1676 while (!cvec.is_empty ())
1678 oecount *c = &cvec.last ();
1679 if (c->cnt < 2)
1680 break;
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2);
1685 nr_candidates2 = 0;
1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1688 gimple *oedef;
1689 enum tree_code oecode;
1690 unsigned j;
1691 tree op = (*ops)[i]->op;
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1705 if (oe1->op == c->op)
1707 bitmap_set_bit (candidates2, i);
1708 ++nr_candidates2;
1709 break;
1714 if (nr_candidates2 >= 2)
1716 operand_entry *oe1, *oe2;
1717 gimple *prod;
1718 int first = bitmap_first_set_bit (candidates2);
1720 /* Build the new addition chain. */
1721 oe1 = (*ops)[first];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Building (");
1725 print_generic_expr (dump_file, oe1->op);
1727 zero_one_operation (&oe1->op, c->oecode, c->op);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1730 gimple *sum;
1731 oe2 = (*ops)[i];
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1734 fprintf (dump_file, " + ");
1735 print_generic_expr (dump_file, oe2->op);
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1745 /* Apply the multiplication/division. */
1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
1759 subops[first].release ();
1761 changed = true;
1764 cvec.pop ();
1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
1769 free (subops);
1770 cvec.release ();
1772 return changed;
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1780 static bool
1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 vec<operand_entry *> *ops,
1783 unsigned int currindex,
1784 operand_entry *curr)
1786 tree op1, op2;
1787 enum tree_code lcode, rcode;
1788 gimple *def1, *def2;
1789 int i;
1790 operand_entry *oe;
1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793 return false;
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr->op) != SSA_NAME)
1797 return false;
1798 def1 = SSA_NAME_DEF_STMT (curr->op);
1799 if (!is_gimple_assign (def1))
1800 return false;
1801 lcode = gimple_assign_rhs_code (def1);
1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803 return false;
1804 op1 = gimple_assign_rhs1 (def1);
1805 op2 = gimple_assign_rhs2 (def1);
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1810 tree t;
1812 if (TREE_CODE (oe->op) != SSA_NAME)
1813 continue;
1814 def2 = SSA_NAME_DEF_STMT (oe->op);
1815 if (!is_gimple_assign (def2))
1816 continue;
1817 rcode = gimple_assign_rhs_code (def2);
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 continue;
1821 /* If we got here, we have a match. See if we can combine the
1822 two comparisons. */
1823 if (opcode == BIT_IOR_EXPR)
1824 t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 rcode, gimple_assign_rhs1 (def2),
1826 gimple_assign_rhs2 (def2));
1827 else
1828 t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 rcode, gimple_assign_rhs1 (def2),
1830 gimple_assign_rhs2 (def2));
1831 if (!t)
1832 continue;
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 t = fold_convert (TREE_TYPE (curr->op), t);
1841 if (TREE_CODE (t) != INTEGER_CST
1842 && !operand_equal_p (t, curr->op, 0))
1844 enum tree_code subcode;
1845 tree newop1, newop2;
1846 if (!COMPARISON_CLASS_P (t))
1847 continue;
1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 continue;
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1857 fprintf (dump_file, "Equivalence: ");
1858 print_generic_expr (dump_file, curr->op);
1859 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 print_generic_expr (dump_file, oe->op);
1861 fprintf (dump_file, " -> ");
1862 print_generic_expr (dump_file, t);
1863 fprintf (dump_file, "\n");
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1867 expression t. */
1868 ops->ordered_remove (i);
1869 reassociate_stats.ops_eliminated ++;
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t) == INTEGER_CST)
1877 ops->ordered_remove (currindex);
1878 add_to_ops_vec (ops, t);
1880 else if (!operand_equal_p (t, curr->op, 0))
1882 gimple *sum;
1883 enum tree_code subcode;
1884 tree newop1;
1885 tree newop2;
1886 gcc_assert (COMPARISON_CLASS_P (t));
1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 gcc_checking_assert (is_gimple_val (newop1)
1891 && is_gimple_val (newop2));
1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 curr->op = gimple_get_lhs (sum);
1895 return true;
1898 return false;
1902 /* Transform repeated addition of same values into multiply with
1903 constant. */
1904 static bool
1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1907 operand_entry *oe;
1908 tree op = NULL_TREE;
1909 int j;
1910 int i, start = -1, end = 0, count = 0;
1911 auto_vec<std::pair <int, int> > indxs;
1912 bool changed = false;
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 || !flag_unsafe_math_optimizations))
1917 return false;
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops, i, oe)
1922 if (start == -1)
1924 count = 1;
1925 op = oe->op;
1926 start = i;
1928 else if (operand_equal_p (oe->op, op, 0))
1930 count++;
1931 end = i;
1933 else
1935 if (count > 1)
1936 indxs.safe_push (std::make_pair (start, end));
1937 count = 1;
1938 op = oe->op;
1939 start = i;
1943 if (count > 1)
1944 indxs.safe_push (std::make_pair (start, end));
1946 for (j = indxs.length () - 1; j >= 0; --j)
1948 /* Convert repeated operand addition to multiplication. */
1949 start = indxs[j].first;
1950 end = indxs[j].second;
1951 op = (*ops)[start]->op;
1952 count = end - start + 1;
1953 for (i = end; i >= start; --i)
1954 ops->unordered_remove (i);
1955 tree tmp = make_ssa_name (TREE_TYPE (op));
1956 tree cst = build_int_cst (integer_type_node, count);
1957 gassign *mul_stmt
1958 = gimple_build_assign (tmp, MULT_EXPR,
1959 op, fold_convert (TREE_TYPE (op), cst));
1960 gimple_set_visited (mul_stmt, true);
1961 add_to_ops_vec (ops, tmp, mul_stmt);
1962 changed = true;
1965 return changed;
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1973 static void
1974 optimize_ops_list (enum tree_code opcode,
1975 vec<operand_entry *> *ops)
1977 unsigned int length = ops->length ();
1978 unsigned int i;
1979 operand_entry *oe;
1980 operand_entry *oelast = NULL;
1981 bool iterate = false;
1983 if (length == 1)
1984 return;
1986 oelast = ops->last ();
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1992 operand_entry *oelm1 = (*ops)[length - 2];
1994 if (oelm1->rank == 0
1995 && is_gimple_min_invariant (oelm1->op)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 TREE_TYPE (oelast->op)))
1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 oelm1->op, oelast->op);
2002 if (folded && is_gimple_min_invariant (folded))
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "Merging constants\n");
2007 ops->pop ();
2008 ops->pop ();
2010 add_to_ops_vec (ops, folded);
2011 reassociate_stats.constants_eliminated++;
2013 optimize_ops_list (opcode, ops);
2014 return;
2019 eliminate_using_constants (opcode, ops);
2020 oelast = NULL;
2022 for (i = 0; ops->iterate (i, &oe);)
2024 bool done = false;
2026 if (eliminate_not_pairs (opcode, ops, i, oe))
2027 return;
2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2032 if (done)
2033 return;
2034 iterate = true;
2035 oelast = NULL;
2036 continue;
2038 oelast = oe;
2039 i++;
2042 length = ops->length ();
2043 oelast = ops->last ();
2045 if (iterate)
2046 optimize_ops_list (opcode, ops);
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050 it to try to change a logical combination of comparisons into a range
2051 test.
2053 For example, both
2054 X == 2 || X == 5 || X == 3 || X == 4
2056 X >= 2 && X <= 5
2057 are converted to
2058 (unsigned) (X - 2) <= 3
2060 For more information see comments above fold_test_range in fold-const.c,
2061 this implementation is for GIMPLE. */
2063 struct range_entry
2065 tree exp;
2066 tree low;
2067 tree high;
2068 bool in_p;
2069 bool strict_overflow_p;
2070 unsigned int idx, next;
2073 /* This is similar to make_range in fold-const.c, but on top of
2074 GIMPLE instead of trees. If EXP is non-NULL, it should be
2075 an SSA_NAME and STMT argument is ignored, otherwise STMT
2076 argument should be a GIMPLE_COND. */
2078 static void
2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2081 int in_p;
2082 tree low, high;
2083 bool is_bool, strict_overflow_p;
2085 r->exp = NULL_TREE;
2086 r->in_p = false;
2087 r->strict_overflow_p = false;
2088 r->low = NULL_TREE;
2089 r->high = NULL_TREE;
2090 if (exp != NULL_TREE
2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092 return;
2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095 and see if we can refine the range. Some of the cases below may not
2096 happen, but it doesn't seem worth worrying about this. We "continue"
2097 the outer loop when we've changed something; otherwise we "break"
2098 the switch, which will "break" the while. */
2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100 high = low;
2101 in_p = 0;
2102 strict_overflow_p = false;
2103 is_bool = false;
2104 if (exp == NULL_TREE)
2105 is_bool = true;
2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2108 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 is_bool = true;
2110 else
2111 return;
2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114 is_bool = true;
2116 while (1)
2118 enum tree_code code;
2119 tree arg0, arg1, exp_type;
2120 tree nexp;
2121 location_t loc;
2123 if (exp != NULL_TREE)
2125 if (TREE_CODE (exp) != SSA_NAME
2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 break;
2129 stmt = SSA_NAME_DEF_STMT (exp);
2130 if (!is_gimple_assign (stmt))
2131 break;
2133 code = gimple_assign_rhs_code (stmt);
2134 arg0 = gimple_assign_rhs1 (stmt);
2135 arg1 = gimple_assign_rhs2 (stmt);
2136 exp_type = TREE_TYPE (exp);
2138 else
2140 code = gimple_cond_code (stmt);
2141 arg0 = gimple_cond_lhs (stmt);
2142 arg1 = gimple_cond_rhs (stmt);
2143 exp_type = boolean_type_node;
2146 if (TREE_CODE (arg0) != SSA_NAME)
2147 break;
2148 loc = gimple_location (stmt);
2149 switch (code)
2151 case BIT_NOT_EXPR:
2152 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2153 /* Ensure the range is either +[-,0], +[0,0],
2154 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2156 or similar expression of unconditional true or
2157 false, it should not be negated. */
2158 && ((high && integer_zerop (high))
2159 || (low && integer_onep (low))))
2161 in_p = !in_p;
2162 exp = arg0;
2163 continue;
2165 break;
2166 case SSA_NAME:
2167 exp = arg0;
2168 continue;
2169 CASE_CONVERT:
2170 if (is_bool)
2171 goto do_default;
2172 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2174 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2175 is_bool = true;
2176 else
2177 return;
2179 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2180 is_bool = true;
2181 goto do_default;
2182 case EQ_EXPR:
2183 case NE_EXPR:
2184 case LT_EXPR:
2185 case LE_EXPR:
2186 case GE_EXPR:
2187 case GT_EXPR:
2188 is_bool = true;
2189 /* FALLTHRU */
2190 default:
2191 if (!is_bool)
2192 return;
2193 do_default:
2194 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2195 &low, &high, &in_p,
2196 &strict_overflow_p);
2197 if (nexp != NULL_TREE)
2199 exp = nexp;
2200 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2201 continue;
2203 break;
2205 break;
2207 if (is_bool)
2209 r->exp = exp;
2210 r->in_p = in_p;
2211 r->low = low;
2212 r->high = high;
2213 r->strict_overflow_p = strict_overflow_p;
2217 /* Comparison function for qsort. Sort entries
2218 without SSA_NAME exp first, then with SSA_NAMEs sorted
2219 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2220 by increasing ->low and if ->low is the same, by increasing
2221 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2222 maximum. */
2224 static int
2225 range_entry_cmp (const void *a, const void *b)
2227 const struct range_entry *p = (const struct range_entry *) a;
2228 const struct range_entry *q = (const struct range_entry *) b;
2230 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2232 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2234 /* Group range_entries for the same SSA_NAME together. */
2235 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2236 return -1;
2237 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2238 return 1;
2239 /* If ->low is different, NULL low goes first, then by
2240 ascending low. */
2241 if (p->low != NULL_TREE)
2243 if (q->low != NULL_TREE)
2245 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2246 p->low, q->low);
2247 if (tem && integer_onep (tem))
2248 return -1;
2249 tem = fold_binary (GT_EXPR, boolean_type_node,
2250 p->low, q->low);
2251 if (tem && integer_onep (tem))
2252 return 1;
2254 else
2255 return 1;
2257 else if (q->low != NULL_TREE)
2258 return -1;
2259 /* If ->high is different, NULL high goes last, before that by
2260 ascending high. */
2261 if (p->high != NULL_TREE)
2263 if (q->high != NULL_TREE)
2265 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2266 p->high, q->high);
2267 if (tem && integer_onep (tem))
2268 return -1;
2269 tem = fold_binary (GT_EXPR, boolean_type_node,
2270 p->high, q->high);
2271 if (tem && integer_onep (tem))
2272 return 1;
2274 else
2275 return -1;
2277 else if (q->high != NULL_TREE)
2278 return 1;
2279 /* If both ranges are the same, sort below by ascending idx. */
2281 else
2282 return 1;
2284 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2285 return -1;
2287 if (p->idx < q->idx)
2288 return -1;
2289 else
2291 gcc_checking_assert (p->idx > q->idx);
2292 return 1;
2296 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2297 insert needed statements BEFORE or after GSI. */
2299 static tree
2300 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2302 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2303 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2304 if (TREE_CODE (ret) != SSA_NAME)
2306 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2307 if (before)
2308 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2309 else
2310 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2311 ret = gimple_assign_lhs (g);
2313 return ret;
2316 /* Helper routine of optimize_range_test.
2317 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2318 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2319 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2320 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2321 an array of COUNT pointers to other ranges. Return
2322 true if the range merge has been successful.
2323 If OPCODE is ERROR_MARK, this is called from within
2324 maybe_optimize_range_tests and is performing inter-bb range optimization.
2325 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2326 oe->rank. */
2328 static bool
2329 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2330 struct range_entry **otherrangep,
2331 unsigned int count, enum tree_code opcode,
2332 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2333 bool in_p, tree low, tree high, bool strict_overflow_p)
2335 operand_entry *oe = (*ops)[range->idx];
2336 tree op = oe->op;
2337 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2338 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2339 location_t loc = gimple_location (stmt);
2340 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2341 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2342 in_p, low, high);
2343 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2344 gimple_stmt_iterator gsi;
2345 unsigned int i, uid;
2347 if (tem == NULL_TREE)
2348 return false;
2350 /* If op is default def SSA_NAME, there is no place to insert the
2351 new comparison. Give up, unless we can use OP itself as the
2352 range test. */
2353 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2355 if (op == range->exp
2356 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2357 || TREE_CODE (optype) == BOOLEAN_TYPE)
2358 && (op == tem
2359 || (TREE_CODE (tem) == EQ_EXPR
2360 && TREE_OPERAND (tem, 0) == op
2361 && integer_onep (TREE_OPERAND (tem, 1))))
2362 && opcode != BIT_IOR_EXPR
2363 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2365 stmt = NULL;
2366 tem = op;
2368 else
2369 return false;
2372 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2373 warning_at (loc, OPT_Wstrict_overflow,
2374 "assuming signed overflow does not occur "
2375 "when simplifying range test");
2377 if (dump_file && (dump_flags & TDF_DETAILS))
2379 struct range_entry *r;
2380 fprintf (dump_file, "Optimizing range tests ");
2381 print_generic_expr (dump_file, range->exp);
2382 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2383 print_generic_expr (dump_file, range->low);
2384 fprintf (dump_file, ", ");
2385 print_generic_expr (dump_file, range->high);
2386 fprintf (dump_file, "]");
2387 for (i = 0; i < count; i++)
2389 if (otherrange)
2390 r = otherrange + i;
2391 else
2392 r = otherrangep[i];
2393 if (r->exp
2394 && r->exp != range->exp
2395 && TREE_CODE (r->exp) == SSA_NAME)
2397 fprintf (dump_file, " and ");
2398 print_generic_expr (dump_file, r->exp);
2400 else
2401 fprintf (dump_file, " and");
2402 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2403 print_generic_expr (dump_file, r->low);
2404 fprintf (dump_file, ", ");
2405 print_generic_expr (dump_file, r->high);
2406 fprintf (dump_file, "]");
2408 fprintf (dump_file, "\n into ");
2409 print_generic_expr (dump_file, tem);
2410 fprintf (dump_file, "\n");
2413 if (opcode == BIT_IOR_EXPR
2414 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2415 tem = invert_truthvalue_loc (loc, tem);
2417 tem = fold_convert_loc (loc, optype, tem);
2418 if (stmt)
2420 gsi = gsi_for_stmt (stmt);
2421 uid = gimple_uid (stmt);
2423 else
2425 gsi = gsi_none ();
2426 uid = 0;
2428 if (stmt == NULL)
2429 gcc_checking_assert (tem == op);
2430 /* In rare cases range->exp can be equal to lhs of stmt.
2431 In that case we have to insert after the stmt rather then before
2432 it. If stmt is a PHI, insert it at the start of the basic block. */
2433 else if (op != range->exp)
2435 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2436 tem = force_into_ssa_name (&gsi, tem, true);
2437 gsi_prev (&gsi);
2439 else if (gimple_code (stmt) != GIMPLE_PHI)
2441 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2442 tem = force_into_ssa_name (&gsi, tem, false);
2444 else
2446 gsi = gsi_after_labels (gimple_bb (stmt));
2447 if (!gsi_end_p (gsi))
2448 uid = gimple_uid (gsi_stmt (gsi));
2449 else
2451 gsi = gsi_start_bb (gimple_bb (stmt));
2452 uid = 1;
2453 while (!gsi_end_p (gsi))
2455 uid = gimple_uid (gsi_stmt (gsi));
2456 gsi_next (&gsi);
2459 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2460 tem = force_into_ssa_name (&gsi, tem, true);
2461 if (gsi_end_p (gsi))
2462 gsi = gsi_last_bb (gimple_bb (stmt));
2463 else
2464 gsi_prev (&gsi);
2466 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2467 if (gimple_uid (gsi_stmt (gsi)))
2468 break;
2469 else
2470 gimple_set_uid (gsi_stmt (gsi), uid);
2472 oe->op = tem;
2473 range->exp = exp;
2474 range->low = low;
2475 range->high = high;
2476 range->in_p = in_p;
2477 range->strict_overflow_p = false;
2479 for (i = 0; i < count; i++)
2481 if (otherrange)
2482 range = otherrange + i;
2483 else
2484 range = otherrangep[i];
2485 oe = (*ops)[range->idx];
2486 /* Now change all the other range test immediate uses, so that
2487 those tests will be optimized away. */
2488 if (opcode == ERROR_MARK)
2490 if (oe->op)
2491 oe->op = build_int_cst (TREE_TYPE (oe->op),
2492 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2493 else
2494 oe->op = (oe->rank == BIT_IOR_EXPR
2495 ? boolean_false_node : boolean_true_node);
2497 else
2498 oe->op = error_mark_node;
2499 range->exp = NULL_TREE;
2500 range->low = NULL_TREE;
2501 range->high = NULL_TREE;
2503 return true;
2506 /* Optimize X == CST1 || X == CST2
2507 if popcount (CST1 ^ CST2) == 1 into
2508 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2509 Similarly for ranges. E.g.
2510 X != 2 && X != 3 && X != 10 && X != 11
2511 will be transformed by the previous optimization into
2512 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2513 and this loop can transform that into
2514 !(((X & ~8) - 2U) <= 1U). */
2516 static bool
2517 optimize_range_tests_xor (enum tree_code opcode, tree type,
2518 tree lowi, tree lowj, tree highi, tree highj,
2519 vec<operand_entry *> *ops,
2520 struct range_entry *rangei,
2521 struct range_entry *rangej)
2523 tree lowxor, highxor, tem, exp;
2524 /* Check lowi ^ lowj == highi ^ highj and
2525 popcount (lowi ^ lowj) == 1. */
2526 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2527 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2528 return false;
2529 if (!integer_pow2p (lowxor))
2530 return false;
2531 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2532 if (!tree_int_cst_equal (lowxor, highxor))
2533 return false;
2535 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2536 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2537 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2538 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2539 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2540 NULL, rangei->in_p, lowj, highj,
2541 rangei->strict_overflow_p
2542 || rangej->strict_overflow_p))
2543 return true;
2544 return false;
2547 /* Optimize X == CST1 || X == CST2
2548 if popcount (CST2 - CST1) == 1 into
2549 ((X - CST1) & ~(CST2 - CST1)) == 0.
2550 Similarly for ranges. E.g.
2551 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2552 || X == 75 || X == 45
2553 will be transformed by the previous optimization into
2554 (X - 43U) <= 3U || (X - 75U) <= 3U
2555 and this loop can transform that into
2556 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2557 static bool
2558 optimize_range_tests_diff (enum tree_code opcode, tree type,
2559 tree lowi, tree lowj, tree highi, tree highj,
2560 vec<operand_entry *> *ops,
2561 struct range_entry *rangei,
2562 struct range_entry *rangej)
2564 tree tem1, tem2, mask;
2565 /* Check highi - lowi == highj - lowj. */
2566 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2567 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2568 return false;
2569 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2570 if (!tree_int_cst_equal (tem1, tem2))
2571 return false;
2572 /* Check popcount (lowj - lowi) == 1. */
2573 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2574 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2575 return false;
2576 if (!integer_pow2p (tem1))
2577 return false;
2579 type = unsigned_type_for (type);
2580 tem1 = fold_convert (type, tem1);
2581 tem2 = fold_convert (type, tem2);
2582 lowi = fold_convert (type, lowi);
2583 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2584 tem1 = fold_build2 (MINUS_EXPR, type,
2585 fold_convert (type, rangei->exp), lowi);
2586 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2587 lowj = build_int_cst (type, 0);
2588 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2589 NULL, rangei->in_p, lowj, tem2,
2590 rangei->strict_overflow_p
2591 || rangej->strict_overflow_p))
2592 return true;
2593 return false;
2596 /* It does some common checks for function optimize_range_tests_xor and
2597 optimize_range_tests_diff.
2598 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2599 Else it calls optimize_range_tests_diff. */
2601 static bool
2602 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2603 bool optimize_xor, vec<operand_entry *> *ops,
2604 struct range_entry *ranges)
2606 int i, j;
2607 bool any_changes = false;
2608 for (i = first; i < length; i++)
2610 tree lowi, highi, lowj, highj, type, tem;
2612 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2613 continue;
2614 type = TREE_TYPE (ranges[i].exp);
2615 if (!INTEGRAL_TYPE_P (type))
2616 continue;
2617 lowi = ranges[i].low;
2618 if (lowi == NULL_TREE)
2619 lowi = TYPE_MIN_VALUE (type);
2620 highi = ranges[i].high;
2621 if (highi == NULL_TREE)
2622 continue;
2623 for (j = i + 1; j < length && j < i + 64; j++)
2625 bool changes;
2626 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2627 continue;
2628 lowj = ranges[j].low;
2629 if (lowj == NULL_TREE)
2630 continue;
2631 highj = ranges[j].high;
2632 if (highj == NULL_TREE)
2633 highj = TYPE_MAX_VALUE (type);
2634 /* Check lowj > highi. */
2635 tem = fold_binary (GT_EXPR, boolean_type_node,
2636 lowj, highi);
2637 if (tem == NULL_TREE || !integer_onep (tem))
2638 continue;
2639 if (optimize_xor)
2640 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2641 highi, highj, ops,
2642 ranges + i, ranges + j);
2643 else
2644 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2645 highi, highj, ops,
2646 ranges + i, ranges + j);
2647 if (changes)
2649 any_changes = true;
2650 break;
2654 return any_changes;
2657 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2658 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2659 EXP on success, NULL otherwise. */
2661 static tree
2662 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2663 wide_int *mask, tree *totallowp)
2665 tree tem = int_const_binop (MINUS_EXPR, high, low);
2666 if (tem == NULL_TREE
2667 || TREE_CODE (tem) != INTEGER_CST
2668 || TREE_OVERFLOW (tem)
2669 || tree_int_cst_sgn (tem) == -1
2670 || compare_tree_int (tem, prec) != -1)
2671 return NULL_TREE;
2673 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2674 *mask = wi::shifted_mask (0, max, false, prec);
2675 if (TREE_CODE (exp) == BIT_AND_EXPR
2676 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2678 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2679 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2680 if (wi::popcount (msk) == 1
2681 && wi::ltu_p (msk, prec - max))
2683 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2684 max += msk.to_uhwi ();
2685 exp = TREE_OPERAND (exp, 0);
2686 if (integer_zerop (low)
2687 && TREE_CODE (exp) == PLUS_EXPR
2688 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2690 tree ret = TREE_OPERAND (exp, 0);
2691 STRIP_NOPS (ret);
2692 widest_int bias
2693 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2694 TYPE_PRECISION (TREE_TYPE (low))));
2695 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2696 if (totallowp)
2698 *totallowp = tbias;
2699 return ret;
2701 else if (!tree_int_cst_lt (totallow, tbias))
2702 return NULL_TREE;
2703 bias = wi::to_widest (tbias);
2704 bias -= wi::to_widest (totallow);
2705 if (bias >= 0 && bias < prec - max)
2707 *mask = wi::lshift (*mask, bias);
2708 return ret;
2713 if (totallowp)
2714 return exp;
2715 if (!tree_int_cst_lt (totallow, low))
2716 return exp;
2717 tem = int_const_binop (MINUS_EXPR, low, totallow);
2718 if (tem == NULL_TREE
2719 || TREE_CODE (tem) != INTEGER_CST
2720 || TREE_OVERFLOW (tem)
2721 || compare_tree_int (tem, prec - max) == 1)
2722 return NULL_TREE;
2724 *mask = wi::lshift (*mask, wi::to_widest (tem));
2725 return exp;
2728 /* Attempt to optimize small range tests using bit test.
2729 E.g.
2730 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2731 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2732 has been by earlier optimizations optimized into:
2733 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2734 As all the 43 through 82 range is less than 64 numbers,
2735 for 64-bit word targets optimize that into:
2736 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2738 static bool
2739 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2740 vec<operand_entry *> *ops,
2741 struct range_entry *ranges)
2743 int i, j;
2744 bool any_changes = false;
2745 int prec = GET_MODE_BITSIZE (word_mode);
2746 auto_vec<struct range_entry *, 64> candidates;
2748 for (i = first; i < length - 2; i++)
2750 tree lowi, highi, lowj, highj, type;
2752 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2753 continue;
2754 type = TREE_TYPE (ranges[i].exp);
2755 if (!INTEGRAL_TYPE_P (type))
2756 continue;
2757 lowi = ranges[i].low;
2758 if (lowi == NULL_TREE)
2759 lowi = TYPE_MIN_VALUE (type);
2760 highi = ranges[i].high;
2761 if (highi == NULL_TREE)
2762 continue;
2763 wide_int mask;
2764 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2765 highi, &mask, &lowi);
2766 if (exp == NULL_TREE)
2767 continue;
2768 bool strict_overflow_p = ranges[i].strict_overflow_p;
2769 candidates.truncate (0);
2770 int end = MIN (i + 64, length);
2771 for (j = i + 1; j < end; j++)
2773 tree exp2;
2774 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2775 continue;
2776 if (ranges[j].exp == exp)
2778 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2780 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2781 if (exp2 == exp)
2783 else if (TREE_CODE (exp2) == PLUS_EXPR)
2785 exp2 = TREE_OPERAND (exp2, 0);
2786 STRIP_NOPS (exp2);
2787 if (exp2 != exp)
2788 continue;
2790 else
2791 continue;
2793 else
2794 continue;
2795 lowj = ranges[j].low;
2796 if (lowj == NULL_TREE)
2797 continue;
2798 highj = ranges[j].high;
2799 if (highj == NULL_TREE)
2800 highj = TYPE_MAX_VALUE (type);
2801 wide_int mask2;
2802 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2803 highj, &mask2, NULL);
2804 if (exp2 != exp)
2805 continue;
2806 mask |= mask2;
2807 strict_overflow_p |= ranges[j].strict_overflow_p;
2808 candidates.safe_push (&ranges[j]);
2811 /* If we need otherwise 3 or more comparisons, use a bit test. */
2812 if (candidates.length () >= 2)
2814 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2815 wi::to_widest (lowi)
2816 + prec - 1 - wi::clz (mask));
2817 operand_entry *oe = (*ops)[ranges[i].idx];
2818 tree op = oe->op;
2819 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2820 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2821 location_t loc = gimple_location (stmt);
2822 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2824 /* See if it isn't cheaper to pretend the minimum value of the
2825 range is 0, if maximum value is small enough.
2826 We can avoid then subtraction of the minimum value, but the
2827 mask constant could be perhaps more expensive. */
2828 if (compare_tree_int (lowi, 0) > 0
2829 && compare_tree_int (high, prec) < 0)
2831 int cost_diff;
2832 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2833 rtx reg = gen_raw_REG (word_mode, 10000);
2834 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2835 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2836 GEN_INT (-m)), speed_p);
2837 rtx r = immed_wide_int_const (mask, word_mode);
2838 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2839 word_mode, speed_p);
2840 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2841 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2842 word_mode, speed_p);
2843 if (cost_diff > 0)
2845 mask = wi::lshift (mask, m);
2846 lowi = build_zero_cst (TREE_TYPE (lowi));
2850 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2851 false, lowi, high);
2852 if (tem == NULL_TREE || is_gimple_val (tem))
2853 continue;
2854 tree etype = unsigned_type_for (TREE_TYPE (exp));
2855 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2856 fold_convert_loc (loc, etype, exp),
2857 fold_convert_loc (loc, etype, lowi));
2858 exp = fold_convert_loc (loc, integer_type_node, exp);
2859 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2860 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2861 build_int_cst (word_type, 1), exp);
2862 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2863 wide_int_to_tree (word_type, mask));
2864 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2865 build_zero_cst (word_type));
2866 if (is_gimple_val (exp))
2867 continue;
2869 /* The shift might have undefined behavior if TEM is true,
2870 but reassociate_bb isn't prepared to have basic blocks
2871 split when it is running. So, temporarily emit a code
2872 with BIT_IOR_EXPR instead of &&, and fix it up in
2873 branch_fixup. */
2874 gimple_seq seq;
2875 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2876 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2877 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2878 gimple_seq seq2;
2879 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2880 gimple_seq_add_seq_without_update (&seq, seq2);
2881 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2882 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2883 gimple *g = gimple_build_assign (make_ssa_name (optype),
2884 BIT_IOR_EXPR, tem, exp);
2885 gimple_set_location (g, loc);
2886 gimple_seq_add_stmt_without_update (&seq, g);
2887 exp = gimple_assign_lhs (g);
2888 tree val = build_zero_cst (optype);
2889 if (update_range_test (&ranges[i], NULL, candidates.address (),
2890 candidates.length (), opcode, ops, exp,
2891 seq, false, val, val, strict_overflow_p))
2893 any_changes = true;
2894 reassoc_branch_fixups.safe_push (tem);
2896 else
2897 gimple_seq_discard (seq);
2900 return any_changes;
2903 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2904 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2906 static bool
2907 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2908 vec<operand_entry *> *ops,
2909 struct range_entry *ranges)
2911 int i;
2912 unsigned int b;
2913 bool any_changes = false;
2914 auto_vec<int, 128> buckets;
2915 auto_vec<int, 32> chains;
2916 auto_vec<struct range_entry *, 32> candidates;
2918 for (i = first; i < length; i++)
2920 if (ranges[i].exp == NULL_TREE
2921 || TREE_CODE (ranges[i].exp) != SSA_NAME
2922 || !ranges[i].in_p
2923 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2924 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2925 || ranges[i].low == NULL_TREE
2926 || ranges[i].low != ranges[i].high)
2927 continue;
2929 bool zero_p = integer_zerop (ranges[i].low);
2930 if (!zero_p && !integer_all_onesp (ranges[i].low))
2931 continue;
2933 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2934 if (buckets.length () <= b)
2935 buckets.safe_grow_cleared (b + 1);
2936 if (chains.length () <= (unsigned) i)
2937 chains.safe_grow (i + 1);
2938 chains[i] = buckets[b];
2939 buckets[b] = i + 1;
2942 FOR_EACH_VEC_ELT (buckets, b, i)
2943 if (i && chains[i - 1])
2945 int j, k = i;
2946 for (j = chains[i - 1]; j; j = chains[j - 1])
2948 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2949 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2950 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2951 k = j;
2953 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2954 tree type2 = NULL_TREE;
2955 bool strict_overflow_p = false;
2956 candidates.truncate (0);
2957 for (j = i; j; j = chains[j - 1])
2959 tree type = TREE_TYPE (ranges[j - 1].exp);
2960 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2961 if (j == k
2962 || useless_type_conversion_p (type1, type))
2964 else if (type2 == NULL_TREE
2965 || useless_type_conversion_p (type2, type))
2967 if (type2 == NULL_TREE)
2968 type2 = type;
2969 candidates.safe_push (&ranges[j - 1]);
2972 unsigned l = candidates.length ();
2973 for (j = i; j; j = chains[j - 1])
2975 tree type = TREE_TYPE (ranges[j - 1].exp);
2976 if (j == k)
2977 continue;
2978 if (useless_type_conversion_p (type1, type))
2980 else if (type2 == NULL_TREE
2981 || useless_type_conversion_p (type2, type))
2982 continue;
2983 candidates.safe_push (&ranges[j - 1]);
2985 gimple_seq seq = NULL;
2986 tree op = NULL_TREE;
2987 unsigned int id;
2988 struct range_entry *r;
2989 candidates.safe_push (&ranges[k - 1]);
2990 FOR_EACH_VEC_ELT (candidates, id, r)
2992 gimple *g;
2993 if (id == 0)
2995 op = r->exp;
2996 continue;
2998 if (id == l)
3000 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3001 gimple_seq_add_stmt_without_update (&seq, g);
3002 op = gimple_assign_lhs (g);
3004 tree type = TREE_TYPE (r->exp);
3005 tree exp = r->exp;
3006 if (id >= l && !useless_type_conversion_p (type1, type))
3008 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3009 gimple_seq_add_stmt_without_update (&seq, g);
3010 exp = gimple_assign_lhs (g);
3012 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3013 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3014 op, exp);
3015 gimple_seq_add_stmt_without_update (&seq, g);
3016 op = gimple_assign_lhs (g);
3018 candidates.pop ();
3019 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3020 candidates.length (), opcode, ops, op,
3021 seq, true, ranges[k - 1].low,
3022 ranges[k - 1].low, strict_overflow_p))
3023 any_changes = true;
3024 else
3025 gimple_seq_discard (seq);
3028 return any_changes;
3031 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3032 a >= 0 && a < b into (unsigned) a < (unsigned) b
3033 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3035 static bool
3036 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3037 vec<operand_entry *> *ops,
3038 struct range_entry *ranges,
3039 basic_block first_bb)
3041 int i;
3042 bool any_changes = false;
3043 hash_map<tree, int> *map = NULL;
3045 for (i = first; i < length; i++)
3047 if (ranges[i].exp == NULL_TREE
3048 || TREE_CODE (ranges[i].exp) != SSA_NAME
3049 || !ranges[i].in_p)
3050 continue;
3052 tree type = TREE_TYPE (ranges[i].exp);
3053 if (!INTEGRAL_TYPE_P (type)
3054 || TYPE_UNSIGNED (type)
3055 || ranges[i].low == NULL_TREE
3056 || !integer_zerop (ranges[i].low)
3057 || ranges[i].high != NULL_TREE)
3058 continue;
3059 /* EXP >= 0 here. */
3060 if (map == NULL)
3061 map = new hash_map <tree, int>;
3062 map->put (ranges[i].exp, i);
3065 if (map == NULL)
3066 return false;
3068 for (i = 0; i < length; i++)
3070 bool in_p = ranges[i].in_p;
3071 if (ranges[i].low == NULL_TREE
3072 || ranges[i].high == NULL_TREE)
3073 continue;
3074 if (!integer_zerop (ranges[i].low)
3075 || !integer_zerop (ranges[i].high))
3077 if (ranges[i].exp
3078 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3079 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3080 && integer_onep (ranges[i].low)
3081 && integer_onep (ranges[i].high))
3082 in_p = !in_p;
3083 else
3084 continue;
3087 gimple *stmt;
3088 tree_code ccode;
3089 tree rhs1, rhs2;
3090 if (ranges[i].exp)
3092 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3093 continue;
3094 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3095 if (!is_gimple_assign (stmt))
3096 continue;
3097 ccode = gimple_assign_rhs_code (stmt);
3098 rhs1 = gimple_assign_rhs1 (stmt);
3099 rhs2 = gimple_assign_rhs2 (stmt);
3101 else
3103 operand_entry *oe = (*ops)[ranges[i].idx];
3104 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3105 if (gimple_code (stmt) != GIMPLE_COND)
3106 continue;
3107 ccode = gimple_cond_code (stmt);
3108 rhs1 = gimple_cond_lhs (stmt);
3109 rhs2 = gimple_cond_rhs (stmt);
3112 if (TREE_CODE (rhs1) != SSA_NAME
3113 || rhs2 == NULL_TREE
3114 || TREE_CODE (rhs2) != SSA_NAME)
3115 continue;
3117 switch (ccode)
3119 case GT_EXPR:
3120 case GE_EXPR:
3121 case LT_EXPR:
3122 case LE_EXPR:
3123 break;
3124 default:
3125 continue;
3127 if (in_p)
3128 ccode = invert_tree_comparison (ccode, false);
3129 switch (ccode)
3131 case GT_EXPR:
3132 case GE_EXPR:
3133 std::swap (rhs1, rhs2);
3134 ccode = swap_tree_comparison (ccode);
3135 break;
3136 case LT_EXPR:
3137 case LE_EXPR:
3138 break;
3139 default:
3140 gcc_unreachable ();
3143 int *idx = map->get (rhs1);
3144 if (idx == NULL)
3145 continue;
3147 /* maybe_optimize_range_tests allows statements without side-effects
3148 in the basic blocks as long as they are consumed in the same bb.
3149 Make sure rhs2's def stmt is not among them, otherwise we can't
3150 use safely get_nonzero_bits on it. E.g. in:
3151 # RANGE [-83, 1] NONZERO 173
3152 # k_32 = PHI <k_47(13), k_12(9)>
3154 if (k_32 >= 0)
3155 goto <bb 5>; [26.46%]
3156 else
3157 goto <bb 9>; [73.54%]
3159 <bb 5> [local count: 140323371]:
3160 # RANGE [0, 1] NONZERO 1
3161 _5 = (int) k_32;
3162 # RANGE [0, 4] NONZERO 4
3163 _21 = _5 << 2;
3164 # RANGE [0, 4] NONZERO 4
3165 iftmp.0_44 = (char) _21;
3166 if (k_32 < iftmp.0_44)
3167 goto <bb 6>; [84.48%]
3168 else
3169 goto <bb 9>; [15.52%]
3170 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3171 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3172 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3173 those stmts even for negative k_32 and the value ranges would be no
3174 longer guaranteed and so the optimization would be invalid. */
3175 while (opcode == ERROR_MARK)
3177 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3178 basic_block bb2 = gimple_bb (g);
3179 if (bb2
3180 && bb2 != first_bb
3181 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3183 /* As an exception, handle a few common cases. */
3184 if (gimple_assign_cast_p (g)
3185 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3187 tree op0 = gimple_assign_rhs1 (g);
3188 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3189 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3190 > TYPE_PRECISION (TREE_TYPE (op0))))
3191 /* Zero-extension is always ok. */
3192 break;
3193 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3194 == TYPE_PRECISION (TREE_TYPE (op0))
3195 && TREE_CODE (op0) == SSA_NAME)
3197 /* Cast from signed to unsigned or vice versa. Retry
3198 with the op0 as new rhs2. */
3199 rhs2 = op0;
3200 continue;
3203 else if (is_gimple_assign (g)
3204 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3205 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3206 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3207 /* Masking with INTEGER_CST with MSB clear is always ok
3208 too. */
3209 break;
3210 rhs2 = NULL_TREE;
3212 break;
3214 if (rhs2 == NULL_TREE)
3215 continue;
3217 wide_int nz = get_nonzero_bits (rhs2);
3218 if (wi::neg_p (nz))
3219 continue;
3221 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3222 and RHS2 is known to be RHS2 >= 0. */
3223 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3225 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3226 if ((ranges[*idx].strict_overflow_p
3227 || ranges[i].strict_overflow_p)
3228 && issue_strict_overflow_warning (wc))
3229 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3230 "assuming signed overflow does not occur "
3231 "when simplifying range test");
3233 if (dump_file && (dump_flags & TDF_DETAILS))
3235 struct range_entry *r = &ranges[*idx];
3236 fprintf (dump_file, "Optimizing range test ");
3237 print_generic_expr (dump_file, r->exp);
3238 fprintf (dump_file, " +[");
3239 print_generic_expr (dump_file, r->low);
3240 fprintf (dump_file, ", ");
3241 print_generic_expr (dump_file, r->high);
3242 fprintf (dump_file, "] and comparison ");
3243 print_generic_expr (dump_file, rhs1);
3244 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3245 print_generic_expr (dump_file, rhs2);
3246 fprintf (dump_file, "\n into (");
3247 print_generic_expr (dump_file, utype);
3248 fprintf (dump_file, ") ");
3249 print_generic_expr (dump_file, rhs1);
3250 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3251 print_generic_expr (dump_file, utype);
3252 fprintf (dump_file, ") ");
3253 print_generic_expr (dump_file, rhs2);
3254 fprintf (dump_file, "\n");
3257 operand_entry *oe = (*ops)[ranges[i].idx];
3258 ranges[i].in_p = 0;
3259 if (opcode == BIT_IOR_EXPR
3260 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3262 ranges[i].in_p = 1;
3263 ccode = invert_tree_comparison (ccode, false);
3266 unsigned int uid = gimple_uid (stmt);
3267 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3268 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3269 gimple_set_uid (g, uid);
3270 rhs1 = gimple_assign_lhs (g);
3271 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3272 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3274 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3275 gimple_set_uid (g, uid);
3276 rhs2 = gimple_assign_lhs (g);
3277 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3279 if (tree_swap_operands_p (rhs1, rhs2))
3281 std::swap (rhs1, rhs2);
3282 ccode = swap_tree_comparison (ccode);
3284 if (gimple_code (stmt) == GIMPLE_COND)
3286 gcond *c = as_a <gcond *> (stmt);
3287 gimple_cond_set_code (c, ccode);
3288 gimple_cond_set_lhs (c, rhs1);
3289 gimple_cond_set_rhs (c, rhs2);
3290 update_stmt (stmt);
3292 else
3294 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3295 if (!INTEGRAL_TYPE_P (ctype)
3296 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3297 && TYPE_PRECISION (ctype) != 1))
3298 ctype = boolean_type_node;
3299 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3300 gimple_set_uid (g, uid);
3301 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3302 if (oe->op && ctype != TREE_TYPE (oe->op))
3304 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3305 NOP_EXPR, gimple_assign_lhs (g));
3306 gimple_set_uid (g, uid);
3307 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3309 ranges[i].exp = gimple_assign_lhs (g);
3310 oe->op = ranges[i].exp;
3311 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3312 ranges[i].high = ranges[i].low;
3314 ranges[i].strict_overflow_p = false;
3315 oe = (*ops)[ranges[*idx].idx];
3316 /* Now change all the other range test immediate uses, so that
3317 those tests will be optimized away. */
3318 if (opcode == ERROR_MARK)
3320 if (oe->op)
3321 oe->op = build_int_cst (TREE_TYPE (oe->op),
3322 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3323 else
3324 oe->op = (oe->rank == BIT_IOR_EXPR
3325 ? boolean_false_node : boolean_true_node);
3327 else
3328 oe->op = error_mark_node;
3329 ranges[*idx].exp = NULL_TREE;
3330 ranges[*idx].low = NULL_TREE;
3331 ranges[*idx].high = NULL_TREE;
3332 any_changes = true;
3335 delete map;
3336 return any_changes;
3339 /* Optimize range tests, similarly how fold_range_test optimizes
3340 it on trees. The tree code for the binary
3341 operation between all the operands is OPCODE.
3342 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3343 maybe_optimize_range_tests for inter-bb range optimization.
3344 In that case if oe->op is NULL, oe->id is bb->index whose
3345 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3346 the actual opcode.
3347 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3349 static bool
3350 optimize_range_tests (enum tree_code opcode,
3351 vec<operand_entry *> *ops, basic_block first_bb)
3353 unsigned int length = ops->length (), i, j, first;
3354 operand_entry *oe;
3355 struct range_entry *ranges;
3356 bool any_changes = false;
3358 if (length == 1)
3359 return false;
3361 ranges = XNEWVEC (struct range_entry, length);
3362 for (i = 0; i < length; i++)
3364 oe = (*ops)[i];
3365 ranges[i].idx = i;
3366 init_range_entry (ranges + i, oe->op,
3367 oe->op
3368 ? NULL
3369 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3370 /* For | invert it now, we will invert it again before emitting
3371 the optimized expression. */
3372 if (opcode == BIT_IOR_EXPR
3373 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3374 ranges[i].in_p = !ranges[i].in_p;
3377 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3378 for (i = 0; i < length; i++)
3379 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3380 break;
3382 /* Try to merge ranges. */
3383 for (first = i; i < length; i++)
3385 tree low = ranges[i].low;
3386 tree high = ranges[i].high;
3387 int in_p = ranges[i].in_p;
3388 bool strict_overflow_p = ranges[i].strict_overflow_p;
3389 int update_fail_count = 0;
3391 for (j = i + 1; j < length; j++)
3393 if (ranges[i].exp != ranges[j].exp)
3394 break;
3395 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3396 ranges[j].in_p, ranges[j].low, ranges[j].high))
3397 break;
3398 strict_overflow_p |= ranges[j].strict_overflow_p;
3401 if (j == i + 1)
3402 continue;
3404 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3405 opcode, ops, ranges[i].exp, NULL, in_p,
3406 low, high, strict_overflow_p))
3408 i = j - 1;
3409 any_changes = true;
3411 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3412 while update_range_test would fail. */
3413 else if (update_fail_count == 64)
3414 i = j - 1;
3415 else
3416 ++update_fail_count;
3419 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3420 ops, ranges);
3422 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3423 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3424 ops, ranges);
3425 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3426 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3427 ops, ranges);
3428 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3429 ops, ranges);
3430 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3431 ranges, first_bb);
3433 if (any_changes && opcode != ERROR_MARK)
3435 j = 0;
3436 FOR_EACH_VEC_ELT (*ops, i, oe)
3438 if (oe->op == error_mark_node)
3439 continue;
3440 else if (i != j)
3441 (*ops)[j] = oe;
3442 j++;
3444 ops->truncate (j);
3447 XDELETEVEC (ranges);
3448 return any_changes;
3451 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3452 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3453 otherwise the comparison code. */
3455 static tree_code
3456 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3458 if (TREE_CODE (var) != SSA_NAME)
3459 return ERROR_MARK;
3461 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3462 if (stmt == NULL)
3463 return ERROR_MARK;
3465 /* ??? If we start creating more COND_EXPR, we could perform
3466 this same optimization with them. For now, simplify. */
3467 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3468 return ERROR_MARK;
3470 tree cond = gimple_assign_rhs1 (stmt);
3471 tree_code cmp = TREE_CODE (cond);
3472 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3473 return ERROR_MARK;
3475 /* ??? For now, allow only canonical true and false result vectors.
3476 We could expand this to other constants should the need arise,
3477 but at the moment we don't create them. */
3478 tree t = gimple_assign_rhs2 (stmt);
3479 tree f = gimple_assign_rhs3 (stmt);
3480 bool inv;
3481 if (integer_all_onesp (t))
3482 inv = false;
3483 else if (integer_all_onesp (f))
3485 cmp = invert_tree_comparison (cmp, false);
3486 inv = true;
3488 else
3489 return ERROR_MARK;
3490 if (!integer_zerop (f))
3491 return ERROR_MARK;
3493 /* Success! */
3494 if (rets)
3495 *rets = stmt;
3496 if (reti)
3497 *reti = inv;
3498 return cmp;
3501 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3502 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3504 static bool
3505 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3507 unsigned int length = ops->length (), i, j;
3508 bool any_changes = false;
3510 if (length == 1)
3511 return false;
3513 for (i = 0; i < length; ++i)
3515 tree elt0 = (*ops)[i]->op;
3517 gassign *stmt0;
3518 bool invert;
3519 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3520 if (cmp0 == ERROR_MARK)
3521 continue;
3523 for (j = i + 1; j < length; ++j)
3525 tree &elt1 = (*ops)[j]->op;
3527 gassign *stmt1;
3528 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3529 if (cmp1 == ERROR_MARK)
3530 continue;
3532 tree cond0 = gimple_assign_rhs1 (stmt0);
3533 tree x0 = TREE_OPERAND (cond0, 0);
3534 tree y0 = TREE_OPERAND (cond0, 1);
3536 tree cond1 = gimple_assign_rhs1 (stmt1);
3537 tree x1 = TREE_OPERAND (cond1, 0);
3538 tree y1 = TREE_OPERAND (cond1, 1);
3540 tree comb;
3541 if (opcode == BIT_AND_EXPR)
3542 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3543 else if (opcode == BIT_IOR_EXPR)
3544 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3545 else
3546 gcc_unreachable ();
3547 if (comb == NULL)
3548 continue;
3550 /* Success! */
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3553 fprintf (dump_file, "Transforming ");
3554 print_generic_expr (dump_file, cond0);
3555 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3556 print_generic_expr (dump_file, cond1);
3557 fprintf (dump_file, " into ");
3558 print_generic_expr (dump_file, comb);
3559 fputc ('\n', dump_file);
3562 gimple_assign_set_rhs1 (stmt0, comb);
3563 if (invert)
3564 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3565 *gimple_assign_rhs3_ptr (stmt0));
3566 update_stmt (stmt0);
3568 elt1 = error_mark_node;
3569 any_changes = true;
3573 if (any_changes)
3575 operand_entry *oe;
3576 j = 0;
3577 FOR_EACH_VEC_ELT (*ops, i, oe)
3579 if (oe->op == error_mark_node)
3580 continue;
3581 else if (i != j)
3582 (*ops)[j] = oe;
3583 j++;
3585 ops->truncate (j);
3588 return any_changes;
3591 /* Return true if STMT is a cast like:
3592 <bb N>:
3594 _123 = (int) _234;
3596 <bb M>:
3597 # _345 = PHI <_123(N), 1(...), 1(...)>
3598 where _234 has bool type, _123 has single use and
3599 bb N has a single successor M. This is commonly used in
3600 the last block of a range test.
3602 Also Return true if STMT is tcc_compare like:
3603 <bb N>:
3605 _234 = a_2(D) == 2;
3607 <bb M>:
3608 # _345 = PHI <_234(N), 1(...), 1(...)>
3609 _346 = (int) _345;
3610 where _234 has booltype, single use and
3611 bb N has a single successor M. This is commonly used in
3612 the last block of a range test. */
3614 static bool
3615 final_range_test_p (gimple *stmt)
3617 basic_block bb, rhs_bb, lhs_bb;
3618 edge e;
3619 tree lhs, rhs;
3620 use_operand_p use_p;
3621 gimple *use_stmt;
3623 if (!gimple_assign_cast_p (stmt)
3624 && (!is_gimple_assign (stmt)
3625 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3626 != tcc_comparison)))
3627 return false;
3628 bb = gimple_bb (stmt);
3629 if (!single_succ_p (bb))
3630 return false;
3631 e = single_succ_edge (bb);
3632 if (e->flags & EDGE_COMPLEX)
3633 return false;
3635 lhs = gimple_assign_lhs (stmt);
3636 rhs = gimple_assign_rhs1 (stmt);
3637 if (gimple_assign_cast_p (stmt)
3638 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3639 || TREE_CODE (rhs) != SSA_NAME
3640 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3641 return false;
3643 if (!gimple_assign_cast_p (stmt)
3644 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3645 return false;
3647 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3648 if (!single_imm_use (lhs, &use_p, &use_stmt))
3649 return false;
3651 if (gimple_code (use_stmt) != GIMPLE_PHI
3652 || gimple_bb (use_stmt) != e->dest)
3653 return false;
3655 /* And that the rhs is defined in the same loop. */
3656 if (gimple_assign_cast_p (stmt))
3658 if (TREE_CODE (rhs) != SSA_NAME
3659 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3660 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3661 return false;
3663 else
3665 if (TREE_CODE (lhs) != SSA_NAME
3666 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3667 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3668 return false;
3671 return true;
3674 /* Return true if BB is suitable basic block for inter-bb range test
3675 optimization. If BACKWARD is true, BB should be the only predecessor
3676 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3677 or compared with to find a common basic block to which all conditions
3678 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3679 be the only predecessor of BB. */
3681 static bool
3682 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3683 bool backward)
3685 edge_iterator ei, ei2;
3686 edge e, e2;
3687 gimple *stmt;
3688 gphi_iterator gsi;
3689 bool other_edge_seen = false;
3690 bool is_cond;
3692 if (test_bb == bb)
3693 return false;
3694 /* Check last stmt first. */
3695 stmt = last_stmt (bb);
3696 if (stmt == NULL
3697 || (gimple_code (stmt) != GIMPLE_COND
3698 && (backward || !final_range_test_p (stmt)))
3699 || gimple_visited_p (stmt)
3700 || stmt_could_throw_p (stmt)
3701 || *other_bb == bb)
3702 return false;
3703 is_cond = gimple_code (stmt) == GIMPLE_COND;
3704 if (is_cond)
3706 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3707 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3708 to *OTHER_BB (if not set yet, try to find it out). */
3709 if (EDGE_COUNT (bb->succs) != 2)
3710 return false;
3711 FOR_EACH_EDGE (e, ei, bb->succs)
3713 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3714 return false;
3715 if (e->dest == test_bb)
3717 if (backward)
3718 continue;
3719 else
3720 return false;
3722 if (e->dest == bb)
3723 return false;
3724 if (*other_bb == NULL)
3726 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3727 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3728 return false;
3729 else if (e->dest == e2->dest)
3730 *other_bb = e->dest;
3731 if (*other_bb == NULL)
3732 return false;
3734 if (e->dest == *other_bb)
3735 other_edge_seen = true;
3736 else if (backward)
3737 return false;
3739 if (*other_bb == NULL || !other_edge_seen)
3740 return false;
3742 else if (single_succ (bb) != *other_bb)
3743 return false;
3745 /* Now check all PHIs of *OTHER_BB. */
3746 e = find_edge (bb, *other_bb);
3747 e2 = find_edge (test_bb, *other_bb);
3748 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3750 gphi *phi = gsi.phi ();
3751 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3752 corresponding to BB and TEST_BB predecessor must be the same. */
3753 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3754 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3756 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3757 one of the PHIs should have the lhs of the last stmt in
3758 that block as PHI arg and that PHI should have 0 or 1
3759 corresponding to it in all other range test basic blocks
3760 considered. */
3761 if (!is_cond)
3763 if (gimple_phi_arg_def (phi, e->dest_idx)
3764 == gimple_assign_lhs (stmt)
3765 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3766 || integer_onep (gimple_phi_arg_def (phi,
3767 e2->dest_idx))))
3768 continue;
3770 else
3772 gimple *test_last = last_stmt (test_bb);
3773 if (gimple_code (test_last) != GIMPLE_COND
3774 && gimple_phi_arg_def (phi, e2->dest_idx)
3775 == gimple_assign_lhs (test_last)
3776 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3777 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3778 continue;
3781 return false;
3784 return true;
3787 /* Return true if BB doesn't have side-effects that would disallow
3788 range test optimization, all SSA_NAMEs set in the bb are consumed
3789 in the bb and there are no PHIs. */
3791 static bool
3792 no_side_effect_bb (basic_block bb)
3794 gimple_stmt_iterator gsi;
3795 gimple *last;
3797 if (!gimple_seq_empty_p (phi_nodes (bb)))
3798 return false;
3799 last = last_stmt (bb);
3800 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3802 gimple *stmt = gsi_stmt (gsi);
3803 tree lhs;
3804 imm_use_iterator imm_iter;
3805 use_operand_p use_p;
3807 if (is_gimple_debug (stmt))
3808 continue;
3809 if (gimple_has_side_effects (stmt))
3810 return false;
3811 if (stmt == last)
3812 return true;
3813 if (!is_gimple_assign (stmt))
3814 return false;
3815 lhs = gimple_assign_lhs (stmt);
3816 if (TREE_CODE (lhs) != SSA_NAME)
3817 return false;
3818 if (gimple_assign_rhs_could_trap_p (stmt))
3819 return false;
3820 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3822 gimple *use_stmt = USE_STMT (use_p);
3823 if (is_gimple_debug (use_stmt))
3824 continue;
3825 if (gimple_bb (use_stmt) != bb)
3826 return false;
3829 return false;
3832 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3833 return true and fill in *OPS recursively. */
3835 static bool
3836 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3837 struct loop *loop)
3839 gimple *stmt = SSA_NAME_DEF_STMT (var);
3840 tree rhs[2];
3841 int i;
3843 if (!is_reassociable_op (stmt, code, loop))
3844 return false;
3846 rhs[0] = gimple_assign_rhs1 (stmt);
3847 rhs[1] = gimple_assign_rhs2 (stmt);
3848 gimple_set_visited (stmt, true);
3849 for (i = 0; i < 2; i++)
3850 if (TREE_CODE (rhs[i]) == SSA_NAME
3851 && !get_ops (rhs[i], code, ops, loop)
3852 && has_single_use (rhs[i]))
3854 operand_entry *oe = operand_entry_pool.allocate ();
3856 oe->op = rhs[i];
3857 oe->rank = code;
3858 oe->id = 0;
3859 oe->count = 1;
3860 oe->stmt_to_insert = NULL;
3861 ops->safe_push (oe);
3863 return true;
3866 /* Find the ops that were added by get_ops starting from VAR, see if
3867 they were changed during update_range_test and if yes, create new
3868 stmts. */
3870 static tree
3871 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3872 unsigned int *pidx, struct loop *loop)
3874 gimple *stmt = SSA_NAME_DEF_STMT (var);
3875 tree rhs[4];
3876 int i;
3878 if (!is_reassociable_op (stmt, code, loop))
3879 return NULL;
3881 rhs[0] = gimple_assign_rhs1 (stmt);
3882 rhs[1] = gimple_assign_rhs2 (stmt);
3883 rhs[2] = rhs[0];
3884 rhs[3] = rhs[1];
3885 for (i = 0; i < 2; i++)
3886 if (TREE_CODE (rhs[i]) == SSA_NAME)
3888 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3889 if (rhs[2 + i] == NULL_TREE)
3891 if (has_single_use (rhs[i]))
3892 rhs[2 + i] = ops[(*pidx)++]->op;
3893 else
3894 rhs[2 + i] = rhs[i];
3897 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3898 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3900 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3901 var = make_ssa_name (TREE_TYPE (var));
3902 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3903 rhs[2], rhs[3]);
3904 gimple_set_uid (g, gimple_uid (stmt));
3905 gimple_set_visited (g, true);
3906 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3908 return var;
3911 /* Structure to track the initial value passed to get_ops and
3912 the range in the ops vector for each basic block. */
3914 struct inter_bb_range_test_entry
3916 tree op;
3917 unsigned int first_idx, last_idx;
3920 /* Inter-bb range test optimization.
3922 Returns TRUE if a gimple conditional is optimized to a true/false,
3923 otherwise return FALSE.
3925 This indicates to the caller that it should run a CFG cleanup pass
3926 once reassociation is completed. */
3928 static bool
3929 maybe_optimize_range_tests (gimple *stmt)
3931 basic_block first_bb = gimple_bb (stmt);
3932 basic_block last_bb = first_bb;
3933 basic_block other_bb = NULL;
3934 basic_block bb;
3935 edge_iterator ei;
3936 edge e;
3937 auto_vec<operand_entry *> ops;
3938 auto_vec<inter_bb_range_test_entry> bbinfo;
3939 bool any_changes = false;
3940 bool cfg_cleanup_needed = false;
3942 /* Consider only basic blocks that end with GIMPLE_COND or
3943 a cast statement satisfying final_range_test_p. All
3944 but the last bb in the first_bb .. last_bb range
3945 should end with GIMPLE_COND. */
3946 if (gimple_code (stmt) == GIMPLE_COND)
3948 if (EDGE_COUNT (first_bb->succs) != 2)
3949 return cfg_cleanup_needed;
3951 else if (final_range_test_p (stmt))
3952 other_bb = single_succ (first_bb);
3953 else
3954 return cfg_cleanup_needed;
3956 if (stmt_could_throw_p (stmt))
3957 return cfg_cleanup_needed;
3959 /* As relative ordering of post-dominator sons isn't fixed,
3960 maybe_optimize_range_tests can be called first on any
3961 bb in the range we want to optimize. So, start searching
3962 backwards, if first_bb can be set to a predecessor. */
3963 while (single_pred_p (first_bb))
3965 basic_block pred_bb = single_pred (first_bb);
3966 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3967 break;
3968 if (!no_side_effect_bb (first_bb))
3969 break;
3970 first_bb = pred_bb;
3972 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3973 Before starting forward search in last_bb successors, find
3974 out the other_bb. */
3975 if (first_bb == last_bb)
3977 other_bb = NULL;
3978 /* As non-GIMPLE_COND last stmt always terminates the range,
3979 if forward search didn't discover anything, just give up. */
3980 if (gimple_code (stmt) != GIMPLE_COND)
3981 return cfg_cleanup_needed;
3982 /* Look at both successors. Either it ends with a GIMPLE_COND
3983 and satisfies suitable_cond_bb, or ends with a cast and
3984 other_bb is that cast's successor. */
3985 FOR_EACH_EDGE (e, ei, first_bb->succs)
3986 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3987 || e->dest == first_bb)
3988 return cfg_cleanup_needed;
3989 else if (single_pred_p (e->dest))
3991 stmt = last_stmt (e->dest);
3992 if (stmt
3993 && gimple_code (stmt) == GIMPLE_COND
3994 && EDGE_COUNT (e->dest->succs) == 2)
3996 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3997 break;
3998 else
3999 other_bb = NULL;
4001 else if (stmt
4002 && final_range_test_p (stmt)
4003 && find_edge (first_bb, single_succ (e->dest)))
4005 other_bb = single_succ (e->dest);
4006 if (other_bb == first_bb)
4007 other_bb = NULL;
4010 if (other_bb == NULL)
4011 return cfg_cleanup_needed;
4013 /* Now do the forward search, moving last_bb to successor bbs
4014 that aren't other_bb. */
4015 while (EDGE_COUNT (last_bb->succs) == 2)
4017 FOR_EACH_EDGE (e, ei, last_bb->succs)
4018 if (e->dest != other_bb)
4019 break;
4020 if (e == NULL)
4021 break;
4022 if (!single_pred_p (e->dest))
4023 break;
4024 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4025 break;
4026 if (!no_side_effect_bb (e->dest))
4027 break;
4028 last_bb = e->dest;
4030 if (first_bb == last_bb)
4031 return cfg_cleanup_needed;
4032 /* Here basic blocks first_bb through last_bb's predecessor
4033 end with GIMPLE_COND, all of them have one of the edges to
4034 other_bb and another to another block in the range,
4035 all blocks except first_bb don't have side-effects and
4036 last_bb ends with either GIMPLE_COND, or cast satisfying
4037 final_range_test_p. */
4038 for (bb = last_bb; ; bb = single_pred (bb))
4040 enum tree_code code;
4041 tree lhs, rhs;
4042 inter_bb_range_test_entry bb_ent;
4044 bb_ent.op = NULL_TREE;
4045 bb_ent.first_idx = ops.length ();
4046 bb_ent.last_idx = bb_ent.first_idx;
4047 e = find_edge (bb, other_bb);
4048 stmt = last_stmt (bb);
4049 gimple_set_visited (stmt, true);
4050 if (gimple_code (stmt) != GIMPLE_COND)
4052 use_operand_p use_p;
4053 gimple *phi;
4054 edge e2;
4055 unsigned int d;
4057 lhs = gimple_assign_lhs (stmt);
4058 rhs = gimple_assign_rhs1 (stmt);
4059 gcc_assert (bb == last_bb);
4061 /* stmt is
4062 _123 = (int) _234;
4064 _234 = a_2(D) == 2;
4066 followed by:
4067 <bb M>:
4068 # _345 = PHI <_123(N), 1(...), 1(...)>
4070 or 0 instead of 1. If it is 0, the _234
4071 range test is anded together with all the
4072 other range tests, if it is 1, it is ored with
4073 them. */
4074 single_imm_use (lhs, &use_p, &phi);
4075 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4076 e2 = find_edge (first_bb, other_bb);
4077 d = e2->dest_idx;
4078 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4079 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4080 code = BIT_AND_EXPR;
4081 else
4083 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4084 code = BIT_IOR_EXPR;
4087 /* If _234 SSA_NAME_DEF_STMT is
4088 _234 = _567 | _789;
4089 (or &, corresponding to 1/0 in the phi arguments,
4090 push into ops the individual range test arguments
4091 of the bitwise or resp. and, recursively. */
4092 if (TREE_CODE (rhs) == SSA_NAME
4093 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4094 != tcc_comparison)
4095 && !get_ops (rhs, code, &ops,
4096 loop_containing_stmt (stmt))
4097 && has_single_use (rhs))
4099 /* Otherwise, push the _234 range test itself. */
4100 operand_entry *oe = operand_entry_pool.allocate ();
4102 oe->op = rhs;
4103 oe->rank = code;
4104 oe->id = 0;
4105 oe->count = 1;
4106 oe->stmt_to_insert = NULL;
4107 ops.safe_push (oe);
4108 bb_ent.last_idx++;
4109 bb_ent.op = rhs;
4111 else if (is_gimple_assign (stmt)
4112 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4113 == tcc_comparison)
4114 && !get_ops (lhs, code, &ops,
4115 loop_containing_stmt (stmt))
4116 && has_single_use (lhs))
4118 operand_entry *oe = operand_entry_pool.allocate ();
4119 oe->op = lhs;
4120 oe->rank = code;
4121 oe->id = 0;
4122 oe->count = 1;
4123 ops.safe_push (oe);
4124 bb_ent.last_idx++;
4125 bb_ent.op = lhs;
4127 else
4129 bb_ent.last_idx = ops.length ();
4130 bb_ent.op = rhs;
4132 bbinfo.safe_push (bb_ent);
4133 continue;
4135 /* Otherwise stmt is GIMPLE_COND. */
4136 code = gimple_cond_code (stmt);
4137 lhs = gimple_cond_lhs (stmt);
4138 rhs = gimple_cond_rhs (stmt);
4139 if (TREE_CODE (lhs) == SSA_NAME
4140 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4141 && ((code != EQ_EXPR && code != NE_EXPR)
4142 || rhs != boolean_false_node
4143 /* Either push into ops the individual bitwise
4144 or resp. and operands, depending on which
4145 edge is other_bb. */
4146 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4147 ^ (code == EQ_EXPR))
4148 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4149 loop_containing_stmt (stmt))))
4151 /* Or push the GIMPLE_COND stmt itself. */
4152 operand_entry *oe = operand_entry_pool.allocate ();
4154 oe->op = NULL;
4155 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4156 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4157 /* oe->op = NULL signs that there is no SSA_NAME
4158 for the range test, and oe->id instead is the
4159 basic block number, at which's end the GIMPLE_COND
4160 is. */
4161 oe->id = bb->index;
4162 oe->count = 1;
4163 oe->stmt_to_insert = NULL;
4164 ops.safe_push (oe);
4165 bb_ent.op = NULL;
4166 bb_ent.last_idx++;
4168 else if (ops.length () > bb_ent.first_idx)
4170 bb_ent.op = lhs;
4171 bb_ent.last_idx = ops.length ();
4173 bbinfo.safe_push (bb_ent);
4174 if (bb == first_bb)
4175 break;
4177 if (ops.length () > 1)
4178 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4179 if (any_changes)
4181 unsigned int idx, max_idx = 0;
4182 /* update_ops relies on has_single_use predicates returning the
4183 same values as it did during get_ops earlier. Additionally it
4184 never removes statements, only adds new ones and it should walk
4185 from the single imm use and check the predicate already before
4186 making those changes.
4187 On the other side, the handling of GIMPLE_COND directly can turn
4188 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4189 it needs to be done in a separate loop afterwards. */
4190 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4192 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4193 && bbinfo[idx].op != NULL_TREE)
4195 tree new_op;
4197 max_idx = idx;
4198 stmt = last_stmt (bb);
4199 new_op = update_ops (bbinfo[idx].op,
4200 (enum tree_code)
4201 ops[bbinfo[idx].first_idx]->rank,
4202 ops, &bbinfo[idx].first_idx,
4203 loop_containing_stmt (stmt));
4204 if (new_op == NULL_TREE)
4206 gcc_assert (bb == last_bb);
4207 new_op = ops[bbinfo[idx].first_idx++]->op;
4209 if (bbinfo[idx].op != new_op)
4211 imm_use_iterator iter;
4212 use_operand_p use_p;
4213 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4215 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4216 if (is_gimple_debug (use_stmt))
4217 continue;
4218 else if (gimple_code (use_stmt) == GIMPLE_COND
4219 || gimple_code (use_stmt) == GIMPLE_PHI)
4220 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4221 SET_USE (use_p, new_op);
4222 else if ((is_gimple_assign (use_stmt)
4223 && (TREE_CODE_CLASS
4224 (gimple_assign_rhs_code (use_stmt))
4225 == tcc_comparison)))
4226 cast_or_tcc_cmp_stmt = use_stmt;
4227 else if (gimple_assign_cast_p (use_stmt))
4228 cast_or_tcc_cmp_stmt = use_stmt;
4229 else
4230 gcc_unreachable ();
4232 if (cast_or_tcc_cmp_stmt)
4234 gcc_assert (bb == last_bb);
4235 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4236 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4237 enum tree_code rhs_code
4238 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4239 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4240 : CONVERT_EXPR;
4241 gassign *g;
4242 if (is_gimple_min_invariant (new_op))
4244 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4245 g = gimple_build_assign (new_lhs, new_op);
4247 else
4248 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4249 gimple_stmt_iterator gsi
4250 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4251 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4252 gimple_set_visited (g, true);
4253 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4254 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4255 if (is_gimple_debug (use_stmt))
4256 continue;
4257 else if (gimple_code (use_stmt) == GIMPLE_COND
4258 || gimple_code (use_stmt) == GIMPLE_PHI)
4259 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4260 SET_USE (use_p, new_lhs);
4261 else
4262 gcc_unreachable ();
4266 if (bb == first_bb)
4267 break;
4269 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4271 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4272 && bbinfo[idx].op == NULL_TREE
4273 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4275 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4277 if (idx > max_idx)
4278 max_idx = idx;
4280 /* If we collapse the conditional to a true/false
4281 condition, then bubble that knowledge up to our caller. */
4282 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4284 gimple_cond_make_false (cond_stmt);
4285 cfg_cleanup_needed = true;
4287 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4289 gimple_cond_make_true (cond_stmt);
4290 cfg_cleanup_needed = true;
4292 else
4294 gimple_cond_set_code (cond_stmt, NE_EXPR);
4295 gimple_cond_set_lhs (cond_stmt,
4296 ops[bbinfo[idx].first_idx]->op);
4297 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4299 update_stmt (cond_stmt);
4301 if (bb == first_bb)
4302 break;
4305 /* The above changes could result in basic blocks after the first
4306 modified one, up to and including last_bb, to be executed even if
4307 they would not be in the original program. If the value ranges of
4308 assignment lhs' in those bbs were dependent on the conditions
4309 guarding those basic blocks which now can change, the VRs might
4310 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4311 are only used within the same bb, it should be not a big deal if
4312 we just reset all the VRs in those bbs. See PR68671. */
4313 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4314 reset_flow_sensitive_info_in_bb (bb);
4316 return cfg_cleanup_needed;
4319 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4320 of STMT in it's operands. This is also known as a "destructive
4321 update" operation. */
4323 static bool
4324 is_phi_for_stmt (gimple *stmt, tree operand)
4326 gimple *def_stmt;
4327 gphi *def_phi;
4328 tree lhs;
4329 use_operand_p arg_p;
4330 ssa_op_iter i;
4332 if (TREE_CODE (operand) != SSA_NAME)
4333 return false;
4335 lhs = gimple_assign_lhs (stmt);
4337 def_stmt = SSA_NAME_DEF_STMT (operand);
4338 def_phi = dyn_cast <gphi *> (def_stmt);
4339 if (!def_phi)
4340 return false;
4342 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4343 if (lhs == USE_FROM_PTR (arg_p))
4344 return true;
4345 return false;
4348 /* Remove def stmt of VAR if VAR has zero uses and recurse
4349 on rhs1 operand if so. */
4351 static void
4352 remove_visited_stmt_chain (tree var)
4354 gimple *stmt;
4355 gimple_stmt_iterator gsi;
4357 while (1)
4359 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4360 return;
4361 stmt = SSA_NAME_DEF_STMT (var);
4362 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4364 var = gimple_assign_rhs1 (stmt);
4365 gsi = gsi_for_stmt (stmt);
4366 reassoc_remove_stmt (&gsi);
4367 release_defs (stmt);
4369 else
4370 return;
4374 /* This function checks three consequtive operands in
4375 passed operands vector OPS starting from OPINDEX and
4376 swaps two operands if it is profitable for binary operation
4377 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4379 We pair ops with the same rank if possible.
4381 The alternative we try is to see if STMT is a destructive
4382 update style statement, which is like:
4383 b = phi (a, ...)
4384 a = c + b;
4385 In that case, we want to use the destructive update form to
4386 expose the possible vectorizer sum reduction opportunity.
4387 In that case, the third operand will be the phi node. This
4388 check is not performed if STMT is null.
4390 We could, of course, try to be better as noted above, and do a
4391 lot of work to try to find these opportunities in >3 operand
4392 cases, but it is unlikely to be worth it. */
4394 static void
4395 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4396 unsigned int opindex, gimple *stmt)
4398 operand_entry *oe1, *oe2, *oe3;
4400 oe1 = ops[opindex];
4401 oe2 = ops[opindex + 1];
4402 oe3 = ops[opindex + 2];
4404 if ((oe1->rank == oe2->rank
4405 && oe2->rank != oe3->rank)
4406 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4407 && !is_phi_for_stmt (stmt, oe1->op)
4408 && !is_phi_for_stmt (stmt, oe2->op)))
4409 std::swap (*oe1, *oe3);
4410 else if ((oe1->rank == oe3->rank
4411 && oe2->rank != oe3->rank)
4412 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4413 && !is_phi_for_stmt (stmt, oe1->op)
4414 && !is_phi_for_stmt (stmt, oe3->op)))
4415 std::swap (*oe1, *oe2);
4418 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4419 two definitions, otherwise return STMT. */
4421 static inline gimple *
4422 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4424 if (TREE_CODE (rhs1) == SSA_NAME
4425 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4426 stmt = SSA_NAME_DEF_STMT (rhs1);
4427 if (TREE_CODE (rhs2) == SSA_NAME
4428 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4429 stmt = SSA_NAME_DEF_STMT (rhs2);
4430 return stmt;
4433 /* If the stmt that defines operand has to be inserted, insert it
4434 before the use. */
4435 static void
4436 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4438 gcc_assert (is_gimple_assign (stmt_to_insert));
4439 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4440 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4441 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4442 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4443 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4445 /* If the insert point is not stmt, then insert_point would be
4446 the point where operand rhs1 or rhs2 is defined. In this case,
4447 stmt_to_insert has to be inserted afterwards. This would
4448 only happen when the stmt insertion point is flexible. */
4449 if (stmt == insert_point)
4450 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4451 else
4452 insert_stmt_after (stmt_to_insert, insert_point);
4456 /* Recursively rewrite our linearized statements so that the operators
4457 match those in OPS[OPINDEX], putting the computation in rank
4458 order. Return new lhs.
4459 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4460 the current stmt and during recursive invocations.
4461 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4462 recursive invocations. */
4464 static tree
4465 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4466 vec<operand_entry *> ops, bool changed, bool next_changed)
4468 tree rhs1 = gimple_assign_rhs1 (stmt);
4469 tree rhs2 = gimple_assign_rhs2 (stmt);
4470 tree lhs = gimple_assign_lhs (stmt);
4471 operand_entry *oe;
4473 /* The final recursion case for this function is that you have
4474 exactly two operations left.
4475 If we had exactly one op in the entire list to start with, we
4476 would have never called this function, and the tail recursion
4477 rewrites them one at a time. */
4478 if (opindex + 2 == ops.length ())
4480 operand_entry *oe1, *oe2;
4482 oe1 = ops[opindex];
4483 oe2 = ops[opindex + 1];
4485 if (rhs1 != oe1->op || rhs2 != oe2->op)
4487 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4488 unsigned int uid = gimple_uid (stmt);
4490 if (dump_file && (dump_flags & TDF_DETAILS))
4492 fprintf (dump_file, "Transforming ");
4493 print_gimple_stmt (dump_file, stmt, 0);
4496 /* If the stmt that defines operand has to be inserted, insert it
4497 before the use. */
4498 if (oe1->stmt_to_insert)
4499 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4500 if (oe2->stmt_to_insert)
4501 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4502 /* Even when changed is false, reassociation could have e.g. removed
4503 some redundant operations, so unless we are just swapping the
4504 arguments or unless there is no change at all (then we just
4505 return lhs), force creation of a new SSA_NAME. */
4506 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4508 gimple *insert_point
4509 = find_insert_point (stmt, oe1->op, oe2->op);
4510 lhs = make_ssa_name (TREE_TYPE (lhs));
4511 stmt
4512 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4513 oe1->op, oe2->op);
4514 gimple_set_uid (stmt, uid);
4515 gimple_set_visited (stmt, true);
4516 if (insert_point == gsi_stmt (gsi))
4517 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4518 else
4519 insert_stmt_after (stmt, insert_point);
4521 else
4523 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4524 == stmt);
4525 gimple_assign_set_rhs1 (stmt, oe1->op);
4526 gimple_assign_set_rhs2 (stmt, oe2->op);
4527 update_stmt (stmt);
4530 if (rhs1 != oe1->op && rhs1 != oe2->op)
4531 remove_visited_stmt_chain (rhs1);
4533 if (dump_file && (dump_flags & TDF_DETAILS))
4535 fprintf (dump_file, " into ");
4536 print_gimple_stmt (dump_file, stmt, 0);
4539 return lhs;
4542 /* If we hit here, we should have 3 or more ops left. */
4543 gcc_assert (opindex + 2 < ops.length ());
4545 /* Rewrite the next operator. */
4546 oe = ops[opindex];
4548 /* If the stmt that defines operand has to be inserted, insert it
4549 before the use. */
4550 if (oe->stmt_to_insert)
4551 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4553 /* Recurse on the LHS of the binary operator, which is guaranteed to
4554 be the non-leaf side. */
4555 tree new_rhs1
4556 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4557 changed || oe->op != rhs2 || next_changed,
4558 false);
4560 if (oe->op != rhs2 || new_rhs1 != rhs1)
4562 if (dump_file && (dump_flags & TDF_DETAILS))
4564 fprintf (dump_file, "Transforming ");
4565 print_gimple_stmt (dump_file, stmt, 0);
4568 /* If changed is false, this is either opindex == 0
4569 or all outer rhs2's were equal to corresponding oe->op,
4570 and powi_result is NULL.
4571 That means lhs is equivalent before and after reassociation.
4572 Otherwise ensure the old lhs SSA_NAME is not reused and
4573 create a new stmt as well, so that any debug stmts will be
4574 properly adjusted. */
4575 if (changed)
4577 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4578 unsigned int uid = gimple_uid (stmt);
4579 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4581 lhs = make_ssa_name (TREE_TYPE (lhs));
4582 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4583 new_rhs1, oe->op);
4584 gimple_set_uid (stmt, uid);
4585 gimple_set_visited (stmt, true);
4586 if (insert_point == gsi_stmt (gsi))
4587 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4588 else
4589 insert_stmt_after (stmt, insert_point);
4591 else
4593 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4594 == stmt);
4595 gimple_assign_set_rhs1 (stmt, new_rhs1);
4596 gimple_assign_set_rhs2 (stmt, oe->op);
4597 update_stmt (stmt);
4600 if (dump_file && (dump_flags & TDF_DETAILS))
4602 fprintf (dump_file, " into ");
4603 print_gimple_stmt (dump_file, stmt, 0);
4606 return lhs;
4609 /* Find out how many cycles we need to compute statements chain.
4610 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4611 maximum number of independent statements we may execute per cycle. */
4613 static int
4614 get_required_cycles (int ops_num, int cpu_width)
4616 int res;
4617 int elog;
4618 unsigned int rest;
4620 /* While we have more than 2 * cpu_width operands
4621 we may reduce number of operands by cpu_width
4622 per cycle. */
4623 res = ops_num / (2 * cpu_width);
4625 /* Remained operands count may be reduced twice per cycle
4626 until we have only one operand. */
4627 rest = (unsigned)(ops_num - res * cpu_width);
4628 elog = exact_log2 (rest);
4629 if (elog >= 0)
4630 res += elog;
4631 else
4632 res += floor_log2 (rest) + 1;
4634 return res;
4637 /* Returns an optimal number of registers to use for computation of
4638 given statements. */
4640 static int
4641 get_reassociation_width (int ops_num, enum tree_code opc,
4642 machine_mode mode)
4644 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4645 int width;
4646 int width_min;
4647 int cycles_best;
4649 if (param_width > 0)
4650 width = param_width;
4651 else
4652 width = targetm.sched.reassociation_width (opc, mode);
4654 if (width == 1)
4655 return width;
4657 /* Get the minimal time required for sequence computation. */
4658 cycles_best = get_required_cycles (ops_num, width);
4660 /* Check if we may use less width and still compute sequence for
4661 the same time. It will allow us to reduce registers usage.
4662 get_required_cycles is monotonically increasing with lower width
4663 so we can perform a binary search for the minimal width that still
4664 results in the optimal cycle count. */
4665 width_min = 1;
4666 while (width > width_min)
4668 int width_mid = (width + width_min) / 2;
4670 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4671 width = width_mid;
4672 else if (width_min < width_mid)
4673 width_min = width_mid;
4674 else
4675 break;
4678 return width;
4681 /* Recursively rewrite our linearized statements so that the operators
4682 match those in OPS[OPINDEX], putting the computation in rank
4683 order and trying to allow operations to be executed in
4684 parallel. */
4686 static void
4687 rewrite_expr_tree_parallel (gassign *stmt, int width,
4688 vec<operand_entry *> ops)
4690 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4691 int op_num = ops.length ();
4692 gcc_assert (op_num > 0);
4693 int stmt_num = op_num - 1;
4694 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4695 int op_index = op_num - 1;
4696 int stmt_index = 0;
4697 int ready_stmts_end = 0;
4698 int i = 0;
4699 gimple *stmt1 = NULL, *stmt2 = NULL;
4700 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4702 /* We start expression rewriting from the top statements.
4703 So, in this loop we create a full list of statements
4704 we will work with. */
4705 stmts[stmt_num - 1] = stmt;
4706 for (i = stmt_num - 2; i >= 0; i--)
4707 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4709 for (i = 0; i < stmt_num; i++)
4711 tree op1, op2;
4713 /* Determine whether we should use results of
4714 already handled statements or not. */
4715 if (ready_stmts_end == 0
4716 && (i - stmt_index >= width || op_index < 1))
4717 ready_stmts_end = i;
4719 /* Now we choose operands for the next statement. Non zero
4720 value in ready_stmts_end means here that we should use
4721 the result of already generated statements as new operand. */
4722 if (ready_stmts_end > 0)
4724 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4725 if (ready_stmts_end > stmt_index)
4726 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4727 else if (op_index >= 0)
4729 operand_entry *oe = ops[op_index--];
4730 stmt2 = oe->stmt_to_insert;
4731 op2 = oe->op;
4733 else
4735 gcc_assert (stmt_index < i);
4736 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4739 if (stmt_index >= ready_stmts_end)
4740 ready_stmts_end = 0;
4742 else
4744 if (op_index > 1)
4745 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4746 operand_entry *oe2 = ops[op_index--];
4747 operand_entry *oe1 = ops[op_index--];
4748 op2 = oe2->op;
4749 stmt2 = oe2->stmt_to_insert;
4750 op1 = oe1->op;
4751 stmt1 = oe1->stmt_to_insert;
4754 /* If we emit the last statement then we should put
4755 operands into the last statement. It will also
4756 break the loop. */
4757 if (op_index < 0 && stmt_index == i)
4758 i = stmt_num - 1;
4760 if (dump_file && (dump_flags & TDF_DETAILS))
4762 fprintf (dump_file, "Transforming ");
4763 print_gimple_stmt (dump_file, stmts[i], 0);
4766 /* If the stmt that defines operand has to be inserted, insert it
4767 before the use. */
4768 if (stmt1)
4769 insert_stmt_before_use (stmts[i], stmt1);
4770 if (stmt2)
4771 insert_stmt_before_use (stmts[i], stmt2);
4772 stmt1 = stmt2 = NULL;
4774 /* We keep original statement only for the last one. All
4775 others are recreated. */
4776 if (i == stmt_num - 1)
4778 gimple_assign_set_rhs1 (stmts[i], op1);
4779 gimple_assign_set_rhs2 (stmts[i], op2);
4780 update_stmt (stmts[i]);
4782 else
4784 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4786 if (dump_file && (dump_flags & TDF_DETAILS))
4788 fprintf (dump_file, " into ");
4789 print_gimple_stmt (dump_file, stmts[i], 0);
4793 remove_visited_stmt_chain (last_rhs1);
4796 /* Transform STMT, which is really (A +B) + (C + D) into the left
4797 linear form, ((A+B)+C)+D.
4798 Recurse on D if necessary. */
4800 static void
4801 linearize_expr (gimple *stmt)
4803 gimple_stmt_iterator gsi;
4804 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4805 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4806 gimple *oldbinrhs = binrhs;
4807 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4808 gimple *newbinrhs = NULL;
4809 struct loop *loop = loop_containing_stmt (stmt);
4810 tree lhs = gimple_assign_lhs (stmt);
4812 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4813 && is_reassociable_op (binrhs, rhscode, loop));
4815 gsi = gsi_for_stmt (stmt);
4817 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4818 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4819 gimple_assign_rhs_code (binrhs),
4820 gimple_assign_lhs (binlhs),
4821 gimple_assign_rhs2 (binrhs));
4822 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4823 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4824 gimple_set_uid (binrhs, gimple_uid (stmt));
4826 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4827 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4829 if (dump_file && (dump_flags & TDF_DETAILS))
4831 fprintf (dump_file, "Linearized: ");
4832 print_gimple_stmt (dump_file, stmt, 0);
4835 reassociate_stats.linearized++;
4836 update_stmt (stmt);
4838 gsi = gsi_for_stmt (oldbinrhs);
4839 reassoc_remove_stmt (&gsi);
4840 release_defs (oldbinrhs);
4842 gimple_set_visited (stmt, true);
4843 gimple_set_visited (binlhs, true);
4844 gimple_set_visited (binrhs, true);
4846 /* Tail recurse on the new rhs if it still needs reassociation. */
4847 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4848 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4849 want to change the algorithm while converting to tuples. */
4850 linearize_expr (stmt);
4853 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4854 it. Otherwise, return NULL. */
4856 static gimple *
4857 get_single_immediate_use (tree lhs)
4859 use_operand_p immuse;
4860 gimple *immusestmt;
4862 if (TREE_CODE (lhs) == SSA_NAME
4863 && single_imm_use (lhs, &immuse, &immusestmt)
4864 && is_gimple_assign (immusestmt))
4865 return immusestmt;
4867 return NULL;
4870 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4871 representing the negated value. Insertions of any necessary
4872 instructions go before GSI.
4873 This function is recursive in that, if you hand it "a_5" as the
4874 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4875 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4877 static tree
4878 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4880 gimple *negatedefstmt = NULL;
4881 tree resultofnegate;
4882 gimple_stmt_iterator gsi;
4883 unsigned int uid;
4885 /* If we are trying to negate a name, defined by an add, negate the
4886 add operands instead. */
4887 if (TREE_CODE (tonegate) == SSA_NAME)
4888 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4889 if (TREE_CODE (tonegate) == SSA_NAME
4890 && is_gimple_assign (negatedefstmt)
4891 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4892 && has_single_use (gimple_assign_lhs (negatedefstmt))
4893 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4895 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4896 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4897 tree lhs = gimple_assign_lhs (negatedefstmt);
4898 gimple *g;
4900 gsi = gsi_for_stmt (negatedefstmt);
4901 rhs1 = negate_value (rhs1, &gsi);
4903 gsi = gsi_for_stmt (negatedefstmt);
4904 rhs2 = negate_value (rhs2, &gsi);
4906 gsi = gsi_for_stmt (negatedefstmt);
4907 lhs = make_ssa_name (TREE_TYPE (lhs));
4908 gimple_set_visited (negatedefstmt, true);
4909 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4910 gimple_set_uid (g, gimple_uid (negatedefstmt));
4911 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4912 return lhs;
4915 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4916 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4917 NULL_TREE, true, GSI_SAME_STMT);
4918 gsi = *gsip;
4919 uid = gimple_uid (gsi_stmt (gsi));
4920 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4922 gimple *stmt = gsi_stmt (gsi);
4923 if (gimple_uid (stmt) != 0)
4924 break;
4925 gimple_set_uid (stmt, uid);
4927 return resultofnegate;
4930 /* Return true if we should break up the subtract in STMT into an add
4931 with negate. This is true when we the subtract operands are really
4932 adds, or the subtract itself is used in an add expression. In
4933 either case, breaking up the subtract into an add with negate
4934 exposes the adds to reassociation. */
4936 static bool
4937 should_break_up_subtract (gimple *stmt)
4939 tree lhs = gimple_assign_lhs (stmt);
4940 tree binlhs = gimple_assign_rhs1 (stmt);
4941 tree binrhs = gimple_assign_rhs2 (stmt);
4942 gimple *immusestmt;
4943 struct loop *loop = loop_containing_stmt (stmt);
4945 if (TREE_CODE (binlhs) == SSA_NAME
4946 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4947 return true;
4949 if (TREE_CODE (binrhs) == SSA_NAME
4950 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4951 return true;
4953 if (TREE_CODE (lhs) == SSA_NAME
4954 && (immusestmt = get_single_immediate_use (lhs))
4955 && is_gimple_assign (immusestmt)
4956 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4957 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4958 && gimple_assign_rhs1 (immusestmt) == lhs)
4959 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4960 return true;
4961 return false;
4964 /* Transform STMT from A - B into A + -B. */
4966 static void
4967 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4969 tree rhs1 = gimple_assign_rhs1 (stmt);
4970 tree rhs2 = gimple_assign_rhs2 (stmt);
4972 if (dump_file && (dump_flags & TDF_DETAILS))
4974 fprintf (dump_file, "Breaking up subtract ");
4975 print_gimple_stmt (dump_file, stmt, 0);
4978 rhs2 = negate_value (rhs2, gsip);
4979 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4980 update_stmt (stmt);
4983 /* Determine whether STMT is a builtin call that raises an SSA name
4984 to an integer power and has only one use. If so, and this is early
4985 reassociation and unsafe math optimizations are permitted, place
4986 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4987 If any of these conditions does not hold, return FALSE. */
4989 static bool
4990 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4992 tree arg1;
4993 REAL_VALUE_TYPE c, cint;
4995 switch (gimple_call_combined_fn (stmt))
4997 CASE_CFN_POW:
4998 if (flag_errno_math)
4999 return false;
5001 *base = gimple_call_arg (stmt, 0);
5002 arg1 = gimple_call_arg (stmt, 1);
5004 if (TREE_CODE (arg1) != REAL_CST)
5005 return false;
5007 c = TREE_REAL_CST (arg1);
5009 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5010 return false;
5012 *exponent = real_to_integer (&c);
5013 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5014 if (!real_identical (&c, &cint))
5015 return false;
5017 break;
5019 CASE_CFN_POWI:
5020 *base = gimple_call_arg (stmt, 0);
5021 arg1 = gimple_call_arg (stmt, 1);
5023 if (!tree_fits_shwi_p (arg1))
5024 return false;
5026 *exponent = tree_to_shwi (arg1);
5027 break;
5029 default:
5030 return false;
5033 /* Expanding negative exponents is generally unproductive, so we don't
5034 complicate matters with those. Exponents of zero and one should
5035 have been handled by expression folding. */
5036 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5037 return false;
5039 return true;
5042 /* Try to derive and add operand entry for OP to *OPS. Return false if
5043 unsuccessful. */
5045 static bool
5046 try_special_add_to_ops (vec<operand_entry *> *ops,
5047 enum tree_code code,
5048 tree op, gimple* def_stmt)
5050 tree base = NULL_TREE;
5051 HOST_WIDE_INT exponent = 0;
5053 if (TREE_CODE (op) != SSA_NAME
5054 || ! has_single_use (op))
5055 return false;
5057 if (code == MULT_EXPR
5058 && reassoc_insert_powi_p
5059 && flag_unsafe_math_optimizations
5060 && is_gimple_call (def_stmt)
5061 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5063 add_repeat_to_ops_vec (ops, base, exponent);
5064 gimple_set_visited (def_stmt, true);
5065 return true;
5067 else if (code == MULT_EXPR
5068 && is_gimple_assign (def_stmt)
5069 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5070 && !HONOR_SNANS (TREE_TYPE (op))
5071 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5072 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5074 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5075 tree cst = build_minus_one_cst (TREE_TYPE (op));
5076 add_to_ops_vec (ops, rhs1);
5077 add_to_ops_vec (ops, cst);
5078 gimple_set_visited (def_stmt, true);
5079 return true;
5082 return false;
5085 /* Recursively linearize a binary expression that is the RHS of STMT.
5086 Place the operands of the expression tree in the vector named OPS. */
5088 static void
5089 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5090 bool is_associative, bool set_visited)
5092 tree binlhs = gimple_assign_rhs1 (stmt);
5093 tree binrhs = gimple_assign_rhs2 (stmt);
5094 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5095 bool binlhsisreassoc = false;
5096 bool binrhsisreassoc = false;
5097 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5098 struct loop *loop = loop_containing_stmt (stmt);
5100 if (set_visited)
5101 gimple_set_visited (stmt, true);
5103 if (TREE_CODE (binlhs) == SSA_NAME)
5105 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5106 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5107 && !stmt_could_throw_p (binlhsdef));
5110 if (TREE_CODE (binrhs) == SSA_NAME)
5112 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5113 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5114 && !stmt_could_throw_p (binrhsdef));
5117 /* If the LHS is not reassociable, but the RHS is, we need to swap
5118 them. If neither is reassociable, there is nothing we can do, so
5119 just put them in the ops vector. If the LHS is reassociable,
5120 linearize it. If both are reassociable, then linearize the RHS
5121 and the LHS. */
5123 if (!binlhsisreassoc)
5125 /* If this is not a associative operation like division, give up. */
5126 if (!is_associative)
5128 add_to_ops_vec (ops, binrhs);
5129 return;
5132 if (!binrhsisreassoc)
5134 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5135 add_to_ops_vec (ops, binrhs);
5137 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5138 add_to_ops_vec (ops, binlhs);
5140 return;
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5145 fprintf (dump_file, "swapping operands of ");
5146 print_gimple_stmt (dump_file, stmt, 0);
5149 swap_ssa_operands (stmt,
5150 gimple_assign_rhs1_ptr (stmt),
5151 gimple_assign_rhs2_ptr (stmt));
5152 update_stmt (stmt);
5154 if (dump_file && (dump_flags & TDF_DETAILS))
5156 fprintf (dump_file, " is now ");
5157 print_gimple_stmt (dump_file, stmt, 0);
5160 /* We want to make it so the lhs is always the reassociative op,
5161 so swap. */
5162 std::swap (binlhs, binrhs);
5164 else if (binrhsisreassoc)
5166 linearize_expr (stmt);
5167 binlhs = gimple_assign_rhs1 (stmt);
5168 binrhs = gimple_assign_rhs2 (stmt);
5171 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5172 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5173 rhscode, loop));
5174 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5175 is_associative, set_visited);
5177 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5178 add_to_ops_vec (ops, binrhs);
5181 /* Repropagate the negates back into subtracts, since no other pass
5182 currently does it. */
5184 static void
5185 repropagate_negates (void)
5187 unsigned int i = 0;
5188 tree negate;
5190 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5192 gimple *user = get_single_immediate_use (negate);
5194 if (!user || !is_gimple_assign (user))
5195 continue;
5197 /* The negate operand can be either operand of a PLUS_EXPR
5198 (it can be the LHS if the RHS is a constant for example).
5200 Force the negate operand to the RHS of the PLUS_EXPR, then
5201 transform the PLUS_EXPR into a MINUS_EXPR. */
5202 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5204 /* If the negated operand appears on the LHS of the
5205 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5206 to force the negated operand to the RHS of the PLUS_EXPR. */
5207 if (gimple_assign_rhs1 (user) == negate)
5209 swap_ssa_operands (user,
5210 gimple_assign_rhs1_ptr (user),
5211 gimple_assign_rhs2_ptr (user));
5214 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5215 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5216 if (gimple_assign_rhs2 (user) == negate)
5218 tree rhs1 = gimple_assign_rhs1 (user);
5219 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5220 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5221 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5222 update_stmt (user);
5225 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5227 if (gimple_assign_rhs1 (user) == negate)
5229 /* We have
5230 x = -a
5231 y = x - b
5232 which we transform into
5233 x = a + b
5234 y = -x .
5235 This pushes down the negate which we possibly can merge
5236 into some other operation, hence insert it into the
5237 plus_negates vector. */
5238 gimple *feed = SSA_NAME_DEF_STMT (negate);
5239 tree a = gimple_assign_rhs1 (feed);
5240 tree b = gimple_assign_rhs2 (user);
5241 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5242 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5243 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5244 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5245 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5246 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5247 user = gsi_stmt (gsi2);
5248 update_stmt (user);
5249 reassoc_remove_stmt (&gsi);
5250 release_defs (feed);
5251 plus_negates.safe_push (gimple_assign_lhs (user));
5253 else
5255 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5256 rid of one operation. */
5257 gimple *feed = SSA_NAME_DEF_STMT (negate);
5258 tree a = gimple_assign_rhs1 (feed);
5259 tree rhs1 = gimple_assign_rhs1 (user);
5260 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5261 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5262 update_stmt (gsi_stmt (gsi));
5268 /* Returns true if OP is of a type for which we can do reassociation.
5269 That is for integral or non-saturating fixed-point types, and for
5270 floating point type when associative-math is enabled. */
5272 static bool
5273 can_reassociate_p (tree op)
5275 tree type = TREE_TYPE (op);
5276 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5277 return false;
5278 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5279 || NON_SAT_FIXED_POINT_TYPE_P (type)
5280 || (flag_associative_math && FLOAT_TYPE_P (type)))
5281 return true;
5282 return false;
5285 /* Break up subtract operations in block BB.
5287 We do this top down because we don't know whether the subtract is
5288 part of a possible chain of reassociation except at the top.
5290 IE given
5291 d = f + g
5292 c = a + e
5293 b = c - d
5294 q = b - r
5295 k = t - q
5297 we want to break up k = t - q, but we won't until we've transformed q
5298 = b - r, which won't be broken up until we transform b = c - d.
5300 En passant, clear the GIMPLE visited flag on every statement
5301 and set UIDs within each basic block. */
5303 static void
5304 break_up_subtract_bb (basic_block bb)
5306 gimple_stmt_iterator gsi;
5307 basic_block son;
5308 unsigned int uid = 1;
5310 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5312 gimple *stmt = gsi_stmt (gsi);
5313 gimple_set_visited (stmt, false);
5314 gimple_set_uid (stmt, uid++);
5316 if (!is_gimple_assign (stmt)
5317 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5318 continue;
5320 /* Look for simple gimple subtract operations. */
5321 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5323 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5324 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5325 continue;
5327 /* Check for a subtract used only in an addition. If this
5328 is the case, transform it into add of a negate for better
5329 reassociation. IE transform C = A-B into C = A + -B if C
5330 is only used in an addition. */
5331 if (should_break_up_subtract (stmt))
5332 break_up_subtract (stmt, &gsi);
5334 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5335 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5336 plus_negates.safe_push (gimple_assign_lhs (stmt));
5338 for (son = first_dom_son (CDI_DOMINATORS, bb);
5339 son;
5340 son = next_dom_son (CDI_DOMINATORS, son))
5341 break_up_subtract_bb (son);
5344 /* Used for repeated factor analysis. */
5345 struct repeat_factor
5347 /* An SSA name that occurs in a multiply chain. */
5348 tree factor;
5350 /* Cached rank of the factor. */
5351 unsigned rank;
5353 /* Number of occurrences of the factor in the chain. */
5354 HOST_WIDE_INT count;
5356 /* An SSA name representing the product of this factor and
5357 all factors appearing later in the repeated factor vector. */
5358 tree repr;
5362 static vec<repeat_factor> repeat_factor_vec;
5364 /* Used for sorting the repeat factor vector. Sort primarily by
5365 ascending occurrence count, secondarily by descending rank. */
5367 static int
5368 compare_repeat_factors (const void *x1, const void *x2)
5370 const repeat_factor *rf1 = (const repeat_factor *) x1;
5371 const repeat_factor *rf2 = (const repeat_factor *) x2;
5373 if (rf1->count != rf2->count)
5374 return rf1->count - rf2->count;
5376 return rf2->rank - rf1->rank;
5379 /* Look for repeated operands in OPS in the multiply tree rooted at
5380 STMT. Replace them with an optimal sequence of multiplies and powi
5381 builtin calls, and remove the used operands from OPS. Return an
5382 SSA name representing the value of the replacement sequence. */
5384 static tree
5385 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5387 unsigned i, j, vec_len;
5388 int ii;
5389 operand_entry *oe;
5390 repeat_factor *rf1, *rf2;
5391 repeat_factor rfnew;
5392 tree result = NULL_TREE;
5393 tree target_ssa, iter_result;
5394 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5395 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5396 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5397 gimple *mul_stmt, *pow_stmt;
5399 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5400 target. */
5401 if (!powi_fndecl)
5402 return NULL_TREE;
5404 /* Allocate the repeated factor vector. */
5405 repeat_factor_vec.create (10);
5407 /* Scan the OPS vector for all SSA names in the product and build
5408 up a vector of occurrence counts for each factor. */
5409 FOR_EACH_VEC_ELT (*ops, i, oe)
5411 if (TREE_CODE (oe->op) == SSA_NAME)
5413 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5415 if (rf1->factor == oe->op)
5417 rf1->count += oe->count;
5418 break;
5422 if (j >= repeat_factor_vec.length ())
5424 rfnew.factor = oe->op;
5425 rfnew.rank = oe->rank;
5426 rfnew.count = oe->count;
5427 rfnew.repr = NULL_TREE;
5428 repeat_factor_vec.safe_push (rfnew);
5433 /* Sort the repeated factor vector by (a) increasing occurrence count,
5434 and (b) decreasing rank. */
5435 repeat_factor_vec.qsort (compare_repeat_factors);
5437 /* It is generally best to combine as many base factors as possible
5438 into a product before applying __builtin_powi to the result.
5439 However, the sort order chosen for the repeated factor vector
5440 allows us to cache partial results for the product of the base
5441 factors for subsequent use. When we already have a cached partial
5442 result from a previous iteration, it is best to make use of it
5443 before looking for another __builtin_pow opportunity.
5445 As an example, consider x * x * y * y * y * z * z * z * z.
5446 We want to first compose the product x * y * z, raise it to the
5447 second power, then multiply this by y * z, and finally multiply
5448 by z. This can be done in 5 multiplies provided we cache y * z
5449 for use in both expressions:
5451 t1 = y * z
5452 t2 = t1 * x
5453 t3 = t2 * t2
5454 t4 = t1 * t3
5455 result = t4 * z
5457 If we instead ignored the cached y * z and first multiplied by
5458 the __builtin_pow opportunity z * z, we would get the inferior:
5460 t1 = y * z
5461 t2 = t1 * x
5462 t3 = t2 * t2
5463 t4 = z * z
5464 t5 = t3 * t4
5465 result = t5 * y */
5467 vec_len = repeat_factor_vec.length ();
5469 /* Repeatedly look for opportunities to create a builtin_powi call. */
5470 while (true)
5472 HOST_WIDE_INT power;
5474 /* First look for the largest cached product of factors from
5475 preceding iterations. If found, create a builtin_powi for
5476 it if the minimum occurrence count for its factors is at
5477 least 2, or just use this cached product as our next
5478 multiplicand if the minimum occurrence count is 1. */
5479 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5481 if (rf1->repr && rf1->count > 0)
5482 break;
5485 if (j < vec_len)
5487 power = rf1->count;
5489 if (power == 1)
5491 iter_result = rf1->repr;
5493 if (dump_file && (dump_flags & TDF_DETAILS))
5495 unsigned elt;
5496 repeat_factor *rf;
5497 fputs ("Multiplying by cached product ", dump_file);
5498 for (elt = j; elt < vec_len; elt++)
5500 rf = &repeat_factor_vec[elt];
5501 print_generic_expr (dump_file, rf->factor);
5502 if (elt < vec_len - 1)
5503 fputs (" * ", dump_file);
5505 fputs ("\n", dump_file);
5508 else
5510 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5511 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5512 build_int_cst (integer_type_node,
5513 power));
5514 gimple_call_set_lhs (pow_stmt, iter_result);
5515 gimple_set_location (pow_stmt, gimple_location (stmt));
5516 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5517 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5519 if (dump_file && (dump_flags & TDF_DETAILS))
5521 unsigned elt;
5522 repeat_factor *rf;
5523 fputs ("Building __builtin_pow call for cached product (",
5524 dump_file);
5525 for (elt = j; elt < vec_len; elt++)
5527 rf = &repeat_factor_vec[elt];
5528 print_generic_expr (dump_file, rf->factor);
5529 if (elt < vec_len - 1)
5530 fputs (" * ", dump_file);
5532 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5533 power);
5537 else
5539 /* Otherwise, find the first factor in the repeated factor
5540 vector whose occurrence count is at least 2. If no such
5541 factor exists, there are no builtin_powi opportunities
5542 remaining. */
5543 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5545 if (rf1->count >= 2)
5546 break;
5549 if (j >= vec_len)
5550 break;
5552 power = rf1->count;
5554 if (dump_file && (dump_flags & TDF_DETAILS))
5556 unsigned elt;
5557 repeat_factor *rf;
5558 fputs ("Building __builtin_pow call for (", dump_file);
5559 for (elt = j; elt < vec_len; elt++)
5561 rf = &repeat_factor_vec[elt];
5562 print_generic_expr (dump_file, rf->factor);
5563 if (elt < vec_len - 1)
5564 fputs (" * ", dump_file);
5566 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5569 reassociate_stats.pows_created++;
5571 /* Visit each element of the vector in reverse order (so that
5572 high-occurrence elements are visited first, and within the
5573 same occurrence count, lower-ranked elements are visited
5574 first). Form a linear product of all elements in this order
5575 whose occurrencce count is at least that of element J.
5576 Record the SSA name representing the product of each element
5577 with all subsequent elements in the vector. */
5578 if (j == vec_len - 1)
5579 rf1->repr = rf1->factor;
5580 else
5582 for (ii = vec_len - 2; ii >= (int)j; ii--)
5584 tree op1, op2;
5586 rf1 = &repeat_factor_vec[ii];
5587 rf2 = &repeat_factor_vec[ii + 1];
5589 /* Init the last factor's representative to be itself. */
5590 if (!rf2->repr)
5591 rf2->repr = rf2->factor;
5593 op1 = rf1->factor;
5594 op2 = rf2->repr;
5596 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5597 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5598 op1, op2);
5599 gimple_set_location (mul_stmt, gimple_location (stmt));
5600 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5601 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5602 rf1->repr = target_ssa;
5604 /* Don't reprocess the multiply we just introduced. */
5605 gimple_set_visited (mul_stmt, true);
5609 /* Form a call to __builtin_powi for the maximum product
5610 just formed, raised to the power obtained earlier. */
5611 rf1 = &repeat_factor_vec[j];
5612 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5613 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5614 build_int_cst (integer_type_node,
5615 power));
5616 gimple_call_set_lhs (pow_stmt, iter_result);
5617 gimple_set_location (pow_stmt, gimple_location (stmt));
5618 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5619 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5622 /* If we previously formed at least one other builtin_powi call,
5623 form the product of this one and those others. */
5624 if (result)
5626 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5627 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5628 result, iter_result);
5629 gimple_set_location (mul_stmt, gimple_location (stmt));
5630 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5631 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5632 gimple_set_visited (mul_stmt, true);
5633 result = new_result;
5635 else
5636 result = iter_result;
5638 /* Decrement the occurrence count of each element in the product
5639 by the count found above, and remove this many copies of each
5640 factor from OPS. */
5641 for (i = j; i < vec_len; i++)
5643 unsigned k = power;
5644 unsigned n;
5646 rf1 = &repeat_factor_vec[i];
5647 rf1->count -= power;
5649 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5651 if (oe->op == rf1->factor)
5653 if (oe->count <= k)
5655 ops->ordered_remove (n);
5656 k -= oe->count;
5658 if (k == 0)
5659 break;
5661 else
5663 oe->count -= k;
5664 break;
5671 /* At this point all elements in the repeated factor vector have a
5672 remaining occurrence count of 0 or 1, and those with a count of 1
5673 don't have cached representatives. Re-sort the ops vector and
5674 clean up. */
5675 ops->qsort (sort_by_operand_rank);
5676 repeat_factor_vec.release ();
5678 /* Return the final product computed herein. Note that there may
5679 still be some elements with single occurrence count left in OPS;
5680 those will be handled by the normal reassociation logic. */
5681 return result;
5684 /* Attempt to optimize
5685 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5686 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5688 static void
5689 attempt_builtin_copysign (vec<operand_entry *> *ops)
5691 operand_entry *oe;
5692 unsigned int i;
5693 unsigned int length = ops->length ();
5694 tree cst = ops->last ()->op;
5696 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5697 return;
5699 FOR_EACH_VEC_ELT (*ops, i, oe)
5701 if (TREE_CODE (oe->op) == SSA_NAME
5702 && has_single_use (oe->op))
5704 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5705 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5707 tree arg0, arg1;
5708 switch (gimple_call_combined_fn (old_call))
5710 CASE_CFN_COPYSIGN:
5711 CASE_CFN_COPYSIGN_FN:
5712 arg0 = gimple_call_arg (old_call, 0);
5713 arg1 = gimple_call_arg (old_call, 1);
5714 /* The first argument of copysign must be a constant,
5715 otherwise there's nothing to do. */
5716 if (TREE_CODE (arg0) == REAL_CST)
5718 tree type = TREE_TYPE (arg0);
5719 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5720 /* If we couldn't fold to a single constant, skip it.
5721 That happens e.g. for inexact multiplication when
5722 -frounding-math. */
5723 if (mul == NULL_TREE)
5724 break;
5725 /* Instead of adjusting OLD_CALL, let's build a new
5726 call to not leak the LHS and prevent keeping bogus
5727 debug statements. DCE will clean up the old call. */
5728 gcall *new_call;
5729 if (gimple_call_internal_p (old_call))
5730 new_call = gimple_build_call_internal
5731 (IFN_COPYSIGN, 2, mul, arg1);
5732 else
5733 new_call = gimple_build_call
5734 (gimple_call_fndecl (old_call), 2, mul, arg1);
5735 tree lhs = make_ssa_name (type);
5736 gimple_call_set_lhs (new_call, lhs);
5737 gimple_set_location (new_call,
5738 gimple_location (old_call));
5739 insert_stmt_after (new_call, old_call);
5740 /* We've used the constant, get rid of it. */
5741 ops->pop ();
5742 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5743 /* Handle the CST1 < 0 case by negating the result. */
5744 if (cst1_neg)
5746 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5747 gimple *negate_stmt
5748 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5749 insert_stmt_after (negate_stmt, new_call);
5750 oe->op = negrhs;
5752 else
5753 oe->op = lhs;
5754 if (dump_file && (dump_flags & TDF_DETAILS))
5756 fprintf (dump_file, "Optimizing copysign: ");
5757 print_generic_expr (dump_file, cst);
5758 fprintf (dump_file, " * COPYSIGN (");
5759 print_generic_expr (dump_file, arg0);
5760 fprintf (dump_file, ", ");
5761 print_generic_expr (dump_file, arg1);
5762 fprintf (dump_file, ") into %sCOPYSIGN (",
5763 cst1_neg ? "-" : "");
5764 print_generic_expr (dump_file, mul);
5765 fprintf (dump_file, ", ");
5766 print_generic_expr (dump_file, arg1);
5767 fprintf (dump_file, "\n");
5769 return;
5771 break;
5772 default:
5773 break;
5780 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5782 static void
5783 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5785 tree rhs1;
5787 if (dump_file && (dump_flags & TDF_DETAILS))
5789 fprintf (dump_file, "Transforming ");
5790 print_gimple_stmt (dump_file, stmt, 0);
5793 rhs1 = gimple_assign_rhs1 (stmt);
5794 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5795 update_stmt (stmt);
5796 remove_visited_stmt_chain (rhs1);
5798 if (dump_file && (dump_flags & TDF_DETAILS))
5800 fprintf (dump_file, " into ");
5801 print_gimple_stmt (dump_file, stmt, 0);
5805 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5807 static void
5808 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5809 tree rhs1, tree rhs2)
5811 if (dump_file && (dump_flags & TDF_DETAILS))
5813 fprintf (dump_file, "Transforming ");
5814 print_gimple_stmt (dump_file, stmt, 0);
5817 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5818 update_stmt (gsi_stmt (*gsi));
5819 remove_visited_stmt_chain (rhs1);
5821 if (dump_file && (dump_flags & TDF_DETAILS))
5823 fprintf (dump_file, " into ");
5824 print_gimple_stmt (dump_file, stmt, 0);
5828 /* Reassociate expressions in basic block BB and its post-dominator as
5829 children.
5831 Bubble up return status from maybe_optimize_range_tests. */
5833 static bool
5834 reassociate_bb (basic_block bb)
5836 gimple_stmt_iterator gsi;
5837 basic_block son;
5838 gimple *stmt = last_stmt (bb);
5839 bool cfg_cleanup_needed = false;
5841 if (stmt && !gimple_visited_p (stmt))
5842 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5844 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5846 stmt = gsi_stmt (gsi);
5848 if (is_gimple_assign (stmt)
5849 && !stmt_could_throw_p (stmt))
5851 tree lhs, rhs1, rhs2;
5852 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5854 /* If this is not a gimple binary expression, there is
5855 nothing for us to do with it. */
5856 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5857 continue;
5859 /* If this was part of an already processed statement,
5860 we don't need to touch it again. */
5861 if (gimple_visited_p (stmt))
5863 /* This statement might have become dead because of previous
5864 reassociations. */
5865 if (has_zero_uses (gimple_get_lhs (stmt)))
5867 reassoc_remove_stmt (&gsi);
5868 release_defs (stmt);
5869 /* We might end up removing the last stmt above which
5870 places the iterator to the end of the sequence.
5871 Reset it to the last stmt in this case which might
5872 be the end of the sequence as well if we removed
5873 the last statement of the sequence. In which case
5874 we need to bail out. */
5875 if (gsi_end_p (gsi))
5877 gsi = gsi_last_bb (bb);
5878 if (gsi_end_p (gsi))
5879 break;
5882 continue;
5885 lhs = gimple_assign_lhs (stmt);
5886 rhs1 = gimple_assign_rhs1 (stmt);
5887 rhs2 = gimple_assign_rhs2 (stmt);
5889 /* For non-bit or min/max operations we can't associate
5890 all types. Verify that here. */
5891 if (rhs_code != BIT_IOR_EXPR
5892 && rhs_code != BIT_AND_EXPR
5893 && rhs_code != BIT_XOR_EXPR
5894 && rhs_code != MIN_EXPR
5895 && rhs_code != MAX_EXPR
5896 && (!can_reassociate_p (lhs)
5897 || !can_reassociate_p (rhs1)
5898 || !can_reassociate_p (rhs2)))
5899 continue;
5901 if (associative_tree_code (rhs_code))
5903 auto_vec<operand_entry *> ops;
5904 tree powi_result = NULL_TREE;
5905 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5907 /* There may be no immediate uses left by the time we
5908 get here because we may have eliminated them all. */
5909 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5910 continue;
5912 gimple_set_visited (stmt, true);
5913 linearize_expr_tree (&ops, stmt, true, true);
5914 ops.qsort (sort_by_operand_rank);
5915 int orig_len = ops.length ();
5916 optimize_ops_list (rhs_code, &ops);
5917 if (undistribute_ops_list (rhs_code, &ops,
5918 loop_containing_stmt (stmt)))
5920 ops.qsort (sort_by_operand_rank);
5921 optimize_ops_list (rhs_code, &ops);
5924 if (rhs_code == PLUS_EXPR
5925 && transform_add_to_multiply (&ops))
5926 ops.qsort (sort_by_operand_rank);
5928 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5930 if (is_vector)
5931 optimize_vec_cond_expr (rhs_code, &ops);
5932 else
5933 optimize_range_tests (rhs_code, &ops, NULL);
5936 if (rhs_code == MULT_EXPR && !is_vector)
5938 attempt_builtin_copysign (&ops);
5940 if (reassoc_insert_powi_p
5941 && flag_unsafe_math_optimizations)
5942 powi_result = attempt_builtin_powi (stmt, &ops);
5945 operand_entry *last;
5946 bool negate_result = false;
5947 if (ops.length () > 1
5948 && rhs_code == MULT_EXPR)
5950 last = ops.last ();
5951 if ((integer_minus_onep (last->op)
5952 || real_minus_onep (last->op))
5953 && !HONOR_SNANS (TREE_TYPE (lhs))
5954 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5955 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5957 ops.pop ();
5958 negate_result = true;
5962 tree new_lhs = lhs;
5963 /* If the operand vector is now empty, all operands were
5964 consumed by the __builtin_powi optimization. */
5965 if (ops.length () == 0)
5966 transform_stmt_to_copy (&gsi, stmt, powi_result);
5967 else if (ops.length () == 1)
5969 tree last_op = ops.last ()->op;
5971 /* If the stmt that defines operand has to be inserted, insert it
5972 before the use. */
5973 if (ops.last ()->stmt_to_insert)
5974 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5975 if (powi_result)
5976 transform_stmt_to_multiply (&gsi, stmt, last_op,
5977 powi_result);
5978 else
5979 transform_stmt_to_copy (&gsi, stmt, last_op);
5981 else
5983 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5984 int ops_num = ops.length ();
5985 int width = get_reassociation_width (ops_num, rhs_code, mode);
5987 if (dump_file && (dump_flags & TDF_DETAILS))
5988 fprintf (dump_file,
5989 "Width = %d was chosen for reassociation\n", width);
5992 /* For binary bit operations, if there are at least 3
5993 operands and the last last operand in OPS is a constant,
5994 move it to the front. This helps ensure that we generate
5995 (X & Y) & C rather than (X & C) & Y. The former will
5996 often match a canonical bit test when we get to RTL. */
5997 if (ops.length () > 2
5998 && (rhs_code == BIT_AND_EXPR
5999 || rhs_code == BIT_IOR_EXPR
6000 || rhs_code == BIT_XOR_EXPR)
6001 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6002 std::swap (*ops[0], *ops[ops_num - 1]);
6004 if (width > 1
6005 && ops.length () > 3)
6006 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6007 width, ops);
6008 else
6010 /* When there are three operands left, we want
6011 to make sure the ones that get the double
6012 binary op are chosen wisely. */
6013 int len = ops.length ();
6014 if (len >= 3)
6015 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6017 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6018 powi_result != NULL
6019 || negate_result,
6020 len != orig_len);
6023 /* If we combined some repeated factors into a
6024 __builtin_powi call, multiply that result by the
6025 reassociated operands. */
6026 if (powi_result)
6028 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6029 tree type = TREE_TYPE (lhs);
6030 tree target_ssa = make_temp_ssa_name (type, NULL,
6031 "reassocpow");
6032 gimple_set_lhs (lhs_stmt, target_ssa);
6033 update_stmt (lhs_stmt);
6034 if (lhs != new_lhs)
6036 target_ssa = new_lhs;
6037 new_lhs = lhs;
6039 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6040 powi_result, target_ssa);
6041 gimple_set_location (mul_stmt, gimple_location (stmt));
6042 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6043 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6047 if (negate_result)
6049 stmt = SSA_NAME_DEF_STMT (lhs);
6050 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6051 gimple_set_lhs (stmt, tmp);
6052 if (lhs != new_lhs)
6053 tmp = new_lhs;
6054 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6055 tmp);
6056 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6057 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6058 update_stmt (stmt);
6063 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6064 son;
6065 son = next_dom_son (CDI_POST_DOMINATORS, son))
6066 cfg_cleanup_needed |= reassociate_bb (son);
6068 return cfg_cleanup_needed;
6071 /* Add jumps around shifts for range tests turned into bit tests.
6072 For each SSA_NAME VAR we have code like:
6073 VAR = ...; // final stmt of range comparison
6074 // bit test here...;
6075 OTHERVAR = ...; // final stmt of the bit test sequence
6076 RES = VAR | OTHERVAR;
6077 Turn the above into:
6078 VAR = ...;
6079 if (VAR != 0)
6080 goto <l3>;
6081 else
6082 goto <l2>;
6083 <l2>:
6084 // bit test here...;
6085 OTHERVAR = ...;
6086 <l3>:
6087 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6089 static void
6090 branch_fixup (void)
6092 tree var;
6093 unsigned int i;
6095 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6097 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6098 gimple *use_stmt;
6099 use_operand_p use;
6100 bool ok = single_imm_use (var, &use, &use_stmt);
6101 gcc_assert (ok
6102 && is_gimple_assign (use_stmt)
6103 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6104 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6106 basic_block cond_bb = gimple_bb (def_stmt);
6107 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6108 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6110 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6111 gimple *g = gimple_build_cond (NE_EXPR, var,
6112 build_zero_cst (TREE_TYPE (var)),
6113 NULL_TREE, NULL_TREE);
6114 location_t loc = gimple_location (use_stmt);
6115 gimple_set_location (g, loc);
6116 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6118 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6119 etrue->probability = profile_probability::even ();
6120 edge efalse = find_edge (cond_bb, then_bb);
6121 efalse->flags = EDGE_FALSE_VALUE;
6122 efalse->probability -= etrue->probability;
6123 then_bb->count -= etrue->count ();
6125 tree othervar = NULL_TREE;
6126 if (gimple_assign_rhs1 (use_stmt) == var)
6127 othervar = gimple_assign_rhs2 (use_stmt);
6128 else if (gimple_assign_rhs2 (use_stmt) == var)
6129 othervar = gimple_assign_rhs1 (use_stmt);
6130 else
6131 gcc_unreachable ();
6132 tree lhs = gimple_assign_lhs (use_stmt);
6133 gphi *phi = create_phi_node (lhs, merge_bb);
6134 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6135 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6136 gsi = gsi_for_stmt (use_stmt);
6137 gsi_remove (&gsi, true);
6139 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6140 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6142 reassoc_branch_fixups.release ();
6145 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6146 void debug_ops_vector (vec<operand_entry *> ops);
6148 /* Dump the operand entry vector OPS to FILE. */
6150 void
6151 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6153 operand_entry *oe;
6154 unsigned int i;
6156 FOR_EACH_VEC_ELT (ops, i, oe)
6158 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6159 print_generic_expr (file, oe->op);
6160 fprintf (file, "\n");
6164 /* Dump the operand entry vector OPS to STDERR. */
6166 DEBUG_FUNCTION void
6167 debug_ops_vector (vec<operand_entry *> ops)
6169 dump_ops_vector (stderr, ops);
6172 /* Bubble up return status from reassociate_bb. */
6174 static bool
6175 do_reassoc (void)
6177 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6178 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6181 /* Initialize the reassociation pass. */
6183 static void
6184 init_reassoc (void)
6186 int i;
6187 long rank = 2;
6188 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6190 /* Find the loops, so that we can prevent moving calculations in
6191 them. */
6192 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6194 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6196 next_operand_entry_id = 0;
6198 /* Reverse RPO (Reverse Post Order) will give us something where
6199 deeper loops come later. */
6200 pre_and_rev_post_order_compute (NULL, bbs, false);
6201 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6202 operand_rank = new hash_map<tree, long>;
6204 /* Give each default definition a distinct rank. This includes
6205 parameters and the static chain. Walk backwards over all
6206 SSA names so that we get proper rank ordering according
6207 to tree_swap_operands_p. */
6208 for (i = num_ssa_names - 1; i > 0; --i)
6210 tree name = ssa_name (i);
6211 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6212 insert_operand_rank (name, ++rank);
6215 /* Set up rank for each BB */
6216 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6217 bb_rank[bbs[i]] = ++rank << 16;
6219 free (bbs);
6220 calculate_dominance_info (CDI_POST_DOMINATORS);
6221 plus_negates = vNULL;
6224 /* Cleanup after the reassociation pass, and print stats if
6225 requested. */
6227 static void
6228 fini_reassoc (void)
6230 statistics_counter_event (cfun, "Linearized",
6231 reassociate_stats.linearized);
6232 statistics_counter_event (cfun, "Constants eliminated",
6233 reassociate_stats.constants_eliminated);
6234 statistics_counter_event (cfun, "Ops eliminated",
6235 reassociate_stats.ops_eliminated);
6236 statistics_counter_event (cfun, "Statements rewritten",
6237 reassociate_stats.rewritten);
6238 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6239 reassociate_stats.pows_encountered);
6240 statistics_counter_event (cfun, "Built-in powi calls created",
6241 reassociate_stats.pows_created);
6243 delete operand_rank;
6244 operand_entry_pool.release ();
6245 free (bb_rank);
6246 plus_negates.release ();
6247 free_dominance_info (CDI_POST_DOMINATORS);
6248 loop_optimizer_finalize ();
6251 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6252 insertion of __builtin_powi calls.
6254 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6255 optimization of a gimple conditional. Otherwise returns zero. */
6257 static unsigned int
6258 execute_reassoc (bool insert_powi_p)
6260 reassoc_insert_powi_p = insert_powi_p;
6262 init_reassoc ();
6264 bool cfg_cleanup_needed;
6265 cfg_cleanup_needed = do_reassoc ();
6266 repropagate_negates ();
6267 branch_fixup ();
6269 fini_reassoc ();
6270 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6273 namespace {
6275 const pass_data pass_data_reassoc =
6277 GIMPLE_PASS, /* type */
6278 "reassoc", /* name */
6279 OPTGROUP_NONE, /* optinfo_flags */
6280 TV_TREE_REASSOC, /* tv_id */
6281 ( PROP_cfg | PROP_ssa ), /* properties_required */
6282 0, /* properties_provided */
6283 0, /* properties_destroyed */
6284 0, /* todo_flags_start */
6285 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6288 class pass_reassoc : public gimple_opt_pass
6290 public:
6291 pass_reassoc (gcc::context *ctxt)
6292 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6295 /* opt_pass methods: */
6296 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6297 void set_pass_param (unsigned int n, bool param)
6299 gcc_assert (n == 0);
6300 insert_powi_p = param;
6302 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6303 virtual unsigned int execute (function *)
6304 { return execute_reassoc (insert_powi_p); }
6306 private:
6307 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6308 point 3a in the pass header comment. */
6309 bool insert_powi_p;
6310 }; // class pass_reassoc
6312 } // anon namespace
6314 gimple_opt_pass *
6315 make_pass_reassoc (gcc::context *ctxt)
6317 return new pass_reassoc (ctxt);