PR rtl-optimization/82913
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob5e8cac69d5d588bdef43b89d2e1ea5df6b964aff
1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
107 You want to first merge all leaves of the same rank, as much as
108 possible.
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
123 mergetmp2 = d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
132 So we have
134 build binary op
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
181 /* Statistics */
182 static struct
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
192 /* Operator, rank pair. */
193 struct operand_entry
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
230 bool
231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
233 gimple *stmt = gsi_stmt (*gsi);
235 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
254 return ret;
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
270 phi_rank (gimple *stmt)
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
320 loop_carried_phi (tree exp)
322 gimple *phi_stmt;
323 long block_rank;
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
342 return false;
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
350 propagate_rank (long rank, tree op)
352 long op_rank;
354 if (loop_carried_phi (op))
355 return rank;
357 op_rank = get_rank (op);
359 return MAX (rank, op_rank);
362 /* Look up the operand rank structure for expression E. */
364 static inline long
365 find_operand_rank (tree e)
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
373 static inline void
374 insert_operand_rank (tree e, long rank)
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e) == SSA_NAME)
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
454 if (dump_file && (dump_flags & TDF_DETAILS))
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
479 static inline int
480 constant_type (tree t)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483 return INTEGER_CONST_TYPE;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485 return FLOAT_CONST_TYPE;
486 else
487 return OTHER_CONST_TYPE;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
492 static int
493 sort_by_operand_rank (const void *pa, const void *pb)
495 const operand_entry *oea = *(const operand_entry *const *)pa;
496 const operand_entry *oeb = *(const operand_entry *const *)pb;
498 if (oeb->rank != oea->rank)
499 return oeb->rank > oea->rank ? 1 : -1;
501 /* It's nicer for optimize_expression if constants that are likely
502 to fold when added/multiplied/whatever are put next to each
503 other. Since all constants have rank 0, order them by type. */
504 if (oea->rank == 0)
506 if (constant_type (oeb->op) != constant_type (oea->op))
507 return constant_type (oeb->op) - constant_type (oea->op);
508 else
509 /* To make sorting result stable, we use unique IDs to determine
510 order. */
511 return oeb->id > oea->id ? 1 : -1;
514 if (TREE_CODE (oea->op) != SSA_NAME)
516 if (TREE_CODE (oeb->op) != SSA_NAME)
517 return oeb->id > oea->id ? 1 : -1;
518 else
519 return 1;
521 else if (TREE_CODE (oeb->op) != SSA_NAME)
522 return -1;
524 /* Lastly, make sure the versions that are the same go next to each
525 other. */
526 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
528 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
529 versions of removed SSA_NAMEs, so if possible, prefer to sort
530 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
531 See PR60418. */
532 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
533 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
534 basic_block bba = gimple_bb (stmta);
535 basic_block bbb = gimple_bb (stmtb);
536 if (bbb != bba)
538 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
539 but the other might not. */
540 if (!bba)
541 return 1;
542 if (!bbb)
543 return -1;
544 /* If neither is, compare bb_rank. */
545 if (bb_rank[bbb->index] != bb_rank[bba->index])
546 return bb_rank[bbb->index] - bb_rank[bba->index];
549 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
550 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
551 if (da != db)
552 return da ? 1 : -1;
554 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
557 return oeb->id > oea->id ? 1 : -1;
560 /* Add an operand entry to *OPS for the tree operand OP. */
562 static void
563 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
565 operand_entry *oe = operand_entry_pool.allocate ();
567 oe->op = op;
568 oe->rank = get_rank (op);
569 oe->id = next_operand_entry_id++;
570 oe->count = 1;
571 oe->stmt_to_insert = stmt_to_insert;
572 ops->safe_push (oe);
575 /* Add an operand entry to *OPS for the tree operand OP with repeat
576 count REPEAT. */
578 static void
579 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
580 HOST_WIDE_INT repeat)
582 operand_entry *oe = operand_entry_pool.allocate ();
584 oe->op = op;
585 oe->rank = get_rank (op);
586 oe->id = next_operand_entry_id++;
587 oe->count = repeat;
588 oe->stmt_to_insert = NULL;
589 ops->safe_push (oe);
591 reassociate_stats.pows_encountered++;
594 /* Return true if STMT is reassociable operation containing a binary
595 operation with tree code CODE, and is inside LOOP. */
597 static bool
598 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
600 basic_block bb = gimple_bb (stmt);
602 if (gimple_bb (stmt) == NULL)
603 return false;
605 if (!flow_bb_inside_loop_p (loop, bb))
606 return false;
608 if (is_gimple_assign (stmt)
609 && gimple_assign_rhs_code (stmt) == code
610 && has_single_use (gimple_assign_lhs (stmt)))
612 tree rhs1 = gimple_assign_rhs1 (stmt);
613 tree rhs2 = gimple_assign_rhs1 (stmt);
614 if (TREE_CODE (rhs1) == SSA_NAME
615 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
616 return false;
617 if (rhs2
618 && TREE_CODE (rhs2) == SSA_NAME
619 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
620 return false;
621 return true;
624 return false;
628 /* Return true if STMT is a nop-conversion. */
630 static bool
631 gimple_nop_conversion_p (gimple *stmt)
633 if (gassign *ass = dyn_cast <gassign *> (stmt))
635 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
636 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
637 TREE_TYPE (gimple_assign_rhs1 (ass))))
638 return true;
640 return false;
643 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
644 operand of the negate operation. Otherwise, return NULL. */
646 static tree
647 get_unary_op (tree name, enum tree_code opcode)
649 gimple *stmt = SSA_NAME_DEF_STMT (name);
651 /* Look through nop conversions (sign changes). */
652 if (gimple_nop_conversion_p (stmt)
653 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
654 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
656 if (!is_gimple_assign (stmt))
657 return NULL_TREE;
659 if (gimple_assign_rhs_code (stmt) == opcode)
660 return gimple_assign_rhs1 (stmt);
661 return NULL_TREE;
664 /* Return true if OP1 and OP2 have the same value if casted to either type. */
666 static bool
667 ops_equal_values_p (tree op1, tree op2)
669 if (op1 == op2)
670 return true;
672 tree orig_op1 = op1;
673 if (TREE_CODE (op1) == SSA_NAME)
675 gimple *stmt = SSA_NAME_DEF_STMT (op1);
676 if (gimple_nop_conversion_p (stmt))
678 op1 = gimple_assign_rhs1 (stmt);
679 if (op1 == op2)
680 return true;
684 if (TREE_CODE (op2) == SSA_NAME)
686 gimple *stmt = SSA_NAME_DEF_STMT (op2);
687 if (gimple_nop_conversion_p (stmt))
689 op2 = gimple_assign_rhs1 (stmt);
690 if (op1 == op2
691 || orig_op1 == op2)
692 return true;
696 return false;
700 /* If CURR and LAST are a pair of ops that OPCODE allows us to
701 eliminate through equivalences, do so, remove them from OPS, and
702 return true. Otherwise, return false. */
704 static bool
705 eliminate_duplicate_pair (enum tree_code opcode,
706 vec<operand_entry *> *ops,
707 bool *all_done,
708 unsigned int i,
709 operand_entry *curr,
710 operand_entry *last)
713 /* If we have two of the same op, and the opcode is & |, min, or max,
714 we can eliminate one of them.
715 If we have two of the same op, and the opcode is ^, we can
716 eliminate both of them. */
718 if (last && last->op == curr->op)
720 switch (opcode)
722 case MAX_EXPR:
723 case MIN_EXPR:
724 case BIT_IOR_EXPR:
725 case BIT_AND_EXPR:
726 if (dump_file && (dump_flags & TDF_DETAILS))
728 fprintf (dump_file, "Equivalence: ");
729 print_generic_expr (dump_file, curr->op);
730 fprintf (dump_file, " [&|minmax] ");
731 print_generic_expr (dump_file, last->op);
732 fprintf (dump_file, " -> ");
733 print_generic_stmt (dump_file, last->op);
736 ops->ordered_remove (i);
737 reassociate_stats.ops_eliminated ++;
739 return true;
741 case BIT_XOR_EXPR:
742 if (dump_file && (dump_flags & TDF_DETAILS))
744 fprintf (dump_file, "Equivalence: ");
745 print_generic_expr (dump_file, curr->op);
746 fprintf (dump_file, " ^ ");
747 print_generic_expr (dump_file, last->op);
748 fprintf (dump_file, " -> nothing\n");
751 reassociate_stats.ops_eliminated += 2;
753 if (ops->length () == 2)
755 ops->truncate (0);
756 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
757 *all_done = true;
759 else
761 ops->ordered_remove (i-1);
762 ops->ordered_remove (i-1);
765 return true;
767 default:
768 break;
771 return false;
774 static vec<tree> plus_negates;
776 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
777 expression, look in OPS for a corresponding positive operation to cancel
778 it out. If we find one, remove the other from OPS, replace
779 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
780 return false. */
782 static bool
783 eliminate_plus_minus_pair (enum tree_code opcode,
784 vec<operand_entry *> *ops,
785 unsigned int currindex,
786 operand_entry *curr)
788 tree negateop;
789 tree notop;
790 unsigned int i;
791 operand_entry *oe;
793 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
794 return false;
796 negateop = get_unary_op (curr->op, NEGATE_EXPR);
797 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
798 if (negateop == NULL_TREE && notop == NULL_TREE)
799 return false;
801 /* Any non-negated version will have a rank that is one less than
802 the current rank. So once we hit those ranks, if we don't find
803 one, we can stop. */
805 for (i = currindex + 1;
806 ops->iterate (i, &oe)
807 && oe->rank >= curr->rank - 1 ;
808 i++)
810 if (negateop
811 && ops_equal_values_p (oe->op, negateop))
813 if (dump_file && (dump_flags & TDF_DETAILS))
815 fprintf (dump_file, "Equivalence: ");
816 print_generic_expr (dump_file, negateop);
817 fprintf (dump_file, " + -");
818 print_generic_expr (dump_file, oe->op);
819 fprintf (dump_file, " -> 0\n");
822 ops->ordered_remove (i);
823 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
824 ops->ordered_remove (currindex);
825 reassociate_stats.ops_eliminated ++;
827 return true;
829 else if (notop
830 && ops_equal_values_p (oe->op, notop))
832 tree op_type = TREE_TYPE (oe->op);
834 if (dump_file && (dump_flags & TDF_DETAILS))
836 fprintf (dump_file, "Equivalence: ");
837 print_generic_expr (dump_file, notop);
838 fprintf (dump_file, " + ~");
839 print_generic_expr (dump_file, oe->op);
840 fprintf (dump_file, " -> -1\n");
843 ops->ordered_remove (i);
844 add_to_ops_vec (ops, build_all_ones_cst (op_type));
845 ops->ordered_remove (currindex);
846 reassociate_stats.ops_eliminated ++;
848 return true;
852 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
853 save it for later inspection in repropagate_negates(). */
854 if (negateop != NULL_TREE
855 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
856 plus_negates.safe_push (curr->op);
858 return false;
861 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
862 bitwise not expression, look in OPS for a corresponding operand to
863 cancel it out. If we find one, remove the other from OPS, replace
864 OPS[CURRINDEX] with 0, and return true. Otherwise, return
865 false. */
867 static bool
868 eliminate_not_pairs (enum tree_code opcode,
869 vec<operand_entry *> *ops,
870 unsigned int currindex,
871 operand_entry *curr)
873 tree notop;
874 unsigned int i;
875 operand_entry *oe;
877 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
878 || TREE_CODE (curr->op) != SSA_NAME)
879 return false;
881 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
882 if (notop == NULL_TREE)
883 return false;
885 /* Any non-not version will have a rank that is one less than
886 the current rank. So once we hit those ranks, if we don't find
887 one, we can stop. */
889 for (i = currindex + 1;
890 ops->iterate (i, &oe)
891 && oe->rank >= curr->rank - 1;
892 i++)
894 if (oe->op == notop)
896 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Equivalence: ");
899 print_generic_expr (dump_file, notop);
900 if (opcode == BIT_AND_EXPR)
901 fprintf (dump_file, " & ~");
902 else if (opcode == BIT_IOR_EXPR)
903 fprintf (dump_file, " | ~");
904 print_generic_expr (dump_file, oe->op);
905 if (opcode == BIT_AND_EXPR)
906 fprintf (dump_file, " -> 0\n");
907 else if (opcode == BIT_IOR_EXPR)
908 fprintf (dump_file, " -> -1\n");
911 if (opcode == BIT_AND_EXPR)
912 oe->op = build_zero_cst (TREE_TYPE (oe->op));
913 else if (opcode == BIT_IOR_EXPR)
914 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
916 reassociate_stats.ops_eliminated += ops->length () - 1;
917 ops->truncate (0);
918 ops->quick_push (oe);
919 return true;
923 return false;
926 /* Use constant value that may be present in OPS to try to eliminate
927 operands. Note that this function is only really used when we've
928 eliminated ops for other reasons, or merged constants. Across
929 single statements, fold already does all of this, plus more. There
930 is little point in duplicating logic, so I've only included the
931 identities that I could ever construct testcases to trigger. */
933 static void
934 eliminate_using_constants (enum tree_code opcode,
935 vec<operand_entry *> *ops)
937 operand_entry *oelast = ops->last ();
938 tree type = TREE_TYPE (oelast->op);
940 if (oelast->rank == 0
941 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
943 switch (opcode)
945 case BIT_AND_EXPR:
946 if (integer_zerop (oelast->op))
948 if (ops->length () != 1)
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & 0, removing all other ops\n");
953 reassociate_stats.ops_eliminated += ops->length () - 1;
955 ops->truncate (0);
956 ops->quick_push (oelast);
957 return;
960 else if (integer_all_onesp (oelast->op))
962 if (ops->length () != 1)
964 if (dump_file && (dump_flags & TDF_DETAILS))
965 fprintf (dump_file, "Found & -1, removing\n");
966 ops->pop ();
967 reassociate_stats.ops_eliminated++;
970 break;
971 case BIT_IOR_EXPR:
972 if (integer_all_onesp (oelast->op))
974 if (ops->length () != 1)
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | -1, removing all other ops\n");
979 reassociate_stats.ops_eliminated += ops->length () - 1;
981 ops->truncate (0);
982 ops->quick_push (oelast);
983 return;
986 else if (integer_zerop (oelast->op))
988 if (ops->length () != 1)
990 if (dump_file && (dump_flags & TDF_DETAILS))
991 fprintf (dump_file, "Found | 0, removing\n");
992 ops->pop ();
993 reassociate_stats.ops_eliminated++;
996 break;
997 case MULT_EXPR:
998 if (integer_zerop (oelast->op)
999 || (FLOAT_TYPE_P (type)
1000 && !HONOR_NANS (type)
1001 && !HONOR_SIGNED_ZEROS (type)
1002 && real_zerop (oelast->op)))
1004 if (ops->length () != 1)
1006 if (dump_file && (dump_flags & TDF_DETAILS))
1007 fprintf (dump_file, "Found * 0, removing all other ops\n");
1009 reassociate_stats.ops_eliminated += ops->length () - 1;
1010 ops->truncate (1);
1011 ops->quick_push (oelast);
1012 return;
1015 else if (integer_onep (oelast->op)
1016 || (FLOAT_TYPE_P (type)
1017 && !HONOR_SNANS (type)
1018 && real_onep (oelast->op)))
1020 if (ops->length () != 1)
1022 if (dump_file && (dump_flags & TDF_DETAILS))
1023 fprintf (dump_file, "Found * 1, removing\n");
1024 ops->pop ();
1025 reassociate_stats.ops_eliminated++;
1026 return;
1029 break;
1030 case BIT_XOR_EXPR:
1031 case PLUS_EXPR:
1032 case MINUS_EXPR:
1033 if (integer_zerop (oelast->op)
1034 || (FLOAT_TYPE_P (type)
1035 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1036 && fold_real_zero_addition_p (type, oelast->op,
1037 opcode == MINUS_EXPR)))
1039 if (ops->length () != 1)
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1042 fprintf (dump_file, "Found [|^+] 0, removing\n");
1043 ops->pop ();
1044 reassociate_stats.ops_eliminated++;
1045 return;
1048 break;
1049 default:
1050 break;
1056 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1057 bool, bool);
1059 /* Structure for tracking and counting operands. */
1060 struct oecount {
1061 unsigned int cnt;
1062 unsigned int id;
1063 enum tree_code oecode;
1064 tree op;
1068 /* The heap for the oecount hashtable and the sorted list of operands. */
1069 static vec<oecount> cvec;
1072 /* Oecount hashtable helpers. */
1074 struct oecount_hasher : int_hash <int, 0, 1>
1076 static inline hashval_t hash (int);
1077 static inline bool equal (int, int);
1080 /* Hash function for oecount. */
1082 inline hashval_t
1083 oecount_hasher::hash (int p)
1085 const oecount *c = &cvec[p - 42];
1086 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1089 /* Comparison function for oecount. */
1091 inline bool
1092 oecount_hasher::equal (int p1, int p2)
1094 const oecount *c1 = &cvec[p1 - 42];
1095 const oecount *c2 = &cvec[p2 - 42];
1096 return c1->oecode == c2->oecode && c1->op == c2->op;
1099 /* Comparison function for qsort sorting oecount elements by count. */
1101 static int
1102 oecount_cmp (const void *p1, const void *p2)
1104 const oecount *c1 = (const oecount *)p1;
1105 const oecount *c2 = (const oecount *)p2;
1106 if (c1->cnt != c2->cnt)
1107 return c1->cnt > c2->cnt ? 1 : -1;
1108 else
1109 /* If counts are identical, use unique IDs to stabilize qsort. */
1110 return c1->id > c2->id ? 1 : -1;
1113 /* Return TRUE iff STMT represents a builtin call that raises OP
1114 to some exponent. */
1116 static bool
1117 stmt_is_power_of_op (gimple *stmt, tree op)
1119 if (!is_gimple_call (stmt))
1120 return false;
1122 switch (gimple_call_combined_fn (stmt))
1124 CASE_CFN_POW:
1125 CASE_CFN_POWI:
1126 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1128 default:
1129 return false;
1133 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1134 in place and return the result. Assumes that stmt_is_power_of_op
1135 was previously called for STMT and returned TRUE. */
1137 static HOST_WIDE_INT
1138 decrement_power (gimple *stmt)
1140 REAL_VALUE_TYPE c, cint;
1141 HOST_WIDE_INT power;
1142 tree arg1;
1144 switch (gimple_call_combined_fn (stmt))
1146 CASE_CFN_POW:
1147 arg1 = gimple_call_arg (stmt, 1);
1148 c = TREE_REAL_CST (arg1);
1149 power = real_to_integer (&c) - 1;
1150 real_from_integer (&cint, VOIDmode, power, SIGNED);
1151 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1152 return power;
1154 CASE_CFN_POWI:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 power = TREE_INT_CST_LOW (arg1) - 1;
1157 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1158 return power;
1160 default:
1161 gcc_unreachable ();
1165 /* Replace SSA defined by STMT and replace all its uses with new
1166 SSA. Also return the new SSA. */
1168 static tree
1169 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1171 gimple *use_stmt;
1172 use_operand_p use;
1173 imm_use_iterator iter;
1174 tree new_lhs, new_debug_lhs = NULL_TREE;
1175 tree lhs = gimple_get_lhs (stmt);
1177 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1178 gimple_set_lhs (stmt, new_lhs);
1180 /* Also need to update GIMPLE_DEBUGs. */
1181 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1183 tree repl = new_lhs;
1184 if (is_gimple_debug (use_stmt))
1186 if (new_debug_lhs == NULL_TREE)
1188 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1189 gdebug *def_temp
1190 = gimple_build_debug_bind (new_debug_lhs,
1191 build2 (opcode, TREE_TYPE (lhs),
1192 new_lhs, op),
1193 stmt);
1194 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1195 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1196 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1197 gimple_set_uid (def_temp, gimple_uid (stmt));
1198 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1199 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1201 repl = new_debug_lhs;
1203 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1204 SET_USE (use, repl);
1205 update_stmt (use_stmt);
1207 return new_lhs;
1210 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1211 uses with new SSAs. Also do this for the stmt that defines DEF
1212 if *DEF is not OP. */
1214 static void
1215 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1216 vec<gimple *> &stmts_to_fix)
1218 unsigned i;
1219 gimple *stmt;
1221 if (*def != op
1222 && TREE_CODE (*def) == SSA_NAME
1223 && (stmt = SSA_NAME_DEF_STMT (*def))
1224 && gimple_code (stmt) != GIMPLE_NOP)
1225 *def = make_new_ssa_for_def (stmt, opcode, op);
1227 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1228 make_new_ssa_for_def (stmt, opcode, op);
1231 /* Find the single immediate use of STMT's LHS, and replace it
1232 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1233 replace *DEF with OP as well. */
1235 static void
1236 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1238 tree lhs;
1239 gimple *use_stmt;
1240 use_operand_p use;
1241 gimple_stmt_iterator gsi;
1243 if (is_gimple_call (stmt))
1244 lhs = gimple_call_lhs (stmt);
1245 else
1246 lhs = gimple_assign_lhs (stmt);
1248 gcc_assert (has_single_use (lhs));
1249 single_imm_use (lhs, &use, &use_stmt);
1250 if (lhs == *def)
1251 *def = op;
1252 SET_USE (use, op);
1253 if (TREE_CODE (op) != SSA_NAME)
1254 update_stmt (use_stmt);
1255 gsi = gsi_for_stmt (stmt);
1256 unlink_stmt_vdef (stmt);
1257 reassoc_remove_stmt (&gsi);
1258 release_defs (stmt);
1261 /* Walks the linear chain with result *DEF searching for an operation
1262 with operand OP and code OPCODE removing that from the chain. *DEF
1263 is updated if there is only one operand but no operation left. */
1265 static void
1266 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1268 tree orig_def = *def;
1269 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1270 /* PR72835 - Record the stmt chain that has to be updated such that
1271 we dont use the same LHS when the values computed are different. */
1272 auto_vec<gimple *, 64> stmts_to_fix;
1276 tree name;
1278 if (opcode == MULT_EXPR)
1280 if (stmt_is_power_of_op (stmt, op))
1282 if (decrement_power (stmt) == 1)
1284 if (stmts_to_fix.length () > 0)
1285 stmts_to_fix.pop ();
1286 propagate_op_to_single_use (op, stmt, def);
1288 break;
1290 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1292 if (gimple_assign_rhs1 (stmt) == op)
1294 tree cst = build_minus_one_cst (TREE_TYPE (op));
1295 if (stmts_to_fix.length () > 0)
1296 stmts_to_fix.pop ();
1297 propagate_op_to_single_use (cst, stmt, def);
1298 break;
1300 else if (integer_minus_onep (op)
1301 || real_minus_onep (op))
1303 gimple_assign_set_rhs_code
1304 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1305 break;
1310 name = gimple_assign_rhs1 (stmt);
1312 /* If this is the operation we look for and one of the operands
1313 is ours simply propagate the other operand into the stmts
1314 single use. */
1315 if (gimple_assign_rhs_code (stmt) == opcode
1316 && (name == op
1317 || gimple_assign_rhs2 (stmt) == op))
1319 if (name == op)
1320 name = gimple_assign_rhs2 (stmt);
1321 if (stmts_to_fix.length () > 0)
1322 stmts_to_fix.pop ();
1323 propagate_op_to_single_use (name, stmt, def);
1324 break;
1327 /* We might have a multiply of two __builtin_pow* calls, and
1328 the operand might be hiding in the rightmost one. Likewise
1329 this can happen for a negate. */
1330 if (opcode == MULT_EXPR
1331 && gimple_assign_rhs_code (stmt) == opcode
1332 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1333 && has_single_use (gimple_assign_rhs2 (stmt)))
1335 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1336 if (stmt_is_power_of_op (stmt2, op))
1338 if (decrement_power (stmt2) == 1)
1339 propagate_op_to_single_use (op, stmt2, def);
1340 else
1341 stmts_to_fix.safe_push (stmt2);
1342 break;
1344 else if (is_gimple_assign (stmt2)
1345 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1347 if (gimple_assign_rhs1 (stmt2) == op)
1349 tree cst = build_minus_one_cst (TREE_TYPE (op));
1350 propagate_op_to_single_use (cst, stmt2, def);
1351 break;
1353 else if (integer_minus_onep (op)
1354 || real_minus_onep (op))
1356 stmts_to_fix.safe_push (stmt2);
1357 gimple_assign_set_rhs_code
1358 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1359 break;
1364 /* Continue walking the chain. */
1365 gcc_assert (name != op
1366 && TREE_CODE (name) == SSA_NAME);
1367 stmt = SSA_NAME_DEF_STMT (name);
1368 stmts_to_fix.safe_push (stmt);
1370 while (1);
1372 if (stmts_to_fix.length () > 0 || *def == orig_def)
1373 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1376 /* Returns true if statement S1 dominates statement S2. Like
1377 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1379 static bool
1380 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1382 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1384 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1385 SSA_NAME. Assume it lives at the beginning of function and
1386 thus dominates everything. */
1387 if (!bb1 || s1 == s2)
1388 return true;
1390 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1391 if (!bb2)
1392 return false;
1394 if (bb1 == bb2)
1396 /* PHIs in the same basic block are assumed to be
1397 executed all in parallel, if only one stmt is a PHI,
1398 it dominates the other stmt in the same basic block. */
1399 if (gimple_code (s1) == GIMPLE_PHI)
1400 return true;
1402 if (gimple_code (s2) == GIMPLE_PHI)
1403 return false;
1405 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1407 if (gimple_uid (s1) < gimple_uid (s2))
1408 return true;
1410 if (gimple_uid (s1) > gimple_uid (s2))
1411 return false;
1413 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1414 unsigned int uid = gimple_uid (s1);
1415 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1417 gimple *s = gsi_stmt (gsi);
1418 if (gimple_uid (s) != uid)
1419 break;
1420 if (s == s2)
1421 return true;
1424 return false;
1427 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1430 /* Insert STMT after INSERT_POINT. */
1432 static void
1433 insert_stmt_after (gimple *stmt, gimple *insert_point)
1435 gimple_stmt_iterator gsi;
1436 basic_block bb;
1438 if (gimple_code (insert_point) == GIMPLE_PHI)
1439 bb = gimple_bb (insert_point);
1440 else if (!stmt_ends_bb_p (insert_point))
1442 gsi = gsi_for_stmt (insert_point);
1443 gimple_set_uid (stmt, gimple_uid (insert_point));
1444 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1445 return;
1447 else
1448 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1449 thus if it must end a basic block, it should be a call that can
1450 throw, or some assignment that can throw. If it throws, the LHS
1451 of it will not be initialized though, so only valid places using
1452 the SSA_NAME should be dominated by the fallthru edge. */
1453 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1454 gsi = gsi_after_labels (bb);
1455 if (gsi_end_p (gsi))
1457 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1458 gimple_set_uid (stmt,
1459 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1461 else
1462 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1463 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1466 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1467 the result. Places the statement after the definition of either
1468 OP1 or OP2. Returns the new statement. */
1470 static gimple *
1471 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1473 gimple *op1def = NULL, *op2def = NULL;
1474 gimple_stmt_iterator gsi;
1475 tree op;
1476 gassign *sum;
1478 /* Create the addition statement. */
1479 op = make_ssa_name (type);
1480 sum = gimple_build_assign (op, opcode, op1, op2);
1482 /* Find an insertion place and insert. */
1483 if (TREE_CODE (op1) == SSA_NAME)
1484 op1def = SSA_NAME_DEF_STMT (op1);
1485 if (TREE_CODE (op2) == SSA_NAME)
1486 op2def = SSA_NAME_DEF_STMT (op2);
1487 if ((!op1def || gimple_nop_p (op1def))
1488 && (!op2def || gimple_nop_p (op2def)))
1490 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1491 if (gsi_end_p (gsi))
1493 gimple_stmt_iterator gsi2
1494 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1495 gimple_set_uid (sum,
1496 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1498 else
1499 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1500 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1502 else
1504 gimple *insert_point;
1505 if ((!op1def || gimple_nop_p (op1def))
1506 || (op2def && !gimple_nop_p (op2def)
1507 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1508 insert_point = op2def;
1509 else
1510 insert_point = op1def;
1511 insert_stmt_after (sum, insert_point);
1513 update_stmt (sum);
1515 return sum;
1518 /* Perform un-distribution of divisions and multiplications.
1519 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1520 to (A + B) / X for real X.
1522 The algorithm is organized as follows.
1524 - First we walk the addition chain *OPS looking for summands that
1525 are defined by a multiplication or a real division. This results
1526 in the candidates bitmap with relevant indices into *OPS.
1528 - Second we build the chains of multiplications or divisions for
1529 these candidates, counting the number of occurrences of (operand, code)
1530 pairs in all of the candidates chains.
1532 - Third we sort the (operand, code) pairs by number of occurrence and
1533 process them starting with the pair with the most uses.
1535 * For each such pair we walk the candidates again to build a
1536 second candidate bitmap noting all multiplication/division chains
1537 that have at least one occurrence of (operand, code).
1539 * We build an alternate addition chain only covering these
1540 candidates with one (operand, code) operation removed from their
1541 multiplication/division chain.
1543 * The first candidate gets replaced by the alternate addition chain
1544 multiplied/divided by the operand.
1546 * All candidate chains get disabled for further processing and
1547 processing of (operand, code) pairs continues.
1549 The alternate addition chains built are re-processed by the main
1550 reassociation algorithm which allows optimizing a * x * y + b * y * x
1551 to (a + b ) * x * y in one invocation of the reassociation pass. */
1553 static bool
1554 undistribute_ops_list (enum tree_code opcode,
1555 vec<operand_entry *> *ops, struct loop *loop)
1557 unsigned int length = ops->length ();
1558 operand_entry *oe1;
1559 unsigned i, j;
1560 unsigned nr_candidates, nr_candidates2;
1561 sbitmap_iterator sbi0;
1562 vec<operand_entry *> *subops;
1563 bool changed = false;
1564 unsigned int next_oecount_id = 0;
1566 if (length <= 1
1567 || opcode != PLUS_EXPR)
1568 return false;
1570 /* Build a list of candidates to process. */
1571 auto_sbitmap candidates (length);
1572 bitmap_clear (candidates);
1573 nr_candidates = 0;
1574 FOR_EACH_VEC_ELT (*ops, i, oe1)
1576 enum tree_code dcode;
1577 gimple *oe1def;
1579 if (TREE_CODE (oe1->op) != SSA_NAME)
1580 continue;
1581 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1582 if (!is_gimple_assign (oe1def))
1583 continue;
1584 dcode = gimple_assign_rhs_code (oe1def);
1585 if ((dcode != MULT_EXPR
1586 && dcode != RDIV_EXPR)
1587 || !is_reassociable_op (oe1def, dcode, loop))
1588 continue;
1590 bitmap_set_bit (candidates, i);
1591 nr_candidates++;
1594 if (nr_candidates < 2)
1595 return false;
1597 if (dump_file && (dump_flags & TDF_DETAILS))
1599 fprintf (dump_file, "searching for un-distribute opportunities ");
1600 print_generic_expr (dump_file,
1601 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1602 fprintf (dump_file, " %d\n", nr_candidates);
1605 /* Build linearized sub-operand lists and the counting table. */
1606 cvec.create (0);
1608 hash_table<oecount_hasher> ctable (15);
1610 /* ??? Macro arguments cannot have multi-argument template types in
1611 them. This typedef is needed to workaround that limitation. */
1612 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1613 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1614 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1616 gimple *oedef;
1617 enum tree_code oecode;
1618 unsigned j;
1620 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1621 oecode = gimple_assign_rhs_code (oedef);
1622 linearize_expr_tree (&subops[i], oedef,
1623 associative_tree_code (oecode), false);
1625 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1627 oecount c;
1628 int *slot;
1629 int idx;
1630 c.oecode = oecode;
1631 c.cnt = 1;
1632 c.id = next_oecount_id++;
1633 c.op = oe1->op;
1634 cvec.safe_push (c);
1635 idx = cvec.length () + 41;
1636 slot = ctable.find_slot (idx, INSERT);
1637 if (!*slot)
1639 *slot = idx;
1641 else
1643 cvec.pop ();
1644 cvec[*slot - 42].cnt++;
1649 /* Sort the counting table. */
1650 cvec.qsort (oecount_cmp);
1652 if (dump_file && (dump_flags & TDF_DETAILS))
1654 oecount *c;
1655 fprintf (dump_file, "Candidates:\n");
1656 FOR_EACH_VEC_ELT (cvec, j, c)
1658 fprintf (dump_file, " %u %s: ", c->cnt,
1659 c->oecode == MULT_EXPR
1660 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1661 print_generic_expr (dump_file, c->op);
1662 fprintf (dump_file, "\n");
1666 /* Process the (operand, code) pairs in order of most occurrence. */
1667 auto_sbitmap candidates2 (length);
1668 while (!cvec.is_empty ())
1670 oecount *c = &cvec.last ();
1671 if (c->cnt < 2)
1672 break;
1674 /* Now collect the operands in the outer chain that contain
1675 the common operand in their inner chain. */
1676 bitmap_clear (candidates2);
1677 nr_candidates2 = 0;
1678 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1680 gimple *oedef;
1681 enum tree_code oecode;
1682 unsigned j;
1683 tree op = (*ops)[i]->op;
1685 /* If we undistributed in this chain already this may be
1686 a constant. */
1687 if (TREE_CODE (op) != SSA_NAME)
1688 continue;
1690 oedef = SSA_NAME_DEF_STMT (op);
1691 oecode = gimple_assign_rhs_code (oedef);
1692 if (oecode != c->oecode)
1693 continue;
1695 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1697 if (oe1->op == c->op)
1699 bitmap_set_bit (candidates2, i);
1700 ++nr_candidates2;
1701 break;
1706 if (nr_candidates2 >= 2)
1708 operand_entry *oe1, *oe2;
1709 gimple *prod;
1710 int first = bitmap_first_set_bit (candidates2);
1712 /* Build the new addition chain. */
1713 oe1 = (*ops)[first];
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1716 fprintf (dump_file, "Building (");
1717 print_generic_expr (dump_file, oe1->op);
1719 zero_one_operation (&oe1->op, c->oecode, c->op);
1720 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1722 gimple *sum;
1723 oe2 = (*ops)[i];
1724 if (dump_file && (dump_flags & TDF_DETAILS))
1726 fprintf (dump_file, " + ");
1727 print_generic_expr (dump_file, oe2->op);
1729 zero_one_operation (&oe2->op, c->oecode, c->op);
1730 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1731 oe1->op, oe2->op, opcode);
1732 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1733 oe2->rank = 0;
1734 oe1->op = gimple_get_lhs (sum);
1737 /* Apply the multiplication/division. */
1738 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, c->op, c->oecode);
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1742 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1743 print_generic_expr (dump_file, c->op);
1744 fprintf (dump_file, "\n");
1747 /* Record it in the addition chain and disable further
1748 undistribution with this op. */
1749 oe1->op = gimple_assign_lhs (prod);
1750 oe1->rank = get_rank (oe1->op);
1751 subops[first].release ();
1753 changed = true;
1756 cvec.pop ();
1759 for (i = 0; i < ops->length (); ++i)
1760 subops[i].release ();
1761 free (subops);
1762 cvec.release ();
1764 return changed;
1767 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1768 expression, examine the other OPS to see if any of them are comparisons
1769 of the same values, which we may be able to combine or eliminate.
1770 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1772 static bool
1773 eliminate_redundant_comparison (enum tree_code opcode,
1774 vec<operand_entry *> *ops,
1775 unsigned int currindex,
1776 operand_entry *curr)
1778 tree op1, op2;
1779 enum tree_code lcode, rcode;
1780 gimple *def1, *def2;
1781 int i;
1782 operand_entry *oe;
1784 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1785 return false;
1787 /* Check that CURR is a comparison. */
1788 if (TREE_CODE (curr->op) != SSA_NAME)
1789 return false;
1790 def1 = SSA_NAME_DEF_STMT (curr->op);
1791 if (!is_gimple_assign (def1))
1792 return false;
1793 lcode = gimple_assign_rhs_code (def1);
1794 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1795 return false;
1796 op1 = gimple_assign_rhs1 (def1);
1797 op2 = gimple_assign_rhs2 (def1);
1799 /* Now look for a similar comparison in the remaining OPS. */
1800 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1802 tree t;
1804 if (TREE_CODE (oe->op) != SSA_NAME)
1805 continue;
1806 def2 = SSA_NAME_DEF_STMT (oe->op);
1807 if (!is_gimple_assign (def2))
1808 continue;
1809 rcode = gimple_assign_rhs_code (def2);
1810 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1811 continue;
1813 /* If we got here, we have a match. See if we can combine the
1814 two comparisons. */
1815 if (opcode == BIT_IOR_EXPR)
1816 t = maybe_fold_or_comparisons (lcode, op1, op2,
1817 rcode, gimple_assign_rhs1 (def2),
1818 gimple_assign_rhs2 (def2));
1819 else
1820 t = maybe_fold_and_comparisons (lcode, op1, op2,
1821 rcode, gimple_assign_rhs1 (def2),
1822 gimple_assign_rhs2 (def2));
1823 if (!t)
1824 continue;
1826 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1827 always give us a boolean_type_node value back. If the original
1828 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1829 we need to convert. */
1830 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1831 t = fold_convert (TREE_TYPE (curr->op), t);
1833 if (TREE_CODE (t) != INTEGER_CST
1834 && !operand_equal_p (t, curr->op, 0))
1836 enum tree_code subcode;
1837 tree newop1, newop2;
1838 if (!COMPARISON_CLASS_P (t))
1839 continue;
1840 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1841 STRIP_USELESS_TYPE_CONVERSION (newop1);
1842 STRIP_USELESS_TYPE_CONVERSION (newop2);
1843 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1844 continue;
1847 if (dump_file && (dump_flags & TDF_DETAILS))
1849 fprintf (dump_file, "Equivalence: ");
1850 print_generic_expr (dump_file, curr->op);
1851 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1852 print_generic_expr (dump_file, oe->op);
1853 fprintf (dump_file, " -> ");
1854 print_generic_expr (dump_file, t);
1855 fprintf (dump_file, "\n");
1858 /* Now we can delete oe, as it has been subsumed by the new combined
1859 expression t. */
1860 ops->ordered_remove (i);
1861 reassociate_stats.ops_eliminated ++;
1863 /* If t is the same as curr->op, we're done. Otherwise we must
1864 replace curr->op with t. Special case is if we got a constant
1865 back, in which case we add it to the end instead of in place of
1866 the current entry. */
1867 if (TREE_CODE (t) == INTEGER_CST)
1869 ops->ordered_remove (currindex);
1870 add_to_ops_vec (ops, t);
1872 else if (!operand_equal_p (t, curr->op, 0))
1874 gimple *sum;
1875 enum tree_code subcode;
1876 tree newop1;
1877 tree newop2;
1878 gcc_assert (COMPARISON_CLASS_P (t));
1879 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1880 STRIP_USELESS_TYPE_CONVERSION (newop1);
1881 STRIP_USELESS_TYPE_CONVERSION (newop2);
1882 gcc_checking_assert (is_gimple_val (newop1)
1883 && is_gimple_val (newop2));
1884 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1885 curr->op = gimple_get_lhs (sum);
1887 return true;
1890 return false;
1894 /* Transform repeated addition of same values into multiply with
1895 constant. */
1896 static bool
1897 transform_add_to_multiply (vec<operand_entry *> *ops)
1899 operand_entry *oe;
1900 tree op = NULL_TREE;
1901 int j;
1902 int i, start = -1, end = 0, count = 0;
1903 auto_vec<std::pair <int, int> > indxs;
1904 bool changed = false;
1906 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1907 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1908 || !flag_unsafe_math_optimizations))
1909 return false;
1911 /* Look for repeated operands. */
1912 FOR_EACH_VEC_ELT (*ops, i, oe)
1914 if (start == -1)
1916 count = 1;
1917 op = oe->op;
1918 start = i;
1920 else if (operand_equal_p (oe->op, op, 0))
1922 count++;
1923 end = i;
1925 else
1927 if (count > 1)
1928 indxs.safe_push (std::make_pair (start, end));
1929 count = 1;
1930 op = oe->op;
1931 start = i;
1935 if (count > 1)
1936 indxs.safe_push (std::make_pair (start, end));
1938 for (j = indxs.length () - 1; j >= 0; --j)
1940 /* Convert repeated operand addition to multiplication. */
1941 start = indxs[j].first;
1942 end = indxs[j].second;
1943 op = (*ops)[start]->op;
1944 count = end - start + 1;
1945 for (i = end; i >= start; --i)
1946 ops->unordered_remove (i);
1947 tree tmp = make_ssa_name (TREE_TYPE (op));
1948 tree cst = build_int_cst (integer_type_node, count);
1949 gassign *mul_stmt
1950 = gimple_build_assign (tmp, MULT_EXPR,
1951 op, fold_convert (TREE_TYPE (op), cst));
1952 gimple_set_visited (mul_stmt, true);
1953 add_to_ops_vec (ops, tmp, mul_stmt);
1954 changed = true;
1957 return changed;
1961 /* Perform various identities and other optimizations on the list of
1962 operand entries, stored in OPS. The tree code for the binary
1963 operation between all the operands is OPCODE. */
1965 static void
1966 optimize_ops_list (enum tree_code opcode,
1967 vec<operand_entry *> *ops)
1969 unsigned int length = ops->length ();
1970 unsigned int i;
1971 operand_entry *oe;
1972 operand_entry *oelast = NULL;
1973 bool iterate = false;
1975 if (length == 1)
1976 return;
1978 oelast = ops->last ();
1980 /* If the last two are constants, pop the constants off, merge them
1981 and try the next two. */
1982 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1984 operand_entry *oelm1 = (*ops)[length - 2];
1986 if (oelm1->rank == 0
1987 && is_gimple_min_invariant (oelm1->op)
1988 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1989 TREE_TYPE (oelast->op)))
1991 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1992 oelm1->op, oelast->op);
1994 if (folded && is_gimple_min_invariant (folded))
1996 if (dump_file && (dump_flags & TDF_DETAILS))
1997 fprintf (dump_file, "Merging constants\n");
1999 ops->pop ();
2000 ops->pop ();
2002 add_to_ops_vec (ops, folded);
2003 reassociate_stats.constants_eliminated++;
2005 optimize_ops_list (opcode, ops);
2006 return;
2011 eliminate_using_constants (opcode, ops);
2012 oelast = NULL;
2014 for (i = 0; ops->iterate (i, &oe);)
2016 bool done = false;
2018 if (eliminate_not_pairs (opcode, ops, i, oe))
2019 return;
2020 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2021 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2022 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2024 if (done)
2025 return;
2026 iterate = true;
2027 oelast = NULL;
2028 continue;
2030 oelast = oe;
2031 i++;
2034 length = ops->length ();
2035 oelast = ops->last ();
2037 if (iterate)
2038 optimize_ops_list (opcode, ops);
2041 /* The following functions are subroutines to optimize_range_tests and allow
2042 it to try to change a logical combination of comparisons into a range
2043 test.
2045 For example, both
2046 X == 2 || X == 5 || X == 3 || X == 4
2048 X >= 2 && X <= 5
2049 are converted to
2050 (unsigned) (X - 2) <= 3
2052 For more information see comments above fold_test_range in fold-const.c,
2053 this implementation is for GIMPLE. */
2055 struct range_entry
2057 tree exp;
2058 tree low;
2059 tree high;
2060 bool in_p;
2061 bool strict_overflow_p;
2062 unsigned int idx, next;
2065 /* This is similar to make_range in fold-const.c, but on top of
2066 GIMPLE instead of trees. If EXP is non-NULL, it should be
2067 an SSA_NAME and STMT argument is ignored, otherwise STMT
2068 argument should be a GIMPLE_COND. */
2070 static void
2071 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2073 int in_p;
2074 tree low, high;
2075 bool is_bool, strict_overflow_p;
2077 r->exp = NULL_TREE;
2078 r->in_p = false;
2079 r->strict_overflow_p = false;
2080 r->low = NULL_TREE;
2081 r->high = NULL_TREE;
2082 if (exp != NULL_TREE
2083 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2084 return;
2086 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2087 and see if we can refine the range. Some of the cases below may not
2088 happen, but it doesn't seem worth worrying about this. We "continue"
2089 the outer loop when we've changed something; otherwise we "break"
2090 the switch, which will "break" the while. */
2091 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2092 high = low;
2093 in_p = 0;
2094 strict_overflow_p = false;
2095 is_bool = false;
2096 if (exp == NULL_TREE)
2097 is_bool = true;
2098 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2100 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2101 is_bool = true;
2102 else
2103 return;
2105 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2106 is_bool = true;
2108 while (1)
2110 enum tree_code code;
2111 tree arg0, arg1, exp_type;
2112 tree nexp;
2113 location_t loc;
2115 if (exp != NULL_TREE)
2117 if (TREE_CODE (exp) != SSA_NAME
2118 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2119 break;
2121 stmt = SSA_NAME_DEF_STMT (exp);
2122 if (!is_gimple_assign (stmt))
2123 break;
2125 code = gimple_assign_rhs_code (stmt);
2126 arg0 = gimple_assign_rhs1 (stmt);
2127 arg1 = gimple_assign_rhs2 (stmt);
2128 exp_type = TREE_TYPE (exp);
2130 else
2132 code = gimple_cond_code (stmt);
2133 arg0 = gimple_cond_lhs (stmt);
2134 arg1 = gimple_cond_rhs (stmt);
2135 exp_type = boolean_type_node;
2138 if (TREE_CODE (arg0) != SSA_NAME)
2139 break;
2140 loc = gimple_location (stmt);
2141 switch (code)
2143 case BIT_NOT_EXPR:
2144 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2145 /* Ensure the range is either +[-,0], +[0,0],
2146 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2147 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2148 or similar expression of unconditional true or
2149 false, it should not be negated. */
2150 && ((high && integer_zerop (high))
2151 || (low && integer_onep (low))))
2153 in_p = !in_p;
2154 exp = arg0;
2155 continue;
2157 break;
2158 case SSA_NAME:
2159 exp = arg0;
2160 continue;
2161 CASE_CONVERT:
2162 if (is_bool)
2163 goto do_default;
2164 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2166 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2167 is_bool = true;
2168 else
2169 return;
2171 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2172 is_bool = true;
2173 goto do_default;
2174 case EQ_EXPR:
2175 case NE_EXPR:
2176 case LT_EXPR:
2177 case LE_EXPR:
2178 case GE_EXPR:
2179 case GT_EXPR:
2180 is_bool = true;
2181 /* FALLTHRU */
2182 default:
2183 if (!is_bool)
2184 return;
2185 do_default:
2186 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2187 &low, &high, &in_p,
2188 &strict_overflow_p);
2189 if (nexp != NULL_TREE)
2191 exp = nexp;
2192 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2193 continue;
2195 break;
2197 break;
2199 if (is_bool)
2201 r->exp = exp;
2202 r->in_p = in_p;
2203 r->low = low;
2204 r->high = high;
2205 r->strict_overflow_p = strict_overflow_p;
2209 /* Comparison function for qsort. Sort entries
2210 without SSA_NAME exp first, then with SSA_NAMEs sorted
2211 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2212 by increasing ->low and if ->low is the same, by increasing
2213 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2214 maximum. */
2216 static int
2217 range_entry_cmp (const void *a, const void *b)
2219 const struct range_entry *p = (const struct range_entry *) a;
2220 const struct range_entry *q = (const struct range_entry *) b;
2222 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2224 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2226 /* Group range_entries for the same SSA_NAME together. */
2227 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2228 return -1;
2229 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2230 return 1;
2231 /* If ->low is different, NULL low goes first, then by
2232 ascending low. */
2233 if (p->low != NULL_TREE)
2235 if (q->low != NULL_TREE)
2237 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2238 p->low, q->low);
2239 if (tem && integer_onep (tem))
2240 return -1;
2241 tem = fold_binary (GT_EXPR, boolean_type_node,
2242 p->low, q->low);
2243 if (tem && integer_onep (tem))
2244 return 1;
2246 else
2247 return 1;
2249 else if (q->low != NULL_TREE)
2250 return -1;
2251 /* If ->high is different, NULL high goes last, before that by
2252 ascending high. */
2253 if (p->high != NULL_TREE)
2255 if (q->high != NULL_TREE)
2257 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2258 p->high, q->high);
2259 if (tem && integer_onep (tem))
2260 return -1;
2261 tem = fold_binary (GT_EXPR, boolean_type_node,
2262 p->high, q->high);
2263 if (tem && integer_onep (tem))
2264 return 1;
2266 else
2267 return -1;
2269 else if (q->high != NULL_TREE)
2270 return 1;
2271 /* If both ranges are the same, sort below by ascending idx. */
2273 else
2274 return 1;
2276 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2277 return -1;
2279 if (p->idx < q->idx)
2280 return -1;
2281 else
2283 gcc_checking_assert (p->idx > q->idx);
2284 return 1;
2288 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2289 insert needed statements BEFORE or after GSI. */
2291 static tree
2292 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2294 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2295 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2296 if (TREE_CODE (ret) != SSA_NAME)
2298 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2299 if (before)
2300 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2301 else
2302 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2303 ret = gimple_assign_lhs (g);
2305 return ret;
2308 /* Helper routine of optimize_range_test.
2309 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2310 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2311 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2312 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2313 an array of COUNT pointers to other ranges. Return
2314 true if the range merge has been successful.
2315 If OPCODE is ERROR_MARK, this is called from within
2316 maybe_optimize_range_tests and is performing inter-bb range optimization.
2317 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2318 oe->rank. */
2320 static bool
2321 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2322 struct range_entry **otherrangep,
2323 unsigned int count, enum tree_code opcode,
2324 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2325 bool in_p, tree low, tree high, bool strict_overflow_p)
2327 operand_entry *oe = (*ops)[range->idx];
2328 tree op = oe->op;
2329 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2330 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2331 location_t loc = gimple_location (stmt);
2332 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2333 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2334 in_p, low, high);
2335 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2336 gimple_stmt_iterator gsi;
2337 unsigned int i, uid;
2339 if (tem == NULL_TREE)
2340 return false;
2342 /* If op is default def SSA_NAME, there is no place to insert the
2343 new comparison. Give up, unless we can use OP itself as the
2344 range test. */
2345 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2347 if (op == range->exp
2348 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2349 || TREE_CODE (optype) == BOOLEAN_TYPE)
2350 && (op == tem
2351 || (TREE_CODE (tem) == EQ_EXPR
2352 && TREE_OPERAND (tem, 0) == op
2353 && integer_onep (TREE_OPERAND (tem, 1))))
2354 && opcode != BIT_IOR_EXPR
2355 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2357 stmt = NULL;
2358 tem = op;
2360 else
2361 return false;
2364 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2365 warning_at (loc, OPT_Wstrict_overflow,
2366 "assuming signed overflow does not occur "
2367 "when simplifying range test");
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2371 struct range_entry *r;
2372 fprintf (dump_file, "Optimizing range tests ");
2373 print_generic_expr (dump_file, range->exp);
2374 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2375 print_generic_expr (dump_file, range->low);
2376 fprintf (dump_file, ", ");
2377 print_generic_expr (dump_file, range->high);
2378 fprintf (dump_file, "]");
2379 for (i = 0; i < count; i++)
2381 if (otherrange)
2382 r = otherrange + i;
2383 else
2384 r = otherrangep[i];
2385 if (r->exp
2386 && r->exp != range->exp
2387 && TREE_CODE (r->exp) == SSA_NAME)
2389 fprintf (dump_file, " and ");
2390 print_generic_expr (dump_file, r->exp);
2392 else
2393 fprintf (dump_file, " and");
2394 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2395 print_generic_expr (dump_file, r->low);
2396 fprintf (dump_file, ", ");
2397 print_generic_expr (dump_file, r->high);
2398 fprintf (dump_file, "]");
2400 fprintf (dump_file, "\n into ");
2401 print_generic_expr (dump_file, tem);
2402 fprintf (dump_file, "\n");
2405 if (opcode == BIT_IOR_EXPR
2406 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2407 tem = invert_truthvalue_loc (loc, tem);
2409 tem = fold_convert_loc (loc, optype, tem);
2410 if (stmt)
2412 gsi = gsi_for_stmt (stmt);
2413 uid = gimple_uid (stmt);
2415 else
2417 gsi = gsi_none ();
2418 uid = 0;
2420 if (stmt == NULL)
2421 gcc_checking_assert (tem == op);
2422 /* In rare cases range->exp can be equal to lhs of stmt.
2423 In that case we have to insert after the stmt rather then before
2424 it. If stmt is a PHI, insert it at the start of the basic block. */
2425 else if (op != range->exp)
2427 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2428 tem = force_into_ssa_name (&gsi, tem, true);
2429 gsi_prev (&gsi);
2431 else if (gimple_code (stmt) != GIMPLE_PHI)
2433 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2434 tem = force_into_ssa_name (&gsi, tem, false);
2436 else
2438 gsi = gsi_after_labels (gimple_bb (stmt));
2439 if (!gsi_end_p (gsi))
2440 uid = gimple_uid (gsi_stmt (gsi));
2441 else
2443 gsi = gsi_start_bb (gimple_bb (stmt));
2444 uid = 1;
2445 while (!gsi_end_p (gsi))
2447 uid = gimple_uid (gsi_stmt (gsi));
2448 gsi_next (&gsi);
2451 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2452 tem = force_into_ssa_name (&gsi, tem, true);
2453 if (gsi_end_p (gsi))
2454 gsi = gsi_last_bb (gimple_bb (stmt));
2455 else
2456 gsi_prev (&gsi);
2458 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2459 if (gimple_uid (gsi_stmt (gsi)))
2460 break;
2461 else
2462 gimple_set_uid (gsi_stmt (gsi), uid);
2464 oe->op = tem;
2465 range->exp = exp;
2466 range->low = low;
2467 range->high = high;
2468 range->in_p = in_p;
2469 range->strict_overflow_p = false;
2471 for (i = 0; i < count; i++)
2473 if (otherrange)
2474 range = otherrange + i;
2475 else
2476 range = otherrangep[i];
2477 oe = (*ops)[range->idx];
2478 /* Now change all the other range test immediate uses, so that
2479 those tests will be optimized away. */
2480 if (opcode == ERROR_MARK)
2482 if (oe->op)
2483 oe->op = build_int_cst (TREE_TYPE (oe->op),
2484 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2485 else
2486 oe->op = (oe->rank == BIT_IOR_EXPR
2487 ? boolean_false_node : boolean_true_node);
2489 else
2490 oe->op = error_mark_node;
2491 range->exp = NULL_TREE;
2492 range->low = NULL_TREE;
2493 range->high = NULL_TREE;
2495 return true;
2498 /* Optimize X == CST1 || X == CST2
2499 if popcount (CST1 ^ CST2) == 1 into
2500 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2501 Similarly for ranges. E.g.
2502 X != 2 && X != 3 && X != 10 && X != 11
2503 will be transformed by the previous optimization into
2504 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2505 and this loop can transform that into
2506 !(((X & ~8) - 2U) <= 1U). */
2508 static bool
2509 optimize_range_tests_xor (enum tree_code opcode, tree type,
2510 tree lowi, tree lowj, tree highi, tree highj,
2511 vec<operand_entry *> *ops,
2512 struct range_entry *rangei,
2513 struct range_entry *rangej)
2515 tree lowxor, highxor, tem, exp;
2516 /* Check lowi ^ lowj == highi ^ highj and
2517 popcount (lowi ^ lowj) == 1. */
2518 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2519 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2520 return false;
2521 if (!integer_pow2p (lowxor))
2522 return false;
2523 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2524 if (!tree_int_cst_equal (lowxor, highxor))
2525 return false;
2527 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2528 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2529 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2530 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2531 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2532 NULL, rangei->in_p, lowj, highj,
2533 rangei->strict_overflow_p
2534 || rangej->strict_overflow_p))
2535 return true;
2536 return false;
2539 /* Optimize X == CST1 || X == CST2
2540 if popcount (CST2 - CST1) == 1 into
2541 ((X - CST1) & ~(CST2 - CST1)) == 0.
2542 Similarly for ranges. E.g.
2543 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2544 || X == 75 || X == 45
2545 will be transformed by the previous optimization into
2546 (X - 43U) <= 3U || (X - 75U) <= 3U
2547 and this loop can transform that into
2548 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2549 static bool
2550 optimize_range_tests_diff (enum tree_code opcode, tree type,
2551 tree lowi, tree lowj, tree highi, tree highj,
2552 vec<operand_entry *> *ops,
2553 struct range_entry *rangei,
2554 struct range_entry *rangej)
2556 tree tem1, tem2, mask;
2557 /* Check highi - lowi == highj - lowj. */
2558 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2559 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2560 return false;
2561 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2562 if (!tree_int_cst_equal (tem1, tem2))
2563 return false;
2564 /* Check popcount (lowj - lowi) == 1. */
2565 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2566 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2567 return false;
2568 if (!integer_pow2p (tem1))
2569 return false;
2571 type = unsigned_type_for (type);
2572 tem1 = fold_convert (type, tem1);
2573 tem2 = fold_convert (type, tem2);
2574 lowi = fold_convert (type, lowi);
2575 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2576 tem1 = fold_build2 (MINUS_EXPR, type,
2577 fold_convert (type, rangei->exp), lowi);
2578 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2579 lowj = build_int_cst (type, 0);
2580 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2581 NULL, rangei->in_p, lowj, tem2,
2582 rangei->strict_overflow_p
2583 || rangej->strict_overflow_p))
2584 return true;
2585 return false;
2588 /* It does some common checks for function optimize_range_tests_xor and
2589 optimize_range_tests_diff.
2590 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2591 Else it calls optimize_range_tests_diff. */
2593 static bool
2594 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2595 bool optimize_xor, vec<operand_entry *> *ops,
2596 struct range_entry *ranges)
2598 int i, j;
2599 bool any_changes = false;
2600 for (i = first; i < length; i++)
2602 tree lowi, highi, lowj, highj, type, tem;
2604 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2605 continue;
2606 type = TREE_TYPE (ranges[i].exp);
2607 if (!INTEGRAL_TYPE_P (type))
2608 continue;
2609 lowi = ranges[i].low;
2610 if (lowi == NULL_TREE)
2611 lowi = TYPE_MIN_VALUE (type);
2612 highi = ranges[i].high;
2613 if (highi == NULL_TREE)
2614 continue;
2615 for (j = i + 1; j < length && j < i + 64; j++)
2617 bool changes;
2618 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2619 continue;
2620 lowj = ranges[j].low;
2621 if (lowj == NULL_TREE)
2622 continue;
2623 highj = ranges[j].high;
2624 if (highj == NULL_TREE)
2625 highj = TYPE_MAX_VALUE (type);
2626 /* Check lowj > highi. */
2627 tem = fold_binary (GT_EXPR, boolean_type_node,
2628 lowj, highi);
2629 if (tem == NULL_TREE || !integer_onep (tem))
2630 continue;
2631 if (optimize_xor)
2632 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2633 highi, highj, ops,
2634 ranges + i, ranges + j);
2635 else
2636 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2637 highi, highj, ops,
2638 ranges + i, ranges + j);
2639 if (changes)
2641 any_changes = true;
2642 break;
2646 return any_changes;
2649 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2650 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2651 EXP on success, NULL otherwise. */
2653 static tree
2654 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2655 wide_int *mask, tree *totallowp)
2657 tree tem = int_const_binop (MINUS_EXPR, high, low);
2658 if (tem == NULL_TREE
2659 || TREE_CODE (tem) != INTEGER_CST
2660 || TREE_OVERFLOW (tem)
2661 || tree_int_cst_sgn (tem) == -1
2662 || compare_tree_int (tem, prec) != -1)
2663 return NULL_TREE;
2665 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2666 *mask = wi::shifted_mask (0, max, false, prec);
2667 if (TREE_CODE (exp) == BIT_AND_EXPR
2668 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2670 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2671 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2672 if (wi::popcount (msk) == 1
2673 && wi::ltu_p (msk, prec - max))
2675 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2676 max += msk.to_uhwi ();
2677 exp = TREE_OPERAND (exp, 0);
2678 if (integer_zerop (low)
2679 && TREE_CODE (exp) == PLUS_EXPR
2680 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2682 tree ret = TREE_OPERAND (exp, 0);
2683 STRIP_NOPS (ret);
2684 widest_int bias
2685 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2686 TYPE_PRECISION (TREE_TYPE (low))));
2687 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2688 if (totallowp)
2690 *totallowp = tbias;
2691 return ret;
2693 else if (!tree_int_cst_lt (totallow, tbias))
2694 return NULL_TREE;
2695 bias = wi::to_widest (tbias);
2696 bias -= wi::to_widest (totallow);
2697 if (bias >= 0 && bias < prec - max)
2699 *mask = wi::lshift (*mask, bias);
2700 return ret;
2705 if (totallowp)
2706 return exp;
2707 if (!tree_int_cst_lt (totallow, low))
2708 return exp;
2709 tem = int_const_binop (MINUS_EXPR, low, totallow);
2710 if (tem == NULL_TREE
2711 || TREE_CODE (tem) != INTEGER_CST
2712 || TREE_OVERFLOW (tem)
2713 || compare_tree_int (tem, prec - max) == 1)
2714 return NULL_TREE;
2716 *mask = wi::lshift (*mask, wi::to_widest (tem));
2717 return exp;
2720 /* Attempt to optimize small range tests using bit test.
2721 E.g.
2722 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2723 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2724 has been by earlier optimizations optimized into:
2725 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2726 As all the 43 through 82 range is less than 64 numbers,
2727 for 64-bit word targets optimize that into:
2728 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2730 static bool
2731 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2732 vec<operand_entry *> *ops,
2733 struct range_entry *ranges)
2735 int i, j;
2736 bool any_changes = false;
2737 int prec = GET_MODE_BITSIZE (word_mode);
2738 auto_vec<struct range_entry *, 64> candidates;
2740 for (i = first; i < length - 2; i++)
2742 tree lowi, highi, lowj, highj, type;
2744 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2745 continue;
2746 type = TREE_TYPE (ranges[i].exp);
2747 if (!INTEGRAL_TYPE_P (type))
2748 continue;
2749 lowi = ranges[i].low;
2750 if (lowi == NULL_TREE)
2751 lowi = TYPE_MIN_VALUE (type);
2752 highi = ranges[i].high;
2753 if (highi == NULL_TREE)
2754 continue;
2755 wide_int mask;
2756 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2757 highi, &mask, &lowi);
2758 if (exp == NULL_TREE)
2759 continue;
2760 bool strict_overflow_p = ranges[i].strict_overflow_p;
2761 candidates.truncate (0);
2762 int end = MIN (i + 64, length);
2763 for (j = i + 1; j < end; j++)
2765 tree exp2;
2766 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2767 continue;
2768 if (ranges[j].exp == exp)
2770 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2772 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2773 if (exp2 == exp)
2775 else if (TREE_CODE (exp2) == PLUS_EXPR)
2777 exp2 = TREE_OPERAND (exp2, 0);
2778 STRIP_NOPS (exp2);
2779 if (exp2 != exp)
2780 continue;
2782 else
2783 continue;
2785 else
2786 continue;
2787 lowj = ranges[j].low;
2788 if (lowj == NULL_TREE)
2789 continue;
2790 highj = ranges[j].high;
2791 if (highj == NULL_TREE)
2792 highj = TYPE_MAX_VALUE (type);
2793 wide_int mask2;
2794 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2795 highj, &mask2, NULL);
2796 if (exp2 != exp)
2797 continue;
2798 mask |= mask2;
2799 strict_overflow_p |= ranges[j].strict_overflow_p;
2800 candidates.safe_push (&ranges[j]);
2803 /* If we need otherwise 3 or more comparisons, use a bit test. */
2804 if (candidates.length () >= 2)
2806 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2807 wi::to_widest (lowi)
2808 + prec - 1 - wi::clz (mask));
2809 operand_entry *oe = (*ops)[ranges[i].idx];
2810 tree op = oe->op;
2811 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2812 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2813 location_t loc = gimple_location (stmt);
2814 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2816 /* See if it isn't cheaper to pretend the minimum value of the
2817 range is 0, if maximum value is small enough.
2818 We can avoid then subtraction of the minimum value, but the
2819 mask constant could be perhaps more expensive. */
2820 if (compare_tree_int (lowi, 0) > 0
2821 && compare_tree_int (high, prec) < 0)
2823 int cost_diff;
2824 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2825 rtx reg = gen_raw_REG (word_mode, 10000);
2826 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2827 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2828 GEN_INT (-m)), speed_p);
2829 rtx r = immed_wide_int_const (mask, word_mode);
2830 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2831 word_mode, speed_p);
2832 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2833 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2834 word_mode, speed_p);
2835 if (cost_diff > 0)
2837 mask = wi::lshift (mask, m);
2838 lowi = build_zero_cst (TREE_TYPE (lowi));
2842 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2843 false, lowi, high);
2844 if (tem == NULL_TREE || is_gimple_val (tem))
2845 continue;
2846 tree etype = unsigned_type_for (TREE_TYPE (exp));
2847 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2848 fold_convert_loc (loc, etype, exp),
2849 fold_convert_loc (loc, etype, lowi));
2850 exp = fold_convert_loc (loc, integer_type_node, exp);
2851 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2852 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2853 build_int_cst (word_type, 1), exp);
2854 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2855 wide_int_to_tree (word_type, mask));
2856 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2857 build_zero_cst (word_type));
2858 if (is_gimple_val (exp))
2859 continue;
2861 /* The shift might have undefined behavior if TEM is true,
2862 but reassociate_bb isn't prepared to have basic blocks
2863 split when it is running. So, temporarily emit a code
2864 with BIT_IOR_EXPR instead of &&, and fix it up in
2865 branch_fixup. */
2866 gimple_seq seq;
2867 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2868 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2869 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2870 gimple_seq seq2;
2871 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2872 gimple_seq_add_seq_without_update (&seq, seq2);
2873 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2874 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2875 gimple *g = gimple_build_assign (make_ssa_name (optype),
2876 BIT_IOR_EXPR, tem, exp);
2877 gimple_set_location (g, loc);
2878 gimple_seq_add_stmt_without_update (&seq, g);
2879 exp = gimple_assign_lhs (g);
2880 tree val = build_zero_cst (optype);
2881 if (update_range_test (&ranges[i], NULL, candidates.address (),
2882 candidates.length (), opcode, ops, exp,
2883 seq, false, val, val, strict_overflow_p))
2885 any_changes = true;
2886 reassoc_branch_fixups.safe_push (tem);
2888 else
2889 gimple_seq_discard (seq);
2892 return any_changes;
2895 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2896 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2898 static bool
2899 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2900 vec<operand_entry *> *ops,
2901 struct range_entry *ranges)
2903 int i;
2904 unsigned int b;
2905 bool any_changes = false;
2906 auto_vec<int, 128> buckets;
2907 auto_vec<int, 32> chains;
2908 auto_vec<struct range_entry *, 32> candidates;
2910 for (i = first; i < length; i++)
2912 if (ranges[i].exp == NULL_TREE
2913 || TREE_CODE (ranges[i].exp) != SSA_NAME
2914 || !ranges[i].in_p
2915 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2916 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2917 || ranges[i].low == NULL_TREE
2918 || ranges[i].low != ranges[i].high)
2919 continue;
2921 bool zero_p = integer_zerop (ranges[i].low);
2922 if (!zero_p && !integer_all_onesp (ranges[i].low))
2923 continue;
2925 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2926 if (buckets.length () <= b)
2927 buckets.safe_grow_cleared (b + 1);
2928 if (chains.length () <= (unsigned) i)
2929 chains.safe_grow (i + 1);
2930 chains[i] = buckets[b];
2931 buckets[b] = i + 1;
2934 FOR_EACH_VEC_ELT (buckets, b, i)
2935 if (i && chains[i - 1])
2937 int j, k = i;
2938 for (j = chains[i - 1]; j; j = chains[j - 1])
2940 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2941 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2942 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2943 k = j;
2945 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2946 tree type2 = NULL_TREE;
2947 bool strict_overflow_p = false;
2948 candidates.truncate (0);
2949 for (j = i; j; j = chains[j - 1])
2951 tree type = TREE_TYPE (ranges[j - 1].exp);
2952 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2953 if (j == k
2954 || useless_type_conversion_p (type1, type))
2956 else if (type2 == NULL_TREE
2957 || useless_type_conversion_p (type2, type))
2959 if (type2 == NULL_TREE)
2960 type2 = type;
2961 candidates.safe_push (&ranges[j - 1]);
2964 unsigned l = candidates.length ();
2965 for (j = i; j; j = chains[j - 1])
2967 tree type = TREE_TYPE (ranges[j - 1].exp);
2968 if (j == k)
2969 continue;
2970 if (useless_type_conversion_p (type1, type))
2972 else if (type2 == NULL_TREE
2973 || useless_type_conversion_p (type2, type))
2974 continue;
2975 candidates.safe_push (&ranges[j - 1]);
2977 gimple_seq seq = NULL;
2978 tree op = NULL_TREE;
2979 unsigned int id;
2980 struct range_entry *r;
2981 candidates.safe_push (&ranges[k - 1]);
2982 FOR_EACH_VEC_ELT (candidates, id, r)
2984 gimple *g;
2985 if (id == 0)
2987 op = r->exp;
2988 continue;
2990 if (id == l)
2992 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
2993 gimple_seq_add_stmt_without_update (&seq, g);
2994 op = gimple_assign_lhs (g);
2996 tree type = TREE_TYPE (r->exp);
2997 tree exp = r->exp;
2998 if (id >= l && !useless_type_conversion_p (type1, type))
3000 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3001 gimple_seq_add_stmt_without_update (&seq, g);
3002 exp = gimple_assign_lhs (g);
3004 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3005 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3006 op, exp);
3007 gimple_seq_add_stmt_without_update (&seq, g);
3008 op = gimple_assign_lhs (g);
3010 candidates.pop ();
3011 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3012 candidates.length (), opcode, ops, op,
3013 seq, true, ranges[k - 1].low,
3014 ranges[k - 1].low, strict_overflow_p))
3015 any_changes = true;
3016 else
3017 gimple_seq_discard (seq);
3020 return any_changes;
3023 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3024 a >= 0 && a < b into (unsigned) a < (unsigned) b
3025 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3027 static bool
3028 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3029 vec<operand_entry *> *ops,
3030 struct range_entry *ranges)
3032 int i;
3033 bool any_changes = false;
3034 hash_map<tree, int> *map = NULL;
3036 for (i = first; i < length; i++)
3038 if (ranges[i].exp == NULL_TREE
3039 || TREE_CODE (ranges[i].exp) != SSA_NAME
3040 || !ranges[i].in_p)
3041 continue;
3043 tree type = TREE_TYPE (ranges[i].exp);
3044 if (!INTEGRAL_TYPE_P (type)
3045 || TYPE_UNSIGNED (type)
3046 || ranges[i].low == NULL_TREE
3047 || !integer_zerop (ranges[i].low)
3048 || ranges[i].high != NULL_TREE)
3049 continue;
3050 /* EXP >= 0 here. */
3051 if (map == NULL)
3052 map = new hash_map <tree, int>;
3053 map->put (ranges[i].exp, i);
3056 if (map == NULL)
3057 return false;
3059 for (i = 0; i < length; i++)
3061 bool in_p = ranges[i].in_p;
3062 if (ranges[i].low == NULL_TREE
3063 || ranges[i].high == NULL_TREE)
3064 continue;
3065 if (!integer_zerop (ranges[i].low)
3066 || !integer_zerop (ranges[i].high))
3068 if (ranges[i].exp
3069 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3070 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3071 && integer_onep (ranges[i].low)
3072 && integer_onep (ranges[i].high))
3073 in_p = !in_p;
3074 else
3075 continue;
3078 gimple *stmt;
3079 tree_code ccode;
3080 tree rhs1, rhs2;
3081 if (ranges[i].exp)
3083 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3084 continue;
3085 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3086 if (!is_gimple_assign (stmt))
3087 continue;
3088 ccode = gimple_assign_rhs_code (stmt);
3089 rhs1 = gimple_assign_rhs1 (stmt);
3090 rhs2 = gimple_assign_rhs2 (stmt);
3092 else
3094 operand_entry *oe = (*ops)[ranges[i].idx];
3095 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3096 if (gimple_code (stmt) != GIMPLE_COND)
3097 continue;
3098 ccode = gimple_cond_code (stmt);
3099 rhs1 = gimple_cond_lhs (stmt);
3100 rhs2 = gimple_cond_rhs (stmt);
3103 if (TREE_CODE (rhs1) != SSA_NAME
3104 || rhs2 == NULL_TREE
3105 || TREE_CODE (rhs2) != SSA_NAME)
3106 continue;
3108 switch (ccode)
3110 case GT_EXPR:
3111 case GE_EXPR:
3112 case LT_EXPR:
3113 case LE_EXPR:
3114 break;
3115 default:
3116 continue;
3118 if (in_p)
3119 ccode = invert_tree_comparison (ccode, false);
3120 switch (ccode)
3122 case GT_EXPR:
3123 case GE_EXPR:
3124 std::swap (rhs1, rhs2);
3125 ccode = swap_tree_comparison (ccode);
3126 break;
3127 case LT_EXPR:
3128 case LE_EXPR:
3129 break;
3130 default:
3131 gcc_unreachable ();
3134 int *idx = map->get (rhs1);
3135 if (idx == NULL)
3136 continue;
3138 wide_int nz = get_nonzero_bits (rhs2);
3139 if (wi::neg_p (nz))
3140 continue;
3142 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3143 and RHS2 is known to be RHS2 >= 0. */
3144 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3146 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3147 if ((ranges[*idx].strict_overflow_p
3148 || ranges[i].strict_overflow_p)
3149 && issue_strict_overflow_warning (wc))
3150 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3151 "assuming signed overflow does not occur "
3152 "when simplifying range test");
3154 if (dump_file && (dump_flags & TDF_DETAILS))
3156 struct range_entry *r = &ranges[*idx];
3157 fprintf (dump_file, "Optimizing range test ");
3158 print_generic_expr (dump_file, r->exp);
3159 fprintf (dump_file, " +[");
3160 print_generic_expr (dump_file, r->low);
3161 fprintf (dump_file, ", ");
3162 print_generic_expr (dump_file, r->high);
3163 fprintf (dump_file, "] and comparison ");
3164 print_generic_expr (dump_file, rhs1);
3165 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3166 print_generic_expr (dump_file, rhs2);
3167 fprintf (dump_file, "\n into (");
3168 print_generic_expr (dump_file, utype);
3169 fprintf (dump_file, ") ");
3170 print_generic_expr (dump_file, rhs1);
3171 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3172 print_generic_expr (dump_file, utype);
3173 fprintf (dump_file, ") ");
3174 print_generic_expr (dump_file, rhs2);
3175 fprintf (dump_file, "\n");
3178 operand_entry *oe = (*ops)[ranges[i].idx];
3179 ranges[i].in_p = 0;
3180 if (opcode == BIT_IOR_EXPR
3181 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3183 ranges[i].in_p = 1;
3184 ccode = invert_tree_comparison (ccode, false);
3187 unsigned int uid = gimple_uid (stmt);
3188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3189 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3190 gimple_set_uid (g, uid);
3191 rhs1 = gimple_assign_lhs (g);
3192 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3193 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3194 gimple_set_uid (g, uid);
3195 rhs2 = gimple_assign_lhs (g);
3196 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3197 if (tree_swap_operands_p (rhs1, rhs2))
3199 std::swap (rhs1, rhs2);
3200 ccode = swap_tree_comparison (ccode);
3202 if (gimple_code (stmt) == GIMPLE_COND)
3204 gcond *c = as_a <gcond *> (stmt);
3205 gimple_cond_set_code (c, ccode);
3206 gimple_cond_set_lhs (c, rhs1);
3207 gimple_cond_set_rhs (c, rhs2);
3208 update_stmt (stmt);
3210 else
3212 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3213 if (!INTEGRAL_TYPE_P (ctype)
3214 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3215 && TYPE_PRECISION (ctype) != 1))
3216 ctype = boolean_type_node;
3217 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3218 gimple_set_uid (g, uid);
3219 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3220 if (oe->op && ctype != TREE_TYPE (oe->op))
3222 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3223 NOP_EXPR, gimple_assign_lhs (g));
3224 gimple_set_uid (g, uid);
3225 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3227 ranges[i].exp = gimple_assign_lhs (g);
3228 oe->op = ranges[i].exp;
3229 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3230 ranges[i].high = ranges[i].low;
3232 ranges[i].strict_overflow_p = false;
3233 oe = (*ops)[ranges[*idx].idx];
3234 /* Now change all the other range test immediate uses, so that
3235 those tests will be optimized away. */
3236 if (opcode == ERROR_MARK)
3238 if (oe->op)
3239 oe->op = build_int_cst (TREE_TYPE (oe->op),
3240 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3241 else
3242 oe->op = (oe->rank == BIT_IOR_EXPR
3243 ? boolean_false_node : boolean_true_node);
3245 else
3246 oe->op = error_mark_node;
3247 ranges[*idx].exp = NULL_TREE;
3248 ranges[*idx].low = NULL_TREE;
3249 ranges[*idx].high = NULL_TREE;
3250 any_changes = true;
3253 delete map;
3254 return any_changes;
3257 /* Optimize range tests, similarly how fold_range_test optimizes
3258 it on trees. The tree code for the binary
3259 operation between all the operands is OPCODE.
3260 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3261 maybe_optimize_range_tests for inter-bb range optimization.
3262 In that case if oe->op is NULL, oe->id is bb->index whose
3263 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3264 the actual opcode. */
3266 static bool
3267 optimize_range_tests (enum tree_code opcode,
3268 vec<operand_entry *> *ops)
3270 unsigned int length = ops->length (), i, j, first;
3271 operand_entry *oe;
3272 struct range_entry *ranges;
3273 bool any_changes = false;
3275 if (length == 1)
3276 return false;
3278 ranges = XNEWVEC (struct range_entry, length);
3279 for (i = 0; i < length; i++)
3281 oe = (*ops)[i];
3282 ranges[i].idx = i;
3283 init_range_entry (ranges + i, oe->op,
3284 oe->op
3285 ? NULL
3286 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3287 /* For | invert it now, we will invert it again before emitting
3288 the optimized expression. */
3289 if (opcode == BIT_IOR_EXPR
3290 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3291 ranges[i].in_p = !ranges[i].in_p;
3294 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3295 for (i = 0; i < length; i++)
3296 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3297 break;
3299 /* Try to merge ranges. */
3300 for (first = i; i < length; i++)
3302 tree low = ranges[i].low;
3303 tree high = ranges[i].high;
3304 int in_p = ranges[i].in_p;
3305 bool strict_overflow_p = ranges[i].strict_overflow_p;
3306 int update_fail_count = 0;
3308 for (j = i + 1; j < length; j++)
3310 if (ranges[i].exp != ranges[j].exp)
3311 break;
3312 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3313 ranges[j].in_p, ranges[j].low, ranges[j].high))
3314 break;
3315 strict_overflow_p |= ranges[j].strict_overflow_p;
3318 if (j == i + 1)
3319 continue;
3321 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3322 opcode, ops, ranges[i].exp, NULL, in_p,
3323 low, high, strict_overflow_p))
3325 i = j - 1;
3326 any_changes = true;
3328 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3329 while update_range_test would fail. */
3330 else if (update_fail_count == 64)
3331 i = j - 1;
3332 else
3333 ++update_fail_count;
3336 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3337 ops, ranges);
3339 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3340 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3341 ops, ranges);
3342 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3343 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3344 ops, ranges);
3345 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3346 ops, ranges);
3347 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3348 ranges);
3350 if (any_changes && opcode != ERROR_MARK)
3352 j = 0;
3353 FOR_EACH_VEC_ELT (*ops, i, oe)
3355 if (oe->op == error_mark_node)
3356 continue;
3357 else if (i != j)
3358 (*ops)[j] = oe;
3359 j++;
3361 ops->truncate (j);
3364 XDELETEVEC (ranges);
3365 return any_changes;
3368 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3369 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3370 otherwise the comparison code. */
3372 static tree_code
3373 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3375 if (TREE_CODE (var) != SSA_NAME)
3376 return ERROR_MARK;
3378 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3379 if (stmt == NULL)
3380 return ERROR_MARK;
3382 /* ??? If we start creating more COND_EXPR, we could perform
3383 this same optimization with them. For now, simplify. */
3384 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3385 return ERROR_MARK;
3387 tree cond = gimple_assign_rhs1 (stmt);
3388 tree_code cmp = TREE_CODE (cond);
3389 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3390 return ERROR_MARK;
3392 /* ??? For now, allow only canonical true and false result vectors.
3393 We could expand this to other constants should the need arise,
3394 but at the moment we don't create them. */
3395 tree t = gimple_assign_rhs2 (stmt);
3396 tree f = gimple_assign_rhs3 (stmt);
3397 bool inv;
3398 if (integer_all_onesp (t))
3399 inv = false;
3400 else if (integer_all_onesp (f))
3402 cmp = invert_tree_comparison (cmp, false);
3403 inv = true;
3405 else
3406 return ERROR_MARK;
3407 if (!integer_zerop (f))
3408 return ERROR_MARK;
3410 /* Success! */
3411 if (rets)
3412 *rets = stmt;
3413 if (reti)
3414 *reti = inv;
3415 return cmp;
3418 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3419 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3421 static bool
3422 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3424 unsigned int length = ops->length (), i, j;
3425 bool any_changes = false;
3427 if (length == 1)
3428 return false;
3430 for (i = 0; i < length; ++i)
3432 tree elt0 = (*ops)[i]->op;
3434 gassign *stmt0;
3435 bool invert;
3436 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3437 if (cmp0 == ERROR_MARK)
3438 continue;
3440 for (j = i + 1; j < length; ++j)
3442 tree &elt1 = (*ops)[j]->op;
3444 gassign *stmt1;
3445 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3446 if (cmp1 == ERROR_MARK)
3447 continue;
3449 tree cond0 = gimple_assign_rhs1 (stmt0);
3450 tree x0 = TREE_OPERAND (cond0, 0);
3451 tree y0 = TREE_OPERAND (cond0, 1);
3453 tree cond1 = gimple_assign_rhs1 (stmt1);
3454 tree x1 = TREE_OPERAND (cond1, 0);
3455 tree y1 = TREE_OPERAND (cond1, 1);
3457 tree comb;
3458 if (opcode == BIT_AND_EXPR)
3459 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3460 else if (opcode == BIT_IOR_EXPR)
3461 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3462 else
3463 gcc_unreachable ();
3464 if (comb == NULL)
3465 continue;
3467 /* Success! */
3468 if (dump_file && (dump_flags & TDF_DETAILS))
3470 fprintf (dump_file, "Transforming ");
3471 print_generic_expr (dump_file, cond0);
3472 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3473 print_generic_expr (dump_file, cond1);
3474 fprintf (dump_file, " into ");
3475 print_generic_expr (dump_file, comb);
3476 fputc ('\n', dump_file);
3479 gimple_assign_set_rhs1 (stmt0, comb);
3480 if (invert)
3481 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3482 *gimple_assign_rhs3_ptr (stmt0));
3483 update_stmt (stmt0);
3485 elt1 = error_mark_node;
3486 any_changes = true;
3490 if (any_changes)
3492 operand_entry *oe;
3493 j = 0;
3494 FOR_EACH_VEC_ELT (*ops, i, oe)
3496 if (oe->op == error_mark_node)
3497 continue;
3498 else if (i != j)
3499 (*ops)[j] = oe;
3500 j++;
3502 ops->truncate (j);
3505 return any_changes;
3508 /* Return true if STMT is a cast like:
3509 <bb N>:
3511 _123 = (int) _234;
3513 <bb M>:
3514 # _345 = PHI <_123(N), 1(...), 1(...)>
3515 where _234 has bool type, _123 has single use and
3516 bb N has a single successor M. This is commonly used in
3517 the last block of a range test.
3519 Also Return true if STMT is tcc_compare like:
3520 <bb N>:
3522 _234 = a_2(D) == 2;
3524 <bb M>:
3525 # _345 = PHI <_234(N), 1(...), 1(...)>
3526 _346 = (int) _345;
3527 where _234 has booltype, single use and
3528 bb N has a single successor M. This is commonly used in
3529 the last block of a range test. */
3531 static bool
3532 final_range_test_p (gimple *stmt)
3534 basic_block bb, rhs_bb, lhs_bb;
3535 edge e;
3536 tree lhs, rhs;
3537 use_operand_p use_p;
3538 gimple *use_stmt;
3540 if (!gimple_assign_cast_p (stmt)
3541 && (!is_gimple_assign (stmt)
3542 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3543 != tcc_comparison)))
3544 return false;
3545 bb = gimple_bb (stmt);
3546 if (!single_succ_p (bb))
3547 return false;
3548 e = single_succ_edge (bb);
3549 if (e->flags & EDGE_COMPLEX)
3550 return false;
3552 lhs = gimple_assign_lhs (stmt);
3553 rhs = gimple_assign_rhs1 (stmt);
3554 if (gimple_assign_cast_p (stmt)
3555 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3556 || TREE_CODE (rhs) != SSA_NAME
3557 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3558 return false;
3560 if (!gimple_assign_cast_p (stmt)
3561 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3562 return false;
3564 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3565 if (!single_imm_use (lhs, &use_p, &use_stmt))
3566 return false;
3568 if (gimple_code (use_stmt) != GIMPLE_PHI
3569 || gimple_bb (use_stmt) != e->dest)
3570 return false;
3572 /* And that the rhs is defined in the same loop. */
3573 if (gimple_assign_cast_p (stmt))
3575 if (TREE_CODE (rhs) != SSA_NAME
3576 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3577 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3578 return false;
3580 else
3582 if (TREE_CODE (lhs) != SSA_NAME
3583 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3584 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3585 return false;
3588 return true;
3591 /* Return true if BB is suitable basic block for inter-bb range test
3592 optimization. If BACKWARD is true, BB should be the only predecessor
3593 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3594 or compared with to find a common basic block to which all conditions
3595 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3596 be the only predecessor of BB. */
3598 static bool
3599 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3600 bool backward)
3602 edge_iterator ei, ei2;
3603 edge e, e2;
3604 gimple *stmt;
3605 gphi_iterator gsi;
3606 bool other_edge_seen = false;
3607 bool is_cond;
3609 if (test_bb == bb)
3610 return false;
3611 /* Check last stmt first. */
3612 stmt = last_stmt (bb);
3613 if (stmt == NULL
3614 || (gimple_code (stmt) != GIMPLE_COND
3615 && (backward || !final_range_test_p (stmt)))
3616 || gimple_visited_p (stmt)
3617 || stmt_could_throw_p (stmt)
3618 || *other_bb == bb)
3619 return false;
3620 is_cond = gimple_code (stmt) == GIMPLE_COND;
3621 if (is_cond)
3623 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3624 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3625 to *OTHER_BB (if not set yet, try to find it out). */
3626 if (EDGE_COUNT (bb->succs) != 2)
3627 return false;
3628 FOR_EACH_EDGE (e, ei, bb->succs)
3630 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3631 return false;
3632 if (e->dest == test_bb)
3634 if (backward)
3635 continue;
3636 else
3637 return false;
3639 if (e->dest == bb)
3640 return false;
3641 if (*other_bb == NULL)
3643 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3644 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3645 return false;
3646 else if (e->dest == e2->dest)
3647 *other_bb = e->dest;
3648 if (*other_bb == NULL)
3649 return false;
3651 if (e->dest == *other_bb)
3652 other_edge_seen = true;
3653 else if (backward)
3654 return false;
3656 if (*other_bb == NULL || !other_edge_seen)
3657 return false;
3659 else if (single_succ (bb) != *other_bb)
3660 return false;
3662 /* Now check all PHIs of *OTHER_BB. */
3663 e = find_edge (bb, *other_bb);
3664 e2 = find_edge (test_bb, *other_bb);
3665 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3667 gphi *phi = gsi.phi ();
3668 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3669 corresponding to BB and TEST_BB predecessor must be the same. */
3670 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3671 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3673 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3674 one of the PHIs should have the lhs of the last stmt in
3675 that block as PHI arg and that PHI should have 0 or 1
3676 corresponding to it in all other range test basic blocks
3677 considered. */
3678 if (!is_cond)
3680 if (gimple_phi_arg_def (phi, e->dest_idx)
3681 == gimple_assign_lhs (stmt)
3682 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3683 || integer_onep (gimple_phi_arg_def (phi,
3684 e2->dest_idx))))
3685 continue;
3687 else
3689 gimple *test_last = last_stmt (test_bb);
3690 if (gimple_code (test_last) != GIMPLE_COND
3691 && gimple_phi_arg_def (phi, e2->dest_idx)
3692 == gimple_assign_lhs (test_last)
3693 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3694 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3695 continue;
3698 return false;
3701 return true;
3704 /* Return true if BB doesn't have side-effects that would disallow
3705 range test optimization, all SSA_NAMEs set in the bb are consumed
3706 in the bb and there are no PHIs. */
3708 static bool
3709 no_side_effect_bb (basic_block bb)
3711 gimple_stmt_iterator gsi;
3712 gimple *last;
3714 if (!gimple_seq_empty_p (phi_nodes (bb)))
3715 return false;
3716 last = last_stmt (bb);
3717 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3719 gimple *stmt = gsi_stmt (gsi);
3720 tree lhs;
3721 imm_use_iterator imm_iter;
3722 use_operand_p use_p;
3724 if (is_gimple_debug (stmt))
3725 continue;
3726 if (gimple_has_side_effects (stmt))
3727 return false;
3728 if (stmt == last)
3729 return true;
3730 if (!is_gimple_assign (stmt))
3731 return false;
3732 lhs = gimple_assign_lhs (stmt);
3733 if (TREE_CODE (lhs) != SSA_NAME)
3734 return false;
3735 if (gimple_assign_rhs_could_trap_p (stmt))
3736 return false;
3737 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3739 gimple *use_stmt = USE_STMT (use_p);
3740 if (is_gimple_debug (use_stmt))
3741 continue;
3742 if (gimple_bb (use_stmt) != bb)
3743 return false;
3746 return false;
3749 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3750 return true and fill in *OPS recursively. */
3752 static bool
3753 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3754 struct loop *loop)
3756 gimple *stmt = SSA_NAME_DEF_STMT (var);
3757 tree rhs[2];
3758 int i;
3760 if (!is_reassociable_op (stmt, code, loop))
3761 return false;
3763 rhs[0] = gimple_assign_rhs1 (stmt);
3764 rhs[1] = gimple_assign_rhs2 (stmt);
3765 gimple_set_visited (stmt, true);
3766 for (i = 0; i < 2; i++)
3767 if (TREE_CODE (rhs[i]) == SSA_NAME
3768 && !get_ops (rhs[i], code, ops, loop)
3769 && has_single_use (rhs[i]))
3771 operand_entry *oe = operand_entry_pool.allocate ();
3773 oe->op = rhs[i];
3774 oe->rank = code;
3775 oe->id = 0;
3776 oe->count = 1;
3777 oe->stmt_to_insert = NULL;
3778 ops->safe_push (oe);
3780 return true;
3783 /* Find the ops that were added by get_ops starting from VAR, see if
3784 they were changed during update_range_test and if yes, create new
3785 stmts. */
3787 static tree
3788 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3789 unsigned int *pidx, struct loop *loop)
3791 gimple *stmt = SSA_NAME_DEF_STMT (var);
3792 tree rhs[4];
3793 int i;
3795 if (!is_reassociable_op (stmt, code, loop))
3796 return NULL;
3798 rhs[0] = gimple_assign_rhs1 (stmt);
3799 rhs[1] = gimple_assign_rhs2 (stmt);
3800 rhs[2] = rhs[0];
3801 rhs[3] = rhs[1];
3802 for (i = 0; i < 2; i++)
3803 if (TREE_CODE (rhs[i]) == SSA_NAME)
3805 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3806 if (rhs[2 + i] == NULL_TREE)
3808 if (has_single_use (rhs[i]))
3809 rhs[2 + i] = ops[(*pidx)++]->op;
3810 else
3811 rhs[2 + i] = rhs[i];
3814 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3815 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3817 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3818 var = make_ssa_name (TREE_TYPE (var));
3819 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3820 rhs[2], rhs[3]);
3821 gimple_set_uid (g, gimple_uid (stmt));
3822 gimple_set_visited (g, true);
3823 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3825 return var;
3828 /* Structure to track the initial value passed to get_ops and
3829 the range in the ops vector for each basic block. */
3831 struct inter_bb_range_test_entry
3833 tree op;
3834 unsigned int first_idx, last_idx;
3837 /* Inter-bb range test optimization.
3839 Returns TRUE if a gimple conditional is optimized to a true/false,
3840 otherwise return FALSE.
3842 This indicates to the caller that it should run a CFG cleanup pass
3843 once reassociation is completed. */
3845 static bool
3846 maybe_optimize_range_tests (gimple *stmt)
3848 basic_block first_bb = gimple_bb (stmt);
3849 basic_block last_bb = first_bb;
3850 basic_block other_bb = NULL;
3851 basic_block bb;
3852 edge_iterator ei;
3853 edge e;
3854 auto_vec<operand_entry *> ops;
3855 auto_vec<inter_bb_range_test_entry> bbinfo;
3856 bool any_changes = false;
3857 bool cfg_cleanup_needed = false;
3859 /* Consider only basic blocks that end with GIMPLE_COND or
3860 a cast statement satisfying final_range_test_p. All
3861 but the last bb in the first_bb .. last_bb range
3862 should end with GIMPLE_COND. */
3863 if (gimple_code (stmt) == GIMPLE_COND)
3865 if (EDGE_COUNT (first_bb->succs) != 2)
3866 return cfg_cleanup_needed;
3868 else if (final_range_test_p (stmt))
3869 other_bb = single_succ (first_bb);
3870 else
3871 return cfg_cleanup_needed;
3873 if (stmt_could_throw_p (stmt))
3874 return cfg_cleanup_needed;
3876 /* As relative ordering of post-dominator sons isn't fixed,
3877 maybe_optimize_range_tests can be called first on any
3878 bb in the range we want to optimize. So, start searching
3879 backwards, if first_bb can be set to a predecessor. */
3880 while (single_pred_p (first_bb))
3882 basic_block pred_bb = single_pred (first_bb);
3883 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3884 break;
3885 if (!no_side_effect_bb (first_bb))
3886 break;
3887 first_bb = pred_bb;
3889 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3890 Before starting forward search in last_bb successors, find
3891 out the other_bb. */
3892 if (first_bb == last_bb)
3894 other_bb = NULL;
3895 /* As non-GIMPLE_COND last stmt always terminates the range,
3896 if forward search didn't discover anything, just give up. */
3897 if (gimple_code (stmt) != GIMPLE_COND)
3898 return cfg_cleanup_needed;
3899 /* Look at both successors. Either it ends with a GIMPLE_COND
3900 and satisfies suitable_cond_bb, or ends with a cast and
3901 other_bb is that cast's successor. */
3902 FOR_EACH_EDGE (e, ei, first_bb->succs)
3903 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3904 || e->dest == first_bb)
3905 return cfg_cleanup_needed;
3906 else if (single_pred_p (e->dest))
3908 stmt = last_stmt (e->dest);
3909 if (stmt
3910 && gimple_code (stmt) == GIMPLE_COND
3911 && EDGE_COUNT (e->dest->succs) == 2)
3913 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3914 break;
3915 else
3916 other_bb = NULL;
3918 else if (stmt
3919 && final_range_test_p (stmt)
3920 && find_edge (first_bb, single_succ (e->dest)))
3922 other_bb = single_succ (e->dest);
3923 if (other_bb == first_bb)
3924 other_bb = NULL;
3927 if (other_bb == NULL)
3928 return cfg_cleanup_needed;
3930 /* Now do the forward search, moving last_bb to successor bbs
3931 that aren't other_bb. */
3932 while (EDGE_COUNT (last_bb->succs) == 2)
3934 FOR_EACH_EDGE (e, ei, last_bb->succs)
3935 if (e->dest != other_bb)
3936 break;
3937 if (e == NULL)
3938 break;
3939 if (!single_pred_p (e->dest))
3940 break;
3941 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3942 break;
3943 if (!no_side_effect_bb (e->dest))
3944 break;
3945 last_bb = e->dest;
3947 if (first_bb == last_bb)
3948 return cfg_cleanup_needed;
3949 /* Here basic blocks first_bb through last_bb's predecessor
3950 end with GIMPLE_COND, all of them have one of the edges to
3951 other_bb and another to another block in the range,
3952 all blocks except first_bb don't have side-effects and
3953 last_bb ends with either GIMPLE_COND, or cast satisfying
3954 final_range_test_p. */
3955 for (bb = last_bb; ; bb = single_pred (bb))
3957 enum tree_code code;
3958 tree lhs, rhs;
3959 inter_bb_range_test_entry bb_ent;
3961 bb_ent.op = NULL_TREE;
3962 bb_ent.first_idx = ops.length ();
3963 bb_ent.last_idx = bb_ent.first_idx;
3964 e = find_edge (bb, other_bb);
3965 stmt = last_stmt (bb);
3966 gimple_set_visited (stmt, true);
3967 if (gimple_code (stmt) != GIMPLE_COND)
3969 use_operand_p use_p;
3970 gimple *phi;
3971 edge e2;
3972 unsigned int d;
3974 lhs = gimple_assign_lhs (stmt);
3975 rhs = gimple_assign_rhs1 (stmt);
3976 gcc_assert (bb == last_bb);
3978 /* stmt is
3979 _123 = (int) _234;
3981 _234 = a_2(D) == 2;
3983 followed by:
3984 <bb M>:
3985 # _345 = PHI <_123(N), 1(...), 1(...)>
3987 or 0 instead of 1. If it is 0, the _234
3988 range test is anded together with all the
3989 other range tests, if it is 1, it is ored with
3990 them. */
3991 single_imm_use (lhs, &use_p, &phi);
3992 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3993 e2 = find_edge (first_bb, other_bb);
3994 d = e2->dest_idx;
3995 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3996 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3997 code = BIT_AND_EXPR;
3998 else
4000 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4001 code = BIT_IOR_EXPR;
4004 /* If _234 SSA_NAME_DEF_STMT is
4005 _234 = _567 | _789;
4006 (or &, corresponding to 1/0 in the phi arguments,
4007 push into ops the individual range test arguments
4008 of the bitwise or resp. and, recursively. */
4009 if (TREE_CODE (rhs) == SSA_NAME
4010 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4011 != tcc_comparison)
4012 && !get_ops (rhs, code, &ops,
4013 loop_containing_stmt (stmt))
4014 && has_single_use (rhs))
4016 /* Otherwise, push the _234 range test itself. */
4017 operand_entry *oe = operand_entry_pool.allocate ();
4019 oe->op = rhs;
4020 oe->rank = code;
4021 oe->id = 0;
4022 oe->count = 1;
4023 oe->stmt_to_insert = NULL;
4024 ops.safe_push (oe);
4025 bb_ent.last_idx++;
4026 bb_ent.op = rhs;
4028 else if (is_gimple_assign (stmt)
4029 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4030 == tcc_comparison)
4031 && !get_ops (lhs, code, &ops,
4032 loop_containing_stmt (stmt))
4033 && has_single_use (lhs))
4035 operand_entry *oe = operand_entry_pool.allocate ();
4036 oe->op = lhs;
4037 oe->rank = code;
4038 oe->id = 0;
4039 oe->count = 1;
4040 ops.safe_push (oe);
4041 bb_ent.last_idx++;
4042 bb_ent.op = lhs;
4044 else
4046 bb_ent.last_idx = ops.length ();
4047 bb_ent.op = rhs;
4049 bbinfo.safe_push (bb_ent);
4050 continue;
4052 /* Otherwise stmt is GIMPLE_COND. */
4053 code = gimple_cond_code (stmt);
4054 lhs = gimple_cond_lhs (stmt);
4055 rhs = gimple_cond_rhs (stmt);
4056 if (TREE_CODE (lhs) == SSA_NAME
4057 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4058 && ((code != EQ_EXPR && code != NE_EXPR)
4059 || rhs != boolean_false_node
4060 /* Either push into ops the individual bitwise
4061 or resp. and operands, depending on which
4062 edge is other_bb. */
4063 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4064 ^ (code == EQ_EXPR))
4065 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4066 loop_containing_stmt (stmt))))
4068 /* Or push the GIMPLE_COND stmt itself. */
4069 operand_entry *oe = operand_entry_pool.allocate ();
4071 oe->op = NULL;
4072 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4073 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4074 /* oe->op = NULL signs that there is no SSA_NAME
4075 for the range test, and oe->id instead is the
4076 basic block number, at which's end the GIMPLE_COND
4077 is. */
4078 oe->id = bb->index;
4079 oe->count = 1;
4080 oe->stmt_to_insert = NULL;
4081 ops.safe_push (oe);
4082 bb_ent.op = NULL;
4083 bb_ent.last_idx++;
4085 else if (ops.length () > bb_ent.first_idx)
4087 bb_ent.op = lhs;
4088 bb_ent.last_idx = ops.length ();
4090 bbinfo.safe_push (bb_ent);
4091 if (bb == first_bb)
4092 break;
4094 if (ops.length () > 1)
4095 any_changes = optimize_range_tests (ERROR_MARK, &ops);
4096 if (any_changes)
4098 unsigned int idx, max_idx = 0;
4099 /* update_ops relies on has_single_use predicates returning the
4100 same values as it did during get_ops earlier. Additionally it
4101 never removes statements, only adds new ones and it should walk
4102 from the single imm use and check the predicate already before
4103 making those changes.
4104 On the other side, the handling of GIMPLE_COND directly can turn
4105 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4106 it needs to be done in a separate loop afterwards. */
4107 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4109 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4110 && bbinfo[idx].op != NULL_TREE)
4112 tree new_op;
4114 max_idx = idx;
4115 stmt = last_stmt (bb);
4116 new_op = update_ops (bbinfo[idx].op,
4117 (enum tree_code)
4118 ops[bbinfo[idx].first_idx]->rank,
4119 ops, &bbinfo[idx].first_idx,
4120 loop_containing_stmt (stmt));
4121 if (new_op == NULL_TREE)
4123 gcc_assert (bb == last_bb);
4124 new_op = ops[bbinfo[idx].first_idx++]->op;
4126 if (bbinfo[idx].op != new_op)
4128 imm_use_iterator iter;
4129 use_operand_p use_p;
4130 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4132 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4133 if (is_gimple_debug (use_stmt))
4134 continue;
4135 else if (gimple_code (use_stmt) == GIMPLE_COND
4136 || gimple_code (use_stmt) == GIMPLE_PHI)
4137 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4138 SET_USE (use_p, new_op);
4139 else if ((is_gimple_assign (use_stmt)
4140 && (TREE_CODE_CLASS
4141 (gimple_assign_rhs_code (use_stmt))
4142 == tcc_comparison)))
4143 cast_or_tcc_cmp_stmt = use_stmt;
4144 else if (gimple_assign_cast_p (use_stmt))
4145 cast_or_tcc_cmp_stmt = use_stmt;
4146 else
4147 gcc_unreachable ();
4149 if (cast_or_tcc_cmp_stmt)
4151 gcc_assert (bb == last_bb);
4152 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4153 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4154 enum tree_code rhs_code
4155 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4156 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4157 : CONVERT_EXPR;
4158 gassign *g;
4159 if (is_gimple_min_invariant (new_op))
4161 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4162 g = gimple_build_assign (new_lhs, new_op);
4164 else
4165 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4166 gimple_stmt_iterator gsi
4167 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4168 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4169 gimple_set_visited (g, true);
4170 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4171 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4172 if (is_gimple_debug (use_stmt))
4173 continue;
4174 else if (gimple_code (use_stmt) == GIMPLE_COND
4175 || gimple_code (use_stmt) == GIMPLE_PHI)
4176 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4177 SET_USE (use_p, new_lhs);
4178 else
4179 gcc_unreachable ();
4183 if (bb == first_bb)
4184 break;
4186 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4188 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4189 && bbinfo[idx].op == NULL_TREE
4190 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4192 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4194 if (idx > max_idx)
4195 max_idx = idx;
4197 /* If we collapse the conditional to a true/false
4198 condition, then bubble that knowledge up to our caller. */
4199 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4201 gimple_cond_make_false (cond_stmt);
4202 cfg_cleanup_needed = true;
4204 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4206 gimple_cond_make_true (cond_stmt);
4207 cfg_cleanup_needed = true;
4209 else
4211 gimple_cond_set_code (cond_stmt, NE_EXPR);
4212 gimple_cond_set_lhs (cond_stmt,
4213 ops[bbinfo[idx].first_idx]->op);
4214 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4216 update_stmt (cond_stmt);
4218 if (bb == first_bb)
4219 break;
4222 /* The above changes could result in basic blocks after the first
4223 modified one, up to and including last_bb, to be executed even if
4224 they would not be in the original program. If the value ranges of
4225 assignment lhs' in those bbs were dependent on the conditions
4226 guarding those basic blocks which now can change, the VRs might
4227 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4228 are only used within the same bb, it should be not a big deal if
4229 we just reset all the VRs in those bbs. See PR68671. */
4230 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4231 reset_flow_sensitive_info_in_bb (bb);
4233 return cfg_cleanup_needed;
4236 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4237 of STMT in it's operands. This is also known as a "destructive
4238 update" operation. */
4240 static bool
4241 is_phi_for_stmt (gimple *stmt, tree operand)
4243 gimple *def_stmt;
4244 gphi *def_phi;
4245 tree lhs;
4246 use_operand_p arg_p;
4247 ssa_op_iter i;
4249 if (TREE_CODE (operand) != SSA_NAME)
4250 return false;
4252 lhs = gimple_assign_lhs (stmt);
4254 def_stmt = SSA_NAME_DEF_STMT (operand);
4255 def_phi = dyn_cast <gphi *> (def_stmt);
4256 if (!def_phi)
4257 return false;
4259 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4260 if (lhs == USE_FROM_PTR (arg_p))
4261 return true;
4262 return false;
4265 /* Remove def stmt of VAR if VAR has zero uses and recurse
4266 on rhs1 operand if so. */
4268 static void
4269 remove_visited_stmt_chain (tree var)
4271 gimple *stmt;
4272 gimple_stmt_iterator gsi;
4274 while (1)
4276 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4277 return;
4278 stmt = SSA_NAME_DEF_STMT (var);
4279 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4281 var = gimple_assign_rhs1 (stmt);
4282 gsi = gsi_for_stmt (stmt);
4283 reassoc_remove_stmt (&gsi);
4284 release_defs (stmt);
4286 else
4287 return;
4291 /* This function checks three consequtive operands in
4292 passed operands vector OPS starting from OPINDEX and
4293 swaps two operands if it is profitable for binary operation
4294 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4296 We pair ops with the same rank if possible.
4298 The alternative we try is to see if STMT is a destructive
4299 update style statement, which is like:
4300 b = phi (a, ...)
4301 a = c + b;
4302 In that case, we want to use the destructive update form to
4303 expose the possible vectorizer sum reduction opportunity.
4304 In that case, the third operand will be the phi node. This
4305 check is not performed if STMT is null.
4307 We could, of course, try to be better as noted above, and do a
4308 lot of work to try to find these opportunities in >3 operand
4309 cases, but it is unlikely to be worth it. */
4311 static void
4312 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4313 unsigned int opindex, gimple *stmt)
4315 operand_entry *oe1, *oe2, *oe3;
4317 oe1 = ops[opindex];
4318 oe2 = ops[opindex + 1];
4319 oe3 = ops[opindex + 2];
4321 if ((oe1->rank == oe2->rank
4322 && oe2->rank != oe3->rank)
4323 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4324 && !is_phi_for_stmt (stmt, oe1->op)
4325 && !is_phi_for_stmt (stmt, oe2->op)))
4326 std::swap (*oe1, *oe3);
4327 else if ((oe1->rank == oe3->rank
4328 && oe2->rank != oe3->rank)
4329 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4330 && !is_phi_for_stmt (stmt, oe1->op)
4331 && !is_phi_for_stmt (stmt, oe3->op)))
4332 std::swap (*oe1, *oe2);
4335 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4336 two definitions, otherwise return STMT. */
4338 static inline gimple *
4339 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4341 if (TREE_CODE (rhs1) == SSA_NAME
4342 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4343 stmt = SSA_NAME_DEF_STMT (rhs1);
4344 if (TREE_CODE (rhs2) == SSA_NAME
4345 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4346 stmt = SSA_NAME_DEF_STMT (rhs2);
4347 return stmt;
4350 /* If the stmt that defines operand has to be inserted, insert it
4351 before the use. */
4352 static void
4353 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4355 gcc_assert (is_gimple_assign (stmt_to_insert));
4356 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4357 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4358 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4359 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4360 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4362 /* If the insert point is not stmt, then insert_point would be
4363 the point where operand rhs1 or rhs2 is defined. In this case,
4364 stmt_to_insert has to be inserted afterwards. This would
4365 only happen when the stmt insertion point is flexible. */
4366 if (stmt == insert_point)
4367 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4368 else
4369 insert_stmt_after (stmt_to_insert, insert_point);
4373 /* Recursively rewrite our linearized statements so that the operators
4374 match those in OPS[OPINDEX], putting the computation in rank
4375 order. Return new lhs.
4376 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4377 the current stmt and during recursive invocations.
4378 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4379 recursive invocations. */
4381 static tree
4382 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4383 vec<operand_entry *> ops, bool changed, bool next_changed)
4385 tree rhs1 = gimple_assign_rhs1 (stmt);
4386 tree rhs2 = gimple_assign_rhs2 (stmt);
4387 tree lhs = gimple_assign_lhs (stmt);
4388 operand_entry *oe;
4390 /* The final recursion case for this function is that you have
4391 exactly two operations left.
4392 If we had exactly one op in the entire list to start with, we
4393 would have never called this function, and the tail recursion
4394 rewrites them one at a time. */
4395 if (opindex + 2 == ops.length ())
4397 operand_entry *oe1, *oe2;
4399 oe1 = ops[opindex];
4400 oe2 = ops[opindex + 1];
4402 if (rhs1 != oe1->op || rhs2 != oe2->op)
4404 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4405 unsigned int uid = gimple_uid (stmt);
4407 if (dump_file && (dump_flags & TDF_DETAILS))
4409 fprintf (dump_file, "Transforming ");
4410 print_gimple_stmt (dump_file, stmt, 0);
4413 /* If the stmt that defines operand has to be inserted, insert it
4414 before the use. */
4415 if (oe1->stmt_to_insert)
4416 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4417 if (oe2->stmt_to_insert)
4418 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4419 /* Even when changed is false, reassociation could have e.g. removed
4420 some redundant operations, so unless we are just swapping the
4421 arguments or unless there is no change at all (then we just
4422 return lhs), force creation of a new SSA_NAME. */
4423 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4425 gimple *insert_point
4426 = find_insert_point (stmt, oe1->op, oe2->op);
4427 lhs = make_ssa_name (TREE_TYPE (lhs));
4428 stmt
4429 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4430 oe1->op, oe2->op);
4431 gimple_set_uid (stmt, uid);
4432 gimple_set_visited (stmt, true);
4433 if (insert_point == gsi_stmt (gsi))
4434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4435 else
4436 insert_stmt_after (stmt, insert_point);
4438 else
4440 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4441 == stmt);
4442 gimple_assign_set_rhs1 (stmt, oe1->op);
4443 gimple_assign_set_rhs2 (stmt, oe2->op);
4444 update_stmt (stmt);
4447 if (rhs1 != oe1->op && rhs1 != oe2->op)
4448 remove_visited_stmt_chain (rhs1);
4450 if (dump_file && (dump_flags & TDF_DETAILS))
4452 fprintf (dump_file, " into ");
4453 print_gimple_stmt (dump_file, stmt, 0);
4456 return lhs;
4459 /* If we hit here, we should have 3 or more ops left. */
4460 gcc_assert (opindex + 2 < ops.length ());
4462 /* Rewrite the next operator. */
4463 oe = ops[opindex];
4465 /* If the stmt that defines operand has to be inserted, insert it
4466 before the use. */
4467 if (oe->stmt_to_insert)
4468 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4470 /* Recurse on the LHS of the binary operator, which is guaranteed to
4471 be the non-leaf side. */
4472 tree new_rhs1
4473 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4474 changed || oe->op != rhs2 || next_changed,
4475 false);
4477 if (oe->op != rhs2 || new_rhs1 != rhs1)
4479 if (dump_file && (dump_flags & TDF_DETAILS))
4481 fprintf (dump_file, "Transforming ");
4482 print_gimple_stmt (dump_file, stmt, 0);
4485 /* If changed is false, this is either opindex == 0
4486 or all outer rhs2's were equal to corresponding oe->op,
4487 and powi_result is NULL.
4488 That means lhs is equivalent before and after reassociation.
4489 Otherwise ensure the old lhs SSA_NAME is not reused and
4490 create a new stmt as well, so that any debug stmts will be
4491 properly adjusted. */
4492 if (changed)
4494 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4495 unsigned int uid = gimple_uid (stmt);
4496 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4498 lhs = make_ssa_name (TREE_TYPE (lhs));
4499 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4500 new_rhs1, oe->op);
4501 gimple_set_uid (stmt, uid);
4502 gimple_set_visited (stmt, true);
4503 if (insert_point == gsi_stmt (gsi))
4504 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4505 else
4506 insert_stmt_after (stmt, insert_point);
4508 else
4510 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4511 == stmt);
4512 gimple_assign_set_rhs1 (stmt, new_rhs1);
4513 gimple_assign_set_rhs2 (stmt, oe->op);
4514 update_stmt (stmt);
4517 if (dump_file && (dump_flags & TDF_DETAILS))
4519 fprintf (dump_file, " into ");
4520 print_gimple_stmt (dump_file, stmt, 0);
4523 return lhs;
4526 /* Find out how many cycles we need to compute statements chain.
4527 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4528 maximum number of independent statements we may execute per cycle. */
4530 static int
4531 get_required_cycles (int ops_num, int cpu_width)
4533 int res;
4534 int elog;
4535 unsigned int rest;
4537 /* While we have more than 2 * cpu_width operands
4538 we may reduce number of operands by cpu_width
4539 per cycle. */
4540 res = ops_num / (2 * cpu_width);
4542 /* Remained operands count may be reduced twice per cycle
4543 until we have only one operand. */
4544 rest = (unsigned)(ops_num - res * cpu_width);
4545 elog = exact_log2 (rest);
4546 if (elog >= 0)
4547 res += elog;
4548 else
4549 res += floor_log2 (rest) + 1;
4551 return res;
4554 /* Returns an optimal number of registers to use for computation of
4555 given statements. */
4557 static int
4558 get_reassociation_width (int ops_num, enum tree_code opc,
4559 machine_mode mode)
4561 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4562 int width;
4563 int width_min;
4564 int cycles_best;
4566 if (param_width > 0)
4567 width = param_width;
4568 else
4569 width = targetm.sched.reassociation_width (opc, mode);
4571 if (width == 1)
4572 return width;
4574 /* Get the minimal time required for sequence computation. */
4575 cycles_best = get_required_cycles (ops_num, width);
4577 /* Check if we may use less width and still compute sequence for
4578 the same time. It will allow us to reduce registers usage.
4579 get_required_cycles is monotonically increasing with lower width
4580 so we can perform a binary search for the minimal width that still
4581 results in the optimal cycle count. */
4582 width_min = 1;
4583 while (width > width_min)
4585 int width_mid = (width + width_min) / 2;
4587 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4588 width = width_mid;
4589 else if (width_min < width_mid)
4590 width_min = width_mid;
4591 else
4592 break;
4595 return width;
4598 /* Recursively rewrite our linearized statements so that the operators
4599 match those in OPS[OPINDEX], putting the computation in rank
4600 order and trying to allow operations to be executed in
4601 parallel. */
4603 static void
4604 rewrite_expr_tree_parallel (gassign *stmt, int width,
4605 vec<operand_entry *> ops)
4607 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4608 int op_num = ops.length ();
4609 gcc_assert (op_num > 0);
4610 int stmt_num = op_num - 1;
4611 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4612 int op_index = op_num - 1;
4613 int stmt_index = 0;
4614 int ready_stmts_end = 0;
4615 int i = 0;
4616 gimple *stmt1 = NULL, *stmt2 = NULL;
4617 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4619 /* We start expression rewriting from the top statements.
4620 So, in this loop we create a full list of statements
4621 we will work with. */
4622 stmts[stmt_num - 1] = stmt;
4623 for (i = stmt_num - 2; i >= 0; i--)
4624 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4626 for (i = 0; i < stmt_num; i++)
4628 tree op1, op2;
4630 /* Determine whether we should use results of
4631 already handled statements or not. */
4632 if (ready_stmts_end == 0
4633 && (i - stmt_index >= width || op_index < 1))
4634 ready_stmts_end = i;
4636 /* Now we choose operands for the next statement. Non zero
4637 value in ready_stmts_end means here that we should use
4638 the result of already generated statements as new operand. */
4639 if (ready_stmts_end > 0)
4641 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4642 if (ready_stmts_end > stmt_index)
4643 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4644 else if (op_index >= 0)
4646 operand_entry *oe = ops[op_index--];
4647 stmt2 = oe->stmt_to_insert;
4648 op2 = oe->op;
4650 else
4652 gcc_assert (stmt_index < i);
4653 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4656 if (stmt_index >= ready_stmts_end)
4657 ready_stmts_end = 0;
4659 else
4661 if (op_index > 1)
4662 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4663 operand_entry *oe2 = ops[op_index--];
4664 operand_entry *oe1 = ops[op_index--];
4665 op2 = oe2->op;
4666 stmt2 = oe2->stmt_to_insert;
4667 op1 = oe1->op;
4668 stmt1 = oe1->stmt_to_insert;
4671 /* If we emit the last statement then we should put
4672 operands into the last statement. It will also
4673 break the loop. */
4674 if (op_index < 0 && stmt_index == i)
4675 i = stmt_num - 1;
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4679 fprintf (dump_file, "Transforming ");
4680 print_gimple_stmt (dump_file, stmts[i], 0);
4683 /* If the stmt that defines operand has to be inserted, insert it
4684 before the use. */
4685 if (stmt1)
4686 insert_stmt_before_use (stmts[i], stmt1);
4687 if (stmt2)
4688 insert_stmt_before_use (stmts[i], stmt2);
4689 stmt1 = stmt2 = NULL;
4691 /* We keep original statement only for the last one. All
4692 others are recreated. */
4693 if (i == stmt_num - 1)
4695 gimple_assign_set_rhs1 (stmts[i], op1);
4696 gimple_assign_set_rhs2 (stmts[i], op2);
4697 update_stmt (stmts[i]);
4699 else
4701 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4703 if (dump_file && (dump_flags & TDF_DETAILS))
4705 fprintf (dump_file, " into ");
4706 print_gimple_stmt (dump_file, stmts[i], 0);
4710 remove_visited_stmt_chain (last_rhs1);
4713 /* Transform STMT, which is really (A +B) + (C + D) into the left
4714 linear form, ((A+B)+C)+D.
4715 Recurse on D if necessary. */
4717 static void
4718 linearize_expr (gimple *stmt)
4720 gimple_stmt_iterator gsi;
4721 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4722 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4723 gimple *oldbinrhs = binrhs;
4724 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4725 gimple *newbinrhs = NULL;
4726 struct loop *loop = loop_containing_stmt (stmt);
4727 tree lhs = gimple_assign_lhs (stmt);
4729 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4730 && is_reassociable_op (binrhs, rhscode, loop));
4732 gsi = gsi_for_stmt (stmt);
4734 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4735 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4736 gimple_assign_rhs_code (binrhs),
4737 gimple_assign_lhs (binlhs),
4738 gimple_assign_rhs2 (binrhs));
4739 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4740 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4741 gimple_set_uid (binrhs, gimple_uid (stmt));
4743 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4744 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4746 if (dump_file && (dump_flags & TDF_DETAILS))
4748 fprintf (dump_file, "Linearized: ");
4749 print_gimple_stmt (dump_file, stmt, 0);
4752 reassociate_stats.linearized++;
4753 update_stmt (stmt);
4755 gsi = gsi_for_stmt (oldbinrhs);
4756 reassoc_remove_stmt (&gsi);
4757 release_defs (oldbinrhs);
4759 gimple_set_visited (stmt, true);
4760 gimple_set_visited (binlhs, true);
4761 gimple_set_visited (binrhs, true);
4763 /* Tail recurse on the new rhs if it still needs reassociation. */
4764 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4765 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4766 want to change the algorithm while converting to tuples. */
4767 linearize_expr (stmt);
4770 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4771 it. Otherwise, return NULL. */
4773 static gimple *
4774 get_single_immediate_use (tree lhs)
4776 use_operand_p immuse;
4777 gimple *immusestmt;
4779 if (TREE_CODE (lhs) == SSA_NAME
4780 && single_imm_use (lhs, &immuse, &immusestmt)
4781 && is_gimple_assign (immusestmt))
4782 return immusestmt;
4784 return NULL;
4787 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4788 representing the negated value. Insertions of any necessary
4789 instructions go before GSI.
4790 This function is recursive in that, if you hand it "a_5" as the
4791 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4792 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4794 static tree
4795 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4797 gimple *negatedefstmt = NULL;
4798 tree resultofnegate;
4799 gimple_stmt_iterator gsi;
4800 unsigned int uid;
4802 /* If we are trying to negate a name, defined by an add, negate the
4803 add operands instead. */
4804 if (TREE_CODE (tonegate) == SSA_NAME)
4805 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4806 if (TREE_CODE (tonegate) == SSA_NAME
4807 && is_gimple_assign (negatedefstmt)
4808 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4809 && has_single_use (gimple_assign_lhs (negatedefstmt))
4810 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4812 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4813 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4814 tree lhs = gimple_assign_lhs (negatedefstmt);
4815 gimple *g;
4817 gsi = gsi_for_stmt (negatedefstmt);
4818 rhs1 = negate_value (rhs1, &gsi);
4820 gsi = gsi_for_stmt (negatedefstmt);
4821 rhs2 = negate_value (rhs2, &gsi);
4823 gsi = gsi_for_stmt (negatedefstmt);
4824 lhs = make_ssa_name (TREE_TYPE (lhs));
4825 gimple_set_visited (negatedefstmt, true);
4826 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4827 gimple_set_uid (g, gimple_uid (negatedefstmt));
4828 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4829 return lhs;
4832 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4833 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4834 NULL_TREE, true, GSI_SAME_STMT);
4835 gsi = *gsip;
4836 uid = gimple_uid (gsi_stmt (gsi));
4837 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4839 gimple *stmt = gsi_stmt (gsi);
4840 if (gimple_uid (stmt) != 0)
4841 break;
4842 gimple_set_uid (stmt, uid);
4844 return resultofnegate;
4847 /* Return true if we should break up the subtract in STMT into an add
4848 with negate. This is true when we the subtract operands are really
4849 adds, or the subtract itself is used in an add expression. In
4850 either case, breaking up the subtract into an add with negate
4851 exposes the adds to reassociation. */
4853 static bool
4854 should_break_up_subtract (gimple *stmt)
4856 tree lhs = gimple_assign_lhs (stmt);
4857 tree binlhs = gimple_assign_rhs1 (stmt);
4858 tree binrhs = gimple_assign_rhs2 (stmt);
4859 gimple *immusestmt;
4860 struct loop *loop = loop_containing_stmt (stmt);
4862 if (TREE_CODE (binlhs) == SSA_NAME
4863 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4864 return true;
4866 if (TREE_CODE (binrhs) == SSA_NAME
4867 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4868 return true;
4870 if (TREE_CODE (lhs) == SSA_NAME
4871 && (immusestmt = get_single_immediate_use (lhs))
4872 && is_gimple_assign (immusestmt)
4873 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4874 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4875 && gimple_assign_rhs1 (immusestmt) == lhs)
4876 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4877 return true;
4878 return false;
4881 /* Transform STMT from A - B into A + -B. */
4883 static void
4884 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4886 tree rhs1 = gimple_assign_rhs1 (stmt);
4887 tree rhs2 = gimple_assign_rhs2 (stmt);
4889 if (dump_file && (dump_flags & TDF_DETAILS))
4891 fprintf (dump_file, "Breaking up subtract ");
4892 print_gimple_stmt (dump_file, stmt, 0);
4895 rhs2 = negate_value (rhs2, gsip);
4896 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4897 update_stmt (stmt);
4900 /* Determine whether STMT is a builtin call that raises an SSA name
4901 to an integer power and has only one use. If so, and this is early
4902 reassociation and unsafe math optimizations are permitted, place
4903 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4904 If any of these conditions does not hold, return FALSE. */
4906 static bool
4907 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4909 tree arg1;
4910 REAL_VALUE_TYPE c, cint;
4912 switch (gimple_call_combined_fn (stmt))
4914 CASE_CFN_POW:
4915 if (flag_errno_math)
4916 return false;
4918 *base = gimple_call_arg (stmt, 0);
4919 arg1 = gimple_call_arg (stmt, 1);
4921 if (TREE_CODE (arg1) != REAL_CST)
4922 return false;
4924 c = TREE_REAL_CST (arg1);
4926 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4927 return false;
4929 *exponent = real_to_integer (&c);
4930 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4931 if (!real_identical (&c, &cint))
4932 return false;
4934 break;
4936 CASE_CFN_POWI:
4937 *base = gimple_call_arg (stmt, 0);
4938 arg1 = gimple_call_arg (stmt, 1);
4940 if (!tree_fits_shwi_p (arg1))
4941 return false;
4943 *exponent = tree_to_shwi (arg1);
4944 break;
4946 default:
4947 return false;
4950 /* Expanding negative exponents is generally unproductive, so we don't
4951 complicate matters with those. Exponents of zero and one should
4952 have been handled by expression folding. */
4953 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4954 return false;
4956 return true;
4959 /* Try to derive and add operand entry for OP to *OPS. Return false if
4960 unsuccessful. */
4962 static bool
4963 try_special_add_to_ops (vec<operand_entry *> *ops,
4964 enum tree_code code,
4965 tree op, gimple* def_stmt)
4967 tree base = NULL_TREE;
4968 HOST_WIDE_INT exponent = 0;
4970 if (TREE_CODE (op) != SSA_NAME
4971 || ! has_single_use (op))
4972 return false;
4974 if (code == MULT_EXPR
4975 && reassoc_insert_powi_p
4976 && flag_unsafe_math_optimizations
4977 && is_gimple_call (def_stmt)
4978 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4980 add_repeat_to_ops_vec (ops, base, exponent);
4981 gimple_set_visited (def_stmt, true);
4982 return true;
4984 else if (code == MULT_EXPR
4985 && is_gimple_assign (def_stmt)
4986 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4987 && !HONOR_SNANS (TREE_TYPE (op))
4988 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4989 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4991 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4992 tree cst = build_minus_one_cst (TREE_TYPE (op));
4993 add_to_ops_vec (ops, rhs1);
4994 add_to_ops_vec (ops, cst);
4995 gimple_set_visited (def_stmt, true);
4996 return true;
4999 return false;
5002 /* Recursively linearize a binary expression that is the RHS of STMT.
5003 Place the operands of the expression tree in the vector named OPS. */
5005 static void
5006 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5007 bool is_associative, bool set_visited)
5009 tree binlhs = gimple_assign_rhs1 (stmt);
5010 tree binrhs = gimple_assign_rhs2 (stmt);
5011 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5012 bool binlhsisreassoc = false;
5013 bool binrhsisreassoc = false;
5014 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5015 struct loop *loop = loop_containing_stmt (stmt);
5017 if (set_visited)
5018 gimple_set_visited (stmt, true);
5020 if (TREE_CODE (binlhs) == SSA_NAME)
5022 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5023 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5024 && !stmt_could_throw_p (binlhsdef));
5027 if (TREE_CODE (binrhs) == SSA_NAME)
5029 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5030 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5031 && !stmt_could_throw_p (binrhsdef));
5034 /* If the LHS is not reassociable, but the RHS is, we need to swap
5035 them. If neither is reassociable, there is nothing we can do, so
5036 just put them in the ops vector. If the LHS is reassociable,
5037 linearize it. If both are reassociable, then linearize the RHS
5038 and the LHS. */
5040 if (!binlhsisreassoc)
5042 /* If this is not a associative operation like division, give up. */
5043 if (!is_associative)
5045 add_to_ops_vec (ops, binrhs);
5046 return;
5049 if (!binrhsisreassoc)
5051 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5052 add_to_ops_vec (ops, binrhs);
5054 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5055 add_to_ops_vec (ops, binlhs);
5057 return;
5060 if (dump_file && (dump_flags & TDF_DETAILS))
5062 fprintf (dump_file, "swapping operands of ");
5063 print_gimple_stmt (dump_file, stmt, 0);
5066 swap_ssa_operands (stmt,
5067 gimple_assign_rhs1_ptr (stmt),
5068 gimple_assign_rhs2_ptr (stmt));
5069 update_stmt (stmt);
5071 if (dump_file && (dump_flags & TDF_DETAILS))
5073 fprintf (dump_file, " is now ");
5074 print_gimple_stmt (dump_file, stmt, 0);
5077 /* We want to make it so the lhs is always the reassociative op,
5078 so swap. */
5079 std::swap (binlhs, binrhs);
5081 else if (binrhsisreassoc)
5083 linearize_expr (stmt);
5084 binlhs = gimple_assign_rhs1 (stmt);
5085 binrhs = gimple_assign_rhs2 (stmt);
5088 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5089 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5090 rhscode, loop));
5091 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5092 is_associative, set_visited);
5094 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5095 add_to_ops_vec (ops, binrhs);
5098 /* Repropagate the negates back into subtracts, since no other pass
5099 currently does it. */
5101 static void
5102 repropagate_negates (void)
5104 unsigned int i = 0;
5105 tree negate;
5107 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5109 gimple *user = get_single_immediate_use (negate);
5111 if (!user || !is_gimple_assign (user))
5112 continue;
5114 /* The negate operand can be either operand of a PLUS_EXPR
5115 (it can be the LHS if the RHS is a constant for example).
5117 Force the negate operand to the RHS of the PLUS_EXPR, then
5118 transform the PLUS_EXPR into a MINUS_EXPR. */
5119 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5121 /* If the negated operand appears on the LHS of the
5122 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5123 to force the negated operand to the RHS of the PLUS_EXPR. */
5124 if (gimple_assign_rhs1 (user) == negate)
5126 swap_ssa_operands (user,
5127 gimple_assign_rhs1_ptr (user),
5128 gimple_assign_rhs2_ptr (user));
5131 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5132 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5133 if (gimple_assign_rhs2 (user) == negate)
5135 tree rhs1 = gimple_assign_rhs1 (user);
5136 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5137 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5138 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5139 update_stmt (user);
5142 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5144 if (gimple_assign_rhs1 (user) == negate)
5146 /* We have
5147 x = -a
5148 y = x - b
5149 which we transform into
5150 x = a + b
5151 y = -x .
5152 This pushes down the negate which we possibly can merge
5153 into some other operation, hence insert it into the
5154 plus_negates vector. */
5155 gimple *feed = SSA_NAME_DEF_STMT (negate);
5156 tree a = gimple_assign_rhs1 (feed);
5157 tree b = gimple_assign_rhs2 (user);
5158 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5159 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5160 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5161 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5162 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5163 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5164 user = gsi_stmt (gsi2);
5165 update_stmt (user);
5166 reassoc_remove_stmt (&gsi);
5167 release_defs (feed);
5168 plus_negates.safe_push (gimple_assign_lhs (user));
5170 else
5172 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5173 rid of one operation. */
5174 gimple *feed = SSA_NAME_DEF_STMT (negate);
5175 tree a = gimple_assign_rhs1 (feed);
5176 tree rhs1 = gimple_assign_rhs1 (user);
5177 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5178 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5179 update_stmt (gsi_stmt (gsi));
5185 /* Returns true if OP is of a type for which we can do reassociation.
5186 That is for integral or non-saturating fixed-point types, and for
5187 floating point type when associative-math is enabled. */
5189 static bool
5190 can_reassociate_p (tree op)
5192 tree type = TREE_TYPE (op);
5193 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5194 return false;
5195 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5196 || NON_SAT_FIXED_POINT_TYPE_P (type)
5197 || (flag_associative_math && FLOAT_TYPE_P (type)))
5198 return true;
5199 return false;
5202 /* Break up subtract operations in block BB.
5204 We do this top down because we don't know whether the subtract is
5205 part of a possible chain of reassociation except at the top.
5207 IE given
5208 d = f + g
5209 c = a + e
5210 b = c - d
5211 q = b - r
5212 k = t - q
5214 we want to break up k = t - q, but we won't until we've transformed q
5215 = b - r, which won't be broken up until we transform b = c - d.
5217 En passant, clear the GIMPLE visited flag on every statement
5218 and set UIDs within each basic block. */
5220 static void
5221 break_up_subtract_bb (basic_block bb)
5223 gimple_stmt_iterator gsi;
5224 basic_block son;
5225 unsigned int uid = 1;
5227 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5229 gimple *stmt = gsi_stmt (gsi);
5230 gimple_set_visited (stmt, false);
5231 gimple_set_uid (stmt, uid++);
5233 if (!is_gimple_assign (stmt)
5234 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5235 continue;
5237 /* Look for simple gimple subtract operations. */
5238 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5240 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5241 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5242 continue;
5244 /* Check for a subtract used only in an addition. If this
5245 is the case, transform it into add of a negate for better
5246 reassociation. IE transform C = A-B into C = A + -B if C
5247 is only used in an addition. */
5248 if (should_break_up_subtract (stmt))
5249 break_up_subtract (stmt, &gsi);
5251 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5252 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5253 plus_negates.safe_push (gimple_assign_lhs (stmt));
5255 for (son = first_dom_son (CDI_DOMINATORS, bb);
5256 son;
5257 son = next_dom_son (CDI_DOMINATORS, son))
5258 break_up_subtract_bb (son);
5261 /* Used for repeated factor analysis. */
5262 struct repeat_factor
5264 /* An SSA name that occurs in a multiply chain. */
5265 tree factor;
5267 /* Cached rank of the factor. */
5268 unsigned rank;
5270 /* Number of occurrences of the factor in the chain. */
5271 HOST_WIDE_INT count;
5273 /* An SSA name representing the product of this factor and
5274 all factors appearing later in the repeated factor vector. */
5275 tree repr;
5279 static vec<repeat_factor> repeat_factor_vec;
5281 /* Used for sorting the repeat factor vector. Sort primarily by
5282 ascending occurrence count, secondarily by descending rank. */
5284 static int
5285 compare_repeat_factors (const void *x1, const void *x2)
5287 const repeat_factor *rf1 = (const repeat_factor *) x1;
5288 const repeat_factor *rf2 = (const repeat_factor *) x2;
5290 if (rf1->count != rf2->count)
5291 return rf1->count - rf2->count;
5293 return rf2->rank - rf1->rank;
5296 /* Look for repeated operands in OPS in the multiply tree rooted at
5297 STMT. Replace them with an optimal sequence of multiplies and powi
5298 builtin calls, and remove the used operands from OPS. Return an
5299 SSA name representing the value of the replacement sequence. */
5301 static tree
5302 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5304 unsigned i, j, vec_len;
5305 int ii;
5306 operand_entry *oe;
5307 repeat_factor *rf1, *rf2;
5308 repeat_factor rfnew;
5309 tree result = NULL_TREE;
5310 tree target_ssa, iter_result;
5311 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5312 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5313 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5314 gimple *mul_stmt, *pow_stmt;
5316 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5317 target. */
5318 if (!powi_fndecl)
5319 return NULL_TREE;
5321 /* Allocate the repeated factor vector. */
5322 repeat_factor_vec.create (10);
5324 /* Scan the OPS vector for all SSA names in the product and build
5325 up a vector of occurrence counts for each factor. */
5326 FOR_EACH_VEC_ELT (*ops, i, oe)
5328 if (TREE_CODE (oe->op) == SSA_NAME)
5330 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5332 if (rf1->factor == oe->op)
5334 rf1->count += oe->count;
5335 break;
5339 if (j >= repeat_factor_vec.length ())
5341 rfnew.factor = oe->op;
5342 rfnew.rank = oe->rank;
5343 rfnew.count = oe->count;
5344 rfnew.repr = NULL_TREE;
5345 repeat_factor_vec.safe_push (rfnew);
5350 /* Sort the repeated factor vector by (a) increasing occurrence count,
5351 and (b) decreasing rank. */
5352 repeat_factor_vec.qsort (compare_repeat_factors);
5354 /* It is generally best to combine as many base factors as possible
5355 into a product before applying __builtin_powi to the result.
5356 However, the sort order chosen for the repeated factor vector
5357 allows us to cache partial results for the product of the base
5358 factors for subsequent use. When we already have a cached partial
5359 result from a previous iteration, it is best to make use of it
5360 before looking for another __builtin_pow opportunity.
5362 As an example, consider x * x * y * y * y * z * z * z * z.
5363 We want to first compose the product x * y * z, raise it to the
5364 second power, then multiply this by y * z, and finally multiply
5365 by z. This can be done in 5 multiplies provided we cache y * z
5366 for use in both expressions:
5368 t1 = y * z
5369 t2 = t1 * x
5370 t3 = t2 * t2
5371 t4 = t1 * t3
5372 result = t4 * z
5374 If we instead ignored the cached y * z and first multiplied by
5375 the __builtin_pow opportunity z * z, we would get the inferior:
5377 t1 = y * z
5378 t2 = t1 * x
5379 t3 = t2 * t2
5380 t4 = z * z
5381 t5 = t3 * t4
5382 result = t5 * y */
5384 vec_len = repeat_factor_vec.length ();
5386 /* Repeatedly look for opportunities to create a builtin_powi call. */
5387 while (true)
5389 HOST_WIDE_INT power;
5391 /* First look for the largest cached product of factors from
5392 preceding iterations. If found, create a builtin_powi for
5393 it if the minimum occurrence count for its factors is at
5394 least 2, or just use this cached product as our next
5395 multiplicand if the minimum occurrence count is 1. */
5396 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5398 if (rf1->repr && rf1->count > 0)
5399 break;
5402 if (j < vec_len)
5404 power = rf1->count;
5406 if (power == 1)
5408 iter_result = rf1->repr;
5410 if (dump_file && (dump_flags & TDF_DETAILS))
5412 unsigned elt;
5413 repeat_factor *rf;
5414 fputs ("Multiplying by cached product ", dump_file);
5415 for (elt = j; elt < vec_len; elt++)
5417 rf = &repeat_factor_vec[elt];
5418 print_generic_expr (dump_file, rf->factor);
5419 if (elt < vec_len - 1)
5420 fputs (" * ", dump_file);
5422 fputs ("\n", dump_file);
5425 else
5427 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5428 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5429 build_int_cst (integer_type_node,
5430 power));
5431 gimple_call_set_lhs (pow_stmt, iter_result);
5432 gimple_set_location (pow_stmt, gimple_location (stmt));
5433 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5434 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5436 if (dump_file && (dump_flags & TDF_DETAILS))
5438 unsigned elt;
5439 repeat_factor *rf;
5440 fputs ("Building __builtin_pow call for cached product (",
5441 dump_file);
5442 for (elt = j; elt < vec_len; elt++)
5444 rf = &repeat_factor_vec[elt];
5445 print_generic_expr (dump_file, rf->factor);
5446 if (elt < vec_len - 1)
5447 fputs (" * ", dump_file);
5449 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5450 power);
5454 else
5456 /* Otherwise, find the first factor in the repeated factor
5457 vector whose occurrence count is at least 2. If no such
5458 factor exists, there are no builtin_powi opportunities
5459 remaining. */
5460 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5462 if (rf1->count >= 2)
5463 break;
5466 if (j >= vec_len)
5467 break;
5469 power = rf1->count;
5471 if (dump_file && (dump_flags & TDF_DETAILS))
5473 unsigned elt;
5474 repeat_factor *rf;
5475 fputs ("Building __builtin_pow call for (", dump_file);
5476 for (elt = j; elt < vec_len; elt++)
5478 rf = &repeat_factor_vec[elt];
5479 print_generic_expr (dump_file, rf->factor);
5480 if (elt < vec_len - 1)
5481 fputs (" * ", dump_file);
5483 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5486 reassociate_stats.pows_created++;
5488 /* Visit each element of the vector in reverse order (so that
5489 high-occurrence elements are visited first, and within the
5490 same occurrence count, lower-ranked elements are visited
5491 first). Form a linear product of all elements in this order
5492 whose occurrencce count is at least that of element J.
5493 Record the SSA name representing the product of each element
5494 with all subsequent elements in the vector. */
5495 if (j == vec_len - 1)
5496 rf1->repr = rf1->factor;
5497 else
5499 for (ii = vec_len - 2; ii >= (int)j; ii--)
5501 tree op1, op2;
5503 rf1 = &repeat_factor_vec[ii];
5504 rf2 = &repeat_factor_vec[ii + 1];
5506 /* Init the last factor's representative to be itself. */
5507 if (!rf2->repr)
5508 rf2->repr = rf2->factor;
5510 op1 = rf1->factor;
5511 op2 = rf2->repr;
5513 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5514 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5515 op1, op2);
5516 gimple_set_location (mul_stmt, gimple_location (stmt));
5517 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5518 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5519 rf1->repr = target_ssa;
5521 /* Don't reprocess the multiply we just introduced. */
5522 gimple_set_visited (mul_stmt, true);
5526 /* Form a call to __builtin_powi for the maximum product
5527 just formed, raised to the power obtained earlier. */
5528 rf1 = &repeat_factor_vec[j];
5529 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5530 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5531 build_int_cst (integer_type_node,
5532 power));
5533 gimple_call_set_lhs (pow_stmt, iter_result);
5534 gimple_set_location (pow_stmt, gimple_location (stmt));
5535 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5536 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5539 /* If we previously formed at least one other builtin_powi call,
5540 form the product of this one and those others. */
5541 if (result)
5543 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5544 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5545 result, iter_result);
5546 gimple_set_location (mul_stmt, gimple_location (stmt));
5547 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5548 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5549 gimple_set_visited (mul_stmt, true);
5550 result = new_result;
5552 else
5553 result = iter_result;
5555 /* Decrement the occurrence count of each element in the product
5556 by the count found above, and remove this many copies of each
5557 factor from OPS. */
5558 for (i = j; i < vec_len; i++)
5560 unsigned k = power;
5561 unsigned n;
5563 rf1 = &repeat_factor_vec[i];
5564 rf1->count -= power;
5566 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5568 if (oe->op == rf1->factor)
5570 if (oe->count <= k)
5572 ops->ordered_remove (n);
5573 k -= oe->count;
5575 if (k == 0)
5576 break;
5578 else
5580 oe->count -= k;
5581 break;
5588 /* At this point all elements in the repeated factor vector have a
5589 remaining occurrence count of 0 or 1, and those with a count of 1
5590 don't have cached representatives. Re-sort the ops vector and
5591 clean up. */
5592 ops->qsort (sort_by_operand_rank);
5593 repeat_factor_vec.release ();
5595 /* Return the final product computed herein. Note that there may
5596 still be some elements with single occurrence count left in OPS;
5597 those will be handled by the normal reassociation logic. */
5598 return result;
5601 /* Attempt to optimize
5602 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5603 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5605 static void
5606 attempt_builtin_copysign (vec<operand_entry *> *ops)
5608 operand_entry *oe;
5609 unsigned int i;
5610 unsigned int length = ops->length ();
5611 tree cst = ops->last ()->op;
5613 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5614 return;
5616 FOR_EACH_VEC_ELT (*ops, i, oe)
5618 if (TREE_CODE (oe->op) == SSA_NAME
5619 && has_single_use (oe->op))
5621 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5622 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5624 tree arg0, arg1;
5625 switch (gimple_call_combined_fn (old_call))
5627 CASE_CFN_COPYSIGN:
5628 CASE_CFN_COPYSIGN_FN:
5629 arg0 = gimple_call_arg (old_call, 0);
5630 arg1 = gimple_call_arg (old_call, 1);
5631 /* The first argument of copysign must be a constant,
5632 otherwise there's nothing to do. */
5633 if (TREE_CODE (arg0) == REAL_CST)
5635 tree type = TREE_TYPE (arg0);
5636 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5637 /* If we couldn't fold to a single constant, skip it.
5638 That happens e.g. for inexact multiplication when
5639 -frounding-math. */
5640 if (mul == NULL_TREE)
5641 break;
5642 /* Instead of adjusting OLD_CALL, let's build a new
5643 call to not leak the LHS and prevent keeping bogus
5644 debug statements. DCE will clean up the old call. */
5645 gcall *new_call;
5646 if (gimple_call_internal_p (old_call))
5647 new_call = gimple_build_call_internal
5648 (IFN_COPYSIGN, 2, mul, arg1);
5649 else
5650 new_call = gimple_build_call
5651 (gimple_call_fndecl (old_call), 2, mul, arg1);
5652 tree lhs = make_ssa_name (type);
5653 gimple_call_set_lhs (new_call, lhs);
5654 gimple_set_location (new_call,
5655 gimple_location (old_call));
5656 insert_stmt_after (new_call, old_call);
5657 /* We've used the constant, get rid of it. */
5658 ops->pop ();
5659 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5660 /* Handle the CST1 < 0 case by negating the result. */
5661 if (cst1_neg)
5663 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5664 gimple *negate_stmt
5665 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5666 insert_stmt_after (negate_stmt, new_call);
5667 oe->op = negrhs;
5669 else
5670 oe->op = lhs;
5671 if (dump_file && (dump_flags & TDF_DETAILS))
5673 fprintf (dump_file, "Optimizing copysign: ");
5674 print_generic_expr (dump_file, cst);
5675 fprintf (dump_file, " * COPYSIGN (");
5676 print_generic_expr (dump_file, arg0);
5677 fprintf (dump_file, ", ");
5678 print_generic_expr (dump_file, arg1);
5679 fprintf (dump_file, ") into %sCOPYSIGN (",
5680 cst1_neg ? "-" : "");
5681 print_generic_expr (dump_file, mul);
5682 fprintf (dump_file, ", ");
5683 print_generic_expr (dump_file, arg1);
5684 fprintf (dump_file, "\n");
5686 return;
5688 break;
5689 default:
5690 break;
5697 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5699 static void
5700 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5702 tree rhs1;
5704 if (dump_file && (dump_flags & TDF_DETAILS))
5706 fprintf (dump_file, "Transforming ");
5707 print_gimple_stmt (dump_file, stmt, 0);
5710 rhs1 = gimple_assign_rhs1 (stmt);
5711 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5712 update_stmt (stmt);
5713 remove_visited_stmt_chain (rhs1);
5715 if (dump_file && (dump_flags & TDF_DETAILS))
5717 fprintf (dump_file, " into ");
5718 print_gimple_stmt (dump_file, stmt, 0);
5722 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5724 static void
5725 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5726 tree rhs1, tree rhs2)
5728 if (dump_file && (dump_flags & TDF_DETAILS))
5730 fprintf (dump_file, "Transforming ");
5731 print_gimple_stmt (dump_file, stmt, 0);
5734 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5735 update_stmt (gsi_stmt (*gsi));
5736 remove_visited_stmt_chain (rhs1);
5738 if (dump_file && (dump_flags & TDF_DETAILS))
5740 fprintf (dump_file, " into ");
5741 print_gimple_stmt (dump_file, stmt, 0);
5745 /* Reassociate expressions in basic block BB and its post-dominator as
5746 children.
5748 Bubble up return status from maybe_optimize_range_tests. */
5750 static bool
5751 reassociate_bb (basic_block bb)
5753 gimple_stmt_iterator gsi;
5754 basic_block son;
5755 gimple *stmt = last_stmt (bb);
5756 bool cfg_cleanup_needed = false;
5758 if (stmt && !gimple_visited_p (stmt))
5759 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5761 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5763 stmt = gsi_stmt (gsi);
5765 if (is_gimple_assign (stmt)
5766 && !stmt_could_throw_p (stmt))
5768 tree lhs, rhs1, rhs2;
5769 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5771 /* If this is not a gimple binary expression, there is
5772 nothing for us to do with it. */
5773 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5774 continue;
5776 /* If this was part of an already processed statement,
5777 we don't need to touch it again. */
5778 if (gimple_visited_p (stmt))
5780 /* This statement might have become dead because of previous
5781 reassociations. */
5782 if (has_zero_uses (gimple_get_lhs (stmt)))
5784 reassoc_remove_stmt (&gsi);
5785 release_defs (stmt);
5786 /* We might end up removing the last stmt above which
5787 places the iterator to the end of the sequence.
5788 Reset it to the last stmt in this case which might
5789 be the end of the sequence as well if we removed
5790 the last statement of the sequence. In which case
5791 we need to bail out. */
5792 if (gsi_end_p (gsi))
5794 gsi = gsi_last_bb (bb);
5795 if (gsi_end_p (gsi))
5796 break;
5799 continue;
5802 lhs = gimple_assign_lhs (stmt);
5803 rhs1 = gimple_assign_rhs1 (stmt);
5804 rhs2 = gimple_assign_rhs2 (stmt);
5806 /* For non-bit or min/max operations we can't associate
5807 all types. Verify that here. */
5808 if (rhs_code != BIT_IOR_EXPR
5809 && rhs_code != BIT_AND_EXPR
5810 && rhs_code != BIT_XOR_EXPR
5811 && rhs_code != MIN_EXPR
5812 && rhs_code != MAX_EXPR
5813 && (!can_reassociate_p (lhs)
5814 || !can_reassociate_p (rhs1)
5815 || !can_reassociate_p (rhs2)))
5816 continue;
5818 if (associative_tree_code (rhs_code))
5820 auto_vec<operand_entry *> ops;
5821 tree powi_result = NULL_TREE;
5822 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5824 /* There may be no immediate uses left by the time we
5825 get here because we may have eliminated them all. */
5826 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5827 continue;
5829 gimple_set_visited (stmt, true);
5830 linearize_expr_tree (&ops, stmt, true, true);
5831 ops.qsort (sort_by_operand_rank);
5832 int orig_len = ops.length ();
5833 optimize_ops_list (rhs_code, &ops);
5834 if (undistribute_ops_list (rhs_code, &ops,
5835 loop_containing_stmt (stmt)))
5837 ops.qsort (sort_by_operand_rank);
5838 optimize_ops_list (rhs_code, &ops);
5841 if (rhs_code == PLUS_EXPR
5842 && transform_add_to_multiply (&ops))
5843 ops.qsort (sort_by_operand_rank);
5845 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5847 if (is_vector)
5848 optimize_vec_cond_expr (rhs_code, &ops);
5849 else
5850 optimize_range_tests (rhs_code, &ops);
5853 if (rhs_code == MULT_EXPR && !is_vector)
5855 attempt_builtin_copysign (&ops);
5857 if (reassoc_insert_powi_p
5858 && flag_unsafe_math_optimizations)
5859 powi_result = attempt_builtin_powi (stmt, &ops);
5862 operand_entry *last;
5863 bool negate_result = false;
5864 if (ops.length () > 1
5865 && rhs_code == MULT_EXPR)
5867 last = ops.last ();
5868 if ((integer_minus_onep (last->op)
5869 || real_minus_onep (last->op))
5870 && !HONOR_SNANS (TREE_TYPE (lhs))
5871 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5872 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5874 ops.pop ();
5875 negate_result = true;
5879 tree new_lhs = lhs;
5880 /* If the operand vector is now empty, all operands were
5881 consumed by the __builtin_powi optimization. */
5882 if (ops.length () == 0)
5883 transform_stmt_to_copy (&gsi, stmt, powi_result);
5884 else if (ops.length () == 1)
5886 tree last_op = ops.last ()->op;
5888 /* If the stmt that defines operand has to be inserted, insert it
5889 before the use. */
5890 if (ops.last ()->stmt_to_insert)
5891 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5892 if (powi_result)
5893 transform_stmt_to_multiply (&gsi, stmt, last_op,
5894 powi_result);
5895 else
5896 transform_stmt_to_copy (&gsi, stmt, last_op);
5898 else
5900 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5901 int ops_num = ops.length ();
5902 int width = get_reassociation_width (ops_num, rhs_code, mode);
5904 if (dump_file && (dump_flags & TDF_DETAILS))
5905 fprintf (dump_file,
5906 "Width = %d was chosen for reassociation\n", width);
5909 /* For binary bit operations, if there are at least 3
5910 operands and the last last operand in OPS is a constant,
5911 move it to the front. This helps ensure that we generate
5912 (X & Y) & C rather than (X & C) & Y. The former will
5913 often match a canonical bit test when we get to RTL. */
5914 if (ops.length () > 2
5915 && (rhs_code == BIT_AND_EXPR
5916 || rhs_code == BIT_IOR_EXPR
5917 || rhs_code == BIT_XOR_EXPR)
5918 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5919 std::swap (*ops[0], *ops[ops_num - 1]);
5921 if (width > 1
5922 && ops.length () > 3)
5923 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5924 width, ops);
5925 else
5927 /* When there are three operands left, we want
5928 to make sure the ones that get the double
5929 binary op are chosen wisely. */
5930 int len = ops.length ();
5931 if (len >= 3)
5932 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5934 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5935 powi_result != NULL
5936 || negate_result,
5937 len != orig_len);
5940 /* If we combined some repeated factors into a
5941 __builtin_powi call, multiply that result by the
5942 reassociated operands. */
5943 if (powi_result)
5945 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5946 tree type = TREE_TYPE (lhs);
5947 tree target_ssa = make_temp_ssa_name (type, NULL,
5948 "reassocpow");
5949 gimple_set_lhs (lhs_stmt, target_ssa);
5950 update_stmt (lhs_stmt);
5951 if (lhs != new_lhs)
5953 target_ssa = new_lhs;
5954 new_lhs = lhs;
5956 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5957 powi_result, target_ssa);
5958 gimple_set_location (mul_stmt, gimple_location (stmt));
5959 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5960 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5964 if (negate_result)
5966 stmt = SSA_NAME_DEF_STMT (lhs);
5967 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5968 gimple_set_lhs (stmt, tmp);
5969 if (lhs != new_lhs)
5970 tmp = new_lhs;
5971 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5972 tmp);
5973 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5974 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5975 update_stmt (stmt);
5980 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5981 son;
5982 son = next_dom_son (CDI_POST_DOMINATORS, son))
5983 cfg_cleanup_needed |= reassociate_bb (son);
5985 return cfg_cleanup_needed;
5988 /* Add jumps around shifts for range tests turned into bit tests.
5989 For each SSA_NAME VAR we have code like:
5990 VAR = ...; // final stmt of range comparison
5991 // bit test here...;
5992 OTHERVAR = ...; // final stmt of the bit test sequence
5993 RES = VAR | OTHERVAR;
5994 Turn the above into:
5995 VAR = ...;
5996 if (VAR != 0)
5997 goto <l3>;
5998 else
5999 goto <l2>;
6000 <l2>:
6001 // bit test here...;
6002 OTHERVAR = ...;
6003 <l3>:
6004 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6006 static void
6007 branch_fixup (void)
6009 tree var;
6010 unsigned int i;
6012 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6014 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6015 gimple *use_stmt;
6016 use_operand_p use;
6017 bool ok = single_imm_use (var, &use, &use_stmt);
6018 gcc_assert (ok
6019 && is_gimple_assign (use_stmt)
6020 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6021 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6023 basic_block cond_bb = gimple_bb (def_stmt);
6024 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6025 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6027 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6028 gimple *g = gimple_build_cond (NE_EXPR, var,
6029 build_zero_cst (TREE_TYPE (var)),
6030 NULL_TREE, NULL_TREE);
6031 location_t loc = gimple_location (use_stmt);
6032 gimple_set_location (g, loc);
6033 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6035 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6036 etrue->probability = profile_probability::even ();
6037 edge efalse = find_edge (cond_bb, then_bb);
6038 efalse->flags = EDGE_FALSE_VALUE;
6039 efalse->probability -= etrue->probability;
6040 then_bb->count -= etrue->count ();
6042 tree othervar = NULL_TREE;
6043 if (gimple_assign_rhs1 (use_stmt) == var)
6044 othervar = gimple_assign_rhs2 (use_stmt);
6045 else if (gimple_assign_rhs2 (use_stmt) == var)
6046 othervar = gimple_assign_rhs1 (use_stmt);
6047 else
6048 gcc_unreachable ();
6049 tree lhs = gimple_assign_lhs (use_stmt);
6050 gphi *phi = create_phi_node (lhs, merge_bb);
6051 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6052 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6053 gsi = gsi_for_stmt (use_stmt);
6054 gsi_remove (&gsi, true);
6056 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6057 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6059 reassoc_branch_fixups.release ();
6062 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6063 void debug_ops_vector (vec<operand_entry *> ops);
6065 /* Dump the operand entry vector OPS to FILE. */
6067 void
6068 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6070 operand_entry *oe;
6071 unsigned int i;
6073 FOR_EACH_VEC_ELT (ops, i, oe)
6075 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6076 print_generic_expr (file, oe->op);
6077 fprintf (file, "\n");
6081 /* Dump the operand entry vector OPS to STDERR. */
6083 DEBUG_FUNCTION void
6084 debug_ops_vector (vec<operand_entry *> ops)
6086 dump_ops_vector (stderr, ops);
6089 /* Bubble up return status from reassociate_bb. */
6091 static bool
6092 do_reassoc (void)
6094 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6095 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6098 /* Initialize the reassociation pass. */
6100 static void
6101 init_reassoc (void)
6103 int i;
6104 long rank = 2;
6105 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6107 /* Find the loops, so that we can prevent moving calculations in
6108 them. */
6109 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6111 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6113 next_operand_entry_id = 0;
6115 /* Reverse RPO (Reverse Post Order) will give us something where
6116 deeper loops come later. */
6117 pre_and_rev_post_order_compute (NULL, bbs, false);
6118 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6119 operand_rank = new hash_map<tree, long>;
6121 /* Give each default definition a distinct rank. This includes
6122 parameters and the static chain. Walk backwards over all
6123 SSA names so that we get proper rank ordering according
6124 to tree_swap_operands_p. */
6125 for (i = num_ssa_names - 1; i > 0; --i)
6127 tree name = ssa_name (i);
6128 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6129 insert_operand_rank (name, ++rank);
6132 /* Set up rank for each BB */
6133 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6134 bb_rank[bbs[i]] = ++rank << 16;
6136 free (bbs);
6137 calculate_dominance_info (CDI_POST_DOMINATORS);
6138 plus_negates = vNULL;
6141 /* Cleanup after the reassociation pass, and print stats if
6142 requested. */
6144 static void
6145 fini_reassoc (void)
6147 statistics_counter_event (cfun, "Linearized",
6148 reassociate_stats.linearized);
6149 statistics_counter_event (cfun, "Constants eliminated",
6150 reassociate_stats.constants_eliminated);
6151 statistics_counter_event (cfun, "Ops eliminated",
6152 reassociate_stats.ops_eliminated);
6153 statistics_counter_event (cfun, "Statements rewritten",
6154 reassociate_stats.rewritten);
6155 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6156 reassociate_stats.pows_encountered);
6157 statistics_counter_event (cfun, "Built-in powi calls created",
6158 reassociate_stats.pows_created);
6160 delete operand_rank;
6161 operand_entry_pool.release ();
6162 free (bb_rank);
6163 plus_negates.release ();
6164 free_dominance_info (CDI_POST_DOMINATORS);
6165 loop_optimizer_finalize ();
6168 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6169 insertion of __builtin_powi calls.
6171 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6172 optimization of a gimple conditional. Otherwise returns zero. */
6174 static unsigned int
6175 execute_reassoc (bool insert_powi_p)
6177 reassoc_insert_powi_p = insert_powi_p;
6179 init_reassoc ();
6181 bool cfg_cleanup_needed;
6182 cfg_cleanup_needed = do_reassoc ();
6183 repropagate_negates ();
6184 branch_fixup ();
6186 fini_reassoc ();
6187 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6190 namespace {
6192 const pass_data pass_data_reassoc =
6194 GIMPLE_PASS, /* type */
6195 "reassoc", /* name */
6196 OPTGROUP_NONE, /* optinfo_flags */
6197 TV_TREE_REASSOC, /* tv_id */
6198 ( PROP_cfg | PROP_ssa ), /* properties_required */
6199 0, /* properties_provided */
6200 0, /* properties_destroyed */
6201 0, /* todo_flags_start */
6202 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6205 class pass_reassoc : public gimple_opt_pass
6207 public:
6208 pass_reassoc (gcc::context *ctxt)
6209 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6212 /* opt_pass methods: */
6213 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
6214 void set_pass_param (unsigned int n, bool param)
6216 gcc_assert (n == 0);
6217 insert_powi_p = param;
6219 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
6220 virtual unsigned int execute (function *)
6221 { return execute_reassoc (insert_powi_p); }
6223 private:
6224 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6225 point 3a in the pass header comment. */
6226 bool insert_powi_p;
6227 }; // class pass_reassoc
6229 } // anon namespace
6231 gimple_opt_pass *
6232 make_pass_reassoc (gcc::context *ctxt)
6234 return new pass_reassoc (ctxt);