[AArch64] PR target/68129: Define TARGET_SUPPORTS_WIDE_INT
[official-gcc.git] / gcc / tree-ssa-reassoc.c
bloba75290c2c54ece809bb9ed2d05c8c0ab752b197d
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
54 /* This is a simple global reassociation pass. It is, in part, based
55 on the LLVM pass of the same name (They do some things more/less
56 than we do, in different orders, etc).
58 It consists of five steps:
60 1. Breaking up subtract operations into addition + negate, where
61 it would promote the reassociation of adds.
63 2. Left linearization of the expression trees, so that (A+B)+(C+D)
64 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
65 During linearization, we place the operands of the binary
66 expressions into a vector of operand_entry_*
68 3. Optimization of the operand lists, eliminating things like a +
69 -a, a & a, etc.
71 3a. Combine repeated factors with the same occurrence counts
72 into a __builtin_powi call that will later be optimized into
73 an optimal number of multiplies.
75 4. Rewrite the expression trees we linearized and optimized so
76 they are in proper rank order.
78 5. Repropagate negates, as nothing else will clean it up ATM.
80 A bit of theory on #4, since nobody seems to write anything down
81 about why it makes sense to do it the way they do it:
83 We could do this much nicer theoretically, but don't (for reasons
84 explained after how to do it theoretically nice :P).
86 In order to promote the most redundancy elimination, you want
87 binary expressions whose operands are the same rank (or
88 preferably, the same value) exposed to the redundancy eliminator,
89 for possible elimination.
91 So the way to do this if we really cared, is to build the new op
92 tree from the leaves to the roots, merging as you go, and putting the
93 new op on the end of the worklist, until you are left with one
94 thing on the worklist.
96 IE if you have to rewrite the following set of operands (listed with
97 rank in parentheses), with opcode PLUS_EXPR:
99 a (1), b (1), c (1), d (2), e (2)
102 We start with our merge worklist empty, and the ops list with all of
103 those on it.
105 You want to first merge all leaves of the same rank, as much as
106 possible.
108 So first build a binary op of
110 mergetmp = a + b, and put "mergetmp" on the merge worklist.
112 Because there is no three operand form of PLUS_EXPR, c is not going to
113 be exposed to redundancy elimination as a rank 1 operand.
115 So you might as well throw it on the merge worklist (you could also
116 consider it to now be a rank two operand, and merge it with d and e,
117 but in this case, you then have evicted e from a binary op. So at
118 least in this situation, you can't win.)
120 Then build a binary op of d + e
121 mergetmp2 = d + e
123 and put mergetmp2 on the merge worklist.
125 so merge worklist = {mergetmp, c, mergetmp2}
127 Continue building binary ops of these operations until you have only
128 one operation left on the worklist.
130 So we have
132 build binary op
133 mergetmp3 = mergetmp + c
135 worklist = {mergetmp2, mergetmp3}
137 mergetmp4 = mergetmp2 + mergetmp3
139 worklist = {mergetmp4}
141 because we have one operation left, we can now just set the original
142 statement equal to the result of that operation.
144 This will at least expose a + b and d + e to redundancy elimination
145 as binary operations.
147 For extra points, you can reuse the old statements to build the
148 mergetmps, since you shouldn't run out.
150 So why don't we do this?
152 Because it's expensive, and rarely will help. Most trees we are
153 reassociating have 3 or less ops. If they have 2 ops, they already
154 will be written into a nice single binary op. If you have 3 ops, a
155 single simple check suffices to tell you whether the first two are of the
156 same rank. If so, you know to order it
158 mergetmp = op1 + op2
159 newstmt = mergetmp + op3
161 instead of
162 mergetmp = op2 + op3
163 newstmt = mergetmp + op1
165 If all three are of the same rank, you can't expose them all in a
166 single binary operator anyway, so the above is *still* the best you
167 can do.
169 Thus, this is what we do. When we have three ops left, we check to see
170 what order to put them in, and call it a day. As a nod to vector sum
171 reduction, we check if any of the ops are really a phi node that is a
172 destructive update for the associating op, and keep the destructive
173 update together for vector sum reduction recognition. */
176 /* Statistics */
177 static struct
179 int linearized;
180 int constants_eliminated;
181 int ops_eliminated;
182 int rewritten;
183 int pows_encountered;
184 int pows_created;
185 } reassociate_stats;
187 /* Operator, rank pair. */
188 struct operand_entry
190 unsigned int rank;
191 int id;
192 tree op;
193 unsigned int count;
196 static object_allocator<operand_entry> operand_entry_pool
197 ("operand entry pool");
199 /* This is used to assign a unique ID to each struct operand_entry
200 so that qsort results are identical on different hosts. */
201 static int next_operand_entry_id;
203 /* Starting rank number for a given basic block, so that we can rank
204 operations using unmovable instructions in that BB based on the bb
205 depth. */
206 static long *bb_rank;
208 /* Operand->rank hashtable. */
209 static hash_map<tree, long> *operand_rank;
211 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
212 all basic blocks the CFG should be adjusted - basic blocks
213 split right after that SSA_NAME's definition statement and before
214 the only use, which must be a bit ior. */
215 static vec<tree> reassoc_branch_fixups;
217 /* Forward decls. */
218 static long get_rank (tree);
219 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
221 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
222 possibly added by gsi_remove. */
224 bool
225 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
227 gimple *stmt = gsi_stmt (*gsi);
229 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
230 return gsi_remove (gsi, true);
232 gimple_stmt_iterator prev = *gsi;
233 gsi_prev (&prev);
234 unsigned uid = gimple_uid (stmt);
235 basic_block bb = gimple_bb (stmt);
236 bool ret = gsi_remove (gsi, true);
237 if (!gsi_end_p (prev))
238 gsi_next (&prev);
239 else
240 prev = gsi_start_bb (bb);
241 gimple *end_stmt = gsi_stmt (*gsi);
242 while ((stmt = gsi_stmt (prev)) != end_stmt)
244 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
245 gimple_set_uid (stmt, uid);
246 gsi_next (&prev);
248 return ret;
251 /* Bias amount for loop-carried phis. We want this to be larger than
252 the depth of any reassociation tree we can see, but not larger than
253 the rank difference between two blocks. */
254 #define PHI_LOOP_BIAS (1 << 15)
256 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
257 an innermost loop, and the phi has only a single use which is inside
258 the loop, then the rank is the block rank of the loop latch plus an
259 extra bias for the loop-carried dependence. This causes expressions
260 calculated into an accumulator variable to be independent for each
261 iteration of the loop. If STMT is some other phi, the rank is the
262 block rank of its containing block. */
263 static long
264 phi_rank (gimple *stmt)
266 basic_block bb = gimple_bb (stmt);
267 struct loop *father = bb->loop_father;
268 tree res;
269 unsigned i;
270 use_operand_p use;
271 gimple *use_stmt;
273 /* We only care about real loops (those with a latch). */
274 if (!father->latch)
275 return bb_rank[bb->index];
277 /* Interesting phis must be in headers of innermost loops. */
278 if (bb != father->header
279 || father->inner)
280 return bb_rank[bb->index];
282 /* Ignore virtual SSA_NAMEs. */
283 res = gimple_phi_result (stmt);
284 if (virtual_operand_p (res))
285 return bb_rank[bb->index];
287 /* The phi definition must have a single use, and that use must be
288 within the loop. Otherwise this isn't an accumulator pattern. */
289 if (!single_imm_use (res, &use, &use_stmt)
290 || gimple_bb (use_stmt)->loop_father != father)
291 return bb_rank[bb->index];
293 /* Look for phi arguments from within the loop. If found, bias this phi. */
294 for (i = 0; i < gimple_phi_num_args (stmt); i++)
296 tree arg = gimple_phi_arg_def (stmt, i);
297 if (TREE_CODE (arg) == SSA_NAME
298 && !SSA_NAME_IS_DEFAULT_DEF (arg))
300 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
301 if (gimple_bb (def_stmt)->loop_father == father)
302 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
306 /* Must be an uninteresting phi. */
307 return bb_rank[bb->index];
310 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
311 loop-carried dependence of an innermost loop, return TRUE; else
312 return FALSE. */
313 static bool
314 loop_carried_phi (tree exp)
316 gimple *phi_stmt;
317 long block_rank;
319 if (TREE_CODE (exp) != SSA_NAME
320 || SSA_NAME_IS_DEFAULT_DEF (exp))
321 return false;
323 phi_stmt = SSA_NAME_DEF_STMT (exp);
325 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
326 return false;
328 /* Non-loop-carried phis have block rank. Loop-carried phis have
329 an additional bias added in. If this phi doesn't have block rank,
330 it's biased and should not be propagated. */
331 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
333 if (phi_rank (phi_stmt) != block_rank)
334 return true;
336 return false;
339 /* Return the maximum of RANK and the rank that should be propagated
340 from expression OP. For most operands, this is just the rank of OP.
341 For loop-carried phis, the value is zero to avoid undoing the bias
342 in favor of the phi. */
343 static long
344 propagate_rank (long rank, tree op)
346 long op_rank;
348 if (loop_carried_phi (op))
349 return rank;
351 op_rank = get_rank (op);
353 return MAX (rank, op_rank);
356 /* Look up the operand rank structure for expression E. */
358 static inline long
359 find_operand_rank (tree e)
361 long *slot = operand_rank->get (e);
362 return slot ? *slot : -1;
365 /* Insert {E,RANK} into the operand rank hashtable. */
367 static inline void
368 insert_operand_rank (tree e, long rank)
370 gcc_assert (rank > 0);
371 gcc_assert (!operand_rank->put (e, rank));
374 /* Given an expression E, return the rank of the expression. */
376 static long
377 get_rank (tree e)
379 /* SSA_NAME's have the rank of the expression they are the result
381 For globals and uninitialized values, the rank is 0.
382 For function arguments, use the pre-setup rank.
383 For PHI nodes, stores, asm statements, etc, we use the rank of
384 the BB.
385 For simple operations, the rank is the maximum rank of any of
386 its operands, or the bb_rank, whichever is less.
387 I make no claims that this is optimal, however, it gives good
388 results. */
390 /* We make an exception to the normal ranking system to break
391 dependences of accumulator variables in loops. Suppose we
392 have a simple one-block loop containing:
394 x_1 = phi(x_0, x_2)
395 b = a + x_1
396 c = b + d
397 x_2 = c + e
399 As shown, each iteration of the calculation into x is fully
400 dependent upon the iteration before it. We would prefer to
401 see this in the form:
403 x_1 = phi(x_0, x_2)
404 b = a + d
405 c = b + e
406 x_2 = c + x_1
408 If the loop is unrolled, the calculations of b and c from
409 different iterations can be interleaved.
411 To obtain this result during reassociation, we bias the rank
412 of the phi definition x_1 upward, when it is recognized as an
413 accumulator pattern. The artificial rank causes it to be
414 added last, providing the desired independence. */
416 if (TREE_CODE (e) == SSA_NAME)
418 ssa_op_iter iter;
419 gimple *stmt;
420 long rank;
421 tree op;
423 if (SSA_NAME_IS_DEFAULT_DEF (e))
424 return find_operand_rank (e);
426 stmt = SSA_NAME_DEF_STMT (e);
427 if (gimple_code (stmt) == GIMPLE_PHI)
428 return phi_rank (stmt);
430 if (!is_gimple_assign (stmt))
431 return bb_rank[gimple_bb (stmt)->index];
433 /* If we already have a rank for this expression, use that. */
434 rank = find_operand_rank (e);
435 if (rank != -1)
436 return rank;
438 /* Otherwise, find the maximum rank for the operands. As an
439 exception, remove the bias from loop-carried phis when propagating
440 the rank so that dependent operations are not also biased. */
441 /* Simply walk over all SSA uses - this takes advatage of the
442 fact that non-SSA operands are is_gimple_min_invariant and
443 thus have rank 0. */
444 rank = 0;
445 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
446 rank = propagate_rank (rank, op);
448 if (dump_file && (dump_flags & TDF_DETAILS))
450 fprintf (dump_file, "Rank for ");
451 print_generic_expr (dump_file, e, 0);
452 fprintf (dump_file, " is %ld\n", (rank + 1));
455 /* Note the rank in the hashtable so we don't recompute it. */
456 insert_operand_rank (e, (rank + 1));
457 return (rank + 1);
460 /* Constants, globals, etc., are rank 0 */
461 return 0;
465 /* We want integer ones to end up last no matter what, since they are
466 the ones we can do the most with. */
467 #define INTEGER_CONST_TYPE 1 << 3
468 #define FLOAT_CONST_TYPE 1 << 2
469 #define OTHER_CONST_TYPE 1 << 1
471 /* Classify an invariant tree into integer, float, or other, so that
472 we can sort them to be near other constants of the same type. */
473 static inline int
474 constant_type (tree t)
476 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
477 return INTEGER_CONST_TYPE;
478 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
479 return FLOAT_CONST_TYPE;
480 else
481 return OTHER_CONST_TYPE;
484 /* qsort comparison function to sort operand entries PA and PB by rank
485 so that the sorted array is ordered by rank in decreasing order. */
486 static int
487 sort_by_operand_rank (const void *pa, const void *pb)
489 const operand_entry *oea = *(const operand_entry *const *)pa;
490 const operand_entry *oeb = *(const operand_entry *const *)pb;
492 /* It's nicer for optimize_expression if constants that are likely
493 to fold when added/multiplied//whatever are put next to each
494 other. Since all constants have rank 0, order them by type. */
495 if (oeb->rank == 0 && oea->rank == 0)
497 if (constant_type (oeb->op) != constant_type (oea->op))
498 return constant_type (oeb->op) - constant_type (oea->op);
499 else
500 /* To make sorting result stable, we use unique IDs to determine
501 order. */
502 return oeb->id - oea->id;
505 /* Lastly, make sure the versions that are the same go next to each
506 other. */
507 if ((oeb->rank - oea->rank == 0)
508 && TREE_CODE (oea->op) == SSA_NAME
509 && TREE_CODE (oeb->op) == SSA_NAME)
511 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
512 versions of removed SSA_NAMEs, so if possible, prefer to sort
513 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
514 See PR60418. */
515 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
516 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
517 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
519 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
520 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
521 basic_block bba = gimple_bb (stmta);
522 basic_block bbb = gimple_bb (stmtb);
523 if (bbb != bba)
525 if (bb_rank[bbb->index] != bb_rank[bba->index])
526 return bb_rank[bbb->index] - bb_rank[bba->index];
528 else
530 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
531 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
532 if (da != db)
533 return da ? 1 : -1;
537 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
538 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
539 else
540 return oeb->id - oea->id;
543 if (oeb->rank != oea->rank)
544 return oeb->rank - oea->rank;
545 else
546 return oeb->id - oea->id;
549 /* Add an operand entry to *OPS for the tree operand OP. */
551 static void
552 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
554 operand_entry *oe = operand_entry_pool.allocate ();
556 oe->op = op;
557 oe->rank = get_rank (op);
558 oe->id = next_operand_entry_id++;
559 oe->count = 1;
560 ops->safe_push (oe);
563 /* Add an operand entry to *OPS for the tree operand OP with repeat
564 count REPEAT. */
566 static void
567 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
568 HOST_WIDE_INT repeat)
570 operand_entry *oe = operand_entry_pool.allocate ();
572 oe->op = op;
573 oe->rank = get_rank (op);
574 oe->id = next_operand_entry_id++;
575 oe->count = repeat;
576 ops->safe_push (oe);
578 reassociate_stats.pows_encountered++;
581 /* Return true if STMT is reassociable operation containing a binary
582 operation with tree code CODE, and is inside LOOP. */
584 static bool
585 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
587 basic_block bb = gimple_bb (stmt);
589 if (gimple_bb (stmt) == NULL)
590 return false;
592 if (!flow_bb_inside_loop_p (loop, bb))
593 return false;
595 if (is_gimple_assign (stmt)
596 && gimple_assign_rhs_code (stmt) == code
597 && has_single_use (gimple_assign_lhs (stmt)))
598 return true;
600 return false;
604 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
605 operand of the negate operation. Otherwise, return NULL. */
607 static tree
608 get_unary_op (tree name, enum tree_code opcode)
610 gimple *stmt = SSA_NAME_DEF_STMT (name);
612 if (!is_gimple_assign (stmt))
613 return NULL_TREE;
615 if (gimple_assign_rhs_code (stmt) == opcode)
616 return gimple_assign_rhs1 (stmt);
617 return NULL_TREE;
620 /* If CURR and LAST are a pair of ops that OPCODE allows us to
621 eliminate through equivalences, do so, remove them from OPS, and
622 return true. Otherwise, return false. */
624 static bool
625 eliminate_duplicate_pair (enum tree_code opcode,
626 vec<operand_entry *> *ops,
627 bool *all_done,
628 unsigned int i,
629 operand_entry *curr,
630 operand_entry *last)
633 /* If we have two of the same op, and the opcode is & |, min, or max,
634 we can eliminate one of them.
635 If we have two of the same op, and the opcode is ^, we can
636 eliminate both of them. */
638 if (last && last->op == curr->op)
640 switch (opcode)
642 case MAX_EXPR:
643 case MIN_EXPR:
644 case BIT_IOR_EXPR:
645 case BIT_AND_EXPR:
646 if (dump_file && (dump_flags & TDF_DETAILS))
648 fprintf (dump_file, "Equivalence: ");
649 print_generic_expr (dump_file, curr->op, 0);
650 fprintf (dump_file, " [&|minmax] ");
651 print_generic_expr (dump_file, last->op, 0);
652 fprintf (dump_file, " -> ");
653 print_generic_stmt (dump_file, last->op, 0);
656 ops->ordered_remove (i);
657 reassociate_stats.ops_eliminated ++;
659 return true;
661 case BIT_XOR_EXPR:
662 if (dump_file && (dump_flags & TDF_DETAILS))
664 fprintf (dump_file, "Equivalence: ");
665 print_generic_expr (dump_file, curr->op, 0);
666 fprintf (dump_file, " ^ ");
667 print_generic_expr (dump_file, last->op, 0);
668 fprintf (dump_file, " -> nothing\n");
671 reassociate_stats.ops_eliminated += 2;
673 if (ops->length () == 2)
675 ops->create (0);
676 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
677 *all_done = true;
679 else
681 ops->ordered_remove (i-1);
682 ops->ordered_remove (i-1);
685 return true;
687 default:
688 break;
691 return false;
694 static vec<tree> plus_negates;
696 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
697 expression, look in OPS for a corresponding positive operation to cancel
698 it out. If we find one, remove the other from OPS, replace
699 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
700 return false. */
702 static bool
703 eliminate_plus_minus_pair (enum tree_code opcode,
704 vec<operand_entry *> *ops,
705 unsigned int currindex,
706 operand_entry *curr)
708 tree negateop;
709 tree notop;
710 unsigned int i;
711 operand_entry *oe;
713 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
714 return false;
716 negateop = get_unary_op (curr->op, NEGATE_EXPR);
717 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
718 if (negateop == NULL_TREE && notop == NULL_TREE)
719 return false;
721 /* Any non-negated version will have a rank that is one less than
722 the current rank. So once we hit those ranks, if we don't find
723 one, we can stop. */
725 for (i = currindex + 1;
726 ops->iterate (i, &oe)
727 && oe->rank >= curr->rank - 1 ;
728 i++)
730 if (oe->op == negateop)
733 if (dump_file && (dump_flags & TDF_DETAILS))
735 fprintf (dump_file, "Equivalence: ");
736 print_generic_expr (dump_file, negateop, 0);
737 fprintf (dump_file, " + -");
738 print_generic_expr (dump_file, oe->op, 0);
739 fprintf (dump_file, " -> 0\n");
742 ops->ordered_remove (i);
743 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
744 ops->ordered_remove (currindex);
745 reassociate_stats.ops_eliminated ++;
747 return true;
749 else if (oe->op == notop)
751 tree op_type = TREE_TYPE (oe->op);
753 if (dump_file && (dump_flags & TDF_DETAILS))
755 fprintf (dump_file, "Equivalence: ");
756 print_generic_expr (dump_file, notop, 0);
757 fprintf (dump_file, " + ~");
758 print_generic_expr (dump_file, oe->op, 0);
759 fprintf (dump_file, " -> -1\n");
762 ops->ordered_remove (i);
763 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
764 ops->ordered_remove (currindex);
765 reassociate_stats.ops_eliminated ++;
767 return true;
771 /* CURR->OP is a negate expr in a plus expr: save it for later
772 inspection in repropagate_negates(). */
773 if (negateop != NULL_TREE)
774 plus_negates.safe_push (curr->op);
776 return false;
779 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
780 bitwise not expression, look in OPS for a corresponding operand to
781 cancel it out. If we find one, remove the other from OPS, replace
782 OPS[CURRINDEX] with 0, and return true. Otherwise, return
783 false. */
785 static bool
786 eliminate_not_pairs (enum tree_code opcode,
787 vec<operand_entry *> *ops,
788 unsigned int currindex,
789 operand_entry *curr)
791 tree notop;
792 unsigned int i;
793 operand_entry *oe;
795 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
796 || TREE_CODE (curr->op) != SSA_NAME)
797 return false;
799 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
800 if (notop == NULL_TREE)
801 return false;
803 /* Any non-not version will have a rank that is one less than
804 the current rank. So once we hit those ranks, if we don't find
805 one, we can stop. */
807 for (i = currindex + 1;
808 ops->iterate (i, &oe)
809 && oe->rank >= curr->rank - 1;
810 i++)
812 if (oe->op == notop)
814 if (dump_file && (dump_flags & TDF_DETAILS))
816 fprintf (dump_file, "Equivalence: ");
817 print_generic_expr (dump_file, notop, 0);
818 if (opcode == BIT_AND_EXPR)
819 fprintf (dump_file, " & ~");
820 else if (opcode == BIT_IOR_EXPR)
821 fprintf (dump_file, " | ~");
822 print_generic_expr (dump_file, oe->op, 0);
823 if (opcode == BIT_AND_EXPR)
824 fprintf (dump_file, " -> 0\n");
825 else if (opcode == BIT_IOR_EXPR)
826 fprintf (dump_file, " -> -1\n");
829 if (opcode == BIT_AND_EXPR)
830 oe->op = build_zero_cst (TREE_TYPE (oe->op));
831 else if (opcode == BIT_IOR_EXPR)
832 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
834 reassociate_stats.ops_eliminated += ops->length () - 1;
835 ops->truncate (0);
836 ops->quick_push (oe);
837 return true;
841 return false;
844 /* Use constant value that may be present in OPS to try to eliminate
845 operands. Note that this function is only really used when we've
846 eliminated ops for other reasons, or merged constants. Across
847 single statements, fold already does all of this, plus more. There
848 is little point in duplicating logic, so I've only included the
849 identities that I could ever construct testcases to trigger. */
851 static void
852 eliminate_using_constants (enum tree_code opcode,
853 vec<operand_entry *> *ops)
855 operand_entry *oelast = ops->last ();
856 tree type = TREE_TYPE (oelast->op);
858 if (oelast->rank == 0
859 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
861 switch (opcode)
863 case BIT_AND_EXPR:
864 if (integer_zerop (oelast->op))
866 if (ops->length () != 1)
868 if (dump_file && (dump_flags & TDF_DETAILS))
869 fprintf (dump_file, "Found & 0, removing all other ops\n");
871 reassociate_stats.ops_eliminated += ops->length () - 1;
873 ops->truncate (0);
874 ops->quick_push (oelast);
875 return;
878 else if (integer_all_onesp (oelast->op))
880 if (ops->length () != 1)
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found & -1, removing\n");
884 ops->pop ();
885 reassociate_stats.ops_eliminated++;
888 break;
889 case BIT_IOR_EXPR:
890 if (integer_all_onesp (oelast->op))
892 if (ops->length () != 1)
894 if (dump_file && (dump_flags & TDF_DETAILS))
895 fprintf (dump_file, "Found | -1, removing all other ops\n");
897 reassociate_stats.ops_eliminated += ops->length () - 1;
899 ops->truncate (0);
900 ops->quick_push (oelast);
901 return;
904 else if (integer_zerop (oelast->op))
906 if (ops->length () != 1)
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file, "Found | 0, removing\n");
910 ops->pop ();
911 reassociate_stats.ops_eliminated++;
914 break;
915 case MULT_EXPR:
916 if (integer_zerop (oelast->op)
917 || (FLOAT_TYPE_P (type)
918 && !HONOR_NANS (type)
919 && !HONOR_SIGNED_ZEROS (type)
920 && real_zerop (oelast->op)))
922 if (ops->length () != 1)
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 fprintf (dump_file, "Found * 0, removing all other ops\n");
927 reassociate_stats.ops_eliminated += ops->length () - 1;
928 ops->truncate (1);
929 ops->quick_push (oelast);
930 return;
933 else if (integer_onep (oelast->op)
934 || (FLOAT_TYPE_P (type)
935 && !HONOR_SNANS (type)
936 && real_onep (oelast->op)))
938 if (ops->length () != 1)
940 if (dump_file && (dump_flags & TDF_DETAILS))
941 fprintf (dump_file, "Found * 1, removing\n");
942 ops->pop ();
943 reassociate_stats.ops_eliminated++;
944 return;
947 break;
948 case BIT_XOR_EXPR:
949 case PLUS_EXPR:
950 case MINUS_EXPR:
951 if (integer_zerop (oelast->op)
952 || (FLOAT_TYPE_P (type)
953 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
954 && fold_real_zero_addition_p (type, oelast->op,
955 opcode == MINUS_EXPR)))
957 if (ops->length () != 1)
959 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Found [|^+] 0, removing\n");
961 ops->pop ();
962 reassociate_stats.ops_eliminated++;
963 return;
966 break;
967 default:
968 break;
974 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
975 bool, bool);
977 /* Structure for tracking and counting operands. */
978 struct oecount {
979 int cnt;
980 int id;
981 enum tree_code oecode;
982 tree op;
986 /* The heap for the oecount hashtable and the sorted list of operands. */
987 static vec<oecount> cvec;
990 /* Oecount hashtable helpers. */
992 struct oecount_hasher : int_hash <int, 0, 1>
994 static inline hashval_t hash (int);
995 static inline bool equal (int, int);
998 /* Hash function for oecount. */
1000 inline hashval_t
1001 oecount_hasher::hash (int p)
1003 const oecount *c = &cvec[p - 42];
1004 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1007 /* Comparison function for oecount. */
1009 inline bool
1010 oecount_hasher::equal (int p1, int p2)
1012 const oecount *c1 = &cvec[p1 - 42];
1013 const oecount *c2 = &cvec[p2 - 42];
1014 return (c1->oecode == c2->oecode
1015 && c1->op == c2->op);
1018 /* Comparison function for qsort sorting oecount elements by count. */
1020 static int
1021 oecount_cmp (const void *p1, const void *p2)
1023 const oecount *c1 = (const oecount *)p1;
1024 const oecount *c2 = (const oecount *)p2;
1025 if (c1->cnt != c2->cnt)
1026 return c1->cnt - c2->cnt;
1027 else
1028 /* If counts are identical, use unique IDs to stabilize qsort. */
1029 return c1->id - c2->id;
1032 /* Return TRUE iff STMT represents a builtin call that raises OP
1033 to some exponent. */
1035 static bool
1036 stmt_is_power_of_op (gimple *stmt, tree op)
1038 tree fndecl;
1040 if (!is_gimple_call (stmt))
1041 return false;
1043 fndecl = gimple_call_fndecl (stmt);
1045 if (!fndecl
1046 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1047 return false;
1049 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1051 CASE_FLT_FN (BUILT_IN_POW):
1052 CASE_FLT_FN (BUILT_IN_POWI):
1053 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1055 default:
1056 return false;
1060 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1061 in place and return the result. Assumes that stmt_is_power_of_op
1062 was previously called for STMT and returned TRUE. */
1064 static HOST_WIDE_INT
1065 decrement_power (gimple *stmt)
1067 REAL_VALUE_TYPE c, cint;
1068 HOST_WIDE_INT power;
1069 tree arg1;
1071 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1073 CASE_FLT_FN (BUILT_IN_POW):
1074 arg1 = gimple_call_arg (stmt, 1);
1075 c = TREE_REAL_CST (arg1);
1076 power = real_to_integer (&c) - 1;
1077 real_from_integer (&cint, VOIDmode, power, SIGNED);
1078 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1079 return power;
1081 CASE_FLT_FN (BUILT_IN_POWI):
1082 arg1 = gimple_call_arg (stmt, 1);
1083 power = TREE_INT_CST_LOW (arg1) - 1;
1084 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1085 return power;
1087 default:
1088 gcc_unreachable ();
1092 /* Find the single immediate use of STMT's LHS, and replace it
1093 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1094 replace *DEF with OP as well. */
1096 static void
1097 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1099 tree lhs;
1100 gimple *use_stmt;
1101 use_operand_p use;
1102 gimple_stmt_iterator gsi;
1104 if (is_gimple_call (stmt))
1105 lhs = gimple_call_lhs (stmt);
1106 else
1107 lhs = gimple_assign_lhs (stmt);
1109 gcc_assert (has_single_use (lhs));
1110 single_imm_use (lhs, &use, &use_stmt);
1111 if (lhs == *def)
1112 *def = op;
1113 SET_USE (use, op);
1114 if (TREE_CODE (op) != SSA_NAME)
1115 update_stmt (use_stmt);
1116 gsi = gsi_for_stmt (stmt);
1117 unlink_stmt_vdef (stmt);
1118 reassoc_remove_stmt (&gsi);
1119 release_defs (stmt);
1122 /* Walks the linear chain with result *DEF searching for an operation
1123 with operand OP and code OPCODE removing that from the chain. *DEF
1124 is updated if there is only one operand but no operation left. */
1126 static void
1127 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1129 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1133 tree name;
1135 if (opcode == MULT_EXPR
1136 && stmt_is_power_of_op (stmt, op))
1138 if (decrement_power (stmt) == 1)
1139 propagate_op_to_single_use (op, stmt, def);
1140 return;
1143 name = gimple_assign_rhs1 (stmt);
1145 /* If this is the operation we look for and one of the operands
1146 is ours simply propagate the other operand into the stmts
1147 single use. */
1148 if (gimple_assign_rhs_code (stmt) == opcode
1149 && (name == op
1150 || gimple_assign_rhs2 (stmt) == op))
1152 if (name == op)
1153 name = gimple_assign_rhs2 (stmt);
1154 propagate_op_to_single_use (name, stmt, def);
1155 return;
1158 /* We might have a multiply of two __builtin_pow* calls, and
1159 the operand might be hiding in the rightmost one. */
1160 if (opcode == MULT_EXPR
1161 && gimple_assign_rhs_code (stmt) == opcode
1162 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1163 && has_single_use (gimple_assign_rhs2 (stmt)))
1165 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1166 if (stmt_is_power_of_op (stmt2, op))
1168 if (decrement_power (stmt2) == 1)
1169 propagate_op_to_single_use (op, stmt2, def);
1170 return;
1174 /* Continue walking the chain. */
1175 gcc_assert (name != op
1176 && TREE_CODE (name) == SSA_NAME);
1177 stmt = SSA_NAME_DEF_STMT (name);
1179 while (1);
1182 /* Returns true if statement S1 dominates statement S2. Like
1183 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1185 static bool
1186 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1188 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1190 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1191 SSA_NAME. Assume it lives at the beginning of function and
1192 thus dominates everything. */
1193 if (!bb1 || s1 == s2)
1194 return true;
1196 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1197 if (!bb2)
1198 return false;
1200 if (bb1 == bb2)
1202 /* PHIs in the same basic block are assumed to be
1203 executed all in parallel, if only one stmt is a PHI,
1204 it dominates the other stmt in the same basic block. */
1205 if (gimple_code (s1) == GIMPLE_PHI)
1206 return true;
1208 if (gimple_code (s2) == GIMPLE_PHI)
1209 return false;
1211 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1213 if (gimple_uid (s1) < gimple_uid (s2))
1214 return true;
1216 if (gimple_uid (s1) > gimple_uid (s2))
1217 return false;
1219 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1220 unsigned int uid = gimple_uid (s1);
1221 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1223 gimple *s = gsi_stmt (gsi);
1224 if (gimple_uid (s) != uid)
1225 break;
1226 if (s == s2)
1227 return true;
1230 return false;
1233 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1236 /* Insert STMT after INSERT_POINT. */
1238 static void
1239 insert_stmt_after (gimple *stmt, gimple *insert_point)
1241 gimple_stmt_iterator gsi;
1242 basic_block bb;
1244 if (gimple_code (insert_point) == GIMPLE_PHI)
1245 bb = gimple_bb (insert_point);
1246 else if (!stmt_ends_bb_p (insert_point))
1248 gsi = gsi_for_stmt (insert_point);
1249 gimple_set_uid (stmt, gimple_uid (insert_point));
1250 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1251 return;
1253 else
1254 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1255 thus if it must end a basic block, it should be a call that can
1256 throw, or some assignment that can throw. If it throws, the LHS
1257 of it will not be initialized though, so only valid places using
1258 the SSA_NAME should be dominated by the fallthru edge. */
1259 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1260 gsi = gsi_after_labels (bb);
1261 if (gsi_end_p (gsi))
1263 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1264 gimple_set_uid (stmt,
1265 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1267 else
1268 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1269 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1272 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1273 the result. Places the statement after the definition of either
1274 OP1 or OP2. Returns the new statement. */
1276 static gimple *
1277 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1279 gimple *op1def = NULL, *op2def = NULL;
1280 gimple_stmt_iterator gsi;
1281 tree op;
1282 gassign *sum;
1284 /* Create the addition statement. */
1285 op = make_ssa_name (type);
1286 sum = gimple_build_assign (op, opcode, op1, op2);
1288 /* Find an insertion place and insert. */
1289 if (TREE_CODE (op1) == SSA_NAME)
1290 op1def = SSA_NAME_DEF_STMT (op1);
1291 if (TREE_CODE (op2) == SSA_NAME)
1292 op2def = SSA_NAME_DEF_STMT (op2);
1293 if ((!op1def || gimple_nop_p (op1def))
1294 && (!op2def || gimple_nop_p (op2def)))
1296 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1297 if (gsi_end_p (gsi))
1299 gimple_stmt_iterator gsi2
1300 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1301 gimple_set_uid (sum,
1302 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1304 else
1305 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1306 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1308 else
1310 gimple *insert_point;
1311 if ((!op1def || gimple_nop_p (op1def))
1312 || (op2def && !gimple_nop_p (op2def)
1313 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1314 insert_point = op2def;
1315 else
1316 insert_point = op1def;
1317 insert_stmt_after (sum, insert_point);
1319 update_stmt (sum);
1321 return sum;
1324 /* Perform un-distribution of divisions and multiplications.
1325 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1326 to (A + B) / X for real X.
1328 The algorithm is organized as follows.
1330 - First we walk the addition chain *OPS looking for summands that
1331 are defined by a multiplication or a real division. This results
1332 in the candidates bitmap with relevant indices into *OPS.
1334 - Second we build the chains of multiplications or divisions for
1335 these candidates, counting the number of occurrences of (operand, code)
1336 pairs in all of the candidates chains.
1338 - Third we sort the (operand, code) pairs by number of occurrence and
1339 process them starting with the pair with the most uses.
1341 * For each such pair we walk the candidates again to build a
1342 second candidate bitmap noting all multiplication/division chains
1343 that have at least one occurrence of (operand, code).
1345 * We build an alternate addition chain only covering these
1346 candidates with one (operand, code) operation removed from their
1347 multiplication/division chain.
1349 * The first candidate gets replaced by the alternate addition chain
1350 multiplied/divided by the operand.
1352 * All candidate chains get disabled for further processing and
1353 processing of (operand, code) pairs continues.
1355 The alternate addition chains built are re-processed by the main
1356 reassociation algorithm which allows optimizing a * x * y + b * y * x
1357 to (a + b ) * x * y in one invocation of the reassociation pass. */
1359 static bool
1360 undistribute_ops_list (enum tree_code opcode,
1361 vec<operand_entry *> *ops, struct loop *loop)
1363 unsigned int length = ops->length ();
1364 operand_entry *oe1;
1365 unsigned i, j;
1366 sbitmap candidates, candidates2;
1367 unsigned nr_candidates, nr_candidates2;
1368 sbitmap_iterator sbi0;
1369 vec<operand_entry *> *subops;
1370 bool changed = false;
1371 int next_oecount_id = 0;
1373 if (length <= 1
1374 || opcode != PLUS_EXPR)
1375 return false;
1377 /* Build a list of candidates to process. */
1378 candidates = sbitmap_alloc (length);
1379 bitmap_clear (candidates);
1380 nr_candidates = 0;
1381 FOR_EACH_VEC_ELT (*ops, i, oe1)
1383 enum tree_code dcode;
1384 gimple *oe1def;
1386 if (TREE_CODE (oe1->op) != SSA_NAME)
1387 continue;
1388 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1389 if (!is_gimple_assign (oe1def))
1390 continue;
1391 dcode = gimple_assign_rhs_code (oe1def);
1392 if ((dcode != MULT_EXPR
1393 && dcode != RDIV_EXPR)
1394 || !is_reassociable_op (oe1def, dcode, loop))
1395 continue;
1397 bitmap_set_bit (candidates, i);
1398 nr_candidates++;
1401 if (nr_candidates < 2)
1403 sbitmap_free (candidates);
1404 return false;
1407 if (dump_file && (dump_flags & TDF_DETAILS))
1409 fprintf (dump_file, "searching for un-distribute opportunities ");
1410 print_generic_expr (dump_file,
1411 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1412 fprintf (dump_file, " %d\n", nr_candidates);
1415 /* Build linearized sub-operand lists and the counting table. */
1416 cvec.create (0);
1418 hash_table<oecount_hasher> ctable (15);
1420 /* ??? Macro arguments cannot have multi-argument template types in
1421 them. This typedef is needed to workaround that limitation. */
1422 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1423 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1424 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1426 gimple *oedef;
1427 enum tree_code oecode;
1428 unsigned j;
1430 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1431 oecode = gimple_assign_rhs_code (oedef);
1432 linearize_expr_tree (&subops[i], oedef,
1433 associative_tree_code (oecode), false);
1435 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1437 oecount c;
1438 int *slot;
1439 int idx;
1440 c.oecode = oecode;
1441 c.cnt = 1;
1442 c.id = next_oecount_id++;
1443 c.op = oe1->op;
1444 cvec.safe_push (c);
1445 idx = cvec.length () + 41;
1446 slot = ctable.find_slot (idx, INSERT);
1447 if (!*slot)
1449 *slot = idx;
1451 else
1453 cvec.pop ();
1454 cvec[*slot - 42].cnt++;
1459 /* Sort the counting table. */
1460 cvec.qsort (oecount_cmp);
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1464 oecount *c;
1465 fprintf (dump_file, "Candidates:\n");
1466 FOR_EACH_VEC_ELT (cvec, j, c)
1468 fprintf (dump_file, " %u %s: ", c->cnt,
1469 c->oecode == MULT_EXPR
1470 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1471 print_generic_expr (dump_file, c->op, 0);
1472 fprintf (dump_file, "\n");
1476 /* Process the (operand, code) pairs in order of most occurrence. */
1477 candidates2 = sbitmap_alloc (length);
1478 while (!cvec.is_empty ())
1480 oecount *c = &cvec.last ();
1481 if (c->cnt < 2)
1482 break;
1484 /* Now collect the operands in the outer chain that contain
1485 the common operand in their inner chain. */
1486 bitmap_clear (candidates2);
1487 nr_candidates2 = 0;
1488 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1490 gimple *oedef;
1491 enum tree_code oecode;
1492 unsigned j;
1493 tree op = (*ops)[i]->op;
1495 /* If we undistributed in this chain already this may be
1496 a constant. */
1497 if (TREE_CODE (op) != SSA_NAME)
1498 continue;
1500 oedef = SSA_NAME_DEF_STMT (op);
1501 oecode = gimple_assign_rhs_code (oedef);
1502 if (oecode != c->oecode)
1503 continue;
1505 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1507 if (oe1->op == c->op)
1509 bitmap_set_bit (candidates2, i);
1510 ++nr_candidates2;
1511 break;
1516 if (nr_candidates2 >= 2)
1518 operand_entry *oe1, *oe2;
1519 gimple *prod;
1520 int first = bitmap_first_set_bit (candidates2);
1522 /* Build the new addition chain. */
1523 oe1 = (*ops)[first];
1524 if (dump_file && (dump_flags & TDF_DETAILS))
1526 fprintf (dump_file, "Building (");
1527 print_generic_expr (dump_file, oe1->op, 0);
1529 zero_one_operation (&oe1->op, c->oecode, c->op);
1530 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1532 gimple *sum;
1533 oe2 = (*ops)[i];
1534 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, " + ");
1537 print_generic_expr (dump_file, oe2->op, 0);
1539 zero_one_operation (&oe2->op, c->oecode, c->op);
1540 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1541 oe1->op, oe2->op, opcode);
1542 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1543 oe2->rank = 0;
1544 oe1->op = gimple_get_lhs (sum);
1547 /* Apply the multiplication/division. */
1548 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1549 oe1->op, c->op, c->oecode);
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1552 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1553 print_generic_expr (dump_file, c->op, 0);
1554 fprintf (dump_file, "\n");
1557 /* Record it in the addition chain and disable further
1558 undistribution with this op. */
1559 oe1->op = gimple_assign_lhs (prod);
1560 oe1->rank = get_rank (oe1->op);
1561 subops[first].release ();
1563 changed = true;
1566 cvec.pop ();
1569 for (i = 0; i < ops->length (); ++i)
1570 subops[i].release ();
1571 free (subops);
1572 cvec.release ();
1573 sbitmap_free (candidates);
1574 sbitmap_free (candidates2);
1576 return changed;
1579 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1580 expression, examine the other OPS to see if any of them are comparisons
1581 of the same values, which we may be able to combine or eliminate.
1582 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1584 static bool
1585 eliminate_redundant_comparison (enum tree_code opcode,
1586 vec<operand_entry *> *ops,
1587 unsigned int currindex,
1588 operand_entry *curr)
1590 tree op1, op2;
1591 enum tree_code lcode, rcode;
1592 gimple *def1, *def2;
1593 int i;
1594 operand_entry *oe;
1596 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1597 return false;
1599 /* Check that CURR is a comparison. */
1600 if (TREE_CODE (curr->op) != SSA_NAME)
1601 return false;
1602 def1 = SSA_NAME_DEF_STMT (curr->op);
1603 if (!is_gimple_assign (def1))
1604 return false;
1605 lcode = gimple_assign_rhs_code (def1);
1606 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1607 return false;
1608 op1 = gimple_assign_rhs1 (def1);
1609 op2 = gimple_assign_rhs2 (def1);
1611 /* Now look for a similar comparison in the remaining OPS. */
1612 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1614 tree t;
1616 if (TREE_CODE (oe->op) != SSA_NAME)
1617 continue;
1618 def2 = SSA_NAME_DEF_STMT (oe->op);
1619 if (!is_gimple_assign (def2))
1620 continue;
1621 rcode = gimple_assign_rhs_code (def2);
1622 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1623 continue;
1625 /* If we got here, we have a match. See if we can combine the
1626 two comparisons. */
1627 if (opcode == BIT_IOR_EXPR)
1628 t = maybe_fold_or_comparisons (lcode, op1, op2,
1629 rcode, gimple_assign_rhs1 (def2),
1630 gimple_assign_rhs2 (def2));
1631 else
1632 t = maybe_fold_and_comparisons (lcode, op1, op2,
1633 rcode, gimple_assign_rhs1 (def2),
1634 gimple_assign_rhs2 (def2));
1635 if (!t)
1636 continue;
1638 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1639 always give us a boolean_type_node value back. If the original
1640 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1641 we need to convert. */
1642 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1643 t = fold_convert (TREE_TYPE (curr->op), t);
1645 if (TREE_CODE (t) != INTEGER_CST
1646 && !operand_equal_p (t, curr->op, 0))
1648 enum tree_code subcode;
1649 tree newop1, newop2;
1650 if (!COMPARISON_CLASS_P (t))
1651 continue;
1652 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1653 STRIP_USELESS_TYPE_CONVERSION (newop1);
1654 STRIP_USELESS_TYPE_CONVERSION (newop2);
1655 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1656 continue;
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 fprintf (dump_file, "Equivalence: ");
1662 print_generic_expr (dump_file, curr->op, 0);
1663 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1664 print_generic_expr (dump_file, oe->op, 0);
1665 fprintf (dump_file, " -> ");
1666 print_generic_expr (dump_file, t, 0);
1667 fprintf (dump_file, "\n");
1670 /* Now we can delete oe, as it has been subsumed by the new combined
1671 expression t. */
1672 ops->ordered_remove (i);
1673 reassociate_stats.ops_eliminated ++;
1675 /* If t is the same as curr->op, we're done. Otherwise we must
1676 replace curr->op with t. Special case is if we got a constant
1677 back, in which case we add it to the end instead of in place of
1678 the current entry. */
1679 if (TREE_CODE (t) == INTEGER_CST)
1681 ops->ordered_remove (currindex);
1682 add_to_ops_vec (ops, t);
1684 else if (!operand_equal_p (t, curr->op, 0))
1686 gimple *sum;
1687 enum tree_code subcode;
1688 tree newop1;
1689 tree newop2;
1690 gcc_assert (COMPARISON_CLASS_P (t));
1691 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692 STRIP_USELESS_TYPE_CONVERSION (newop1);
1693 STRIP_USELESS_TYPE_CONVERSION (newop2);
1694 gcc_checking_assert (is_gimple_val (newop1)
1695 && is_gimple_val (newop2));
1696 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1697 curr->op = gimple_get_lhs (sum);
1699 return true;
1702 return false;
1705 /* Perform various identities and other optimizations on the list of
1706 operand entries, stored in OPS. The tree code for the binary
1707 operation between all the operands is OPCODE. */
1709 static void
1710 optimize_ops_list (enum tree_code opcode,
1711 vec<operand_entry *> *ops)
1713 unsigned int length = ops->length ();
1714 unsigned int i;
1715 operand_entry *oe;
1716 operand_entry *oelast = NULL;
1717 bool iterate = false;
1719 if (length == 1)
1720 return;
1722 oelast = ops->last ();
1724 /* If the last two are constants, pop the constants off, merge them
1725 and try the next two. */
1726 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1728 operand_entry *oelm1 = (*ops)[length - 2];
1730 if (oelm1->rank == 0
1731 && is_gimple_min_invariant (oelm1->op)
1732 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1733 TREE_TYPE (oelast->op)))
1735 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1736 oelm1->op, oelast->op);
1738 if (folded && is_gimple_min_invariant (folded))
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 fprintf (dump_file, "Merging constants\n");
1743 ops->pop ();
1744 ops->pop ();
1746 add_to_ops_vec (ops, folded);
1747 reassociate_stats.constants_eliminated++;
1749 optimize_ops_list (opcode, ops);
1750 return;
1755 eliminate_using_constants (opcode, ops);
1756 oelast = NULL;
1758 for (i = 0; ops->iterate (i, &oe);)
1760 bool done = false;
1762 if (eliminate_not_pairs (opcode, ops, i, oe))
1763 return;
1764 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1765 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1766 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1768 if (done)
1769 return;
1770 iterate = true;
1771 oelast = NULL;
1772 continue;
1774 oelast = oe;
1775 i++;
1778 length = ops->length ();
1779 oelast = ops->last ();
1781 if (iterate)
1782 optimize_ops_list (opcode, ops);
1785 /* The following functions are subroutines to optimize_range_tests and allow
1786 it to try to change a logical combination of comparisons into a range
1787 test.
1789 For example, both
1790 X == 2 || X == 5 || X == 3 || X == 4
1792 X >= 2 && X <= 5
1793 are converted to
1794 (unsigned) (X - 2) <= 3
1796 For more information see comments above fold_test_range in fold-const.c,
1797 this implementation is for GIMPLE. */
1799 struct range_entry
1801 tree exp;
1802 tree low;
1803 tree high;
1804 bool in_p;
1805 bool strict_overflow_p;
1806 unsigned int idx, next;
1809 /* This is similar to make_range in fold-const.c, but on top of
1810 GIMPLE instead of trees. If EXP is non-NULL, it should be
1811 an SSA_NAME and STMT argument is ignored, otherwise STMT
1812 argument should be a GIMPLE_COND. */
1814 static void
1815 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1817 int in_p;
1818 tree low, high;
1819 bool is_bool, strict_overflow_p;
1821 r->exp = NULL_TREE;
1822 r->in_p = false;
1823 r->strict_overflow_p = false;
1824 r->low = NULL_TREE;
1825 r->high = NULL_TREE;
1826 if (exp != NULL_TREE
1827 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1828 return;
1830 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1831 and see if we can refine the range. Some of the cases below may not
1832 happen, but it doesn't seem worth worrying about this. We "continue"
1833 the outer loop when we've changed something; otherwise we "break"
1834 the switch, which will "break" the while. */
1835 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1836 high = low;
1837 in_p = 0;
1838 strict_overflow_p = false;
1839 is_bool = false;
1840 if (exp == NULL_TREE)
1841 is_bool = true;
1842 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1844 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1845 is_bool = true;
1846 else
1847 return;
1849 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1850 is_bool = true;
1852 while (1)
1854 enum tree_code code;
1855 tree arg0, arg1, exp_type;
1856 tree nexp;
1857 location_t loc;
1859 if (exp != NULL_TREE)
1861 if (TREE_CODE (exp) != SSA_NAME
1862 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1863 break;
1865 stmt = SSA_NAME_DEF_STMT (exp);
1866 if (!is_gimple_assign (stmt))
1867 break;
1869 code = gimple_assign_rhs_code (stmt);
1870 arg0 = gimple_assign_rhs1 (stmt);
1871 arg1 = gimple_assign_rhs2 (stmt);
1872 exp_type = TREE_TYPE (exp);
1874 else
1876 code = gimple_cond_code (stmt);
1877 arg0 = gimple_cond_lhs (stmt);
1878 arg1 = gimple_cond_rhs (stmt);
1879 exp_type = boolean_type_node;
1882 if (TREE_CODE (arg0) != SSA_NAME)
1883 break;
1884 loc = gimple_location (stmt);
1885 switch (code)
1887 case BIT_NOT_EXPR:
1888 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1889 /* Ensure the range is either +[-,0], +[0,0],
1890 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1891 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1892 or similar expression of unconditional true or
1893 false, it should not be negated. */
1894 && ((high && integer_zerop (high))
1895 || (low && integer_onep (low))))
1897 in_p = !in_p;
1898 exp = arg0;
1899 continue;
1901 break;
1902 case SSA_NAME:
1903 exp = arg0;
1904 continue;
1905 CASE_CONVERT:
1906 if (is_bool)
1907 goto do_default;
1908 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1910 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1911 is_bool = true;
1912 else
1913 return;
1915 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1916 is_bool = true;
1917 goto do_default;
1918 case EQ_EXPR:
1919 case NE_EXPR:
1920 case LT_EXPR:
1921 case LE_EXPR:
1922 case GE_EXPR:
1923 case GT_EXPR:
1924 is_bool = true;
1925 /* FALLTHRU */
1926 default:
1927 if (!is_bool)
1928 return;
1929 do_default:
1930 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1931 &low, &high, &in_p,
1932 &strict_overflow_p);
1933 if (nexp != NULL_TREE)
1935 exp = nexp;
1936 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1937 continue;
1939 break;
1941 break;
1943 if (is_bool)
1945 r->exp = exp;
1946 r->in_p = in_p;
1947 r->low = low;
1948 r->high = high;
1949 r->strict_overflow_p = strict_overflow_p;
1953 /* Comparison function for qsort. Sort entries
1954 without SSA_NAME exp first, then with SSA_NAMEs sorted
1955 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1956 by increasing ->low and if ->low is the same, by increasing
1957 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1958 maximum. */
1960 static int
1961 range_entry_cmp (const void *a, const void *b)
1963 const struct range_entry *p = (const struct range_entry *) a;
1964 const struct range_entry *q = (const struct range_entry *) b;
1966 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1968 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1970 /* Group range_entries for the same SSA_NAME together. */
1971 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1972 return -1;
1973 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1974 return 1;
1975 /* If ->low is different, NULL low goes first, then by
1976 ascending low. */
1977 if (p->low != NULL_TREE)
1979 if (q->low != NULL_TREE)
1981 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1982 p->low, q->low);
1983 if (tem && integer_onep (tem))
1984 return -1;
1985 tem = fold_binary (GT_EXPR, boolean_type_node,
1986 p->low, q->low);
1987 if (tem && integer_onep (tem))
1988 return 1;
1990 else
1991 return 1;
1993 else if (q->low != NULL_TREE)
1994 return -1;
1995 /* If ->high is different, NULL high goes last, before that by
1996 ascending high. */
1997 if (p->high != NULL_TREE)
1999 if (q->high != NULL_TREE)
2001 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2002 p->high, q->high);
2003 if (tem && integer_onep (tem))
2004 return -1;
2005 tem = fold_binary (GT_EXPR, boolean_type_node,
2006 p->high, q->high);
2007 if (tem && integer_onep (tem))
2008 return 1;
2010 else
2011 return -1;
2013 else if (q->high != NULL_TREE)
2014 return 1;
2015 /* If both ranges are the same, sort below by ascending idx. */
2017 else
2018 return 1;
2020 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2021 return -1;
2023 if (p->idx < q->idx)
2024 return -1;
2025 else
2027 gcc_checking_assert (p->idx > q->idx);
2028 return 1;
2032 /* Helper routine of optimize_range_test.
2033 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2034 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2035 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2036 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2037 an array of COUNT pointers to other ranges. Return
2038 true if the range merge has been successful.
2039 If OPCODE is ERROR_MARK, this is called from within
2040 maybe_optimize_range_tests and is performing inter-bb range optimization.
2041 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2042 oe->rank. */
2044 static bool
2045 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2046 struct range_entry **otherrangep,
2047 unsigned int count, enum tree_code opcode,
2048 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2049 bool in_p, tree low, tree high, bool strict_overflow_p)
2051 operand_entry *oe = (*ops)[range->idx];
2052 tree op = oe->op;
2053 gimple *stmt = op ? SSA_NAME_DEF_STMT (op) :
2054 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2055 location_t loc = gimple_location (stmt);
2056 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2057 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2058 in_p, low, high);
2059 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2060 gimple_stmt_iterator gsi;
2061 unsigned int i;
2063 if (tem == NULL_TREE)
2064 return false;
2066 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2067 warning_at (loc, OPT_Wstrict_overflow,
2068 "assuming signed overflow does not occur "
2069 "when simplifying range test");
2071 if (dump_file && (dump_flags & TDF_DETAILS))
2073 struct range_entry *r;
2074 fprintf (dump_file, "Optimizing range tests ");
2075 print_generic_expr (dump_file, range->exp, 0);
2076 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2077 print_generic_expr (dump_file, range->low, 0);
2078 fprintf (dump_file, ", ");
2079 print_generic_expr (dump_file, range->high, 0);
2080 fprintf (dump_file, "]");
2081 for (i = 0; i < count; i++)
2083 if (otherrange)
2084 r = otherrange + i;
2085 else
2086 r = otherrangep[i];
2087 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2088 print_generic_expr (dump_file, r->low, 0);
2089 fprintf (dump_file, ", ");
2090 print_generic_expr (dump_file, r->high, 0);
2091 fprintf (dump_file, "]");
2093 fprintf (dump_file, "\n into ");
2094 print_generic_expr (dump_file, tem, 0);
2095 fprintf (dump_file, "\n");
2098 if (opcode == BIT_IOR_EXPR
2099 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2100 tem = invert_truthvalue_loc (loc, tem);
2102 tem = fold_convert_loc (loc, optype, tem);
2103 gsi = gsi_for_stmt (stmt);
2104 unsigned int uid = gimple_uid (stmt);
2105 /* In rare cases range->exp can be equal to lhs of stmt.
2106 In that case we have to insert after the stmt rather then before
2107 it. If stmt is a PHI, insert it at the start of the basic block. */
2108 if (op != range->exp)
2110 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2111 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2112 GSI_SAME_STMT);
2113 gsi_prev (&gsi);
2115 else if (gimple_code (stmt) != GIMPLE_PHI)
2117 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2118 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2119 GSI_CONTINUE_LINKING);
2121 else
2123 gsi = gsi_after_labels (gimple_bb (stmt));
2124 if (!gsi_end_p (gsi))
2125 uid = gimple_uid (gsi_stmt (gsi));
2126 else
2128 gsi = gsi_start_bb (gimple_bb (stmt));
2129 uid = 1;
2130 while (!gsi_end_p (gsi))
2132 uid = gimple_uid (gsi_stmt (gsi));
2133 gsi_next (&gsi);
2136 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2137 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2138 GSI_SAME_STMT);
2139 if (gsi_end_p (gsi))
2140 gsi = gsi_last_bb (gimple_bb (stmt));
2141 else
2142 gsi_prev (&gsi);
2144 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2145 if (gimple_uid (gsi_stmt (gsi)))
2146 break;
2147 else
2148 gimple_set_uid (gsi_stmt (gsi), uid);
2150 oe->op = tem;
2151 range->exp = exp;
2152 range->low = low;
2153 range->high = high;
2154 range->in_p = in_p;
2155 range->strict_overflow_p = false;
2157 for (i = 0; i < count; i++)
2159 if (otherrange)
2160 range = otherrange + i;
2161 else
2162 range = otherrangep[i];
2163 oe = (*ops)[range->idx];
2164 /* Now change all the other range test immediate uses, so that
2165 those tests will be optimized away. */
2166 if (opcode == ERROR_MARK)
2168 if (oe->op)
2169 oe->op = build_int_cst (TREE_TYPE (oe->op),
2170 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2171 else
2172 oe->op = (oe->rank == BIT_IOR_EXPR
2173 ? boolean_false_node : boolean_true_node);
2175 else
2176 oe->op = error_mark_node;
2177 range->exp = NULL_TREE;
2179 return true;
2182 /* Optimize X == CST1 || X == CST2
2183 if popcount (CST1 ^ CST2) == 1 into
2184 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2185 Similarly for ranges. E.g.
2186 X != 2 && X != 3 && X != 10 && X != 11
2187 will be transformed by the previous optimization into
2188 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2189 and this loop can transform that into
2190 !(((X & ~8) - 2U) <= 1U). */
2192 static bool
2193 optimize_range_tests_xor (enum tree_code opcode, tree type,
2194 tree lowi, tree lowj, tree highi, tree highj,
2195 vec<operand_entry *> *ops,
2196 struct range_entry *rangei,
2197 struct range_entry *rangej)
2199 tree lowxor, highxor, tem, exp;
2200 /* Check lowi ^ lowj == highi ^ highj and
2201 popcount (lowi ^ lowj) == 1. */
2202 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2203 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2204 return false;
2205 if (!integer_pow2p (lowxor))
2206 return false;
2207 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2208 if (!tree_int_cst_equal (lowxor, highxor))
2209 return false;
2211 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2212 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2213 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2214 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2215 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2216 NULL, rangei->in_p, lowj, highj,
2217 rangei->strict_overflow_p
2218 || rangej->strict_overflow_p))
2219 return true;
2220 return false;
2223 /* Optimize X == CST1 || X == CST2
2224 if popcount (CST2 - CST1) == 1 into
2225 ((X - CST1) & ~(CST2 - CST1)) == 0.
2226 Similarly for ranges. E.g.
2227 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2228 || X == 75 || X == 45
2229 will be transformed by the previous optimization into
2230 (X - 43U) <= 3U || (X - 75U) <= 3U
2231 and this loop can transform that into
2232 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2233 static bool
2234 optimize_range_tests_diff (enum tree_code opcode, tree type,
2235 tree lowi, tree lowj, tree highi, tree highj,
2236 vec<operand_entry *> *ops,
2237 struct range_entry *rangei,
2238 struct range_entry *rangej)
2240 tree tem1, tem2, mask;
2241 /* Check highi - lowi == highj - lowj. */
2242 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2243 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2244 return false;
2245 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2246 if (!tree_int_cst_equal (tem1, tem2))
2247 return false;
2248 /* Check popcount (lowj - lowi) == 1. */
2249 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2250 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2251 return false;
2252 if (!integer_pow2p (tem1))
2253 return false;
2255 type = unsigned_type_for (type);
2256 tem1 = fold_convert (type, tem1);
2257 tem2 = fold_convert (type, tem2);
2258 lowi = fold_convert (type, lowi);
2259 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2260 tem1 = fold_binary (MINUS_EXPR, type,
2261 fold_convert (type, rangei->exp), lowi);
2262 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2263 lowj = build_int_cst (type, 0);
2264 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2265 NULL, rangei->in_p, lowj, tem2,
2266 rangei->strict_overflow_p
2267 || rangej->strict_overflow_p))
2268 return true;
2269 return false;
2272 /* It does some common checks for function optimize_range_tests_xor and
2273 optimize_range_tests_diff.
2274 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2275 Else it calls optimize_range_tests_diff. */
2277 static bool
2278 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2279 bool optimize_xor, vec<operand_entry *> *ops,
2280 struct range_entry *ranges)
2282 int i, j;
2283 bool any_changes = false;
2284 for (i = first; i < length; i++)
2286 tree lowi, highi, lowj, highj, type, tem;
2288 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2289 continue;
2290 type = TREE_TYPE (ranges[i].exp);
2291 if (!INTEGRAL_TYPE_P (type))
2292 continue;
2293 lowi = ranges[i].low;
2294 if (lowi == NULL_TREE)
2295 lowi = TYPE_MIN_VALUE (type);
2296 highi = ranges[i].high;
2297 if (highi == NULL_TREE)
2298 continue;
2299 for (j = i + 1; j < length && j < i + 64; j++)
2301 bool changes;
2302 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2303 continue;
2304 lowj = ranges[j].low;
2305 if (lowj == NULL_TREE)
2306 continue;
2307 highj = ranges[j].high;
2308 if (highj == NULL_TREE)
2309 highj = TYPE_MAX_VALUE (type);
2310 /* Check lowj > highi. */
2311 tem = fold_binary (GT_EXPR, boolean_type_node,
2312 lowj, highi);
2313 if (tem == NULL_TREE || !integer_onep (tem))
2314 continue;
2315 if (optimize_xor)
2316 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2317 highi, highj, ops,
2318 ranges + i, ranges + j);
2319 else
2320 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2321 highi, highj, ops,
2322 ranges + i, ranges + j);
2323 if (changes)
2325 any_changes = true;
2326 break;
2330 return any_changes;
2333 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2334 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2335 EXP on success, NULL otherwise. */
2337 static tree
2338 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2339 wide_int *mask, tree *totallowp)
2341 tree tem = int_const_binop (MINUS_EXPR, high, low);
2342 if (tem == NULL_TREE
2343 || TREE_CODE (tem) != INTEGER_CST
2344 || TREE_OVERFLOW (tem)
2345 || tree_int_cst_sgn (tem) == -1
2346 || compare_tree_int (tem, prec) != -1)
2347 return NULL_TREE;
2349 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2350 *mask = wi::shifted_mask (0, max, false, prec);
2351 if (TREE_CODE (exp) == BIT_AND_EXPR
2352 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2354 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2355 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2356 if (wi::popcount (msk) == 1
2357 && wi::ltu_p (msk, prec - max))
2359 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2360 max += msk.to_uhwi ();
2361 exp = TREE_OPERAND (exp, 0);
2362 if (integer_zerop (low)
2363 && TREE_CODE (exp) == PLUS_EXPR
2364 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2366 tree ret = TREE_OPERAND (exp, 0);
2367 STRIP_NOPS (ret);
2368 widest_int bias
2369 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2370 TYPE_PRECISION (TREE_TYPE (low))));
2371 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2372 if (totallowp)
2374 *totallowp = tbias;
2375 return ret;
2377 else if (!tree_int_cst_lt (totallow, tbias))
2378 return NULL_TREE;
2379 bias = wi::to_widest (tbias);
2380 bias -= wi::to_widest (totallow);
2381 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2383 *mask = wi::lshift (*mask, bias);
2384 return ret;
2389 if (totallowp)
2390 return exp;
2391 if (!tree_int_cst_lt (totallow, low))
2392 return exp;
2393 tem = int_const_binop (MINUS_EXPR, low, totallow);
2394 if (tem == NULL_TREE
2395 || TREE_CODE (tem) != INTEGER_CST
2396 || TREE_OVERFLOW (tem)
2397 || compare_tree_int (tem, prec - max) == 1)
2398 return NULL_TREE;
2400 *mask = wi::lshift (*mask, wi::to_widest (tem));
2401 return exp;
2404 /* Attempt to optimize small range tests using bit test.
2405 E.g.
2406 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2407 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2408 has been by earlier optimizations optimized into:
2409 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2410 As all the 43 through 82 range is less than 64 numbers,
2411 for 64-bit word targets optimize that into:
2412 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2414 static bool
2415 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2416 vec<operand_entry *> *ops,
2417 struct range_entry *ranges)
2419 int i, j;
2420 bool any_changes = false;
2421 int prec = GET_MODE_BITSIZE (word_mode);
2422 auto_vec<struct range_entry *, 64> candidates;
2424 for (i = first; i < length - 2; i++)
2426 tree lowi, highi, lowj, highj, type;
2428 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2429 continue;
2430 type = TREE_TYPE (ranges[i].exp);
2431 if (!INTEGRAL_TYPE_P (type))
2432 continue;
2433 lowi = ranges[i].low;
2434 if (lowi == NULL_TREE)
2435 lowi = TYPE_MIN_VALUE (type);
2436 highi = ranges[i].high;
2437 if (highi == NULL_TREE)
2438 continue;
2439 wide_int mask;
2440 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2441 highi, &mask, &lowi);
2442 if (exp == NULL_TREE)
2443 continue;
2444 bool strict_overflow_p = ranges[i].strict_overflow_p;
2445 candidates.truncate (0);
2446 int end = MIN (i + 64, length);
2447 for (j = i + 1; j < end; j++)
2449 tree exp2;
2450 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2451 continue;
2452 if (ranges[j].exp == exp)
2454 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2456 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2457 if (exp2 == exp)
2459 else if (TREE_CODE (exp2) == PLUS_EXPR)
2461 exp2 = TREE_OPERAND (exp2, 0);
2462 STRIP_NOPS (exp2);
2463 if (exp2 != exp)
2464 continue;
2466 else
2467 continue;
2469 else
2470 continue;
2471 lowj = ranges[j].low;
2472 if (lowj == NULL_TREE)
2473 continue;
2474 highj = ranges[j].high;
2475 if (highj == NULL_TREE)
2476 highj = TYPE_MAX_VALUE (type);
2477 wide_int mask2;
2478 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2479 highj, &mask2, NULL);
2480 if (exp2 != exp)
2481 continue;
2482 mask |= mask2;
2483 strict_overflow_p |= ranges[j].strict_overflow_p;
2484 candidates.safe_push (&ranges[j]);
2487 /* If we need otherwise 3 or more comparisons, use a bit test. */
2488 if (candidates.length () >= 2)
2490 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2491 wi::to_widest (lowi)
2492 + prec - 1 - wi::clz (mask));
2493 operand_entry *oe = (*ops)[ranges[i].idx];
2494 tree op = oe->op;
2495 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2496 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2497 location_t loc = gimple_location (stmt);
2498 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2500 /* See if it isn't cheaper to pretend the minimum value of the
2501 range is 0, if maximum value is small enough.
2502 We can avoid then subtraction of the minimum value, but the
2503 mask constant could be perhaps more expensive. */
2504 if (compare_tree_int (lowi, 0) > 0
2505 && compare_tree_int (high, prec) < 0)
2507 int cost_diff;
2508 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2509 rtx reg = gen_raw_REG (word_mode, 10000);
2510 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2511 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2512 GEN_INT (-m)), speed_p);
2513 rtx r = immed_wide_int_const (mask, word_mode);
2514 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2515 word_mode, speed_p);
2516 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2517 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2518 word_mode, speed_p);
2519 if (cost_diff > 0)
2521 mask = wi::lshift (mask, m);
2522 lowi = build_zero_cst (TREE_TYPE (lowi));
2526 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2527 false, lowi, high);
2528 if (tem == NULL_TREE || is_gimple_val (tem))
2529 continue;
2530 tree etype = unsigned_type_for (TREE_TYPE (exp));
2531 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2532 fold_convert_loc (loc, etype, exp),
2533 fold_convert_loc (loc, etype, lowi));
2534 exp = fold_convert_loc (loc, integer_type_node, exp);
2535 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2536 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2537 build_int_cst (word_type, 1), exp);
2538 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2539 wide_int_to_tree (word_type, mask));
2540 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2541 build_zero_cst (word_type));
2542 if (is_gimple_val (exp))
2543 continue;
2545 /* The shift might have undefined behavior if TEM is true,
2546 but reassociate_bb isn't prepared to have basic blocks
2547 split when it is running. So, temporarily emit a code
2548 with BIT_IOR_EXPR instead of &&, and fix it up in
2549 branch_fixup. */
2550 gimple_seq seq;
2551 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2552 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2553 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2554 gimple_seq seq2;
2555 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2556 gimple_seq_add_seq_without_update (&seq, seq2);
2557 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2558 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2559 gimple *g = gimple_build_assign (make_ssa_name (optype),
2560 BIT_IOR_EXPR, tem, exp);
2561 gimple_set_location (g, loc);
2562 gimple_seq_add_stmt_without_update (&seq, g);
2563 exp = gimple_assign_lhs (g);
2564 tree val = build_zero_cst (optype);
2565 if (update_range_test (&ranges[i], NULL, candidates.address (),
2566 candidates.length (), opcode, ops, exp,
2567 seq, false, val, val, strict_overflow_p))
2569 any_changes = true;
2570 reassoc_branch_fixups.safe_push (tem);
2572 else
2573 gimple_seq_discard (seq);
2576 return any_changes;
2579 /* Optimize range tests, similarly how fold_range_test optimizes
2580 it on trees. The tree code for the binary
2581 operation between all the operands is OPCODE.
2582 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2583 maybe_optimize_range_tests for inter-bb range optimization.
2584 In that case if oe->op is NULL, oe->id is bb->index whose
2585 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2586 the actual opcode. */
2588 static bool
2589 optimize_range_tests (enum tree_code opcode,
2590 vec<operand_entry *> *ops)
2592 unsigned int length = ops->length (), i, j, first;
2593 operand_entry *oe;
2594 struct range_entry *ranges;
2595 bool any_changes = false;
2597 if (length == 1)
2598 return false;
2600 ranges = XNEWVEC (struct range_entry, length);
2601 for (i = 0; i < length; i++)
2603 oe = (*ops)[i];
2604 ranges[i].idx = i;
2605 init_range_entry (ranges + i, oe->op,
2606 oe->op ? NULL :
2607 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2608 /* For | invert it now, we will invert it again before emitting
2609 the optimized expression. */
2610 if (opcode == BIT_IOR_EXPR
2611 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2612 ranges[i].in_p = !ranges[i].in_p;
2615 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2616 for (i = 0; i < length; i++)
2617 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2618 break;
2620 /* Try to merge ranges. */
2621 for (first = i; i < length; i++)
2623 tree low = ranges[i].low;
2624 tree high = ranges[i].high;
2625 int in_p = ranges[i].in_p;
2626 bool strict_overflow_p = ranges[i].strict_overflow_p;
2627 int update_fail_count = 0;
2629 for (j = i + 1; j < length; j++)
2631 if (ranges[i].exp != ranges[j].exp)
2632 break;
2633 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2634 ranges[j].in_p, ranges[j].low, ranges[j].high))
2635 break;
2636 strict_overflow_p |= ranges[j].strict_overflow_p;
2639 if (j == i + 1)
2640 continue;
2642 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2643 opcode, ops, ranges[i].exp, NULL, in_p,
2644 low, high, strict_overflow_p))
2646 i = j - 1;
2647 any_changes = true;
2649 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2650 while update_range_test would fail. */
2651 else if (update_fail_count == 64)
2652 i = j - 1;
2653 else
2654 ++update_fail_count;
2657 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2658 ops, ranges);
2660 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2661 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2662 ops, ranges);
2663 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2664 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2665 ops, ranges);
2667 if (any_changes && opcode != ERROR_MARK)
2669 j = 0;
2670 FOR_EACH_VEC_ELT (*ops, i, oe)
2672 if (oe->op == error_mark_node)
2673 continue;
2674 else if (i != j)
2675 (*ops)[j] = oe;
2676 j++;
2678 ops->truncate (j);
2681 XDELETEVEC (ranges);
2682 return any_changes;
2685 /* Return true if STMT is a cast like:
2686 <bb N>:
2688 _123 = (int) _234;
2690 <bb M>:
2691 # _345 = PHI <_123(N), 1(...), 1(...)>
2692 where _234 has bool type, _123 has single use and
2693 bb N has a single successor M. This is commonly used in
2694 the last block of a range test. */
2696 static bool
2697 final_range_test_p (gimple *stmt)
2699 basic_block bb, rhs_bb;
2700 edge e;
2701 tree lhs, rhs;
2702 use_operand_p use_p;
2703 gimple *use_stmt;
2705 if (!gimple_assign_cast_p (stmt))
2706 return false;
2707 bb = gimple_bb (stmt);
2708 if (!single_succ_p (bb))
2709 return false;
2710 e = single_succ_edge (bb);
2711 if (e->flags & EDGE_COMPLEX)
2712 return false;
2714 lhs = gimple_assign_lhs (stmt);
2715 rhs = gimple_assign_rhs1 (stmt);
2716 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2717 || TREE_CODE (rhs) != SSA_NAME
2718 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2719 return false;
2721 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2722 if (!single_imm_use (lhs, &use_p, &use_stmt))
2723 return false;
2725 if (gimple_code (use_stmt) != GIMPLE_PHI
2726 || gimple_bb (use_stmt) != e->dest)
2727 return false;
2729 /* And that the rhs is defined in the same loop. */
2730 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2731 if (rhs_bb == NULL
2732 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2733 return false;
2735 return true;
2738 /* Return true if BB is suitable basic block for inter-bb range test
2739 optimization. If BACKWARD is true, BB should be the only predecessor
2740 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2741 or compared with to find a common basic block to which all conditions
2742 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2743 be the only predecessor of BB. */
2745 static bool
2746 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2747 bool backward)
2749 edge_iterator ei, ei2;
2750 edge e, e2;
2751 gimple *stmt;
2752 gphi_iterator gsi;
2753 bool other_edge_seen = false;
2754 bool is_cond;
2756 if (test_bb == bb)
2757 return false;
2758 /* Check last stmt first. */
2759 stmt = last_stmt (bb);
2760 if (stmt == NULL
2761 || (gimple_code (stmt) != GIMPLE_COND
2762 && (backward || !final_range_test_p (stmt)))
2763 || gimple_visited_p (stmt)
2764 || stmt_could_throw_p (stmt)
2765 || *other_bb == bb)
2766 return false;
2767 is_cond = gimple_code (stmt) == GIMPLE_COND;
2768 if (is_cond)
2770 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2771 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2772 to *OTHER_BB (if not set yet, try to find it out). */
2773 if (EDGE_COUNT (bb->succs) != 2)
2774 return false;
2775 FOR_EACH_EDGE (e, ei, bb->succs)
2777 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2778 return false;
2779 if (e->dest == test_bb)
2781 if (backward)
2782 continue;
2783 else
2784 return false;
2786 if (e->dest == bb)
2787 return false;
2788 if (*other_bb == NULL)
2790 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2791 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2792 return false;
2793 else if (e->dest == e2->dest)
2794 *other_bb = e->dest;
2795 if (*other_bb == NULL)
2796 return false;
2798 if (e->dest == *other_bb)
2799 other_edge_seen = true;
2800 else if (backward)
2801 return false;
2803 if (*other_bb == NULL || !other_edge_seen)
2804 return false;
2806 else if (single_succ (bb) != *other_bb)
2807 return false;
2809 /* Now check all PHIs of *OTHER_BB. */
2810 e = find_edge (bb, *other_bb);
2811 e2 = find_edge (test_bb, *other_bb);
2812 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2814 gphi *phi = gsi.phi ();
2815 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2816 corresponding to BB and TEST_BB predecessor must be the same. */
2817 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2818 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2820 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2821 one of the PHIs should have the lhs of the last stmt in
2822 that block as PHI arg and that PHI should have 0 or 1
2823 corresponding to it in all other range test basic blocks
2824 considered. */
2825 if (!is_cond)
2827 if (gimple_phi_arg_def (phi, e->dest_idx)
2828 == gimple_assign_lhs (stmt)
2829 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2830 || integer_onep (gimple_phi_arg_def (phi,
2831 e2->dest_idx))))
2832 continue;
2834 else
2836 gimple *test_last = last_stmt (test_bb);
2837 if (gimple_code (test_last) != GIMPLE_COND
2838 && gimple_phi_arg_def (phi, e2->dest_idx)
2839 == gimple_assign_lhs (test_last)
2840 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2841 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2842 continue;
2845 return false;
2848 return true;
2851 /* Return true if BB doesn't have side-effects that would disallow
2852 range test optimization, all SSA_NAMEs set in the bb are consumed
2853 in the bb and there are no PHIs. */
2855 static bool
2856 no_side_effect_bb (basic_block bb)
2858 gimple_stmt_iterator gsi;
2859 gimple *last;
2861 if (!gimple_seq_empty_p (phi_nodes (bb)))
2862 return false;
2863 last = last_stmt (bb);
2864 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2866 gimple *stmt = gsi_stmt (gsi);
2867 tree lhs;
2868 imm_use_iterator imm_iter;
2869 use_operand_p use_p;
2871 if (is_gimple_debug (stmt))
2872 continue;
2873 if (gimple_has_side_effects (stmt))
2874 return false;
2875 if (stmt == last)
2876 return true;
2877 if (!is_gimple_assign (stmt))
2878 return false;
2879 lhs = gimple_assign_lhs (stmt);
2880 if (TREE_CODE (lhs) != SSA_NAME)
2881 return false;
2882 if (gimple_assign_rhs_could_trap_p (stmt))
2883 return false;
2884 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2886 gimple *use_stmt = USE_STMT (use_p);
2887 if (is_gimple_debug (use_stmt))
2888 continue;
2889 if (gimple_bb (use_stmt) != bb)
2890 return false;
2893 return false;
2896 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2897 return true and fill in *OPS recursively. */
2899 static bool
2900 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
2901 struct loop *loop)
2903 gimple *stmt = SSA_NAME_DEF_STMT (var);
2904 tree rhs[2];
2905 int i;
2907 if (!is_reassociable_op (stmt, code, loop))
2908 return false;
2910 rhs[0] = gimple_assign_rhs1 (stmt);
2911 rhs[1] = gimple_assign_rhs2 (stmt);
2912 gimple_set_visited (stmt, true);
2913 for (i = 0; i < 2; i++)
2914 if (TREE_CODE (rhs[i]) == SSA_NAME
2915 && !get_ops (rhs[i], code, ops, loop)
2916 && has_single_use (rhs[i]))
2918 operand_entry *oe = operand_entry_pool.allocate ();
2920 oe->op = rhs[i];
2921 oe->rank = code;
2922 oe->id = 0;
2923 oe->count = 1;
2924 ops->safe_push (oe);
2926 return true;
2929 /* Find the ops that were added by get_ops starting from VAR, see if
2930 they were changed during update_range_test and if yes, create new
2931 stmts. */
2933 static tree
2934 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
2935 unsigned int *pidx, struct loop *loop)
2937 gimple *stmt = SSA_NAME_DEF_STMT (var);
2938 tree rhs[4];
2939 int i;
2941 if (!is_reassociable_op (stmt, code, loop))
2942 return NULL;
2944 rhs[0] = gimple_assign_rhs1 (stmt);
2945 rhs[1] = gimple_assign_rhs2 (stmt);
2946 rhs[2] = rhs[0];
2947 rhs[3] = rhs[1];
2948 for (i = 0; i < 2; i++)
2949 if (TREE_CODE (rhs[i]) == SSA_NAME)
2951 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2952 if (rhs[2 + i] == NULL_TREE)
2954 if (has_single_use (rhs[i]))
2955 rhs[2 + i] = ops[(*pidx)++]->op;
2956 else
2957 rhs[2 + i] = rhs[i];
2960 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2961 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2963 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2964 var = make_ssa_name (TREE_TYPE (var));
2965 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2966 rhs[2], rhs[3]);
2967 gimple_set_uid (g, gimple_uid (stmt));
2968 gimple_set_visited (g, true);
2969 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2971 return var;
2974 /* Structure to track the initial value passed to get_ops and
2975 the range in the ops vector for each basic block. */
2977 struct inter_bb_range_test_entry
2979 tree op;
2980 unsigned int first_idx, last_idx;
2983 /* Inter-bb range test optimization. */
2985 static void
2986 maybe_optimize_range_tests (gimple *stmt)
2988 basic_block first_bb = gimple_bb (stmt);
2989 basic_block last_bb = first_bb;
2990 basic_block other_bb = NULL;
2991 basic_block bb;
2992 edge_iterator ei;
2993 edge e;
2994 auto_vec<operand_entry *> ops;
2995 auto_vec<inter_bb_range_test_entry> bbinfo;
2996 bool any_changes = false;
2998 /* Consider only basic blocks that end with GIMPLE_COND or
2999 a cast statement satisfying final_range_test_p. All
3000 but the last bb in the first_bb .. last_bb range
3001 should end with GIMPLE_COND. */
3002 if (gimple_code (stmt) == GIMPLE_COND)
3004 if (EDGE_COUNT (first_bb->succs) != 2)
3005 return;
3007 else if (final_range_test_p (stmt))
3008 other_bb = single_succ (first_bb);
3009 else
3010 return;
3012 if (stmt_could_throw_p (stmt))
3013 return;
3015 /* As relative ordering of post-dominator sons isn't fixed,
3016 maybe_optimize_range_tests can be called first on any
3017 bb in the range we want to optimize. So, start searching
3018 backwards, if first_bb can be set to a predecessor. */
3019 while (single_pred_p (first_bb))
3021 basic_block pred_bb = single_pred (first_bb);
3022 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3023 break;
3024 if (!no_side_effect_bb (first_bb))
3025 break;
3026 first_bb = pred_bb;
3028 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3029 Before starting forward search in last_bb successors, find
3030 out the other_bb. */
3031 if (first_bb == last_bb)
3033 other_bb = NULL;
3034 /* As non-GIMPLE_COND last stmt always terminates the range,
3035 if forward search didn't discover anything, just give up. */
3036 if (gimple_code (stmt) != GIMPLE_COND)
3037 return;
3038 /* Look at both successors. Either it ends with a GIMPLE_COND
3039 and satisfies suitable_cond_bb, or ends with a cast and
3040 other_bb is that cast's successor. */
3041 FOR_EACH_EDGE (e, ei, first_bb->succs)
3042 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3043 || e->dest == first_bb)
3044 return;
3045 else if (single_pred_p (e->dest))
3047 stmt = last_stmt (e->dest);
3048 if (stmt
3049 && gimple_code (stmt) == GIMPLE_COND
3050 && EDGE_COUNT (e->dest->succs) == 2)
3052 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3053 break;
3054 else
3055 other_bb = NULL;
3057 else if (stmt
3058 && final_range_test_p (stmt)
3059 && find_edge (first_bb, single_succ (e->dest)))
3061 other_bb = single_succ (e->dest);
3062 if (other_bb == first_bb)
3063 other_bb = NULL;
3066 if (other_bb == NULL)
3067 return;
3069 /* Now do the forward search, moving last_bb to successor bbs
3070 that aren't other_bb. */
3071 while (EDGE_COUNT (last_bb->succs) == 2)
3073 FOR_EACH_EDGE (e, ei, last_bb->succs)
3074 if (e->dest != other_bb)
3075 break;
3076 if (e == NULL)
3077 break;
3078 if (!single_pred_p (e->dest))
3079 break;
3080 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3081 break;
3082 if (!no_side_effect_bb (e->dest))
3083 break;
3084 last_bb = e->dest;
3086 if (first_bb == last_bb)
3087 return;
3088 /* Here basic blocks first_bb through last_bb's predecessor
3089 end with GIMPLE_COND, all of them have one of the edges to
3090 other_bb and another to another block in the range,
3091 all blocks except first_bb don't have side-effects and
3092 last_bb ends with either GIMPLE_COND, or cast satisfying
3093 final_range_test_p. */
3094 for (bb = last_bb; ; bb = single_pred (bb))
3096 enum tree_code code;
3097 tree lhs, rhs;
3098 inter_bb_range_test_entry bb_ent;
3100 bb_ent.op = NULL_TREE;
3101 bb_ent.first_idx = ops.length ();
3102 bb_ent.last_idx = bb_ent.first_idx;
3103 e = find_edge (bb, other_bb);
3104 stmt = last_stmt (bb);
3105 gimple_set_visited (stmt, true);
3106 if (gimple_code (stmt) != GIMPLE_COND)
3108 use_operand_p use_p;
3109 gimple *phi;
3110 edge e2;
3111 unsigned int d;
3113 lhs = gimple_assign_lhs (stmt);
3114 rhs = gimple_assign_rhs1 (stmt);
3115 gcc_assert (bb == last_bb);
3117 /* stmt is
3118 _123 = (int) _234;
3120 followed by:
3121 <bb M>:
3122 # _345 = PHI <_123(N), 1(...), 1(...)>
3124 or 0 instead of 1. If it is 0, the _234
3125 range test is anded together with all the
3126 other range tests, if it is 1, it is ored with
3127 them. */
3128 single_imm_use (lhs, &use_p, &phi);
3129 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3130 e2 = find_edge (first_bb, other_bb);
3131 d = e2->dest_idx;
3132 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3133 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3134 code = BIT_AND_EXPR;
3135 else
3137 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3138 code = BIT_IOR_EXPR;
3141 /* If _234 SSA_NAME_DEF_STMT is
3142 _234 = _567 | _789;
3143 (or &, corresponding to 1/0 in the phi arguments,
3144 push into ops the individual range test arguments
3145 of the bitwise or resp. and, recursively. */
3146 if (!get_ops (rhs, code, &ops,
3147 loop_containing_stmt (stmt))
3148 && has_single_use (rhs))
3150 /* Otherwise, push the _234 range test itself. */
3151 operand_entry *oe = operand_entry_pool.allocate ();
3153 oe->op = rhs;
3154 oe->rank = code;
3155 oe->id = 0;
3156 oe->count = 1;
3157 ops.safe_push (oe);
3158 bb_ent.last_idx++;
3160 else
3161 bb_ent.last_idx = ops.length ();
3162 bb_ent.op = rhs;
3163 bbinfo.safe_push (bb_ent);
3164 continue;
3166 /* Otherwise stmt is GIMPLE_COND. */
3167 code = gimple_cond_code (stmt);
3168 lhs = gimple_cond_lhs (stmt);
3169 rhs = gimple_cond_rhs (stmt);
3170 if (TREE_CODE (lhs) == SSA_NAME
3171 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3172 && ((code != EQ_EXPR && code != NE_EXPR)
3173 || rhs != boolean_false_node
3174 /* Either push into ops the individual bitwise
3175 or resp. and operands, depending on which
3176 edge is other_bb. */
3177 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3178 ^ (code == EQ_EXPR))
3179 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3180 loop_containing_stmt (stmt))))
3182 /* Or push the GIMPLE_COND stmt itself. */
3183 operand_entry *oe = operand_entry_pool.allocate ();
3185 oe->op = NULL;
3186 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3187 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3188 /* oe->op = NULL signs that there is no SSA_NAME
3189 for the range test, and oe->id instead is the
3190 basic block number, at which's end the GIMPLE_COND
3191 is. */
3192 oe->id = bb->index;
3193 oe->count = 1;
3194 ops.safe_push (oe);
3195 bb_ent.op = NULL;
3196 bb_ent.last_idx++;
3198 else if (ops.length () > bb_ent.first_idx)
3200 bb_ent.op = lhs;
3201 bb_ent.last_idx = ops.length ();
3203 bbinfo.safe_push (bb_ent);
3204 if (bb == first_bb)
3205 break;
3207 if (ops.length () > 1)
3208 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3209 if (any_changes)
3211 unsigned int idx;
3212 /* update_ops relies on has_single_use predicates returning the
3213 same values as it did during get_ops earlier. Additionally it
3214 never removes statements, only adds new ones and it should walk
3215 from the single imm use and check the predicate already before
3216 making those changes.
3217 On the other side, the handling of GIMPLE_COND directly can turn
3218 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3219 it needs to be done in a separate loop afterwards. */
3220 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3222 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3223 && bbinfo[idx].op != NULL_TREE)
3225 tree new_op;
3227 stmt = last_stmt (bb);
3228 new_op = update_ops (bbinfo[idx].op,
3229 (enum tree_code)
3230 ops[bbinfo[idx].first_idx]->rank,
3231 ops, &bbinfo[idx].first_idx,
3232 loop_containing_stmt (stmt));
3233 if (new_op == NULL_TREE)
3235 gcc_assert (bb == last_bb);
3236 new_op = ops[bbinfo[idx].first_idx++]->op;
3238 if (bbinfo[idx].op != new_op)
3240 imm_use_iterator iter;
3241 use_operand_p use_p;
3242 gimple *use_stmt, *cast_stmt = NULL;
3244 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3245 if (is_gimple_debug (use_stmt))
3246 continue;
3247 else if (gimple_code (use_stmt) == GIMPLE_COND
3248 || gimple_code (use_stmt) == GIMPLE_PHI)
3249 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3250 SET_USE (use_p, new_op);
3251 else if (gimple_assign_cast_p (use_stmt))
3252 cast_stmt = use_stmt;
3253 else
3254 gcc_unreachable ();
3255 if (cast_stmt)
3257 gcc_assert (bb == last_bb);
3258 tree lhs = gimple_assign_lhs (cast_stmt);
3259 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3260 enum tree_code rhs_code
3261 = gimple_assign_rhs_code (cast_stmt);
3262 gassign *g;
3263 if (is_gimple_min_invariant (new_op))
3265 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3266 g = gimple_build_assign (new_lhs, new_op);
3268 else
3269 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3270 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3271 gimple_set_uid (g, gimple_uid (cast_stmt));
3272 gimple_set_visited (g, true);
3273 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3274 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3275 if (is_gimple_debug (use_stmt))
3276 continue;
3277 else if (gimple_code (use_stmt) == GIMPLE_COND
3278 || gimple_code (use_stmt) == GIMPLE_PHI)
3279 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3280 SET_USE (use_p, new_lhs);
3281 else
3282 gcc_unreachable ();
3286 if (bb == first_bb)
3287 break;
3289 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3291 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3292 && bbinfo[idx].op == NULL_TREE
3293 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3295 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3296 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3297 gimple_cond_make_false (cond_stmt);
3298 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3299 gimple_cond_make_true (cond_stmt);
3300 else
3302 gimple_cond_set_code (cond_stmt, NE_EXPR);
3303 gimple_cond_set_lhs (cond_stmt,
3304 ops[bbinfo[idx].first_idx]->op);
3305 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3307 update_stmt (cond_stmt);
3309 if (bb == first_bb)
3310 break;
3315 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3316 of STMT in it's operands. This is also known as a "destructive
3317 update" operation. */
3319 static bool
3320 is_phi_for_stmt (gimple *stmt, tree operand)
3322 gimple *def_stmt;
3323 gphi *def_phi;
3324 tree lhs;
3325 use_operand_p arg_p;
3326 ssa_op_iter i;
3328 if (TREE_CODE (operand) != SSA_NAME)
3329 return false;
3331 lhs = gimple_assign_lhs (stmt);
3333 def_stmt = SSA_NAME_DEF_STMT (operand);
3334 def_phi = dyn_cast <gphi *> (def_stmt);
3335 if (!def_phi)
3336 return false;
3338 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3339 if (lhs == USE_FROM_PTR (arg_p))
3340 return true;
3341 return false;
3344 /* Remove def stmt of VAR if VAR has zero uses and recurse
3345 on rhs1 operand if so. */
3347 static void
3348 remove_visited_stmt_chain (tree var)
3350 gimple *stmt;
3351 gimple_stmt_iterator gsi;
3353 while (1)
3355 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3356 return;
3357 stmt = SSA_NAME_DEF_STMT (var);
3358 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3360 var = gimple_assign_rhs1 (stmt);
3361 gsi = gsi_for_stmt (stmt);
3362 reassoc_remove_stmt (&gsi);
3363 release_defs (stmt);
3365 else
3366 return;
3370 /* This function checks three consequtive operands in
3371 passed operands vector OPS starting from OPINDEX and
3372 swaps two operands if it is profitable for binary operation
3373 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3375 We pair ops with the same rank if possible.
3377 The alternative we try is to see if STMT is a destructive
3378 update style statement, which is like:
3379 b = phi (a, ...)
3380 a = c + b;
3381 In that case, we want to use the destructive update form to
3382 expose the possible vectorizer sum reduction opportunity.
3383 In that case, the third operand will be the phi node. This
3384 check is not performed if STMT is null.
3386 We could, of course, try to be better as noted above, and do a
3387 lot of work to try to find these opportunities in >3 operand
3388 cases, but it is unlikely to be worth it. */
3390 static void
3391 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3392 unsigned int opindex, gimple *stmt)
3394 operand_entry *oe1, *oe2, *oe3;
3396 oe1 = ops[opindex];
3397 oe2 = ops[opindex + 1];
3398 oe3 = ops[opindex + 2];
3400 if ((oe1->rank == oe2->rank
3401 && oe2->rank != oe3->rank)
3402 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3403 && !is_phi_for_stmt (stmt, oe1->op)
3404 && !is_phi_for_stmt (stmt, oe2->op)))
3406 operand_entry temp = *oe3;
3407 oe3->op = oe1->op;
3408 oe3->rank = oe1->rank;
3409 oe1->op = temp.op;
3410 oe1->rank= temp.rank;
3412 else if ((oe1->rank == oe3->rank
3413 && oe2->rank != oe3->rank)
3414 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3415 && !is_phi_for_stmt (stmt, oe1->op)
3416 && !is_phi_for_stmt (stmt, oe3->op)))
3418 operand_entry temp = *oe2;
3419 oe2->op = oe1->op;
3420 oe2->rank = oe1->rank;
3421 oe1->op = temp.op;
3422 oe1->rank = temp.rank;
3426 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3427 two definitions, otherwise return STMT. */
3429 static inline gimple *
3430 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3432 if (TREE_CODE (rhs1) == SSA_NAME
3433 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3434 stmt = SSA_NAME_DEF_STMT (rhs1);
3435 if (TREE_CODE (rhs2) == SSA_NAME
3436 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3437 stmt = SSA_NAME_DEF_STMT (rhs2);
3438 return stmt;
3441 /* Recursively rewrite our linearized statements so that the operators
3442 match those in OPS[OPINDEX], putting the computation in rank
3443 order. Return new lhs. */
3445 static tree
3446 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3447 vec<operand_entry *> ops, bool changed)
3449 tree rhs1 = gimple_assign_rhs1 (stmt);
3450 tree rhs2 = gimple_assign_rhs2 (stmt);
3451 tree lhs = gimple_assign_lhs (stmt);
3452 operand_entry *oe;
3454 /* The final recursion case for this function is that you have
3455 exactly two operations left.
3456 If we had exactly one op in the entire list to start with, we
3457 would have never called this function, and the tail recursion
3458 rewrites them one at a time. */
3459 if (opindex + 2 == ops.length ())
3461 operand_entry *oe1, *oe2;
3463 oe1 = ops[opindex];
3464 oe2 = ops[opindex + 1];
3466 if (rhs1 != oe1->op || rhs2 != oe2->op)
3468 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3469 unsigned int uid = gimple_uid (stmt);
3471 if (dump_file && (dump_flags & TDF_DETAILS))
3473 fprintf (dump_file, "Transforming ");
3474 print_gimple_stmt (dump_file, stmt, 0, 0);
3477 /* Even when changed is false, reassociation could have e.g. removed
3478 some redundant operations, so unless we are just swapping the
3479 arguments or unless there is no change at all (then we just
3480 return lhs), force creation of a new SSA_NAME. */
3481 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3483 gimple *insert_point
3484 = find_insert_point (stmt, oe1->op, oe2->op);
3485 lhs = make_ssa_name (TREE_TYPE (lhs));
3486 stmt
3487 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3488 oe1->op, oe2->op);
3489 gimple_set_uid (stmt, uid);
3490 gimple_set_visited (stmt, true);
3491 if (insert_point == gsi_stmt (gsi))
3492 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3493 else
3494 insert_stmt_after (stmt, insert_point);
3496 else
3498 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3499 == stmt);
3500 gimple_assign_set_rhs1 (stmt, oe1->op);
3501 gimple_assign_set_rhs2 (stmt, oe2->op);
3502 update_stmt (stmt);
3505 if (rhs1 != oe1->op && rhs1 != oe2->op)
3506 remove_visited_stmt_chain (rhs1);
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3510 fprintf (dump_file, " into ");
3511 print_gimple_stmt (dump_file, stmt, 0, 0);
3514 return lhs;
3517 /* If we hit here, we should have 3 or more ops left. */
3518 gcc_assert (opindex + 2 < ops.length ());
3520 /* Rewrite the next operator. */
3521 oe = ops[opindex];
3523 /* Recurse on the LHS of the binary operator, which is guaranteed to
3524 be the non-leaf side. */
3525 tree new_rhs1
3526 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3527 changed || oe->op != rhs2);
3529 if (oe->op != rhs2 || new_rhs1 != rhs1)
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3533 fprintf (dump_file, "Transforming ");
3534 print_gimple_stmt (dump_file, stmt, 0, 0);
3537 /* If changed is false, this is either opindex == 0
3538 or all outer rhs2's were equal to corresponding oe->op,
3539 and powi_result is NULL.
3540 That means lhs is equivalent before and after reassociation.
3541 Otherwise ensure the old lhs SSA_NAME is not reused and
3542 create a new stmt as well, so that any debug stmts will be
3543 properly adjusted. */
3544 if (changed)
3546 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3547 unsigned int uid = gimple_uid (stmt);
3548 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3550 lhs = make_ssa_name (TREE_TYPE (lhs));
3551 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3552 new_rhs1, oe->op);
3553 gimple_set_uid (stmt, uid);
3554 gimple_set_visited (stmt, true);
3555 if (insert_point == gsi_stmt (gsi))
3556 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3557 else
3558 insert_stmt_after (stmt, insert_point);
3560 else
3562 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3563 == stmt);
3564 gimple_assign_set_rhs1 (stmt, new_rhs1);
3565 gimple_assign_set_rhs2 (stmt, oe->op);
3566 update_stmt (stmt);
3569 if (dump_file && (dump_flags & TDF_DETAILS))
3571 fprintf (dump_file, " into ");
3572 print_gimple_stmt (dump_file, stmt, 0, 0);
3575 return lhs;
3578 /* Find out how many cycles we need to compute statements chain.
3579 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3580 maximum number of independent statements we may execute per cycle. */
3582 static int
3583 get_required_cycles (int ops_num, int cpu_width)
3585 int res;
3586 int elog;
3587 unsigned int rest;
3589 /* While we have more than 2 * cpu_width operands
3590 we may reduce number of operands by cpu_width
3591 per cycle. */
3592 res = ops_num / (2 * cpu_width);
3594 /* Remained operands count may be reduced twice per cycle
3595 until we have only one operand. */
3596 rest = (unsigned)(ops_num - res * cpu_width);
3597 elog = exact_log2 (rest);
3598 if (elog >= 0)
3599 res += elog;
3600 else
3601 res += floor_log2 (rest) + 1;
3603 return res;
3606 /* Returns an optimal number of registers to use for computation of
3607 given statements. */
3609 static int
3610 get_reassociation_width (int ops_num, enum tree_code opc,
3611 machine_mode mode)
3613 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3614 int width;
3615 int width_min;
3616 int cycles_best;
3618 if (param_width > 0)
3619 width = param_width;
3620 else
3621 width = targetm.sched.reassociation_width (opc, mode);
3623 if (width == 1)
3624 return width;
3626 /* Get the minimal time required for sequence computation. */
3627 cycles_best = get_required_cycles (ops_num, width);
3629 /* Check if we may use less width and still compute sequence for
3630 the same time. It will allow us to reduce registers usage.
3631 get_required_cycles is monotonically increasing with lower width
3632 so we can perform a binary search for the minimal width that still
3633 results in the optimal cycle count. */
3634 width_min = 1;
3635 while (width > width_min)
3637 int width_mid = (width + width_min) / 2;
3639 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3640 width = width_mid;
3641 else if (width_min < width_mid)
3642 width_min = width_mid;
3643 else
3644 break;
3647 return width;
3650 /* Recursively rewrite our linearized statements so that the operators
3651 match those in OPS[OPINDEX], putting the computation in rank
3652 order and trying to allow operations to be executed in
3653 parallel. */
3655 static void
3656 rewrite_expr_tree_parallel (gassign *stmt, int width,
3657 vec<operand_entry *> ops)
3659 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3660 int op_num = ops.length ();
3661 int stmt_num = op_num - 1;
3662 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3663 int op_index = op_num - 1;
3664 int stmt_index = 0;
3665 int ready_stmts_end = 0;
3666 int i = 0;
3667 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3669 /* We start expression rewriting from the top statements.
3670 So, in this loop we create a full list of statements
3671 we will work with. */
3672 stmts[stmt_num - 1] = stmt;
3673 for (i = stmt_num - 2; i >= 0; i--)
3674 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3676 for (i = 0; i < stmt_num; i++)
3678 tree op1, op2;
3680 /* Determine whether we should use results of
3681 already handled statements or not. */
3682 if (ready_stmts_end == 0
3683 && (i - stmt_index >= width || op_index < 1))
3684 ready_stmts_end = i;
3686 /* Now we choose operands for the next statement. Non zero
3687 value in ready_stmts_end means here that we should use
3688 the result of already generated statements as new operand. */
3689 if (ready_stmts_end > 0)
3691 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3692 if (ready_stmts_end > stmt_index)
3693 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3694 else if (op_index >= 0)
3695 op2 = ops[op_index--]->op;
3696 else
3698 gcc_assert (stmt_index < i);
3699 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3702 if (stmt_index >= ready_stmts_end)
3703 ready_stmts_end = 0;
3705 else
3707 if (op_index > 1)
3708 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3709 op2 = ops[op_index--]->op;
3710 op1 = ops[op_index--]->op;
3713 /* If we emit the last statement then we should put
3714 operands into the last statement. It will also
3715 break the loop. */
3716 if (op_index < 0 && stmt_index == i)
3717 i = stmt_num - 1;
3719 if (dump_file && (dump_flags & TDF_DETAILS))
3721 fprintf (dump_file, "Transforming ");
3722 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3725 /* We keep original statement only for the last one. All
3726 others are recreated. */
3727 if (i == stmt_num - 1)
3729 gimple_assign_set_rhs1 (stmts[i], op1);
3730 gimple_assign_set_rhs2 (stmts[i], op2);
3731 update_stmt (stmts[i]);
3733 else
3734 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3736 if (dump_file && (dump_flags & TDF_DETAILS))
3738 fprintf (dump_file, " into ");
3739 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3743 remove_visited_stmt_chain (last_rhs1);
3746 /* Transform STMT, which is really (A +B) + (C + D) into the left
3747 linear form, ((A+B)+C)+D.
3748 Recurse on D if necessary. */
3750 static void
3751 linearize_expr (gimple *stmt)
3753 gimple_stmt_iterator gsi;
3754 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3755 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3756 gimple *oldbinrhs = binrhs;
3757 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3758 gimple *newbinrhs = NULL;
3759 struct loop *loop = loop_containing_stmt (stmt);
3760 tree lhs = gimple_assign_lhs (stmt);
3762 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3763 && is_reassociable_op (binrhs, rhscode, loop));
3765 gsi = gsi_for_stmt (stmt);
3767 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3768 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3769 gimple_assign_rhs_code (binrhs),
3770 gimple_assign_lhs (binlhs),
3771 gimple_assign_rhs2 (binrhs));
3772 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3773 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3774 gimple_set_uid (binrhs, gimple_uid (stmt));
3776 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3777 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3779 if (dump_file && (dump_flags & TDF_DETAILS))
3781 fprintf (dump_file, "Linearized: ");
3782 print_gimple_stmt (dump_file, stmt, 0, 0);
3785 reassociate_stats.linearized++;
3786 update_stmt (stmt);
3788 gsi = gsi_for_stmt (oldbinrhs);
3789 reassoc_remove_stmt (&gsi);
3790 release_defs (oldbinrhs);
3792 gimple_set_visited (stmt, true);
3793 gimple_set_visited (binlhs, true);
3794 gimple_set_visited (binrhs, true);
3796 /* Tail recurse on the new rhs if it still needs reassociation. */
3797 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3798 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3799 want to change the algorithm while converting to tuples. */
3800 linearize_expr (stmt);
3803 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3804 it. Otherwise, return NULL. */
3806 static gimple *
3807 get_single_immediate_use (tree lhs)
3809 use_operand_p immuse;
3810 gimple *immusestmt;
3812 if (TREE_CODE (lhs) == SSA_NAME
3813 && single_imm_use (lhs, &immuse, &immusestmt)
3814 && is_gimple_assign (immusestmt))
3815 return immusestmt;
3817 return NULL;
3820 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3821 representing the negated value. Insertions of any necessary
3822 instructions go before GSI.
3823 This function is recursive in that, if you hand it "a_5" as the
3824 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3825 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3827 static tree
3828 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3830 gimple *negatedefstmt = NULL;
3831 tree resultofnegate;
3832 gimple_stmt_iterator gsi;
3833 unsigned int uid;
3835 /* If we are trying to negate a name, defined by an add, negate the
3836 add operands instead. */
3837 if (TREE_CODE (tonegate) == SSA_NAME)
3838 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3839 if (TREE_CODE (tonegate) == SSA_NAME
3840 && is_gimple_assign (negatedefstmt)
3841 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3842 && has_single_use (gimple_assign_lhs (negatedefstmt))
3843 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3845 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3846 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3847 tree lhs = gimple_assign_lhs (negatedefstmt);
3848 gimple *g;
3850 gsi = gsi_for_stmt (negatedefstmt);
3851 rhs1 = negate_value (rhs1, &gsi);
3853 gsi = gsi_for_stmt (negatedefstmt);
3854 rhs2 = negate_value (rhs2, &gsi);
3856 gsi = gsi_for_stmt (negatedefstmt);
3857 lhs = make_ssa_name (TREE_TYPE (lhs));
3858 gimple_set_visited (negatedefstmt, true);
3859 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3860 gimple_set_uid (g, gimple_uid (negatedefstmt));
3861 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3862 return lhs;
3865 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3866 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3867 NULL_TREE, true, GSI_SAME_STMT);
3868 gsi = *gsip;
3869 uid = gimple_uid (gsi_stmt (gsi));
3870 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3872 gimple *stmt = gsi_stmt (gsi);
3873 if (gimple_uid (stmt) != 0)
3874 break;
3875 gimple_set_uid (stmt, uid);
3877 return resultofnegate;
3880 /* Return true if we should break up the subtract in STMT into an add
3881 with negate. This is true when we the subtract operands are really
3882 adds, or the subtract itself is used in an add expression. In
3883 either case, breaking up the subtract into an add with negate
3884 exposes the adds to reassociation. */
3886 static bool
3887 should_break_up_subtract (gimple *stmt)
3889 tree lhs = gimple_assign_lhs (stmt);
3890 tree binlhs = gimple_assign_rhs1 (stmt);
3891 tree binrhs = gimple_assign_rhs2 (stmt);
3892 gimple *immusestmt;
3893 struct loop *loop = loop_containing_stmt (stmt);
3895 if (TREE_CODE (binlhs) == SSA_NAME
3896 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3897 return true;
3899 if (TREE_CODE (binrhs) == SSA_NAME
3900 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3901 return true;
3903 if (TREE_CODE (lhs) == SSA_NAME
3904 && (immusestmt = get_single_immediate_use (lhs))
3905 && is_gimple_assign (immusestmt)
3906 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3907 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3908 return true;
3909 return false;
3912 /* Transform STMT from A - B into A + -B. */
3914 static void
3915 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
3917 tree rhs1 = gimple_assign_rhs1 (stmt);
3918 tree rhs2 = gimple_assign_rhs2 (stmt);
3920 if (dump_file && (dump_flags & TDF_DETAILS))
3922 fprintf (dump_file, "Breaking up subtract ");
3923 print_gimple_stmt (dump_file, stmt, 0, 0);
3926 rhs2 = negate_value (rhs2, gsip);
3927 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3928 update_stmt (stmt);
3931 /* Determine whether STMT is a builtin call that raises an SSA name
3932 to an integer power and has only one use. If so, and this is early
3933 reassociation and unsafe math optimizations are permitted, place
3934 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3935 If any of these conditions does not hold, return FALSE. */
3937 static bool
3938 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
3940 tree fndecl, arg1;
3941 REAL_VALUE_TYPE c, cint;
3943 if (!first_pass_instance
3944 || !flag_unsafe_math_optimizations
3945 || !is_gimple_call (stmt)
3946 || !has_single_use (gimple_call_lhs (stmt)))
3947 return false;
3949 fndecl = gimple_call_fndecl (stmt);
3951 if (!fndecl
3952 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3953 return false;
3955 switch (DECL_FUNCTION_CODE (fndecl))
3957 CASE_FLT_FN (BUILT_IN_POW):
3958 if (flag_errno_math)
3959 return false;
3961 *base = gimple_call_arg (stmt, 0);
3962 arg1 = gimple_call_arg (stmt, 1);
3964 if (TREE_CODE (arg1) != REAL_CST)
3965 return false;
3967 c = TREE_REAL_CST (arg1);
3969 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3970 return false;
3972 *exponent = real_to_integer (&c);
3973 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3974 if (!real_identical (&c, &cint))
3975 return false;
3977 break;
3979 CASE_FLT_FN (BUILT_IN_POWI):
3980 *base = gimple_call_arg (stmt, 0);
3981 arg1 = gimple_call_arg (stmt, 1);
3983 if (!tree_fits_shwi_p (arg1))
3984 return false;
3986 *exponent = tree_to_shwi (arg1);
3987 break;
3989 default:
3990 return false;
3993 /* Expanding negative exponents is generally unproductive, so we don't
3994 complicate matters with those. Exponents of zero and one should
3995 have been handled by expression folding. */
3996 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3997 return false;
3999 return true;
4002 /* Recursively linearize a binary expression that is the RHS of STMT.
4003 Place the operands of the expression tree in the vector named OPS. */
4005 static void
4006 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4007 bool is_associative, bool set_visited)
4009 tree binlhs = gimple_assign_rhs1 (stmt);
4010 tree binrhs = gimple_assign_rhs2 (stmt);
4011 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4012 bool binlhsisreassoc = false;
4013 bool binrhsisreassoc = false;
4014 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4015 struct loop *loop = loop_containing_stmt (stmt);
4016 tree base = NULL_TREE;
4017 HOST_WIDE_INT exponent = 0;
4019 if (set_visited)
4020 gimple_set_visited (stmt, true);
4022 if (TREE_CODE (binlhs) == SSA_NAME)
4024 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4025 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4026 && !stmt_could_throw_p (binlhsdef));
4029 if (TREE_CODE (binrhs) == SSA_NAME)
4031 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4032 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4033 && !stmt_could_throw_p (binrhsdef));
4036 /* If the LHS is not reassociable, but the RHS is, we need to swap
4037 them. If neither is reassociable, there is nothing we can do, so
4038 just put them in the ops vector. If the LHS is reassociable,
4039 linearize it. If both are reassociable, then linearize the RHS
4040 and the LHS. */
4042 if (!binlhsisreassoc)
4044 /* If this is not a associative operation like division, give up. */
4045 if (!is_associative)
4047 add_to_ops_vec (ops, binrhs);
4048 return;
4051 if (!binrhsisreassoc)
4053 if (rhscode == MULT_EXPR
4054 && TREE_CODE (binrhs) == SSA_NAME
4055 && acceptable_pow_call (binrhsdef, &base, &exponent))
4057 add_repeat_to_ops_vec (ops, base, exponent);
4058 gimple_set_visited (binrhsdef, true);
4060 else
4061 add_to_ops_vec (ops, binrhs);
4063 if (rhscode == MULT_EXPR
4064 && TREE_CODE (binlhs) == SSA_NAME
4065 && acceptable_pow_call (binlhsdef, &base, &exponent))
4067 add_repeat_to_ops_vec (ops, base, exponent);
4068 gimple_set_visited (binlhsdef, true);
4070 else
4071 add_to_ops_vec (ops, binlhs);
4073 return;
4076 if (dump_file && (dump_flags & TDF_DETAILS))
4078 fprintf (dump_file, "swapping operands of ");
4079 print_gimple_stmt (dump_file, stmt, 0, 0);
4082 swap_ssa_operands (stmt,
4083 gimple_assign_rhs1_ptr (stmt),
4084 gimple_assign_rhs2_ptr (stmt));
4085 update_stmt (stmt);
4087 if (dump_file && (dump_flags & TDF_DETAILS))
4089 fprintf (dump_file, " is now ");
4090 print_gimple_stmt (dump_file, stmt, 0, 0);
4093 /* We want to make it so the lhs is always the reassociative op,
4094 so swap. */
4095 std::swap (binlhs, binrhs);
4097 else if (binrhsisreassoc)
4099 linearize_expr (stmt);
4100 binlhs = gimple_assign_rhs1 (stmt);
4101 binrhs = gimple_assign_rhs2 (stmt);
4104 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4105 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4106 rhscode, loop));
4107 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4108 is_associative, set_visited);
4110 if (rhscode == MULT_EXPR
4111 && TREE_CODE (binrhs) == SSA_NAME
4112 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4114 add_repeat_to_ops_vec (ops, base, exponent);
4115 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4117 else
4118 add_to_ops_vec (ops, binrhs);
4121 /* Repropagate the negates back into subtracts, since no other pass
4122 currently does it. */
4124 static void
4125 repropagate_negates (void)
4127 unsigned int i = 0;
4128 tree negate;
4130 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4132 gimple *user = get_single_immediate_use (negate);
4134 if (!user || !is_gimple_assign (user))
4135 continue;
4137 /* The negate operand can be either operand of a PLUS_EXPR
4138 (it can be the LHS if the RHS is a constant for example).
4140 Force the negate operand to the RHS of the PLUS_EXPR, then
4141 transform the PLUS_EXPR into a MINUS_EXPR. */
4142 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4144 /* If the negated operand appears on the LHS of the
4145 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4146 to force the negated operand to the RHS of the PLUS_EXPR. */
4147 if (gimple_assign_rhs1 (user) == negate)
4149 swap_ssa_operands (user,
4150 gimple_assign_rhs1_ptr (user),
4151 gimple_assign_rhs2_ptr (user));
4154 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4155 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4156 if (gimple_assign_rhs2 (user) == negate)
4158 tree rhs1 = gimple_assign_rhs1 (user);
4159 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4160 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4161 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4162 update_stmt (user);
4165 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4167 if (gimple_assign_rhs1 (user) == negate)
4169 /* We have
4170 x = -a
4171 y = x - b
4172 which we transform into
4173 x = a + b
4174 y = -x .
4175 This pushes down the negate which we possibly can merge
4176 into some other operation, hence insert it into the
4177 plus_negates vector. */
4178 gimple *feed = SSA_NAME_DEF_STMT (negate);
4179 tree a = gimple_assign_rhs1 (feed);
4180 tree b = gimple_assign_rhs2 (user);
4181 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4182 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4183 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4184 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4185 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4186 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4187 user = gsi_stmt (gsi2);
4188 update_stmt (user);
4189 reassoc_remove_stmt (&gsi);
4190 release_defs (feed);
4191 plus_negates.safe_push (gimple_assign_lhs (user));
4193 else
4195 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4196 rid of one operation. */
4197 gimple *feed = SSA_NAME_DEF_STMT (negate);
4198 tree a = gimple_assign_rhs1 (feed);
4199 tree rhs1 = gimple_assign_rhs1 (user);
4200 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4201 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4202 update_stmt (gsi_stmt (gsi));
4208 /* Returns true if OP is of a type for which we can do reassociation.
4209 That is for integral or non-saturating fixed-point types, and for
4210 floating point type when associative-math is enabled. */
4212 static bool
4213 can_reassociate_p (tree op)
4215 tree type = TREE_TYPE (op);
4216 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4217 || NON_SAT_FIXED_POINT_TYPE_P (type)
4218 || (flag_associative_math && FLOAT_TYPE_P (type)))
4219 return true;
4220 return false;
4223 /* Break up subtract operations in block BB.
4225 We do this top down because we don't know whether the subtract is
4226 part of a possible chain of reassociation except at the top.
4228 IE given
4229 d = f + g
4230 c = a + e
4231 b = c - d
4232 q = b - r
4233 k = t - q
4235 we want to break up k = t - q, but we won't until we've transformed q
4236 = b - r, which won't be broken up until we transform b = c - d.
4238 En passant, clear the GIMPLE visited flag on every statement
4239 and set UIDs within each basic block. */
4241 static void
4242 break_up_subtract_bb (basic_block bb)
4244 gimple_stmt_iterator gsi;
4245 basic_block son;
4246 unsigned int uid = 1;
4248 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4250 gimple *stmt = gsi_stmt (gsi);
4251 gimple_set_visited (stmt, false);
4252 gimple_set_uid (stmt, uid++);
4254 if (!is_gimple_assign (stmt)
4255 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4256 continue;
4258 /* Look for simple gimple subtract operations. */
4259 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4261 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4262 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4263 continue;
4265 /* Check for a subtract used only in an addition. If this
4266 is the case, transform it into add of a negate for better
4267 reassociation. IE transform C = A-B into C = A + -B if C
4268 is only used in an addition. */
4269 if (should_break_up_subtract (stmt))
4270 break_up_subtract (stmt, &gsi);
4272 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4273 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4274 plus_negates.safe_push (gimple_assign_lhs (stmt));
4276 for (son = first_dom_son (CDI_DOMINATORS, bb);
4277 son;
4278 son = next_dom_son (CDI_DOMINATORS, son))
4279 break_up_subtract_bb (son);
4282 /* Used for repeated factor analysis. */
4283 struct repeat_factor
4285 /* An SSA name that occurs in a multiply chain. */
4286 tree factor;
4288 /* Cached rank of the factor. */
4289 unsigned rank;
4291 /* Number of occurrences of the factor in the chain. */
4292 HOST_WIDE_INT count;
4294 /* An SSA name representing the product of this factor and
4295 all factors appearing later in the repeated factor vector. */
4296 tree repr;
4300 static vec<repeat_factor> repeat_factor_vec;
4302 /* Used for sorting the repeat factor vector. Sort primarily by
4303 ascending occurrence count, secondarily by descending rank. */
4305 static int
4306 compare_repeat_factors (const void *x1, const void *x2)
4308 const repeat_factor *rf1 = (const repeat_factor *) x1;
4309 const repeat_factor *rf2 = (const repeat_factor *) x2;
4311 if (rf1->count != rf2->count)
4312 return rf1->count - rf2->count;
4314 return rf2->rank - rf1->rank;
4317 /* Look for repeated operands in OPS in the multiply tree rooted at
4318 STMT. Replace them with an optimal sequence of multiplies and powi
4319 builtin calls, and remove the used operands from OPS. Return an
4320 SSA name representing the value of the replacement sequence. */
4322 static tree
4323 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4325 unsigned i, j, vec_len;
4326 int ii;
4327 operand_entry *oe;
4328 repeat_factor *rf1, *rf2;
4329 repeat_factor rfnew;
4330 tree result = NULL_TREE;
4331 tree target_ssa, iter_result;
4332 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4333 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4334 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4335 gimple *mul_stmt, *pow_stmt;
4337 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4338 target. */
4339 if (!powi_fndecl)
4340 return NULL_TREE;
4342 /* Allocate the repeated factor vector. */
4343 repeat_factor_vec.create (10);
4345 /* Scan the OPS vector for all SSA names in the product and build
4346 up a vector of occurrence counts for each factor. */
4347 FOR_EACH_VEC_ELT (*ops, i, oe)
4349 if (TREE_CODE (oe->op) == SSA_NAME)
4351 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4353 if (rf1->factor == oe->op)
4355 rf1->count += oe->count;
4356 break;
4360 if (j >= repeat_factor_vec.length ())
4362 rfnew.factor = oe->op;
4363 rfnew.rank = oe->rank;
4364 rfnew.count = oe->count;
4365 rfnew.repr = NULL_TREE;
4366 repeat_factor_vec.safe_push (rfnew);
4371 /* Sort the repeated factor vector by (a) increasing occurrence count,
4372 and (b) decreasing rank. */
4373 repeat_factor_vec.qsort (compare_repeat_factors);
4375 /* It is generally best to combine as many base factors as possible
4376 into a product before applying __builtin_powi to the result.
4377 However, the sort order chosen for the repeated factor vector
4378 allows us to cache partial results for the product of the base
4379 factors for subsequent use. When we already have a cached partial
4380 result from a previous iteration, it is best to make use of it
4381 before looking for another __builtin_pow opportunity.
4383 As an example, consider x * x * y * y * y * z * z * z * z.
4384 We want to first compose the product x * y * z, raise it to the
4385 second power, then multiply this by y * z, and finally multiply
4386 by z. This can be done in 5 multiplies provided we cache y * z
4387 for use in both expressions:
4389 t1 = y * z
4390 t2 = t1 * x
4391 t3 = t2 * t2
4392 t4 = t1 * t3
4393 result = t4 * z
4395 If we instead ignored the cached y * z and first multiplied by
4396 the __builtin_pow opportunity z * z, we would get the inferior:
4398 t1 = y * z
4399 t2 = t1 * x
4400 t3 = t2 * t2
4401 t4 = z * z
4402 t5 = t3 * t4
4403 result = t5 * y */
4405 vec_len = repeat_factor_vec.length ();
4407 /* Repeatedly look for opportunities to create a builtin_powi call. */
4408 while (true)
4410 HOST_WIDE_INT power;
4412 /* First look for the largest cached product of factors from
4413 preceding iterations. If found, create a builtin_powi for
4414 it if the minimum occurrence count for its factors is at
4415 least 2, or just use this cached product as our next
4416 multiplicand if the minimum occurrence count is 1. */
4417 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4419 if (rf1->repr && rf1->count > 0)
4420 break;
4423 if (j < vec_len)
4425 power = rf1->count;
4427 if (power == 1)
4429 iter_result = rf1->repr;
4431 if (dump_file && (dump_flags & TDF_DETAILS))
4433 unsigned elt;
4434 repeat_factor *rf;
4435 fputs ("Multiplying by cached product ", dump_file);
4436 for (elt = j; elt < vec_len; elt++)
4438 rf = &repeat_factor_vec[elt];
4439 print_generic_expr (dump_file, rf->factor, 0);
4440 if (elt < vec_len - 1)
4441 fputs (" * ", dump_file);
4443 fputs ("\n", dump_file);
4446 else
4448 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4449 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4450 build_int_cst (integer_type_node,
4451 power));
4452 gimple_call_set_lhs (pow_stmt, iter_result);
4453 gimple_set_location (pow_stmt, gimple_location (stmt));
4454 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4458 unsigned elt;
4459 repeat_factor *rf;
4460 fputs ("Building __builtin_pow call for cached product (",
4461 dump_file);
4462 for (elt = j; elt < vec_len; elt++)
4464 rf = &repeat_factor_vec[elt];
4465 print_generic_expr (dump_file, rf->factor, 0);
4466 if (elt < vec_len - 1)
4467 fputs (" * ", dump_file);
4469 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4470 power);
4474 else
4476 /* Otherwise, find the first factor in the repeated factor
4477 vector whose occurrence count is at least 2. If no such
4478 factor exists, there are no builtin_powi opportunities
4479 remaining. */
4480 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4482 if (rf1->count >= 2)
4483 break;
4486 if (j >= vec_len)
4487 break;
4489 power = rf1->count;
4491 if (dump_file && (dump_flags & TDF_DETAILS))
4493 unsigned elt;
4494 repeat_factor *rf;
4495 fputs ("Building __builtin_pow call for (", dump_file);
4496 for (elt = j; elt < vec_len; elt++)
4498 rf = &repeat_factor_vec[elt];
4499 print_generic_expr (dump_file, rf->factor, 0);
4500 if (elt < vec_len - 1)
4501 fputs (" * ", dump_file);
4503 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4506 reassociate_stats.pows_created++;
4508 /* Visit each element of the vector in reverse order (so that
4509 high-occurrence elements are visited first, and within the
4510 same occurrence count, lower-ranked elements are visited
4511 first). Form a linear product of all elements in this order
4512 whose occurrencce count is at least that of element J.
4513 Record the SSA name representing the product of each element
4514 with all subsequent elements in the vector. */
4515 if (j == vec_len - 1)
4516 rf1->repr = rf1->factor;
4517 else
4519 for (ii = vec_len - 2; ii >= (int)j; ii--)
4521 tree op1, op2;
4523 rf1 = &repeat_factor_vec[ii];
4524 rf2 = &repeat_factor_vec[ii + 1];
4526 /* Init the last factor's representative to be itself. */
4527 if (!rf2->repr)
4528 rf2->repr = rf2->factor;
4530 op1 = rf1->factor;
4531 op2 = rf2->repr;
4533 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4534 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4535 op1, op2);
4536 gimple_set_location (mul_stmt, gimple_location (stmt));
4537 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4538 rf1->repr = target_ssa;
4540 /* Don't reprocess the multiply we just introduced. */
4541 gimple_set_visited (mul_stmt, true);
4545 /* Form a call to __builtin_powi for the maximum product
4546 just formed, raised to the power obtained earlier. */
4547 rf1 = &repeat_factor_vec[j];
4548 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4549 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4550 build_int_cst (integer_type_node,
4551 power));
4552 gimple_call_set_lhs (pow_stmt, iter_result);
4553 gimple_set_location (pow_stmt, gimple_location (stmt));
4554 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4557 /* If we previously formed at least one other builtin_powi call,
4558 form the product of this one and those others. */
4559 if (result)
4561 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4562 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4563 result, iter_result);
4564 gimple_set_location (mul_stmt, gimple_location (stmt));
4565 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4566 gimple_set_visited (mul_stmt, true);
4567 result = new_result;
4569 else
4570 result = iter_result;
4572 /* Decrement the occurrence count of each element in the product
4573 by the count found above, and remove this many copies of each
4574 factor from OPS. */
4575 for (i = j; i < vec_len; i++)
4577 unsigned k = power;
4578 unsigned n;
4580 rf1 = &repeat_factor_vec[i];
4581 rf1->count -= power;
4583 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4585 if (oe->op == rf1->factor)
4587 if (oe->count <= k)
4589 ops->ordered_remove (n);
4590 k -= oe->count;
4592 if (k == 0)
4593 break;
4595 else
4597 oe->count -= k;
4598 break;
4605 /* At this point all elements in the repeated factor vector have a
4606 remaining occurrence count of 0 or 1, and those with a count of 1
4607 don't have cached representatives. Re-sort the ops vector and
4608 clean up. */
4609 ops->qsort (sort_by_operand_rank);
4610 repeat_factor_vec.release ();
4612 /* Return the final product computed herein. Note that there may
4613 still be some elements with single occurrence count left in OPS;
4614 those will be handled by the normal reassociation logic. */
4615 return result;
4618 /* Attempt to optimize
4619 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4620 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
4622 static void
4623 attempt_builtin_copysign (vec<operand_entry *> *ops)
4625 operand_entry *oe;
4626 unsigned int i;
4627 unsigned int length = ops->length ();
4628 tree cst = ops->last ()->op;
4630 if (length == 1 || TREE_CODE (cst) != REAL_CST)
4631 return;
4633 FOR_EACH_VEC_ELT (*ops, i, oe)
4635 if (TREE_CODE (oe->op) == SSA_NAME
4636 && has_single_use (oe->op))
4638 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4639 if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
4641 tree fndecl = gimple_call_fndecl (def_stmt);
4642 tree arg0, arg1;
4643 switch (DECL_FUNCTION_CODE (fndecl))
4645 CASE_FLT_FN (BUILT_IN_COPYSIGN):
4646 arg0 = gimple_call_arg (def_stmt, 0);
4647 arg1 = gimple_call_arg (def_stmt, 1);
4648 /* The first argument of copysign must be a constant,
4649 otherwise there's nothing to do. */
4650 if (TREE_CODE (arg0) == REAL_CST)
4652 tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst),
4653 cst, arg0);
4654 /* If we couldn't fold to a single constant, skip it.
4655 That happens e.g. for inexact multiplication when
4656 -frounding-math. */
4657 if (mul == NULL_TREE)
4658 break;
4659 /* Instead of adjusting the old DEF_STMT, let's build
4660 a new call to not leak the LHS and prevent keeping
4661 bogus debug statements. DCE will clean up the old
4662 call. */
4663 gcall *call = gimple_build_call (fndecl, 2, mul, arg1);
4664 tree lhs = make_ssa_name (TREE_TYPE (arg0));
4665 gimple_call_set_lhs (call, lhs);
4666 gimple_set_location (call, gimple_location (def_stmt));
4667 insert_stmt_after (call, def_stmt);
4668 /* We've used the constant, get rid of it. */
4669 ops->pop ();
4670 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4671 /* Handle the CST1 < 0 case by negating the result. */
4672 if (cst1_neg)
4674 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4675 gimple *negate_stmt
4676 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4677 insert_stmt_after (negate_stmt, call);
4678 oe->op = negrhs;
4680 else
4681 oe->op = lhs;
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4684 fprintf (dump_file, "Optimizing copysign: ");
4685 print_generic_expr (dump_file, cst, 0);
4686 fprintf (dump_file, " * ");
4687 print_generic_expr (dump_file,
4688 gimple_call_fn (def_stmt), 0);
4689 fprintf (dump_file, " (");
4690 print_generic_expr (dump_file, arg0, 0);
4691 fprintf (dump_file, ", ");
4692 print_generic_expr (dump_file, arg1, 0);
4693 fprintf (dump_file, ") into %s",
4694 cst1_neg ? "-" : "");
4695 print_generic_expr (dump_file,
4696 gimple_call_fn (def_stmt), 0);
4697 fprintf (dump_file, " (");
4698 print_generic_expr (dump_file, mul, 0);
4699 fprintf (dump_file, ", ");
4700 print_generic_expr (dump_file, arg1, 0);
4701 fprintf (dump_file, "\n");
4703 return;
4705 break;
4706 default:
4707 break;
4714 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4716 static void
4717 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4719 tree rhs1;
4721 if (dump_file && (dump_flags & TDF_DETAILS))
4723 fprintf (dump_file, "Transforming ");
4724 print_gimple_stmt (dump_file, stmt, 0, 0);
4727 rhs1 = gimple_assign_rhs1 (stmt);
4728 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4729 update_stmt (stmt);
4730 remove_visited_stmt_chain (rhs1);
4732 if (dump_file && (dump_flags & TDF_DETAILS))
4734 fprintf (dump_file, " into ");
4735 print_gimple_stmt (dump_file, stmt, 0, 0);
4739 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4741 static void
4742 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4743 tree rhs1, tree rhs2)
4745 if (dump_file && (dump_flags & TDF_DETAILS))
4747 fprintf (dump_file, "Transforming ");
4748 print_gimple_stmt (dump_file, stmt, 0, 0);
4751 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4752 update_stmt (gsi_stmt (*gsi));
4753 remove_visited_stmt_chain (rhs1);
4755 if (dump_file && (dump_flags & TDF_DETAILS))
4757 fprintf (dump_file, " into ");
4758 print_gimple_stmt (dump_file, stmt, 0, 0);
4762 /* Reassociate expressions in basic block BB and its post-dominator as
4763 children. */
4765 static void
4766 reassociate_bb (basic_block bb)
4768 gimple_stmt_iterator gsi;
4769 basic_block son;
4770 gimple *stmt = last_stmt (bb);
4772 if (stmt && !gimple_visited_p (stmt))
4773 maybe_optimize_range_tests (stmt);
4775 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4777 stmt = gsi_stmt (gsi);
4779 if (is_gimple_assign (stmt)
4780 && !stmt_could_throw_p (stmt))
4782 tree lhs, rhs1, rhs2;
4783 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4785 /* If this is not a gimple binary expression, there is
4786 nothing for us to do with it. */
4787 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4788 continue;
4790 /* If this was part of an already processed statement,
4791 we don't need to touch it again. */
4792 if (gimple_visited_p (stmt))
4794 /* This statement might have become dead because of previous
4795 reassociations. */
4796 if (has_zero_uses (gimple_get_lhs (stmt)))
4798 reassoc_remove_stmt (&gsi);
4799 release_defs (stmt);
4800 /* We might end up removing the last stmt above which
4801 places the iterator to the end of the sequence.
4802 Reset it to the last stmt in this case which might
4803 be the end of the sequence as well if we removed
4804 the last statement of the sequence. In which case
4805 we need to bail out. */
4806 if (gsi_end_p (gsi))
4808 gsi = gsi_last_bb (bb);
4809 if (gsi_end_p (gsi))
4810 break;
4813 continue;
4816 lhs = gimple_assign_lhs (stmt);
4817 rhs1 = gimple_assign_rhs1 (stmt);
4818 rhs2 = gimple_assign_rhs2 (stmt);
4820 /* For non-bit or min/max operations we can't associate
4821 all types. Verify that here. */
4822 if (rhs_code != BIT_IOR_EXPR
4823 && rhs_code != BIT_AND_EXPR
4824 && rhs_code != BIT_XOR_EXPR
4825 && rhs_code != MIN_EXPR
4826 && rhs_code != MAX_EXPR
4827 && (!can_reassociate_p (lhs)
4828 || !can_reassociate_p (rhs1)
4829 || !can_reassociate_p (rhs2)))
4830 continue;
4832 if (associative_tree_code (rhs_code))
4834 auto_vec<operand_entry *> ops;
4835 tree powi_result = NULL_TREE;
4837 /* There may be no immediate uses left by the time we
4838 get here because we may have eliminated them all. */
4839 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4840 continue;
4842 gimple_set_visited (stmt, true);
4843 linearize_expr_tree (&ops, stmt, true, true);
4844 ops.qsort (sort_by_operand_rank);
4845 optimize_ops_list (rhs_code, &ops);
4846 if (undistribute_ops_list (rhs_code, &ops,
4847 loop_containing_stmt (stmt)))
4849 ops.qsort (sort_by_operand_rank);
4850 optimize_ops_list (rhs_code, &ops);
4853 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4854 optimize_range_tests (rhs_code, &ops);
4856 if (rhs_code == MULT_EXPR)
4857 attempt_builtin_copysign (&ops);
4859 if (first_pass_instance
4860 && rhs_code == MULT_EXPR
4861 && flag_unsafe_math_optimizations)
4862 powi_result = attempt_builtin_powi (stmt, &ops);
4864 /* If the operand vector is now empty, all operands were
4865 consumed by the __builtin_powi optimization. */
4866 if (ops.length () == 0)
4867 transform_stmt_to_copy (&gsi, stmt, powi_result);
4868 else if (ops.length () == 1)
4870 tree last_op = ops.last ()->op;
4872 if (powi_result)
4873 transform_stmt_to_multiply (&gsi, stmt, last_op,
4874 powi_result);
4875 else
4876 transform_stmt_to_copy (&gsi, stmt, last_op);
4878 else
4880 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4881 int ops_num = ops.length ();
4882 int width = get_reassociation_width (ops_num, rhs_code, mode);
4883 tree new_lhs = lhs;
4885 if (dump_file && (dump_flags & TDF_DETAILS))
4886 fprintf (dump_file,
4887 "Width = %d was chosen for reassociation\n", width);
4889 if (width > 1
4890 && ops.length () > 3)
4891 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4892 width, ops);
4893 else
4895 /* When there are three operands left, we want
4896 to make sure the ones that get the double
4897 binary op are chosen wisely. */
4898 int len = ops.length ();
4899 if (len >= 3)
4900 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4902 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4903 powi_result != NULL);
4906 /* If we combined some repeated factors into a
4907 __builtin_powi call, multiply that result by the
4908 reassociated operands. */
4909 if (powi_result)
4911 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4912 tree type = TREE_TYPE (lhs);
4913 tree target_ssa = make_temp_ssa_name (type, NULL,
4914 "reassocpow");
4915 gimple_set_lhs (lhs_stmt, target_ssa);
4916 update_stmt (lhs_stmt);
4917 if (lhs != new_lhs)
4918 target_ssa = new_lhs;
4919 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4920 powi_result, target_ssa);
4921 gimple_set_location (mul_stmt, gimple_location (stmt));
4922 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4928 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4929 son;
4930 son = next_dom_son (CDI_POST_DOMINATORS, son))
4931 reassociate_bb (son);
4934 /* Add jumps around shifts for range tests turned into bit tests.
4935 For each SSA_NAME VAR we have code like:
4936 VAR = ...; // final stmt of range comparison
4937 // bit test here...;
4938 OTHERVAR = ...; // final stmt of the bit test sequence
4939 RES = VAR | OTHERVAR;
4940 Turn the above into:
4941 VAR = ...;
4942 if (VAR != 0)
4943 goto <l3>;
4944 else
4945 goto <l2>;
4946 <l2>:
4947 // bit test here...;
4948 OTHERVAR = ...;
4949 <l3>:
4950 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4952 static void
4953 branch_fixup (void)
4955 tree var;
4956 unsigned int i;
4958 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4960 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
4961 gimple *use_stmt;
4962 use_operand_p use;
4963 bool ok = single_imm_use (var, &use, &use_stmt);
4964 gcc_assert (ok
4965 && is_gimple_assign (use_stmt)
4966 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4967 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4969 basic_block cond_bb = gimple_bb (def_stmt);
4970 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4971 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4973 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4974 gimple *g = gimple_build_cond (NE_EXPR, var,
4975 build_zero_cst (TREE_TYPE (var)),
4976 NULL_TREE, NULL_TREE);
4977 location_t loc = gimple_location (use_stmt);
4978 gimple_set_location (g, loc);
4979 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4981 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4982 etrue->probability = REG_BR_PROB_BASE / 2;
4983 etrue->count = cond_bb->count / 2;
4984 edge efalse = find_edge (cond_bb, then_bb);
4985 efalse->flags = EDGE_FALSE_VALUE;
4986 efalse->probability -= etrue->probability;
4987 efalse->count -= etrue->count;
4988 then_bb->count -= etrue->count;
4990 tree othervar = NULL_TREE;
4991 if (gimple_assign_rhs1 (use_stmt) == var)
4992 othervar = gimple_assign_rhs2 (use_stmt);
4993 else if (gimple_assign_rhs2 (use_stmt) == var)
4994 othervar = gimple_assign_rhs1 (use_stmt);
4995 else
4996 gcc_unreachable ();
4997 tree lhs = gimple_assign_lhs (use_stmt);
4998 gphi *phi = create_phi_node (lhs, merge_bb);
4999 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5000 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5001 gsi = gsi_for_stmt (use_stmt);
5002 gsi_remove (&gsi, true);
5004 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5005 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5007 reassoc_branch_fixups.release ();
5010 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5011 void debug_ops_vector (vec<operand_entry *> ops);
5013 /* Dump the operand entry vector OPS to FILE. */
5015 void
5016 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5018 operand_entry *oe;
5019 unsigned int i;
5021 FOR_EACH_VEC_ELT (ops, i, oe)
5023 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5024 print_generic_expr (file, oe->op, 0);
5025 fprintf (file, "\n");
5029 /* Dump the operand entry vector OPS to STDERR. */
5031 DEBUG_FUNCTION void
5032 debug_ops_vector (vec<operand_entry *> ops)
5034 dump_ops_vector (stderr, ops);
5037 static void
5038 do_reassoc (void)
5040 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5041 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5044 /* Initialize the reassociation pass. */
5046 static void
5047 init_reassoc (void)
5049 int i;
5050 long rank = 2;
5051 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5053 /* Find the loops, so that we can prevent moving calculations in
5054 them. */
5055 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5057 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5059 next_operand_entry_id = 0;
5061 /* Reverse RPO (Reverse Post Order) will give us something where
5062 deeper loops come later. */
5063 pre_and_rev_post_order_compute (NULL, bbs, false);
5064 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5065 operand_rank = new hash_map<tree, long>;
5067 /* Give each default definition a distinct rank. This includes
5068 parameters and the static chain. Walk backwards over all
5069 SSA names so that we get proper rank ordering according
5070 to tree_swap_operands_p. */
5071 for (i = num_ssa_names - 1; i > 0; --i)
5073 tree name = ssa_name (i);
5074 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5075 insert_operand_rank (name, ++rank);
5078 /* Set up rank for each BB */
5079 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5080 bb_rank[bbs[i]] = ++rank << 16;
5082 free (bbs);
5083 calculate_dominance_info (CDI_POST_DOMINATORS);
5084 plus_negates = vNULL;
5087 /* Cleanup after the reassociation pass, and print stats if
5088 requested. */
5090 static void
5091 fini_reassoc (void)
5093 statistics_counter_event (cfun, "Linearized",
5094 reassociate_stats.linearized);
5095 statistics_counter_event (cfun, "Constants eliminated",
5096 reassociate_stats.constants_eliminated);
5097 statistics_counter_event (cfun, "Ops eliminated",
5098 reassociate_stats.ops_eliminated);
5099 statistics_counter_event (cfun, "Statements rewritten",
5100 reassociate_stats.rewritten);
5101 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5102 reassociate_stats.pows_encountered);
5103 statistics_counter_event (cfun, "Built-in powi calls created",
5104 reassociate_stats.pows_created);
5106 delete operand_rank;
5107 operand_entry_pool.release ();
5108 free (bb_rank);
5109 plus_negates.release ();
5110 free_dominance_info (CDI_POST_DOMINATORS);
5111 loop_optimizer_finalize ();
5114 /* Gate and execute functions for Reassociation. */
5116 static unsigned int
5117 execute_reassoc (void)
5119 init_reassoc ();
5121 do_reassoc ();
5122 repropagate_negates ();
5123 branch_fixup ();
5125 fini_reassoc ();
5126 return 0;
5129 namespace {
5131 const pass_data pass_data_reassoc =
5133 GIMPLE_PASS, /* type */
5134 "reassoc", /* name */
5135 OPTGROUP_NONE, /* optinfo_flags */
5136 TV_TREE_REASSOC, /* tv_id */
5137 ( PROP_cfg | PROP_ssa ), /* properties_required */
5138 0, /* properties_provided */
5139 0, /* properties_destroyed */
5140 0, /* todo_flags_start */
5141 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5144 class pass_reassoc : public gimple_opt_pass
5146 public:
5147 pass_reassoc (gcc::context *ctxt)
5148 : gimple_opt_pass (pass_data_reassoc, ctxt)
5151 /* opt_pass methods: */
5152 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5153 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5154 virtual unsigned int execute (function *) { return execute_reassoc (); }
5156 }; // class pass_reassoc
5158 } // anon namespace
5160 gimple_opt_pass *
5161 make_pass_reassoc (gcc::context *ctxt)
5163 return new pass_reassoc (ctxt);