2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobb502ff179eb895aff2e7393e16d4dea1fe99247e
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "predict.h"
34 #include "hard-reg-set.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "cfg.h"
38 #include "cfganal.h"
39 #include "basic-block.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-inline.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "gimple-fold.h"
45 #include "tree-eh.h"
46 #include "gimple-expr.h"
47 #include "is-a.h"
48 #include "gimple.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-niter.h"
58 #include "tree-ssa-loop.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "expmed.h"
62 #include "dojump.h"
63 #include "explow.h"
64 #include "calls.h"
65 #include "emit-rtl.h"
66 #include "varasm.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "tree-dfa.h"
70 #include "tree-ssa.h"
71 #include "tree-iterator.h"
72 #include "tree-pass.h"
73 #include "alloc-pool.h"
74 #include "langhooks.h"
75 #include "cfgloop.h"
76 #include "target.h"
77 #include "params.h"
78 #include "diagnostic-core.h"
79 #include "builtins.h"
80 #include "gimplify.h"
81 #include "insn-codes.h"
82 #include "optabs.h"
84 /* This is a simple global reassociation pass. It is, in part, based
85 on the LLVM pass of the same name (They do some things more/less
86 than we do, in different orders, etc).
88 It consists of five steps:
90 1. Breaking up subtract operations into addition + negate, where
91 it would promote the reassociation of adds.
93 2. Left linearization of the expression trees, so that (A+B)+(C+D)
94 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
95 During linearization, we place the operands of the binary
96 expressions into a vector of operand_entry_t
98 3. Optimization of the operand lists, eliminating things like a +
99 -a, a & a, etc.
101 3a. Combine repeated factors with the same occurrence counts
102 into a __builtin_powi call that will later be optimized into
103 an optimal number of multiplies.
105 4. Rewrite the expression trees we linearized and optimized so
106 they are in proper rank order.
108 5. Repropagate negates, as nothing else will clean it up ATM.
110 A bit of theory on #4, since nobody seems to write anything down
111 about why it makes sense to do it the way they do it:
113 We could do this much nicer theoretically, but don't (for reasons
114 explained after how to do it theoretically nice :P).
116 In order to promote the most redundancy elimination, you want
117 binary expressions whose operands are the same rank (or
118 preferably, the same value) exposed to the redundancy eliminator,
119 for possible elimination.
121 So the way to do this if we really cared, is to build the new op
122 tree from the leaves to the roots, merging as you go, and putting the
123 new op on the end of the worklist, until you are left with one
124 thing on the worklist.
126 IE if you have to rewrite the following set of operands (listed with
127 rank in parentheses), with opcode PLUS_EXPR:
129 a (1), b (1), c (1), d (2), e (2)
132 We start with our merge worklist empty, and the ops list with all of
133 those on it.
135 You want to first merge all leaves of the same rank, as much as
136 possible.
138 So first build a binary op of
140 mergetmp = a + b, and put "mergetmp" on the merge worklist.
142 Because there is no three operand form of PLUS_EXPR, c is not going to
143 be exposed to redundancy elimination as a rank 1 operand.
145 So you might as well throw it on the merge worklist (you could also
146 consider it to now be a rank two operand, and merge it with d and e,
147 but in this case, you then have evicted e from a binary op. So at
148 least in this situation, you can't win.)
150 Then build a binary op of d + e
151 mergetmp2 = d + e
153 and put mergetmp2 on the merge worklist.
155 so merge worklist = {mergetmp, c, mergetmp2}
157 Continue building binary ops of these operations until you have only
158 one operation left on the worklist.
160 So we have
162 build binary op
163 mergetmp3 = mergetmp + c
165 worklist = {mergetmp2, mergetmp3}
167 mergetmp4 = mergetmp2 + mergetmp3
169 worklist = {mergetmp4}
171 because we have one operation left, we can now just set the original
172 statement equal to the result of that operation.
174 This will at least expose a + b and d + e to redundancy elimination
175 as binary operations.
177 For extra points, you can reuse the old statements to build the
178 mergetmps, since you shouldn't run out.
180 So why don't we do this?
182 Because it's expensive, and rarely will help. Most trees we are
183 reassociating have 3 or less ops. If they have 2 ops, they already
184 will be written into a nice single binary op. If you have 3 ops, a
185 single simple check suffices to tell you whether the first two are of the
186 same rank. If so, you know to order it
188 mergetmp = op1 + op2
189 newstmt = mergetmp + op3
191 instead of
192 mergetmp = op2 + op3
193 newstmt = mergetmp + op1
195 If all three are of the same rank, you can't expose them all in a
196 single binary operator anyway, so the above is *still* the best you
197 can do.
199 Thus, this is what we do. When we have three ops left, we check to see
200 what order to put them in, and call it a day. As a nod to vector sum
201 reduction, we check if any of the ops are really a phi node that is a
202 destructive update for the associating op, and keep the destructive
203 update together for vector sum reduction recognition. */
206 /* Statistics */
207 static struct
209 int linearized;
210 int constants_eliminated;
211 int ops_eliminated;
212 int rewritten;
213 int pows_encountered;
214 int pows_created;
215 } reassociate_stats;
217 /* Operator, rank pair. */
218 typedef struct operand_entry
220 unsigned int rank;
221 int id;
222 tree op;
223 unsigned int count;
224 } *operand_entry_t;
226 static pool_allocator<operand_entry> operand_entry_pool ("operand entry pool",
227 30);
229 /* This is used to assign a unique ID to each struct operand_entry
230 so that qsort results are identical on different hosts. */
231 static int next_operand_entry_id;
233 /* Starting rank number for a given basic block, so that we can rank
234 operations using unmovable instructions in that BB based on the bb
235 depth. */
236 static long *bb_rank;
238 /* Operand->rank hashtable. */
239 static hash_map<tree, long> *operand_rank;
241 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
242 all basic blocks the CFG should be adjusted - basic blocks
243 split right after that SSA_NAME's definition statement and before
244 the only use, which must be a bit ior. */
245 static vec<tree> reassoc_branch_fixups;
247 /* Forward decls. */
248 static long get_rank (tree);
249 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
251 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
252 possibly added by gsi_remove. */
254 bool
255 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
257 gimple stmt = gsi_stmt (*gsi);
259 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
260 return gsi_remove (gsi, true);
262 gimple_stmt_iterator prev = *gsi;
263 gsi_prev (&prev);
264 unsigned uid = gimple_uid (stmt);
265 basic_block bb = gimple_bb (stmt);
266 bool ret = gsi_remove (gsi, true);
267 if (!gsi_end_p (prev))
268 gsi_next (&prev);
269 else
270 prev = gsi_start_bb (bb);
271 gimple end_stmt = gsi_stmt (*gsi);
272 while ((stmt = gsi_stmt (prev)) != end_stmt)
274 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
275 gimple_set_uid (stmt, uid);
276 gsi_next (&prev);
278 return ret;
281 /* Bias amount for loop-carried phis. We want this to be larger than
282 the depth of any reassociation tree we can see, but not larger than
283 the rank difference between two blocks. */
284 #define PHI_LOOP_BIAS (1 << 15)
286 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
287 an innermost loop, and the phi has only a single use which is inside
288 the loop, then the rank is the block rank of the loop latch plus an
289 extra bias for the loop-carried dependence. This causes expressions
290 calculated into an accumulator variable to be independent for each
291 iteration of the loop. If STMT is some other phi, the rank is the
292 block rank of its containing block. */
293 static long
294 phi_rank (gimple stmt)
296 basic_block bb = gimple_bb (stmt);
297 struct loop *father = bb->loop_father;
298 tree res;
299 unsigned i;
300 use_operand_p use;
301 gimple use_stmt;
303 /* We only care about real loops (those with a latch). */
304 if (!father->latch)
305 return bb_rank[bb->index];
307 /* Interesting phis must be in headers of innermost loops. */
308 if (bb != father->header
309 || father->inner)
310 return bb_rank[bb->index];
312 /* Ignore virtual SSA_NAMEs. */
313 res = gimple_phi_result (stmt);
314 if (virtual_operand_p (res))
315 return bb_rank[bb->index];
317 /* The phi definition must have a single use, and that use must be
318 within the loop. Otherwise this isn't an accumulator pattern. */
319 if (!single_imm_use (res, &use, &use_stmt)
320 || gimple_bb (use_stmt)->loop_father != father)
321 return bb_rank[bb->index];
323 /* Look for phi arguments from within the loop. If found, bias this phi. */
324 for (i = 0; i < gimple_phi_num_args (stmt); i++)
326 tree arg = gimple_phi_arg_def (stmt, i);
327 if (TREE_CODE (arg) == SSA_NAME
328 && !SSA_NAME_IS_DEFAULT_DEF (arg))
330 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
331 if (gimple_bb (def_stmt)->loop_father == father)
332 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
336 /* Must be an uninteresting phi. */
337 return bb_rank[bb->index];
340 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
341 loop-carried dependence of an innermost loop, return TRUE; else
342 return FALSE. */
343 static bool
344 loop_carried_phi (tree exp)
346 gimple phi_stmt;
347 long block_rank;
349 if (TREE_CODE (exp) != SSA_NAME
350 || SSA_NAME_IS_DEFAULT_DEF (exp))
351 return false;
353 phi_stmt = SSA_NAME_DEF_STMT (exp);
355 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
356 return false;
358 /* Non-loop-carried phis have block rank. Loop-carried phis have
359 an additional bias added in. If this phi doesn't have block rank,
360 it's biased and should not be propagated. */
361 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
363 if (phi_rank (phi_stmt) != block_rank)
364 return true;
366 return false;
369 /* Return the maximum of RANK and the rank that should be propagated
370 from expression OP. For most operands, this is just the rank of OP.
371 For loop-carried phis, the value is zero to avoid undoing the bias
372 in favor of the phi. */
373 static long
374 propagate_rank (long rank, tree op)
376 long op_rank;
378 if (loop_carried_phi (op))
379 return rank;
381 op_rank = get_rank (op);
383 return MAX (rank, op_rank);
386 /* Look up the operand rank structure for expression E. */
388 static inline long
389 find_operand_rank (tree e)
391 long *slot = operand_rank->get (e);
392 return slot ? *slot : -1;
395 /* Insert {E,RANK} into the operand rank hashtable. */
397 static inline void
398 insert_operand_rank (tree e, long rank)
400 gcc_assert (rank > 0);
401 gcc_assert (!operand_rank->put (e, rank));
404 /* Given an expression E, return the rank of the expression. */
406 static long
407 get_rank (tree e)
409 /* SSA_NAME's have the rank of the expression they are the result
411 For globals and uninitialized values, the rank is 0.
412 For function arguments, use the pre-setup rank.
413 For PHI nodes, stores, asm statements, etc, we use the rank of
414 the BB.
415 For simple operations, the rank is the maximum rank of any of
416 its operands, or the bb_rank, whichever is less.
417 I make no claims that this is optimal, however, it gives good
418 results. */
420 /* We make an exception to the normal ranking system to break
421 dependences of accumulator variables in loops. Suppose we
422 have a simple one-block loop containing:
424 x_1 = phi(x_0, x_2)
425 b = a + x_1
426 c = b + d
427 x_2 = c + e
429 As shown, each iteration of the calculation into x is fully
430 dependent upon the iteration before it. We would prefer to
431 see this in the form:
433 x_1 = phi(x_0, x_2)
434 b = a + d
435 c = b + e
436 x_2 = c + x_1
438 If the loop is unrolled, the calculations of b and c from
439 different iterations can be interleaved.
441 To obtain this result during reassociation, we bias the rank
442 of the phi definition x_1 upward, when it is recognized as an
443 accumulator pattern. The artificial rank causes it to be
444 added last, providing the desired independence. */
446 if (TREE_CODE (e) == SSA_NAME)
448 ssa_op_iter iter;
449 gimple stmt;
450 long rank;
451 tree op;
453 if (SSA_NAME_IS_DEFAULT_DEF (e))
454 return find_operand_rank (e);
456 stmt = SSA_NAME_DEF_STMT (e);
457 if (gimple_code (stmt) == GIMPLE_PHI)
458 return phi_rank (stmt);
460 if (!is_gimple_assign (stmt))
461 return bb_rank[gimple_bb (stmt)->index];
463 /* If we already have a rank for this expression, use that. */
464 rank = find_operand_rank (e);
465 if (rank != -1)
466 return rank;
468 /* Otherwise, find the maximum rank for the operands. As an
469 exception, remove the bias from loop-carried phis when propagating
470 the rank so that dependent operations are not also biased. */
471 /* Simply walk over all SSA uses - this takes advatage of the
472 fact that non-SSA operands are is_gimple_min_invariant and
473 thus have rank 0. */
474 rank = 0;
475 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
476 rank = propagate_rank (rank, op);
478 if (dump_file && (dump_flags & TDF_DETAILS))
480 fprintf (dump_file, "Rank for ");
481 print_generic_expr (dump_file, e, 0);
482 fprintf (dump_file, " is %ld\n", (rank + 1));
485 /* Note the rank in the hashtable so we don't recompute it. */
486 insert_operand_rank (e, (rank + 1));
487 return (rank + 1);
490 /* Constants, globals, etc., are rank 0 */
491 return 0;
495 /* We want integer ones to end up last no matter what, since they are
496 the ones we can do the most with. */
497 #define INTEGER_CONST_TYPE 1 << 3
498 #define FLOAT_CONST_TYPE 1 << 2
499 #define OTHER_CONST_TYPE 1 << 1
501 /* Classify an invariant tree into integer, float, or other, so that
502 we can sort them to be near other constants of the same type. */
503 static inline int
504 constant_type (tree t)
506 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
507 return INTEGER_CONST_TYPE;
508 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
509 return FLOAT_CONST_TYPE;
510 else
511 return OTHER_CONST_TYPE;
514 /* qsort comparison function to sort operand entries PA and PB by rank
515 so that the sorted array is ordered by rank in decreasing order. */
516 static int
517 sort_by_operand_rank (const void *pa, const void *pb)
519 const operand_entry_t oea = *(const operand_entry_t *)pa;
520 const operand_entry_t oeb = *(const operand_entry_t *)pb;
522 /* It's nicer for optimize_expression if constants that are likely
523 to fold when added/multiplied//whatever are put next to each
524 other. Since all constants have rank 0, order them by type. */
525 if (oeb->rank == 0 && oea->rank == 0)
527 if (constant_type (oeb->op) != constant_type (oea->op))
528 return constant_type (oeb->op) - constant_type (oea->op);
529 else
530 /* To make sorting result stable, we use unique IDs to determine
531 order. */
532 return oeb->id - oea->id;
535 /* Lastly, make sure the versions that are the same go next to each
536 other. */
537 if ((oeb->rank - oea->rank == 0)
538 && TREE_CODE (oea->op) == SSA_NAME
539 && TREE_CODE (oeb->op) == SSA_NAME)
541 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
542 versions of removed SSA_NAMEs, so if possible, prefer to sort
543 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
544 See PR60418. */
545 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
546 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
547 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
549 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
550 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
551 basic_block bba = gimple_bb (stmta);
552 basic_block bbb = gimple_bb (stmtb);
553 if (bbb != bba)
555 if (bb_rank[bbb->index] != bb_rank[bba->index])
556 return bb_rank[bbb->index] - bb_rank[bba->index];
558 else
560 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
561 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
562 if (da != db)
563 return da ? 1 : -1;
567 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
568 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
569 else
570 return oeb->id - oea->id;
573 if (oeb->rank != oea->rank)
574 return oeb->rank - oea->rank;
575 else
576 return oeb->id - oea->id;
579 /* Add an operand entry to *OPS for the tree operand OP. */
581 static void
582 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
584 operand_entry_t oe = operand_entry_pool.allocate ();
586 oe->op = op;
587 oe->rank = get_rank (op);
588 oe->id = next_operand_entry_id++;
589 oe->count = 1;
590 ops->safe_push (oe);
593 /* Add an operand entry to *OPS for the tree operand OP with repeat
594 count REPEAT. */
596 static void
597 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
598 HOST_WIDE_INT repeat)
600 operand_entry_t oe = operand_entry_pool.allocate ();
602 oe->op = op;
603 oe->rank = get_rank (op);
604 oe->id = next_operand_entry_id++;
605 oe->count = repeat;
606 ops->safe_push (oe);
608 reassociate_stats.pows_encountered++;
611 /* Return true if STMT is reassociable operation containing a binary
612 operation with tree code CODE, and is inside LOOP. */
614 static bool
615 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
617 basic_block bb = gimple_bb (stmt);
619 if (gimple_bb (stmt) == NULL)
620 return false;
622 if (!flow_bb_inside_loop_p (loop, bb))
623 return false;
625 if (is_gimple_assign (stmt)
626 && gimple_assign_rhs_code (stmt) == code
627 && has_single_use (gimple_assign_lhs (stmt)))
628 return true;
630 return false;
634 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
635 operand of the negate operation. Otherwise, return NULL. */
637 static tree
638 get_unary_op (tree name, enum tree_code opcode)
640 gimple stmt = SSA_NAME_DEF_STMT (name);
642 if (!is_gimple_assign (stmt))
643 return NULL_TREE;
645 if (gimple_assign_rhs_code (stmt) == opcode)
646 return gimple_assign_rhs1 (stmt);
647 return NULL_TREE;
650 /* If CURR and LAST are a pair of ops that OPCODE allows us to
651 eliminate through equivalences, do so, remove them from OPS, and
652 return true. Otherwise, return false. */
654 static bool
655 eliminate_duplicate_pair (enum tree_code opcode,
656 vec<operand_entry_t> *ops,
657 bool *all_done,
658 unsigned int i,
659 operand_entry_t curr,
660 operand_entry_t last)
663 /* If we have two of the same op, and the opcode is & |, min, or max,
664 we can eliminate one of them.
665 If we have two of the same op, and the opcode is ^, we can
666 eliminate both of them. */
668 if (last && last->op == curr->op)
670 switch (opcode)
672 case MAX_EXPR:
673 case MIN_EXPR:
674 case BIT_IOR_EXPR:
675 case BIT_AND_EXPR:
676 if (dump_file && (dump_flags & TDF_DETAILS))
678 fprintf (dump_file, "Equivalence: ");
679 print_generic_expr (dump_file, curr->op, 0);
680 fprintf (dump_file, " [&|minmax] ");
681 print_generic_expr (dump_file, last->op, 0);
682 fprintf (dump_file, " -> ");
683 print_generic_stmt (dump_file, last->op, 0);
686 ops->ordered_remove (i);
687 reassociate_stats.ops_eliminated ++;
689 return true;
691 case BIT_XOR_EXPR:
692 if (dump_file && (dump_flags & TDF_DETAILS))
694 fprintf (dump_file, "Equivalence: ");
695 print_generic_expr (dump_file, curr->op, 0);
696 fprintf (dump_file, " ^ ");
697 print_generic_expr (dump_file, last->op, 0);
698 fprintf (dump_file, " -> nothing\n");
701 reassociate_stats.ops_eliminated += 2;
703 if (ops->length () == 2)
705 ops->create (0);
706 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
707 *all_done = true;
709 else
711 ops->ordered_remove (i-1);
712 ops->ordered_remove (i-1);
715 return true;
717 default:
718 break;
721 return false;
724 static vec<tree> plus_negates;
726 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
727 expression, look in OPS for a corresponding positive operation to cancel
728 it out. If we find one, remove the other from OPS, replace
729 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
730 return false. */
732 static bool
733 eliminate_plus_minus_pair (enum tree_code opcode,
734 vec<operand_entry_t> *ops,
735 unsigned int currindex,
736 operand_entry_t curr)
738 tree negateop;
739 tree notop;
740 unsigned int i;
741 operand_entry_t oe;
743 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
744 return false;
746 negateop = get_unary_op (curr->op, NEGATE_EXPR);
747 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
748 if (negateop == NULL_TREE && notop == NULL_TREE)
749 return false;
751 /* Any non-negated version will have a rank that is one less than
752 the current rank. So once we hit those ranks, if we don't find
753 one, we can stop. */
755 for (i = currindex + 1;
756 ops->iterate (i, &oe)
757 && oe->rank >= curr->rank - 1 ;
758 i++)
760 if (oe->op == negateop)
763 if (dump_file && (dump_flags & TDF_DETAILS))
765 fprintf (dump_file, "Equivalence: ");
766 print_generic_expr (dump_file, negateop, 0);
767 fprintf (dump_file, " + -");
768 print_generic_expr (dump_file, oe->op, 0);
769 fprintf (dump_file, " -> 0\n");
772 ops->ordered_remove (i);
773 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
774 ops->ordered_remove (currindex);
775 reassociate_stats.ops_eliminated ++;
777 return true;
779 else if (oe->op == notop)
781 tree op_type = TREE_TYPE (oe->op);
783 if (dump_file && (dump_flags & TDF_DETAILS))
785 fprintf (dump_file, "Equivalence: ");
786 print_generic_expr (dump_file, notop, 0);
787 fprintf (dump_file, " + ~");
788 print_generic_expr (dump_file, oe->op, 0);
789 fprintf (dump_file, " -> -1\n");
792 ops->ordered_remove (i);
793 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
794 ops->ordered_remove (currindex);
795 reassociate_stats.ops_eliminated ++;
797 return true;
801 /* CURR->OP is a negate expr in a plus expr: save it for later
802 inspection in repropagate_negates(). */
803 if (negateop != NULL_TREE)
804 plus_negates.safe_push (curr->op);
806 return false;
809 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
810 bitwise not expression, look in OPS for a corresponding operand to
811 cancel it out. If we find one, remove the other from OPS, replace
812 OPS[CURRINDEX] with 0, and return true. Otherwise, return
813 false. */
815 static bool
816 eliminate_not_pairs (enum tree_code opcode,
817 vec<operand_entry_t> *ops,
818 unsigned int currindex,
819 operand_entry_t curr)
821 tree notop;
822 unsigned int i;
823 operand_entry_t oe;
825 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
826 || TREE_CODE (curr->op) != SSA_NAME)
827 return false;
829 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
830 if (notop == NULL_TREE)
831 return false;
833 /* Any non-not version will have a rank that is one less than
834 the current rank. So once we hit those ranks, if we don't find
835 one, we can stop. */
837 for (i = currindex + 1;
838 ops->iterate (i, &oe)
839 && oe->rank >= curr->rank - 1;
840 i++)
842 if (oe->op == notop)
844 if (dump_file && (dump_flags & TDF_DETAILS))
846 fprintf (dump_file, "Equivalence: ");
847 print_generic_expr (dump_file, notop, 0);
848 if (opcode == BIT_AND_EXPR)
849 fprintf (dump_file, " & ~");
850 else if (opcode == BIT_IOR_EXPR)
851 fprintf (dump_file, " | ~");
852 print_generic_expr (dump_file, oe->op, 0);
853 if (opcode == BIT_AND_EXPR)
854 fprintf (dump_file, " -> 0\n");
855 else if (opcode == BIT_IOR_EXPR)
856 fprintf (dump_file, " -> -1\n");
859 if (opcode == BIT_AND_EXPR)
860 oe->op = build_zero_cst (TREE_TYPE (oe->op));
861 else if (opcode == BIT_IOR_EXPR)
862 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
864 reassociate_stats.ops_eliminated += ops->length () - 1;
865 ops->truncate (0);
866 ops->quick_push (oe);
867 return true;
871 return false;
874 /* Use constant value that may be present in OPS to try to eliminate
875 operands. Note that this function is only really used when we've
876 eliminated ops for other reasons, or merged constants. Across
877 single statements, fold already does all of this, plus more. There
878 is little point in duplicating logic, so I've only included the
879 identities that I could ever construct testcases to trigger. */
881 static void
882 eliminate_using_constants (enum tree_code opcode,
883 vec<operand_entry_t> *ops)
885 operand_entry_t oelast = ops->last ();
886 tree type = TREE_TYPE (oelast->op);
888 if (oelast->rank == 0
889 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
891 switch (opcode)
893 case BIT_AND_EXPR:
894 if (integer_zerop (oelast->op))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found & 0, removing all other ops\n");
901 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oelast);
905 return;
908 else if (integer_all_onesp (oelast->op))
910 if (ops->length () != 1)
912 if (dump_file && (dump_flags & TDF_DETAILS))
913 fprintf (dump_file, "Found & -1, removing\n");
914 ops->pop ();
915 reassociate_stats.ops_eliminated++;
918 break;
919 case BIT_IOR_EXPR:
920 if (integer_all_onesp (oelast->op))
922 if (ops->length () != 1)
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 fprintf (dump_file, "Found | -1, removing all other ops\n");
927 reassociate_stats.ops_eliminated += ops->length () - 1;
929 ops->truncate (0);
930 ops->quick_push (oelast);
931 return;
934 else if (integer_zerop (oelast->op))
936 if (ops->length () != 1)
938 if (dump_file && (dump_flags & TDF_DETAILS))
939 fprintf (dump_file, "Found | 0, removing\n");
940 ops->pop ();
941 reassociate_stats.ops_eliminated++;
944 break;
945 case MULT_EXPR:
946 if (integer_zerop (oelast->op)
947 || (FLOAT_TYPE_P (type)
948 && !HONOR_NANS (type)
949 && !HONOR_SIGNED_ZEROS (type)
950 && real_zerop (oelast->op)))
952 if (ops->length () != 1)
954 if (dump_file && (dump_flags & TDF_DETAILS))
955 fprintf (dump_file, "Found * 0, removing all other ops\n");
957 reassociate_stats.ops_eliminated += ops->length () - 1;
958 ops->truncate (1);
959 ops->quick_push (oelast);
960 return;
963 else if (integer_onep (oelast->op)
964 || (FLOAT_TYPE_P (type)
965 && !HONOR_SNANS (type)
966 && real_onep (oelast->op)))
968 if (ops->length () != 1)
970 if (dump_file && (dump_flags & TDF_DETAILS))
971 fprintf (dump_file, "Found * 1, removing\n");
972 ops->pop ();
973 reassociate_stats.ops_eliminated++;
974 return;
977 break;
978 case BIT_XOR_EXPR:
979 case PLUS_EXPR:
980 case MINUS_EXPR:
981 if (integer_zerop (oelast->op)
982 || (FLOAT_TYPE_P (type)
983 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
984 && fold_real_zero_addition_p (type, oelast->op,
985 opcode == MINUS_EXPR)))
987 if (ops->length () != 1)
989 if (dump_file && (dump_flags & TDF_DETAILS))
990 fprintf (dump_file, "Found [|^+] 0, removing\n");
991 ops->pop ();
992 reassociate_stats.ops_eliminated++;
993 return;
996 break;
997 default:
998 break;
1004 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1005 bool, bool);
1007 /* Structure for tracking and counting operands. */
1008 typedef struct oecount_s {
1009 int cnt;
1010 int id;
1011 enum tree_code oecode;
1012 tree op;
1013 } oecount;
1016 /* The heap for the oecount hashtable and the sorted list of operands. */
1017 static vec<oecount> cvec;
1020 /* Oecount hashtable helpers. */
1022 struct oecount_hasher
1024 typedef int value_type;
1025 typedef int compare_type;
1026 static inline hashval_t hash (const value_type &);
1027 static inline bool equal (const value_type &, const compare_type &);
1028 static bool is_deleted (int &v) { return v == 1; }
1029 static void mark_deleted (int &e) { e = 1; }
1030 static bool is_empty (int &v) { return v == 0; }
1031 static void mark_empty (int &e) { e = 0; }
1032 static void remove (int &) {}
1035 /* Hash function for oecount. */
1037 inline hashval_t
1038 oecount_hasher::hash (const value_type &p)
1040 const oecount *c = &cvec[p - 42];
1041 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1044 /* Comparison function for oecount. */
1046 inline bool
1047 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1049 const oecount *c1 = &cvec[p1 - 42];
1050 const oecount *c2 = &cvec[p2 - 42];
1051 return (c1->oecode == c2->oecode
1052 && c1->op == c2->op);
1055 /* Comparison function for qsort sorting oecount elements by count. */
1057 static int
1058 oecount_cmp (const void *p1, const void *p2)
1060 const oecount *c1 = (const oecount *)p1;
1061 const oecount *c2 = (const oecount *)p2;
1062 if (c1->cnt != c2->cnt)
1063 return c1->cnt - c2->cnt;
1064 else
1065 /* If counts are identical, use unique IDs to stabilize qsort. */
1066 return c1->id - c2->id;
1069 /* Return TRUE iff STMT represents a builtin call that raises OP
1070 to some exponent. */
1072 static bool
1073 stmt_is_power_of_op (gimple stmt, tree op)
1075 tree fndecl;
1077 if (!is_gimple_call (stmt))
1078 return false;
1080 fndecl = gimple_call_fndecl (stmt);
1082 if (!fndecl
1083 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1084 return false;
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1088 CASE_FLT_FN (BUILT_IN_POW):
1089 CASE_FLT_FN (BUILT_IN_POWI):
1090 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1092 default:
1093 return false;
1097 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1098 in place and return the result. Assumes that stmt_is_power_of_op
1099 was previously called for STMT and returned TRUE. */
1101 static HOST_WIDE_INT
1102 decrement_power (gimple stmt)
1104 REAL_VALUE_TYPE c, cint;
1105 HOST_WIDE_INT power;
1106 tree arg1;
1108 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1110 CASE_FLT_FN (BUILT_IN_POW):
1111 arg1 = gimple_call_arg (stmt, 1);
1112 c = TREE_REAL_CST (arg1);
1113 power = real_to_integer (&c) - 1;
1114 real_from_integer (&cint, VOIDmode, power, SIGNED);
1115 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1116 return power;
1118 CASE_FLT_FN (BUILT_IN_POWI):
1119 arg1 = gimple_call_arg (stmt, 1);
1120 power = TREE_INT_CST_LOW (arg1) - 1;
1121 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1122 return power;
1124 default:
1125 gcc_unreachable ();
1129 /* Find the single immediate use of STMT's LHS, and replace it
1130 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1131 replace *DEF with OP as well. */
1133 static void
1134 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1136 tree lhs;
1137 gimple use_stmt;
1138 use_operand_p use;
1139 gimple_stmt_iterator gsi;
1141 if (is_gimple_call (stmt))
1142 lhs = gimple_call_lhs (stmt);
1143 else
1144 lhs = gimple_assign_lhs (stmt);
1146 gcc_assert (has_single_use (lhs));
1147 single_imm_use (lhs, &use, &use_stmt);
1148 if (lhs == *def)
1149 *def = op;
1150 SET_USE (use, op);
1151 if (TREE_CODE (op) != SSA_NAME)
1152 update_stmt (use_stmt);
1153 gsi = gsi_for_stmt (stmt);
1154 unlink_stmt_vdef (stmt);
1155 reassoc_remove_stmt (&gsi);
1156 release_defs (stmt);
1159 /* Walks the linear chain with result *DEF searching for an operation
1160 with operand OP and code OPCODE removing that from the chain. *DEF
1161 is updated if there is only one operand but no operation left. */
1163 static void
1164 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1166 gimple stmt = SSA_NAME_DEF_STMT (*def);
1170 tree name;
1172 if (opcode == MULT_EXPR
1173 && stmt_is_power_of_op (stmt, op))
1175 if (decrement_power (stmt) == 1)
1176 propagate_op_to_single_use (op, stmt, def);
1177 return;
1180 name = gimple_assign_rhs1 (stmt);
1182 /* If this is the operation we look for and one of the operands
1183 is ours simply propagate the other operand into the stmts
1184 single use. */
1185 if (gimple_assign_rhs_code (stmt) == opcode
1186 && (name == op
1187 || gimple_assign_rhs2 (stmt) == op))
1189 if (name == op)
1190 name = gimple_assign_rhs2 (stmt);
1191 propagate_op_to_single_use (name, stmt, def);
1192 return;
1195 /* We might have a multiply of two __builtin_pow* calls, and
1196 the operand might be hiding in the rightmost one. */
1197 if (opcode == MULT_EXPR
1198 && gimple_assign_rhs_code (stmt) == opcode
1199 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1200 && has_single_use (gimple_assign_rhs2 (stmt)))
1202 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1203 if (stmt_is_power_of_op (stmt2, op))
1205 if (decrement_power (stmt2) == 1)
1206 propagate_op_to_single_use (op, stmt2, def);
1207 return;
1211 /* Continue walking the chain. */
1212 gcc_assert (name != op
1213 && TREE_CODE (name) == SSA_NAME);
1214 stmt = SSA_NAME_DEF_STMT (name);
1216 while (1);
1219 /* Returns true if statement S1 dominates statement S2. Like
1220 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1222 static bool
1223 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1225 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1227 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1228 SSA_NAME. Assume it lives at the beginning of function and
1229 thus dominates everything. */
1230 if (!bb1 || s1 == s2)
1231 return true;
1233 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1234 if (!bb2)
1235 return false;
1237 if (bb1 == bb2)
1239 /* PHIs in the same basic block are assumed to be
1240 executed all in parallel, if only one stmt is a PHI,
1241 it dominates the other stmt in the same basic block. */
1242 if (gimple_code (s1) == GIMPLE_PHI)
1243 return true;
1245 if (gimple_code (s2) == GIMPLE_PHI)
1246 return false;
1248 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1250 if (gimple_uid (s1) < gimple_uid (s2))
1251 return true;
1253 if (gimple_uid (s1) > gimple_uid (s2))
1254 return false;
1256 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1257 unsigned int uid = gimple_uid (s1);
1258 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1260 gimple s = gsi_stmt (gsi);
1261 if (gimple_uid (s) != uid)
1262 break;
1263 if (s == s2)
1264 return true;
1267 return false;
1270 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1273 /* Insert STMT after INSERT_POINT. */
1275 static void
1276 insert_stmt_after (gimple stmt, gimple insert_point)
1278 gimple_stmt_iterator gsi;
1279 basic_block bb;
1281 if (gimple_code (insert_point) == GIMPLE_PHI)
1282 bb = gimple_bb (insert_point);
1283 else if (!stmt_ends_bb_p (insert_point))
1285 gsi = gsi_for_stmt (insert_point);
1286 gimple_set_uid (stmt, gimple_uid (insert_point));
1287 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1288 return;
1290 else
1291 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1292 thus if it must end a basic block, it should be a call that can
1293 throw, or some assignment that can throw. If it throws, the LHS
1294 of it will not be initialized though, so only valid places using
1295 the SSA_NAME should be dominated by the fallthru edge. */
1296 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1297 gsi = gsi_after_labels (bb);
1298 if (gsi_end_p (gsi))
1300 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1301 gimple_set_uid (stmt,
1302 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1304 else
1305 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1306 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1309 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1310 the result. Places the statement after the definition of either
1311 OP1 or OP2. Returns the new statement. */
1313 static gimple
1314 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1316 gimple op1def = NULL, op2def = NULL;
1317 gimple_stmt_iterator gsi;
1318 tree op;
1319 gassign *sum;
1321 /* Create the addition statement. */
1322 op = make_ssa_name (type);
1323 sum = gimple_build_assign (op, opcode, op1, op2);
1325 /* Find an insertion place and insert. */
1326 if (TREE_CODE (op1) == SSA_NAME)
1327 op1def = SSA_NAME_DEF_STMT (op1);
1328 if (TREE_CODE (op2) == SSA_NAME)
1329 op2def = SSA_NAME_DEF_STMT (op2);
1330 if ((!op1def || gimple_nop_p (op1def))
1331 && (!op2def || gimple_nop_p (op2def)))
1333 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1334 if (gsi_end_p (gsi))
1336 gimple_stmt_iterator gsi2
1337 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1338 gimple_set_uid (sum,
1339 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1341 else
1342 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1343 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1345 else
1347 gimple insert_point;
1348 if ((!op1def || gimple_nop_p (op1def))
1349 || (op2def && !gimple_nop_p (op2def)
1350 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1351 insert_point = op2def;
1352 else
1353 insert_point = op1def;
1354 insert_stmt_after (sum, insert_point);
1356 update_stmt (sum);
1358 return sum;
1361 /* Perform un-distribution of divisions and multiplications.
1362 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1363 to (A + B) / X for real X.
1365 The algorithm is organized as follows.
1367 - First we walk the addition chain *OPS looking for summands that
1368 are defined by a multiplication or a real division. This results
1369 in the candidates bitmap with relevant indices into *OPS.
1371 - Second we build the chains of multiplications or divisions for
1372 these candidates, counting the number of occurrences of (operand, code)
1373 pairs in all of the candidates chains.
1375 - Third we sort the (operand, code) pairs by number of occurrence and
1376 process them starting with the pair with the most uses.
1378 * For each such pair we walk the candidates again to build a
1379 second candidate bitmap noting all multiplication/division chains
1380 that have at least one occurrence of (operand, code).
1382 * We build an alternate addition chain only covering these
1383 candidates with one (operand, code) operation removed from their
1384 multiplication/division chain.
1386 * The first candidate gets replaced by the alternate addition chain
1387 multiplied/divided by the operand.
1389 * All candidate chains get disabled for further processing and
1390 processing of (operand, code) pairs continues.
1392 The alternate addition chains built are re-processed by the main
1393 reassociation algorithm which allows optimizing a * x * y + b * y * x
1394 to (a + b ) * x * y in one invocation of the reassociation pass. */
1396 static bool
1397 undistribute_ops_list (enum tree_code opcode,
1398 vec<operand_entry_t> *ops, struct loop *loop)
1400 unsigned int length = ops->length ();
1401 operand_entry_t oe1;
1402 unsigned i, j;
1403 sbitmap candidates, candidates2;
1404 unsigned nr_candidates, nr_candidates2;
1405 sbitmap_iterator sbi0;
1406 vec<operand_entry_t> *subops;
1407 bool changed = false;
1408 int next_oecount_id = 0;
1410 if (length <= 1
1411 || opcode != PLUS_EXPR)
1412 return false;
1414 /* Build a list of candidates to process. */
1415 candidates = sbitmap_alloc (length);
1416 bitmap_clear (candidates);
1417 nr_candidates = 0;
1418 FOR_EACH_VEC_ELT (*ops, i, oe1)
1420 enum tree_code dcode;
1421 gimple oe1def;
1423 if (TREE_CODE (oe1->op) != SSA_NAME)
1424 continue;
1425 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1426 if (!is_gimple_assign (oe1def))
1427 continue;
1428 dcode = gimple_assign_rhs_code (oe1def);
1429 if ((dcode != MULT_EXPR
1430 && dcode != RDIV_EXPR)
1431 || !is_reassociable_op (oe1def, dcode, loop))
1432 continue;
1434 bitmap_set_bit (candidates, i);
1435 nr_candidates++;
1438 if (nr_candidates < 2)
1440 sbitmap_free (candidates);
1441 return false;
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file, "searching for un-distribute opportunities ");
1447 print_generic_expr (dump_file,
1448 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1449 fprintf (dump_file, " %d\n", nr_candidates);
1452 /* Build linearized sub-operand lists and the counting table. */
1453 cvec.create (0);
1455 hash_table<oecount_hasher> ctable (15);
1457 /* ??? Macro arguments cannot have multi-argument template types in
1458 them. This typedef is needed to workaround that limitation. */
1459 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1460 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1461 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1463 gimple oedef;
1464 enum tree_code oecode;
1465 unsigned j;
1467 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1468 oecode = gimple_assign_rhs_code (oedef);
1469 linearize_expr_tree (&subops[i], oedef,
1470 associative_tree_code (oecode), false);
1472 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1474 oecount c;
1475 int *slot;
1476 int idx;
1477 c.oecode = oecode;
1478 c.cnt = 1;
1479 c.id = next_oecount_id++;
1480 c.op = oe1->op;
1481 cvec.safe_push (c);
1482 idx = cvec.length () + 41;
1483 slot = ctable.find_slot (idx, INSERT);
1484 if (!*slot)
1486 *slot = idx;
1488 else
1490 cvec.pop ();
1491 cvec[*slot - 42].cnt++;
1496 /* Sort the counting table. */
1497 cvec.qsort (oecount_cmp);
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1501 oecount *c;
1502 fprintf (dump_file, "Candidates:\n");
1503 FOR_EACH_VEC_ELT (cvec, j, c)
1505 fprintf (dump_file, " %u %s: ", c->cnt,
1506 c->oecode == MULT_EXPR
1507 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1508 print_generic_expr (dump_file, c->op, 0);
1509 fprintf (dump_file, "\n");
1513 /* Process the (operand, code) pairs in order of most occurrence. */
1514 candidates2 = sbitmap_alloc (length);
1515 while (!cvec.is_empty ())
1517 oecount *c = &cvec.last ();
1518 if (c->cnt < 2)
1519 break;
1521 /* Now collect the operands in the outer chain that contain
1522 the common operand in their inner chain. */
1523 bitmap_clear (candidates2);
1524 nr_candidates2 = 0;
1525 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1527 gimple oedef;
1528 enum tree_code oecode;
1529 unsigned j;
1530 tree op = (*ops)[i]->op;
1532 /* If we undistributed in this chain already this may be
1533 a constant. */
1534 if (TREE_CODE (op) != SSA_NAME)
1535 continue;
1537 oedef = SSA_NAME_DEF_STMT (op);
1538 oecode = gimple_assign_rhs_code (oedef);
1539 if (oecode != c->oecode)
1540 continue;
1542 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1544 if (oe1->op == c->op)
1546 bitmap_set_bit (candidates2, i);
1547 ++nr_candidates2;
1548 break;
1553 if (nr_candidates2 >= 2)
1555 operand_entry_t oe1, oe2;
1556 gimple prod;
1557 int first = bitmap_first_set_bit (candidates2);
1559 /* Build the new addition chain. */
1560 oe1 = (*ops)[first];
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1563 fprintf (dump_file, "Building (");
1564 print_generic_expr (dump_file, oe1->op, 0);
1566 zero_one_operation (&oe1->op, c->oecode, c->op);
1567 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1569 gimple sum;
1570 oe2 = (*ops)[i];
1571 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, " + ");
1574 print_generic_expr (dump_file, oe2->op, 0);
1576 zero_one_operation (&oe2->op, c->oecode, c->op);
1577 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1578 oe1->op, oe2->op, opcode);
1579 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1580 oe2->rank = 0;
1581 oe1->op = gimple_get_lhs (sum);
1584 /* Apply the multiplication/division. */
1585 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1586 oe1->op, c->op, c->oecode);
1587 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1590 print_generic_expr (dump_file, c->op, 0);
1591 fprintf (dump_file, "\n");
1594 /* Record it in the addition chain and disable further
1595 undistribution with this op. */
1596 oe1->op = gimple_assign_lhs (prod);
1597 oe1->rank = get_rank (oe1->op);
1598 subops[first].release ();
1600 changed = true;
1603 cvec.pop ();
1606 for (i = 0; i < ops->length (); ++i)
1607 subops[i].release ();
1608 free (subops);
1609 cvec.release ();
1610 sbitmap_free (candidates);
1611 sbitmap_free (candidates2);
1613 return changed;
1616 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1617 expression, examine the other OPS to see if any of them are comparisons
1618 of the same values, which we may be able to combine or eliminate.
1619 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1621 static bool
1622 eliminate_redundant_comparison (enum tree_code opcode,
1623 vec<operand_entry_t> *ops,
1624 unsigned int currindex,
1625 operand_entry_t curr)
1627 tree op1, op2;
1628 enum tree_code lcode, rcode;
1629 gimple def1, def2;
1630 int i;
1631 operand_entry_t oe;
1633 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1634 return false;
1636 /* Check that CURR is a comparison. */
1637 if (TREE_CODE (curr->op) != SSA_NAME)
1638 return false;
1639 def1 = SSA_NAME_DEF_STMT (curr->op);
1640 if (!is_gimple_assign (def1))
1641 return false;
1642 lcode = gimple_assign_rhs_code (def1);
1643 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1644 return false;
1645 op1 = gimple_assign_rhs1 (def1);
1646 op2 = gimple_assign_rhs2 (def1);
1648 /* Now look for a similar comparison in the remaining OPS. */
1649 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1651 tree t;
1653 if (TREE_CODE (oe->op) != SSA_NAME)
1654 continue;
1655 def2 = SSA_NAME_DEF_STMT (oe->op);
1656 if (!is_gimple_assign (def2))
1657 continue;
1658 rcode = gimple_assign_rhs_code (def2);
1659 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1660 continue;
1662 /* If we got here, we have a match. See if we can combine the
1663 two comparisons. */
1664 if (opcode == BIT_IOR_EXPR)
1665 t = maybe_fold_or_comparisons (lcode, op1, op2,
1666 rcode, gimple_assign_rhs1 (def2),
1667 gimple_assign_rhs2 (def2));
1668 else
1669 t = maybe_fold_and_comparisons (lcode, op1, op2,
1670 rcode, gimple_assign_rhs1 (def2),
1671 gimple_assign_rhs2 (def2));
1672 if (!t)
1673 continue;
1675 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1676 always give us a boolean_type_node value back. If the original
1677 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1678 we need to convert. */
1679 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1680 t = fold_convert (TREE_TYPE (curr->op), t);
1682 if (TREE_CODE (t) != INTEGER_CST
1683 && !operand_equal_p (t, curr->op, 0))
1685 enum tree_code subcode;
1686 tree newop1, newop2;
1687 if (!COMPARISON_CLASS_P (t))
1688 continue;
1689 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1690 STRIP_USELESS_TYPE_CONVERSION (newop1);
1691 STRIP_USELESS_TYPE_CONVERSION (newop2);
1692 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1693 continue;
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1698 fprintf (dump_file, "Equivalence: ");
1699 print_generic_expr (dump_file, curr->op, 0);
1700 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1701 print_generic_expr (dump_file, oe->op, 0);
1702 fprintf (dump_file, " -> ");
1703 print_generic_expr (dump_file, t, 0);
1704 fprintf (dump_file, "\n");
1707 /* Now we can delete oe, as it has been subsumed by the new combined
1708 expression t. */
1709 ops->ordered_remove (i);
1710 reassociate_stats.ops_eliminated ++;
1712 /* If t is the same as curr->op, we're done. Otherwise we must
1713 replace curr->op with t. Special case is if we got a constant
1714 back, in which case we add it to the end instead of in place of
1715 the current entry. */
1716 if (TREE_CODE (t) == INTEGER_CST)
1718 ops->ordered_remove (currindex);
1719 add_to_ops_vec (ops, t);
1721 else if (!operand_equal_p (t, curr->op, 0))
1723 gimple sum;
1724 enum tree_code subcode;
1725 tree newop1;
1726 tree newop2;
1727 gcc_assert (COMPARISON_CLASS_P (t));
1728 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2);
1731 gcc_checking_assert (is_gimple_val (newop1)
1732 && is_gimple_val (newop2));
1733 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1734 curr->op = gimple_get_lhs (sum);
1736 return true;
1739 return false;
1742 /* Perform various identities and other optimizations on the list of
1743 operand entries, stored in OPS. The tree code for the binary
1744 operation between all the operands is OPCODE. */
1746 static void
1747 optimize_ops_list (enum tree_code opcode,
1748 vec<operand_entry_t> *ops)
1750 unsigned int length = ops->length ();
1751 unsigned int i;
1752 operand_entry_t oe;
1753 operand_entry_t oelast = NULL;
1754 bool iterate = false;
1756 if (length == 1)
1757 return;
1759 oelast = ops->last ();
1761 /* If the last two are constants, pop the constants off, merge them
1762 and try the next two. */
1763 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1765 operand_entry_t oelm1 = (*ops)[length - 2];
1767 if (oelm1->rank == 0
1768 && is_gimple_min_invariant (oelm1->op)
1769 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1770 TREE_TYPE (oelast->op)))
1772 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1773 oelm1->op, oelast->op);
1775 if (folded && is_gimple_min_invariant (folded))
1777 if (dump_file && (dump_flags & TDF_DETAILS))
1778 fprintf (dump_file, "Merging constants\n");
1780 ops->pop ();
1781 ops->pop ();
1783 add_to_ops_vec (ops, folded);
1784 reassociate_stats.constants_eliminated++;
1786 optimize_ops_list (opcode, ops);
1787 return;
1792 eliminate_using_constants (opcode, ops);
1793 oelast = NULL;
1795 for (i = 0; ops->iterate (i, &oe);)
1797 bool done = false;
1799 if (eliminate_not_pairs (opcode, ops, i, oe))
1800 return;
1801 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1802 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1803 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1805 if (done)
1806 return;
1807 iterate = true;
1808 oelast = NULL;
1809 continue;
1811 oelast = oe;
1812 i++;
1815 length = ops->length ();
1816 oelast = ops->last ();
1818 if (iterate)
1819 optimize_ops_list (opcode, ops);
1822 /* The following functions are subroutines to optimize_range_tests and allow
1823 it to try to change a logical combination of comparisons into a range
1824 test.
1826 For example, both
1827 X == 2 || X == 5 || X == 3 || X == 4
1829 X >= 2 && X <= 5
1830 are converted to
1831 (unsigned) (X - 2) <= 3
1833 For more information see comments above fold_test_range in fold-const.c,
1834 this implementation is for GIMPLE. */
1836 struct range_entry
1838 tree exp;
1839 tree low;
1840 tree high;
1841 bool in_p;
1842 bool strict_overflow_p;
1843 unsigned int idx, next;
1846 /* This is similar to make_range in fold-const.c, but on top of
1847 GIMPLE instead of trees. If EXP is non-NULL, it should be
1848 an SSA_NAME and STMT argument is ignored, otherwise STMT
1849 argument should be a GIMPLE_COND. */
1851 static void
1852 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1854 int in_p;
1855 tree low, high;
1856 bool is_bool, strict_overflow_p;
1858 r->exp = NULL_TREE;
1859 r->in_p = false;
1860 r->strict_overflow_p = false;
1861 r->low = NULL_TREE;
1862 r->high = NULL_TREE;
1863 if (exp != NULL_TREE
1864 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1865 return;
1867 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1868 and see if we can refine the range. Some of the cases below may not
1869 happen, but it doesn't seem worth worrying about this. We "continue"
1870 the outer loop when we've changed something; otherwise we "break"
1871 the switch, which will "break" the while. */
1872 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1873 high = low;
1874 in_p = 0;
1875 strict_overflow_p = false;
1876 is_bool = false;
1877 if (exp == NULL_TREE)
1878 is_bool = true;
1879 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1881 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1882 is_bool = true;
1883 else
1884 return;
1886 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1887 is_bool = true;
1889 while (1)
1891 enum tree_code code;
1892 tree arg0, arg1, exp_type;
1893 tree nexp;
1894 location_t loc;
1896 if (exp != NULL_TREE)
1898 if (TREE_CODE (exp) != SSA_NAME
1899 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1900 break;
1902 stmt = SSA_NAME_DEF_STMT (exp);
1903 if (!is_gimple_assign (stmt))
1904 break;
1906 code = gimple_assign_rhs_code (stmt);
1907 arg0 = gimple_assign_rhs1 (stmt);
1908 arg1 = gimple_assign_rhs2 (stmt);
1909 exp_type = TREE_TYPE (exp);
1911 else
1913 code = gimple_cond_code (stmt);
1914 arg0 = gimple_cond_lhs (stmt);
1915 arg1 = gimple_cond_rhs (stmt);
1916 exp_type = boolean_type_node;
1919 if (TREE_CODE (arg0) != SSA_NAME)
1920 break;
1921 loc = gimple_location (stmt);
1922 switch (code)
1924 case BIT_NOT_EXPR:
1925 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1926 /* Ensure the range is either +[-,0], +[0,0],
1927 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1928 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1929 or similar expression of unconditional true or
1930 false, it should not be negated. */
1931 && ((high && integer_zerop (high))
1932 || (low && integer_onep (low))))
1934 in_p = !in_p;
1935 exp = arg0;
1936 continue;
1938 break;
1939 case SSA_NAME:
1940 exp = arg0;
1941 continue;
1942 CASE_CONVERT:
1943 if (is_bool)
1944 goto do_default;
1945 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1947 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1948 is_bool = true;
1949 else
1950 return;
1952 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1953 is_bool = true;
1954 goto do_default;
1955 case EQ_EXPR:
1956 case NE_EXPR:
1957 case LT_EXPR:
1958 case LE_EXPR:
1959 case GE_EXPR:
1960 case GT_EXPR:
1961 is_bool = true;
1962 /* FALLTHRU */
1963 default:
1964 if (!is_bool)
1965 return;
1966 do_default:
1967 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1968 &low, &high, &in_p,
1969 &strict_overflow_p);
1970 if (nexp != NULL_TREE)
1972 exp = nexp;
1973 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1974 continue;
1976 break;
1978 break;
1980 if (is_bool)
1982 r->exp = exp;
1983 r->in_p = in_p;
1984 r->low = low;
1985 r->high = high;
1986 r->strict_overflow_p = strict_overflow_p;
1990 /* Comparison function for qsort. Sort entries
1991 without SSA_NAME exp first, then with SSA_NAMEs sorted
1992 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1993 by increasing ->low and if ->low is the same, by increasing
1994 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1995 maximum. */
1997 static int
1998 range_entry_cmp (const void *a, const void *b)
2000 const struct range_entry *p = (const struct range_entry *) a;
2001 const struct range_entry *q = (const struct range_entry *) b;
2003 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2005 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2007 /* Group range_entries for the same SSA_NAME together. */
2008 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2009 return -1;
2010 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2011 return 1;
2012 /* If ->low is different, NULL low goes first, then by
2013 ascending low. */
2014 if (p->low != NULL_TREE)
2016 if (q->low != NULL_TREE)
2018 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2019 p->low, q->low);
2020 if (tem && integer_onep (tem))
2021 return -1;
2022 tem = fold_binary (GT_EXPR, boolean_type_node,
2023 p->low, q->low);
2024 if (tem && integer_onep (tem))
2025 return 1;
2027 else
2028 return 1;
2030 else if (q->low != NULL_TREE)
2031 return -1;
2032 /* If ->high is different, NULL high goes last, before that by
2033 ascending high. */
2034 if (p->high != NULL_TREE)
2036 if (q->high != NULL_TREE)
2038 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2039 p->high, q->high);
2040 if (tem && integer_onep (tem))
2041 return -1;
2042 tem = fold_binary (GT_EXPR, boolean_type_node,
2043 p->high, q->high);
2044 if (tem && integer_onep (tem))
2045 return 1;
2047 else
2048 return -1;
2050 else if (q->high != NULL_TREE)
2051 return 1;
2052 /* If both ranges are the same, sort below by ascending idx. */
2054 else
2055 return 1;
2057 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2058 return -1;
2060 if (p->idx < q->idx)
2061 return -1;
2062 else
2064 gcc_checking_assert (p->idx > q->idx);
2065 return 1;
2069 /* Helper routine of optimize_range_test.
2070 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2071 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2072 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2073 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2074 an array of COUNT pointers to other ranges. Return
2075 true if the range merge has been successful.
2076 If OPCODE is ERROR_MARK, this is called from within
2077 maybe_optimize_range_tests and is performing inter-bb range optimization.
2078 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2079 oe->rank. */
2081 static bool
2082 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2083 struct range_entry **otherrangep,
2084 unsigned int count, enum tree_code opcode,
2085 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2086 bool in_p, tree low, tree high, bool strict_overflow_p)
2088 operand_entry_t oe = (*ops)[range->idx];
2089 tree op = oe->op;
2090 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2091 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2092 location_t loc = gimple_location (stmt);
2093 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2094 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2095 in_p, low, high);
2096 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2097 gimple_stmt_iterator gsi;
2098 unsigned int i;
2100 if (tem == NULL_TREE)
2101 return false;
2103 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2104 warning_at (loc, OPT_Wstrict_overflow,
2105 "assuming signed overflow does not occur "
2106 "when simplifying range test");
2108 if (dump_file && (dump_flags & TDF_DETAILS))
2110 struct range_entry *r;
2111 fprintf (dump_file, "Optimizing range tests ");
2112 print_generic_expr (dump_file, range->exp, 0);
2113 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2114 print_generic_expr (dump_file, range->low, 0);
2115 fprintf (dump_file, ", ");
2116 print_generic_expr (dump_file, range->high, 0);
2117 fprintf (dump_file, "]");
2118 for (i = 0; i < count; i++)
2120 if (otherrange)
2121 r = otherrange + i;
2122 else
2123 r = otherrangep[i];
2124 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2125 print_generic_expr (dump_file, r->low, 0);
2126 fprintf (dump_file, ", ");
2127 print_generic_expr (dump_file, r->high, 0);
2128 fprintf (dump_file, "]");
2130 fprintf (dump_file, "\n into ");
2131 print_generic_expr (dump_file, tem, 0);
2132 fprintf (dump_file, "\n");
2135 if (opcode == BIT_IOR_EXPR
2136 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2137 tem = invert_truthvalue_loc (loc, tem);
2139 tem = fold_convert_loc (loc, optype, tem);
2140 gsi = gsi_for_stmt (stmt);
2141 unsigned int uid = gimple_uid (stmt);
2142 /* In rare cases range->exp can be equal to lhs of stmt.
2143 In that case we have to insert after the stmt rather then before
2144 it. If stmt is a PHI, insert it at the start of the basic block. */
2145 if (op != range->exp)
2147 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2148 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2149 GSI_SAME_STMT);
2150 gsi_prev (&gsi);
2152 else if (gimple_code (stmt) != GIMPLE_PHI)
2154 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2155 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2156 GSI_CONTINUE_LINKING);
2158 else
2160 gsi = gsi_after_labels (gimple_bb (stmt));
2161 if (!gsi_end_p (gsi))
2162 uid = gimple_uid (gsi_stmt (gsi));
2163 else
2165 gsi = gsi_start_bb (gimple_bb (stmt));
2166 uid = 1;
2167 while (!gsi_end_p (gsi))
2169 uid = gimple_uid (gsi_stmt (gsi));
2170 gsi_next (&gsi);
2173 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2174 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2175 GSI_SAME_STMT);
2176 if (gsi_end_p (gsi))
2177 gsi = gsi_last_bb (gimple_bb (stmt));
2178 else
2179 gsi_prev (&gsi);
2181 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2182 if (gimple_uid (gsi_stmt (gsi)))
2183 break;
2184 else
2185 gimple_set_uid (gsi_stmt (gsi), uid);
2187 oe->op = tem;
2188 range->exp = exp;
2189 range->low = low;
2190 range->high = high;
2191 range->in_p = in_p;
2192 range->strict_overflow_p = false;
2194 for (i = 0; i < count; i++)
2196 if (otherrange)
2197 range = otherrange + i;
2198 else
2199 range = otherrangep[i];
2200 oe = (*ops)[range->idx];
2201 /* Now change all the other range test immediate uses, so that
2202 those tests will be optimized away. */
2203 if (opcode == ERROR_MARK)
2205 if (oe->op)
2206 oe->op = build_int_cst (TREE_TYPE (oe->op),
2207 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2208 else
2209 oe->op = (oe->rank == BIT_IOR_EXPR
2210 ? boolean_false_node : boolean_true_node);
2212 else
2213 oe->op = error_mark_node;
2214 range->exp = NULL_TREE;
2216 return true;
2219 /* Optimize X == CST1 || X == CST2
2220 if popcount (CST1 ^ CST2) == 1 into
2221 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2222 Similarly for ranges. E.g.
2223 X != 2 && X != 3 && X != 10 && X != 11
2224 will be transformed by the previous optimization into
2225 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2226 and this loop can transform that into
2227 !(((X & ~8) - 2U) <= 1U). */
2229 static bool
2230 optimize_range_tests_xor (enum tree_code opcode, tree type,
2231 tree lowi, tree lowj, tree highi, tree highj,
2232 vec<operand_entry_t> *ops,
2233 struct range_entry *rangei,
2234 struct range_entry *rangej)
2236 tree lowxor, highxor, tem, exp;
2237 /* Check lowi ^ lowj == highi ^ highj and
2238 popcount (lowi ^ lowj) == 1. */
2239 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2240 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2241 return false;
2242 if (!integer_pow2p (lowxor))
2243 return false;
2244 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2245 if (!tree_int_cst_equal (lowxor, highxor))
2246 return false;
2248 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2249 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2250 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2251 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2252 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2253 NULL, rangei->in_p, lowj, highj,
2254 rangei->strict_overflow_p
2255 || rangej->strict_overflow_p))
2256 return true;
2257 return false;
2260 /* Optimize X == CST1 || X == CST2
2261 if popcount (CST2 - CST1) == 1 into
2262 ((X - CST1) & ~(CST2 - CST1)) == 0.
2263 Similarly for ranges. E.g.
2264 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2265 || X == 75 || X == 45
2266 will be transformed by the previous optimization into
2267 (X - 43U) <= 3U || (X - 75U) <= 3U
2268 and this loop can transform that into
2269 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2270 static bool
2271 optimize_range_tests_diff (enum tree_code opcode, tree type,
2272 tree lowi, tree lowj, tree highi, tree highj,
2273 vec<operand_entry_t> *ops,
2274 struct range_entry *rangei,
2275 struct range_entry *rangej)
2277 tree tem1, tem2, mask;
2278 /* Check highi - lowi == highj - lowj. */
2279 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2280 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2281 return false;
2282 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2283 if (!tree_int_cst_equal (tem1, tem2))
2284 return false;
2285 /* Check popcount (lowj - lowi) == 1. */
2286 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2287 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2288 return false;
2289 if (!integer_pow2p (tem1))
2290 return false;
2292 type = unsigned_type_for (type);
2293 tem1 = fold_convert (type, tem1);
2294 tem2 = fold_convert (type, tem2);
2295 lowi = fold_convert (type, lowi);
2296 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2297 tem1 = fold_binary (MINUS_EXPR, type,
2298 fold_convert (type, rangei->exp), lowi);
2299 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2300 lowj = build_int_cst (type, 0);
2301 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2302 NULL, rangei->in_p, lowj, tem2,
2303 rangei->strict_overflow_p
2304 || rangej->strict_overflow_p))
2305 return true;
2306 return false;
2309 /* It does some common checks for function optimize_range_tests_xor and
2310 optimize_range_tests_diff.
2311 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2312 Else it calls optimize_range_tests_diff. */
2314 static bool
2315 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2316 bool optimize_xor, vec<operand_entry_t> *ops,
2317 struct range_entry *ranges)
2319 int i, j;
2320 bool any_changes = false;
2321 for (i = first; i < length; i++)
2323 tree lowi, highi, lowj, highj, type, tem;
2325 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2326 continue;
2327 type = TREE_TYPE (ranges[i].exp);
2328 if (!INTEGRAL_TYPE_P (type))
2329 continue;
2330 lowi = ranges[i].low;
2331 if (lowi == NULL_TREE)
2332 lowi = TYPE_MIN_VALUE (type);
2333 highi = ranges[i].high;
2334 if (highi == NULL_TREE)
2335 continue;
2336 for (j = i + 1; j < length && j < i + 64; j++)
2338 bool changes;
2339 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2340 continue;
2341 lowj = ranges[j].low;
2342 if (lowj == NULL_TREE)
2343 continue;
2344 highj = ranges[j].high;
2345 if (highj == NULL_TREE)
2346 highj = TYPE_MAX_VALUE (type);
2347 /* Check lowj > highi. */
2348 tem = fold_binary (GT_EXPR, boolean_type_node,
2349 lowj, highi);
2350 if (tem == NULL_TREE || !integer_onep (tem))
2351 continue;
2352 if (optimize_xor)
2353 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2354 highi, highj, ops,
2355 ranges + i, ranges + j);
2356 else
2357 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2358 highi, highj, ops,
2359 ranges + i, ranges + j);
2360 if (changes)
2362 any_changes = true;
2363 break;
2367 return any_changes;
2370 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2371 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2372 EXP on success, NULL otherwise. */
2374 static tree
2375 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2376 wide_int *mask, tree *totallowp)
2378 tree tem = int_const_binop (MINUS_EXPR, high, low);
2379 if (tem == NULL_TREE
2380 || TREE_CODE (tem) != INTEGER_CST
2381 || TREE_OVERFLOW (tem)
2382 || tree_int_cst_sgn (tem) == -1
2383 || compare_tree_int (tem, prec) != -1)
2384 return NULL_TREE;
2386 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2387 *mask = wi::shifted_mask (0, max, false, prec);
2388 if (TREE_CODE (exp) == BIT_AND_EXPR
2389 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2391 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2392 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2393 if (wi::popcount (msk) == 1
2394 && wi::ltu_p (msk, prec - max))
2396 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2397 max += msk.to_uhwi ();
2398 exp = TREE_OPERAND (exp, 0);
2399 if (integer_zerop (low)
2400 && TREE_CODE (exp) == PLUS_EXPR
2401 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2403 tree ret = TREE_OPERAND (exp, 0);
2404 STRIP_NOPS (ret);
2405 widest_int bias
2406 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2407 TYPE_PRECISION (TREE_TYPE (low))));
2408 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2409 if (totallowp)
2411 *totallowp = tbias;
2412 return ret;
2414 else if (!tree_int_cst_lt (totallow, tbias))
2415 return NULL_TREE;
2416 bias = wi::to_widest (tbias);
2417 bias -= wi::to_widest (totallow);
2418 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2420 *mask = wi::lshift (*mask, bias);
2421 return ret;
2426 if (totallowp)
2427 return exp;
2428 if (!tree_int_cst_lt (totallow, low))
2429 return exp;
2430 tem = int_const_binop (MINUS_EXPR, low, totallow);
2431 if (tem == NULL_TREE
2432 || TREE_CODE (tem) != INTEGER_CST
2433 || TREE_OVERFLOW (tem)
2434 || compare_tree_int (tem, prec - max) == 1)
2435 return NULL_TREE;
2437 *mask = wi::lshift (*mask, wi::to_widest (tem));
2438 return exp;
2441 /* Attempt to optimize small range tests using bit test.
2442 E.g.
2443 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2444 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2445 has been by earlier optimizations optimized into:
2446 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2447 As all the 43 through 82 range is less than 64 numbers,
2448 for 64-bit word targets optimize that into:
2449 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2451 static bool
2452 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2453 vec<operand_entry_t> *ops,
2454 struct range_entry *ranges)
2456 int i, j;
2457 bool any_changes = false;
2458 int prec = GET_MODE_BITSIZE (word_mode);
2459 auto_vec<struct range_entry *, 64> candidates;
2461 for (i = first; i < length - 2; i++)
2463 tree lowi, highi, lowj, highj, type;
2465 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2466 continue;
2467 type = TREE_TYPE (ranges[i].exp);
2468 if (!INTEGRAL_TYPE_P (type))
2469 continue;
2470 lowi = ranges[i].low;
2471 if (lowi == NULL_TREE)
2472 lowi = TYPE_MIN_VALUE (type);
2473 highi = ranges[i].high;
2474 if (highi == NULL_TREE)
2475 continue;
2476 wide_int mask;
2477 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2478 highi, &mask, &lowi);
2479 if (exp == NULL_TREE)
2480 continue;
2481 bool strict_overflow_p = ranges[i].strict_overflow_p;
2482 candidates.truncate (0);
2483 int end = MIN (i + 64, length);
2484 for (j = i + 1; j < end; j++)
2486 tree exp2;
2487 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2488 continue;
2489 if (ranges[j].exp == exp)
2491 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2493 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2494 if (exp2 == exp)
2496 else if (TREE_CODE (exp2) == PLUS_EXPR)
2498 exp2 = TREE_OPERAND (exp2, 0);
2499 STRIP_NOPS (exp2);
2500 if (exp2 != exp)
2501 continue;
2503 else
2504 continue;
2506 else
2507 continue;
2508 lowj = ranges[j].low;
2509 if (lowj == NULL_TREE)
2510 continue;
2511 highj = ranges[j].high;
2512 if (highj == NULL_TREE)
2513 highj = TYPE_MAX_VALUE (type);
2514 wide_int mask2;
2515 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2516 highj, &mask2, NULL);
2517 if (exp2 != exp)
2518 continue;
2519 mask |= mask2;
2520 strict_overflow_p |= ranges[j].strict_overflow_p;
2521 candidates.safe_push (&ranges[j]);
2524 /* If we need otherwise 3 or more comparisons, use a bit test. */
2525 if (candidates.length () >= 2)
2527 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2528 wi::to_widest (lowi)
2529 + prec - 1 - wi::clz (mask));
2530 operand_entry_t oe = (*ops)[ranges[i].idx];
2531 tree op = oe->op;
2532 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2533 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2534 location_t loc = gimple_location (stmt);
2535 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2537 /* See if it isn't cheaper to pretend the minimum value of the
2538 range is 0, if maximum value is small enough.
2539 We can avoid then subtraction of the minimum value, but the
2540 mask constant could be perhaps more expensive. */
2541 if (compare_tree_int (lowi, 0) > 0
2542 && compare_tree_int (high, prec) < 0)
2544 int cost_diff;
2545 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2546 rtx reg = gen_raw_REG (word_mode, 10000);
2547 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2548 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2549 GEN_INT (-m)), speed_p);
2550 rtx r = immed_wide_int_const (mask, word_mode);
2551 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2552 speed_p);
2553 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2554 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2555 speed_p);
2556 if (cost_diff > 0)
2558 mask = wi::lshift (mask, m);
2559 lowi = build_zero_cst (TREE_TYPE (lowi));
2563 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2564 false, lowi, high);
2565 if (tem == NULL_TREE || is_gimple_val (tem))
2566 continue;
2567 tree etype = unsigned_type_for (TREE_TYPE (exp));
2568 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2569 fold_convert_loc (loc, etype, exp),
2570 fold_convert_loc (loc, etype, lowi));
2571 exp = fold_convert_loc (loc, integer_type_node, exp);
2572 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2573 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2574 build_int_cst (word_type, 1), exp);
2575 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2576 wide_int_to_tree (word_type, mask));
2577 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2578 build_zero_cst (word_type));
2579 if (is_gimple_val (exp))
2580 continue;
2582 /* The shift might have undefined behavior if TEM is true,
2583 but reassociate_bb isn't prepared to have basic blocks
2584 split when it is running. So, temporarily emit a code
2585 with BIT_IOR_EXPR instead of &&, and fix it up in
2586 branch_fixup. */
2587 gimple_seq seq;
2588 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2589 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2590 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2591 gimple_seq seq2;
2592 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2593 gimple_seq_add_seq_without_update (&seq, seq2);
2594 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2595 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2596 gimple g = gimple_build_assign (make_ssa_name (optype),
2597 BIT_IOR_EXPR, tem, exp);
2598 gimple_set_location (g, loc);
2599 gimple_seq_add_stmt_without_update (&seq, g);
2600 exp = gimple_assign_lhs (g);
2601 tree val = build_zero_cst (optype);
2602 if (update_range_test (&ranges[i], NULL, candidates.address (),
2603 candidates.length (), opcode, ops, exp,
2604 seq, false, val, val, strict_overflow_p))
2606 any_changes = true;
2607 reassoc_branch_fixups.safe_push (tem);
2609 else
2610 gimple_seq_discard (seq);
2613 return any_changes;
2616 /* Optimize range tests, similarly how fold_range_test optimizes
2617 it on trees. The tree code for the binary
2618 operation between all the operands is OPCODE.
2619 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2620 maybe_optimize_range_tests for inter-bb range optimization.
2621 In that case if oe->op is NULL, oe->id is bb->index whose
2622 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2623 the actual opcode. */
2625 static bool
2626 optimize_range_tests (enum tree_code opcode,
2627 vec<operand_entry_t> *ops)
2629 unsigned int length = ops->length (), i, j, first;
2630 operand_entry_t oe;
2631 struct range_entry *ranges;
2632 bool any_changes = false;
2634 if (length == 1)
2635 return false;
2637 ranges = XNEWVEC (struct range_entry, length);
2638 for (i = 0; i < length; i++)
2640 oe = (*ops)[i];
2641 ranges[i].idx = i;
2642 init_range_entry (ranges + i, oe->op,
2643 oe->op ? NULL :
2644 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2645 /* For | invert it now, we will invert it again before emitting
2646 the optimized expression. */
2647 if (opcode == BIT_IOR_EXPR
2648 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2649 ranges[i].in_p = !ranges[i].in_p;
2652 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2653 for (i = 0; i < length; i++)
2654 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2655 break;
2657 /* Try to merge ranges. */
2658 for (first = i; i < length; i++)
2660 tree low = ranges[i].low;
2661 tree high = ranges[i].high;
2662 int in_p = ranges[i].in_p;
2663 bool strict_overflow_p = ranges[i].strict_overflow_p;
2664 int update_fail_count = 0;
2666 for (j = i + 1; j < length; j++)
2668 if (ranges[i].exp != ranges[j].exp)
2669 break;
2670 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2671 ranges[j].in_p, ranges[j].low, ranges[j].high))
2672 break;
2673 strict_overflow_p |= ranges[j].strict_overflow_p;
2676 if (j == i + 1)
2677 continue;
2679 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2680 opcode, ops, ranges[i].exp, NULL, in_p,
2681 low, high, strict_overflow_p))
2683 i = j - 1;
2684 any_changes = true;
2686 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2687 while update_range_test would fail. */
2688 else if (update_fail_count == 64)
2689 i = j - 1;
2690 else
2691 ++update_fail_count;
2694 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2695 ops, ranges);
2697 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2698 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2699 ops, ranges);
2700 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2701 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2702 ops, ranges);
2704 if (any_changes && opcode != ERROR_MARK)
2706 j = 0;
2707 FOR_EACH_VEC_ELT (*ops, i, oe)
2709 if (oe->op == error_mark_node)
2710 continue;
2711 else if (i != j)
2712 (*ops)[j] = oe;
2713 j++;
2715 ops->truncate (j);
2718 XDELETEVEC (ranges);
2719 return any_changes;
2722 /* Return true if STMT is a cast like:
2723 <bb N>:
2725 _123 = (int) _234;
2727 <bb M>:
2728 # _345 = PHI <_123(N), 1(...), 1(...)>
2729 where _234 has bool type, _123 has single use and
2730 bb N has a single successor M. This is commonly used in
2731 the last block of a range test. */
2733 static bool
2734 final_range_test_p (gimple stmt)
2736 basic_block bb, rhs_bb;
2737 edge e;
2738 tree lhs, rhs;
2739 use_operand_p use_p;
2740 gimple use_stmt;
2742 if (!gimple_assign_cast_p (stmt))
2743 return false;
2744 bb = gimple_bb (stmt);
2745 if (!single_succ_p (bb))
2746 return false;
2747 e = single_succ_edge (bb);
2748 if (e->flags & EDGE_COMPLEX)
2749 return false;
2751 lhs = gimple_assign_lhs (stmt);
2752 rhs = gimple_assign_rhs1 (stmt);
2753 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2754 || TREE_CODE (rhs) != SSA_NAME
2755 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2756 return false;
2758 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2759 if (!single_imm_use (lhs, &use_p, &use_stmt))
2760 return false;
2762 if (gimple_code (use_stmt) != GIMPLE_PHI
2763 || gimple_bb (use_stmt) != e->dest)
2764 return false;
2766 /* And that the rhs is defined in the same loop. */
2767 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2768 if (rhs_bb == NULL
2769 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2770 return false;
2772 return true;
2775 /* Return true if BB is suitable basic block for inter-bb range test
2776 optimization. If BACKWARD is true, BB should be the only predecessor
2777 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2778 or compared with to find a common basic block to which all conditions
2779 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2780 be the only predecessor of BB. */
2782 static bool
2783 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2784 bool backward)
2786 edge_iterator ei, ei2;
2787 edge e, e2;
2788 gimple stmt;
2789 gphi_iterator gsi;
2790 bool other_edge_seen = false;
2791 bool is_cond;
2793 if (test_bb == bb)
2794 return false;
2795 /* Check last stmt first. */
2796 stmt = last_stmt (bb);
2797 if (stmt == NULL
2798 || (gimple_code (stmt) != GIMPLE_COND
2799 && (backward || !final_range_test_p (stmt)))
2800 || gimple_visited_p (stmt)
2801 || stmt_could_throw_p (stmt)
2802 || *other_bb == bb)
2803 return false;
2804 is_cond = gimple_code (stmt) == GIMPLE_COND;
2805 if (is_cond)
2807 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2808 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2809 to *OTHER_BB (if not set yet, try to find it out). */
2810 if (EDGE_COUNT (bb->succs) != 2)
2811 return false;
2812 FOR_EACH_EDGE (e, ei, bb->succs)
2814 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2815 return false;
2816 if (e->dest == test_bb)
2818 if (backward)
2819 continue;
2820 else
2821 return false;
2823 if (e->dest == bb)
2824 return false;
2825 if (*other_bb == NULL)
2827 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2828 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2829 return false;
2830 else if (e->dest == e2->dest)
2831 *other_bb = e->dest;
2832 if (*other_bb == NULL)
2833 return false;
2835 if (e->dest == *other_bb)
2836 other_edge_seen = true;
2837 else if (backward)
2838 return false;
2840 if (*other_bb == NULL || !other_edge_seen)
2841 return false;
2843 else if (single_succ (bb) != *other_bb)
2844 return false;
2846 /* Now check all PHIs of *OTHER_BB. */
2847 e = find_edge (bb, *other_bb);
2848 e2 = find_edge (test_bb, *other_bb);
2849 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2851 gphi *phi = gsi.phi ();
2852 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2853 corresponding to BB and TEST_BB predecessor must be the same. */
2854 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2855 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2857 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2858 one of the PHIs should have the lhs of the last stmt in
2859 that block as PHI arg and that PHI should have 0 or 1
2860 corresponding to it in all other range test basic blocks
2861 considered. */
2862 if (!is_cond)
2864 if (gimple_phi_arg_def (phi, e->dest_idx)
2865 == gimple_assign_lhs (stmt)
2866 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2867 || integer_onep (gimple_phi_arg_def (phi,
2868 e2->dest_idx))))
2869 continue;
2871 else
2873 gimple test_last = last_stmt (test_bb);
2874 if (gimple_code (test_last) != GIMPLE_COND
2875 && gimple_phi_arg_def (phi, e2->dest_idx)
2876 == gimple_assign_lhs (test_last)
2877 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2878 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2879 continue;
2882 return false;
2885 return true;
2888 /* Return true if BB doesn't have side-effects that would disallow
2889 range test optimization, all SSA_NAMEs set in the bb are consumed
2890 in the bb and there are no PHIs. */
2892 static bool
2893 no_side_effect_bb (basic_block bb)
2895 gimple_stmt_iterator gsi;
2896 gimple last;
2898 if (!gimple_seq_empty_p (phi_nodes (bb)))
2899 return false;
2900 last = last_stmt (bb);
2901 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2903 gimple stmt = gsi_stmt (gsi);
2904 tree lhs;
2905 imm_use_iterator imm_iter;
2906 use_operand_p use_p;
2908 if (is_gimple_debug (stmt))
2909 continue;
2910 if (gimple_has_side_effects (stmt))
2911 return false;
2912 if (stmt == last)
2913 return true;
2914 if (!is_gimple_assign (stmt))
2915 return false;
2916 lhs = gimple_assign_lhs (stmt);
2917 if (TREE_CODE (lhs) != SSA_NAME)
2918 return false;
2919 if (gimple_assign_rhs_could_trap_p (stmt))
2920 return false;
2921 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2923 gimple use_stmt = USE_STMT (use_p);
2924 if (is_gimple_debug (use_stmt))
2925 continue;
2926 if (gimple_bb (use_stmt) != bb)
2927 return false;
2930 return false;
2933 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2934 return true and fill in *OPS recursively. */
2936 static bool
2937 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2938 struct loop *loop)
2940 gimple stmt = SSA_NAME_DEF_STMT (var);
2941 tree rhs[2];
2942 int i;
2944 if (!is_reassociable_op (stmt, code, loop))
2945 return false;
2947 rhs[0] = gimple_assign_rhs1 (stmt);
2948 rhs[1] = gimple_assign_rhs2 (stmt);
2949 gimple_set_visited (stmt, true);
2950 for (i = 0; i < 2; i++)
2951 if (TREE_CODE (rhs[i]) == SSA_NAME
2952 && !get_ops (rhs[i], code, ops, loop)
2953 && has_single_use (rhs[i]))
2955 operand_entry_t oe = operand_entry_pool.allocate ();
2957 oe->op = rhs[i];
2958 oe->rank = code;
2959 oe->id = 0;
2960 oe->count = 1;
2961 ops->safe_push (oe);
2963 return true;
2966 /* Find the ops that were added by get_ops starting from VAR, see if
2967 they were changed during update_range_test and if yes, create new
2968 stmts. */
2970 static tree
2971 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2972 unsigned int *pidx, struct loop *loop)
2974 gimple stmt = SSA_NAME_DEF_STMT (var);
2975 tree rhs[4];
2976 int i;
2978 if (!is_reassociable_op (stmt, code, loop))
2979 return NULL;
2981 rhs[0] = gimple_assign_rhs1 (stmt);
2982 rhs[1] = gimple_assign_rhs2 (stmt);
2983 rhs[2] = rhs[0];
2984 rhs[3] = rhs[1];
2985 for (i = 0; i < 2; i++)
2986 if (TREE_CODE (rhs[i]) == SSA_NAME)
2988 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2989 if (rhs[2 + i] == NULL_TREE)
2991 if (has_single_use (rhs[i]))
2992 rhs[2 + i] = ops[(*pidx)++]->op;
2993 else
2994 rhs[2 + i] = rhs[i];
2997 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2998 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3000 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3001 var = make_ssa_name (TREE_TYPE (var));
3002 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3003 rhs[2], rhs[3]);
3004 gimple_set_uid (g, gimple_uid (stmt));
3005 gimple_set_visited (g, true);
3006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3008 return var;
3011 /* Structure to track the initial value passed to get_ops and
3012 the range in the ops vector for each basic block. */
3014 struct inter_bb_range_test_entry
3016 tree op;
3017 unsigned int first_idx, last_idx;
3020 /* Inter-bb range test optimization. */
3022 static void
3023 maybe_optimize_range_tests (gimple stmt)
3025 basic_block first_bb = gimple_bb (stmt);
3026 basic_block last_bb = first_bb;
3027 basic_block other_bb = NULL;
3028 basic_block bb;
3029 edge_iterator ei;
3030 edge e;
3031 auto_vec<operand_entry_t> ops;
3032 auto_vec<inter_bb_range_test_entry> bbinfo;
3033 bool any_changes = false;
3035 /* Consider only basic blocks that end with GIMPLE_COND or
3036 a cast statement satisfying final_range_test_p. All
3037 but the last bb in the first_bb .. last_bb range
3038 should end with GIMPLE_COND. */
3039 if (gimple_code (stmt) == GIMPLE_COND)
3041 if (EDGE_COUNT (first_bb->succs) != 2)
3042 return;
3044 else if (final_range_test_p (stmt))
3045 other_bb = single_succ (first_bb);
3046 else
3047 return;
3049 if (stmt_could_throw_p (stmt))
3050 return;
3052 /* As relative ordering of post-dominator sons isn't fixed,
3053 maybe_optimize_range_tests can be called first on any
3054 bb in the range we want to optimize. So, start searching
3055 backwards, if first_bb can be set to a predecessor. */
3056 while (single_pred_p (first_bb))
3058 basic_block pred_bb = single_pred (first_bb);
3059 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3060 break;
3061 if (!no_side_effect_bb (first_bb))
3062 break;
3063 first_bb = pred_bb;
3065 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3066 Before starting forward search in last_bb successors, find
3067 out the other_bb. */
3068 if (first_bb == last_bb)
3070 other_bb = NULL;
3071 /* As non-GIMPLE_COND last stmt always terminates the range,
3072 if forward search didn't discover anything, just give up. */
3073 if (gimple_code (stmt) != GIMPLE_COND)
3074 return;
3075 /* Look at both successors. Either it ends with a GIMPLE_COND
3076 and satisfies suitable_cond_bb, or ends with a cast and
3077 other_bb is that cast's successor. */
3078 FOR_EACH_EDGE (e, ei, first_bb->succs)
3079 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3080 || e->dest == first_bb)
3081 return;
3082 else if (single_pred_p (e->dest))
3084 stmt = last_stmt (e->dest);
3085 if (stmt
3086 && gimple_code (stmt) == GIMPLE_COND
3087 && EDGE_COUNT (e->dest->succs) == 2)
3089 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3090 break;
3091 else
3092 other_bb = NULL;
3094 else if (stmt
3095 && final_range_test_p (stmt)
3096 && find_edge (first_bb, single_succ (e->dest)))
3098 other_bb = single_succ (e->dest);
3099 if (other_bb == first_bb)
3100 other_bb = NULL;
3103 if (other_bb == NULL)
3104 return;
3106 /* Now do the forward search, moving last_bb to successor bbs
3107 that aren't other_bb. */
3108 while (EDGE_COUNT (last_bb->succs) == 2)
3110 FOR_EACH_EDGE (e, ei, last_bb->succs)
3111 if (e->dest != other_bb)
3112 break;
3113 if (e == NULL)
3114 break;
3115 if (!single_pred_p (e->dest))
3116 break;
3117 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3118 break;
3119 if (!no_side_effect_bb (e->dest))
3120 break;
3121 last_bb = e->dest;
3123 if (first_bb == last_bb)
3124 return;
3125 /* Here basic blocks first_bb through last_bb's predecessor
3126 end with GIMPLE_COND, all of them have one of the edges to
3127 other_bb and another to another block in the range,
3128 all blocks except first_bb don't have side-effects and
3129 last_bb ends with either GIMPLE_COND, or cast satisfying
3130 final_range_test_p. */
3131 for (bb = last_bb; ; bb = single_pred (bb))
3133 enum tree_code code;
3134 tree lhs, rhs;
3135 inter_bb_range_test_entry bb_ent;
3137 bb_ent.op = NULL_TREE;
3138 bb_ent.first_idx = ops.length ();
3139 bb_ent.last_idx = bb_ent.first_idx;
3140 e = find_edge (bb, other_bb);
3141 stmt = last_stmt (bb);
3142 gimple_set_visited (stmt, true);
3143 if (gimple_code (stmt) != GIMPLE_COND)
3145 use_operand_p use_p;
3146 gimple phi;
3147 edge e2;
3148 unsigned int d;
3150 lhs = gimple_assign_lhs (stmt);
3151 rhs = gimple_assign_rhs1 (stmt);
3152 gcc_assert (bb == last_bb);
3154 /* stmt is
3155 _123 = (int) _234;
3157 followed by:
3158 <bb M>:
3159 # _345 = PHI <_123(N), 1(...), 1(...)>
3161 or 0 instead of 1. If it is 0, the _234
3162 range test is anded together with all the
3163 other range tests, if it is 1, it is ored with
3164 them. */
3165 single_imm_use (lhs, &use_p, &phi);
3166 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3167 e2 = find_edge (first_bb, other_bb);
3168 d = e2->dest_idx;
3169 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3170 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3171 code = BIT_AND_EXPR;
3172 else
3174 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3175 code = BIT_IOR_EXPR;
3178 /* If _234 SSA_NAME_DEF_STMT is
3179 _234 = _567 | _789;
3180 (or &, corresponding to 1/0 in the phi arguments,
3181 push into ops the individual range test arguments
3182 of the bitwise or resp. and, recursively. */
3183 if (!get_ops (rhs, code, &ops,
3184 loop_containing_stmt (stmt))
3185 && has_single_use (rhs))
3187 /* Otherwise, push the _234 range test itself. */
3188 operand_entry_t oe = operand_entry_pool.allocate ();
3190 oe->op = rhs;
3191 oe->rank = code;
3192 oe->id = 0;
3193 oe->count = 1;
3194 ops.safe_push (oe);
3195 bb_ent.last_idx++;
3197 else
3198 bb_ent.last_idx = ops.length ();
3199 bb_ent.op = rhs;
3200 bbinfo.safe_push (bb_ent);
3201 continue;
3203 /* Otherwise stmt is GIMPLE_COND. */
3204 code = gimple_cond_code (stmt);
3205 lhs = gimple_cond_lhs (stmt);
3206 rhs = gimple_cond_rhs (stmt);
3207 if (TREE_CODE (lhs) == SSA_NAME
3208 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3209 && ((code != EQ_EXPR && code != NE_EXPR)
3210 || rhs != boolean_false_node
3211 /* Either push into ops the individual bitwise
3212 or resp. and operands, depending on which
3213 edge is other_bb. */
3214 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3215 ^ (code == EQ_EXPR))
3216 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3217 loop_containing_stmt (stmt))))
3219 /* Or push the GIMPLE_COND stmt itself. */
3220 operand_entry_t oe = operand_entry_pool.allocate ();
3222 oe->op = NULL;
3223 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3224 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3225 /* oe->op = NULL signs that there is no SSA_NAME
3226 for the range test, and oe->id instead is the
3227 basic block number, at which's end the GIMPLE_COND
3228 is. */
3229 oe->id = bb->index;
3230 oe->count = 1;
3231 ops.safe_push (oe);
3232 bb_ent.op = NULL;
3233 bb_ent.last_idx++;
3235 else if (ops.length () > bb_ent.first_idx)
3237 bb_ent.op = lhs;
3238 bb_ent.last_idx = ops.length ();
3240 bbinfo.safe_push (bb_ent);
3241 if (bb == first_bb)
3242 break;
3244 if (ops.length () > 1)
3245 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3246 if (any_changes)
3248 unsigned int idx;
3249 /* update_ops relies on has_single_use predicates returning the
3250 same values as it did during get_ops earlier. Additionally it
3251 never removes statements, only adds new ones and it should walk
3252 from the single imm use and check the predicate already before
3253 making those changes.
3254 On the other side, the handling of GIMPLE_COND directly can turn
3255 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3256 it needs to be done in a separate loop afterwards. */
3257 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3259 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3260 && bbinfo[idx].op != NULL_TREE)
3262 tree new_op;
3264 stmt = last_stmt (bb);
3265 new_op = update_ops (bbinfo[idx].op,
3266 (enum tree_code)
3267 ops[bbinfo[idx].first_idx]->rank,
3268 ops, &bbinfo[idx].first_idx,
3269 loop_containing_stmt (stmt));
3270 if (new_op == NULL_TREE)
3272 gcc_assert (bb == last_bb);
3273 new_op = ops[bbinfo[idx].first_idx++]->op;
3275 if (bbinfo[idx].op != new_op)
3277 imm_use_iterator iter;
3278 use_operand_p use_p;
3279 gimple use_stmt, cast_stmt = NULL;
3281 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3282 if (is_gimple_debug (use_stmt))
3283 continue;
3284 else if (gimple_code (use_stmt) == GIMPLE_COND
3285 || gimple_code (use_stmt) == GIMPLE_PHI)
3286 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3287 SET_USE (use_p, new_op);
3288 else if (gimple_assign_cast_p (use_stmt))
3289 cast_stmt = use_stmt;
3290 else
3291 gcc_unreachable ();
3292 if (cast_stmt)
3294 gcc_assert (bb == last_bb);
3295 tree lhs = gimple_assign_lhs (cast_stmt);
3296 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3297 enum tree_code rhs_code
3298 = gimple_assign_rhs_code (cast_stmt);
3299 gassign *g;
3300 if (is_gimple_min_invariant (new_op))
3302 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3303 g = gimple_build_assign (new_lhs, new_op);
3305 else
3306 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3307 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3308 gimple_set_uid (g, gimple_uid (cast_stmt));
3309 gimple_set_visited (g, true);
3310 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3311 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3312 if (is_gimple_debug (use_stmt))
3313 continue;
3314 else if (gimple_code (use_stmt) == GIMPLE_COND
3315 || gimple_code (use_stmt) == GIMPLE_PHI)
3316 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3317 SET_USE (use_p, new_lhs);
3318 else
3319 gcc_unreachable ();
3323 if (bb == first_bb)
3324 break;
3326 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3328 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3329 && bbinfo[idx].op == NULL_TREE
3330 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3332 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3333 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3334 gimple_cond_make_false (cond_stmt);
3335 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3336 gimple_cond_make_true (cond_stmt);
3337 else
3339 gimple_cond_set_code (cond_stmt, NE_EXPR);
3340 gimple_cond_set_lhs (cond_stmt,
3341 ops[bbinfo[idx].first_idx]->op);
3342 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3344 update_stmt (cond_stmt);
3346 if (bb == first_bb)
3347 break;
3352 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3353 of STMT in it's operands. This is also known as a "destructive
3354 update" operation. */
3356 static bool
3357 is_phi_for_stmt (gimple stmt, tree operand)
3359 gimple def_stmt;
3360 gphi *def_phi;
3361 tree lhs;
3362 use_operand_p arg_p;
3363 ssa_op_iter i;
3365 if (TREE_CODE (operand) != SSA_NAME)
3366 return false;
3368 lhs = gimple_assign_lhs (stmt);
3370 def_stmt = SSA_NAME_DEF_STMT (operand);
3371 def_phi = dyn_cast <gphi *> (def_stmt);
3372 if (!def_phi)
3373 return false;
3375 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3376 if (lhs == USE_FROM_PTR (arg_p))
3377 return true;
3378 return false;
3381 /* Remove def stmt of VAR if VAR has zero uses and recurse
3382 on rhs1 operand if so. */
3384 static void
3385 remove_visited_stmt_chain (tree var)
3387 gimple stmt;
3388 gimple_stmt_iterator gsi;
3390 while (1)
3392 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3393 return;
3394 stmt = SSA_NAME_DEF_STMT (var);
3395 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3397 var = gimple_assign_rhs1 (stmt);
3398 gsi = gsi_for_stmt (stmt);
3399 reassoc_remove_stmt (&gsi);
3400 release_defs (stmt);
3402 else
3403 return;
3407 /* This function checks three consequtive operands in
3408 passed operands vector OPS starting from OPINDEX and
3409 swaps two operands if it is profitable for binary operation
3410 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3412 We pair ops with the same rank if possible.
3414 The alternative we try is to see if STMT is a destructive
3415 update style statement, which is like:
3416 b = phi (a, ...)
3417 a = c + b;
3418 In that case, we want to use the destructive update form to
3419 expose the possible vectorizer sum reduction opportunity.
3420 In that case, the third operand will be the phi node. This
3421 check is not performed if STMT is null.
3423 We could, of course, try to be better as noted above, and do a
3424 lot of work to try to find these opportunities in >3 operand
3425 cases, but it is unlikely to be worth it. */
3427 static void
3428 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3429 unsigned int opindex, gimple stmt)
3431 operand_entry_t oe1, oe2, oe3;
3433 oe1 = ops[opindex];
3434 oe2 = ops[opindex + 1];
3435 oe3 = ops[opindex + 2];
3437 if ((oe1->rank == oe2->rank
3438 && oe2->rank != oe3->rank)
3439 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3440 && !is_phi_for_stmt (stmt, oe1->op)
3441 && !is_phi_for_stmt (stmt, oe2->op)))
3443 struct operand_entry temp = *oe3;
3444 oe3->op = oe1->op;
3445 oe3->rank = oe1->rank;
3446 oe1->op = temp.op;
3447 oe1->rank= temp.rank;
3449 else if ((oe1->rank == oe3->rank
3450 && oe2->rank != oe3->rank)
3451 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3452 && !is_phi_for_stmt (stmt, oe1->op)
3453 && !is_phi_for_stmt (stmt, oe3->op)))
3455 struct operand_entry temp = *oe2;
3456 oe2->op = oe1->op;
3457 oe2->rank = oe1->rank;
3458 oe1->op = temp.op;
3459 oe1->rank = temp.rank;
3463 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3464 two definitions, otherwise return STMT. */
3466 static inline gimple
3467 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3469 if (TREE_CODE (rhs1) == SSA_NAME
3470 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3471 stmt = SSA_NAME_DEF_STMT (rhs1);
3472 if (TREE_CODE (rhs2) == SSA_NAME
3473 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3474 stmt = SSA_NAME_DEF_STMT (rhs2);
3475 return stmt;
3478 /* Recursively rewrite our linearized statements so that the operators
3479 match those in OPS[OPINDEX], putting the computation in rank
3480 order. Return new lhs. */
3482 static tree
3483 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3484 vec<operand_entry_t> ops, bool changed)
3486 tree rhs1 = gimple_assign_rhs1 (stmt);
3487 tree rhs2 = gimple_assign_rhs2 (stmt);
3488 tree lhs = gimple_assign_lhs (stmt);
3489 operand_entry_t oe;
3491 /* The final recursion case for this function is that you have
3492 exactly two operations left.
3493 If we had exactly one op in the entire list to start with, we
3494 would have never called this function, and the tail recursion
3495 rewrites them one at a time. */
3496 if (opindex + 2 == ops.length ())
3498 operand_entry_t oe1, oe2;
3500 oe1 = ops[opindex];
3501 oe2 = ops[opindex + 1];
3503 if (rhs1 != oe1->op || rhs2 != oe2->op)
3505 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3506 unsigned int uid = gimple_uid (stmt);
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3510 fprintf (dump_file, "Transforming ");
3511 print_gimple_stmt (dump_file, stmt, 0, 0);
3514 /* Even when changed is false, reassociation could have e.g. removed
3515 some redundant operations, so unless we are just swapping the
3516 arguments or unless there is no change at all (then we just
3517 return lhs), force creation of a new SSA_NAME. */
3518 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3520 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3521 lhs = make_ssa_name (TREE_TYPE (lhs));
3522 stmt
3523 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3524 oe1->op, oe2->op);
3525 gimple_set_uid (stmt, uid);
3526 gimple_set_visited (stmt, true);
3527 if (insert_point == gsi_stmt (gsi))
3528 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3529 else
3530 insert_stmt_after (stmt, insert_point);
3532 else
3534 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3535 == stmt);
3536 gimple_assign_set_rhs1 (stmt, oe1->op);
3537 gimple_assign_set_rhs2 (stmt, oe2->op);
3538 update_stmt (stmt);
3541 if (rhs1 != oe1->op && rhs1 != oe2->op)
3542 remove_visited_stmt_chain (rhs1);
3544 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, " into ");
3547 print_gimple_stmt (dump_file, stmt, 0, 0);
3550 return lhs;
3553 /* If we hit here, we should have 3 or more ops left. */
3554 gcc_assert (opindex + 2 < ops.length ());
3556 /* Rewrite the next operator. */
3557 oe = ops[opindex];
3559 /* Recurse on the LHS of the binary operator, which is guaranteed to
3560 be the non-leaf side. */
3561 tree new_rhs1
3562 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3563 changed || oe->op != rhs2);
3565 if (oe->op != rhs2 || new_rhs1 != rhs1)
3567 if (dump_file && (dump_flags & TDF_DETAILS))
3569 fprintf (dump_file, "Transforming ");
3570 print_gimple_stmt (dump_file, stmt, 0, 0);
3573 /* If changed is false, this is either opindex == 0
3574 or all outer rhs2's were equal to corresponding oe->op,
3575 and powi_result is NULL.
3576 That means lhs is equivalent before and after reassociation.
3577 Otherwise ensure the old lhs SSA_NAME is not reused and
3578 create a new stmt as well, so that any debug stmts will be
3579 properly adjusted. */
3580 if (changed)
3582 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3583 unsigned int uid = gimple_uid (stmt);
3584 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3586 lhs = make_ssa_name (TREE_TYPE (lhs));
3587 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3588 new_rhs1, oe->op);
3589 gimple_set_uid (stmt, uid);
3590 gimple_set_visited (stmt, true);
3591 if (insert_point == gsi_stmt (gsi))
3592 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3593 else
3594 insert_stmt_after (stmt, insert_point);
3596 else
3598 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3599 == stmt);
3600 gimple_assign_set_rhs1 (stmt, new_rhs1);
3601 gimple_assign_set_rhs2 (stmt, oe->op);
3602 update_stmt (stmt);
3605 if (dump_file && (dump_flags & TDF_DETAILS))
3607 fprintf (dump_file, " into ");
3608 print_gimple_stmt (dump_file, stmt, 0, 0);
3611 return lhs;
3614 /* Find out how many cycles we need to compute statements chain.
3615 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3616 maximum number of independent statements we may execute per cycle. */
3618 static int
3619 get_required_cycles (int ops_num, int cpu_width)
3621 int res;
3622 int elog;
3623 unsigned int rest;
3625 /* While we have more than 2 * cpu_width operands
3626 we may reduce number of operands by cpu_width
3627 per cycle. */
3628 res = ops_num / (2 * cpu_width);
3630 /* Remained operands count may be reduced twice per cycle
3631 until we have only one operand. */
3632 rest = (unsigned)(ops_num - res * cpu_width);
3633 elog = exact_log2 (rest);
3634 if (elog >= 0)
3635 res += elog;
3636 else
3637 res += floor_log2 (rest) + 1;
3639 return res;
3642 /* Returns an optimal number of registers to use for computation of
3643 given statements. */
3645 static int
3646 get_reassociation_width (int ops_num, enum tree_code opc,
3647 machine_mode mode)
3649 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3650 int width;
3651 int width_min;
3652 int cycles_best;
3654 if (param_width > 0)
3655 width = param_width;
3656 else
3657 width = targetm.sched.reassociation_width (opc, mode);
3659 if (width == 1)
3660 return width;
3662 /* Get the minimal time required for sequence computation. */
3663 cycles_best = get_required_cycles (ops_num, width);
3665 /* Check if we may use less width and still compute sequence for
3666 the same time. It will allow us to reduce registers usage.
3667 get_required_cycles is monotonically increasing with lower width
3668 so we can perform a binary search for the minimal width that still
3669 results in the optimal cycle count. */
3670 width_min = 1;
3671 while (width > width_min)
3673 int width_mid = (width + width_min) / 2;
3675 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3676 width = width_mid;
3677 else if (width_min < width_mid)
3678 width_min = width_mid;
3679 else
3680 break;
3683 return width;
3686 /* Recursively rewrite our linearized statements so that the operators
3687 match those in OPS[OPINDEX], putting the computation in rank
3688 order and trying to allow operations to be executed in
3689 parallel. */
3691 static void
3692 rewrite_expr_tree_parallel (gassign *stmt, int width,
3693 vec<operand_entry_t> ops)
3695 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3696 int op_num = ops.length ();
3697 int stmt_num = op_num - 1;
3698 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3699 int op_index = op_num - 1;
3700 int stmt_index = 0;
3701 int ready_stmts_end = 0;
3702 int i = 0;
3703 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3705 /* We start expression rewriting from the top statements.
3706 So, in this loop we create a full list of statements
3707 we will work with. */
3708 stmts[stmt_num - 1] = stmt;
3709 for (i = stmt_num - 2; i >= 0; i--)
3710 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3712 for (i = 0; i < stmt_num; i++)
3714 tree op1, op2;
3716 /* Determine whether we should use results of
3717 already handled statements or not. */
3718 if (ready_stmts_end == 0
3719 && (i - stmt_index >= width || op_index < 1))
3720 ready_stmts_end = i;
3722 /* Now we choose operands for the next statement. Non zero
3723 value in ready_stmts_end means here that we should use
3724 the result of already generated statements as new operand. */
3725 if (ready_stmts_end > 0)
3727 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3728 if (ready_stmts_end > stmt_index)
3729 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3730 else if (op_index >= 0)
3731 op2 = ops[op_index--]->op;
3732 else
3734 gcc_assert (stmt_index < i);
3735 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3738 if (stmt_index >= ready_stmts_end)
3739 ready_stmts_end = 0;
3741 else
3743 if (op_index > 1)
3744 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3745 op2 = ops[op_index--]->op;
3746 op1 = ops[op_index--]->op;
3749 /* If we emit the last statement then we should put
3750 operands into the last statement. It will also
3751 break the loop. */
3752 if (op_index < 0 && stmt_index == i)
3753 i = stmt_num - 1;
3755 if (dump_file && (dump_flags & TDF_DETAILS))
3757 fprintf (dump_file, "Transforming ");
3758 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3761 /* We keep original statement only for the last one. All
3762 others are recreated. */
3763 if (i == stmt_num - 1)
3765 gimple_assign_set_rhs1 (stmts[i], op1);
3766 gimple_assign_set_rhs2 (stmts[i], op2);
3767 update_stmt (stmts[i]);
3769 else
3770 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3772 if (dump_file && (dump_flags & TDF_DETAILS))
3774 fprintf (dump_file, " into ");
3775 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3779 remove_visited_stmt_chain (last_rhs1);
3782 /* Transform STMT, which is really (A +B) + (C + D) into the left
3783 linear form, ((A+B)+C)+D.
3784 Recurse on D if necessary. */
3786 static void
3787 linearize_expr (gimple stmt)
3789 gimple_stmt_iterator gsi;
3790 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3791 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3792 gimple oldbinrhs = binrhs;
3793 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3794 gimple newbinrhs = NULL;
3795 struct loop *loop = loop_containing_stmt (stmt);
3796 tree lhs = gimple_assign_lhs (stmt);
3798 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3799 && is_reassociable_op (binrhs, rhscode, loop));
3801 gsi = gsi_for_stmt (stmt);
3803 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3804 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3805 gimple_assign_rhs_code (binrhs),
3806 gimple_assign_lhs (binlhs),
3807 gimple_assign_rhs2 (binrhs));
3808 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3809 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3810 gimple_set_uid (binrhs, gimple_uid (stmt));
3812 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3813 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3815 if (dump_file && (dump_flags & TDF_DETAILS))
3817 fprintf (dump_file, "Linearized: ");
3818 print_gimple_stmt (dump_file, stmt, 0, 0);
3821 reassociate_stats.linearized++;
3822 update_stmt (stmt);
3824 gsi = gsi_for_stmt (oldbinrhs);
3825 reassoc_remove_stmt (&gsi);
3826 release_defs (oldbinrhs);
3828 gimple_set_visited (stmt, true);
3829 gimple_set_visited (binlhs, true);
3830 gimple_set_visited (binrhs, true);
3832 /* Tail recurse on the new rhs if it still needs reassociation. */
3833 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3834 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3835 want to change the algorithm while converting to tuples. */
3836 linearize_expr (stmt);
3839 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3840 it. Otherwise, return NULL. */
3842 static gimple
3843 get_single_immediate_use (tree lhs)
3845 use_operand_p immuse;
3846 gimple immusestmt;
3848 if (TREE_CODE (lhs) == SSA_NAME
3849 && single_imm_use (lhs, &immuse, &immusestmt)
3850 && is_gimple_assign (immusestmt))
3851 return immusestmt;
3853 return NULL;
3856 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3857 representing the negated value. Insertions of any necessary
3858 instructions go before GSI.
3859 This function is recursive in that, if you hand it "a_5" as the
3860 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3861 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3863 static tree
3864 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3866 gimple negatedefstmt = NULL;
3867 tree resultofnegate;
3868 gimple_stmt_iterator gsi;
3869 unsigned int uid;
3871 /* If we are trying to negate a name, defined by an add, negate the
3872 add operands instead. */
3873 if (TREE_CODE (tonegate) == SSA_NAME)
3874 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3875 if (TREE_CODE (tonegate) == SSA_NAME
3876 && is_gimple_assign (negatedefstmt)
3877 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3878 && has_single_use (gimple_assign_lhs (negatedefstmt))
3879 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3881 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3882 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3883 tree lhs = gimple_assign_lhs (negatedefstmt);
3884 gimple g;
3886 gsi = gsi_for_stmt (negatedefstmt);
3887 rhs1 = negate_value (rhs1, &gsi);
3889 gsi = gsi_for_stmt (negatedefstmt);
3890 rhs2 = negate_value (rhs2, &gsi);
3892 gsi = gsi_for_stmt (negatedefstmt);
3893 lhs = make_ssa_name (TREE_TYPE (lhs));
3894 gimple_set_visited (negatedefstmt, true);
3895 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3896 gimple_set_uid (g, gimple_uid (negatedefstmt));
3897 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3898 return lhs;
3901 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3902 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3903 NULL_TREE, true, GSI_SAME_STMT);
3904 gsi = *gsip;
3905 uid = gimple_uid (gsi_stmt (gsi));
3906 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3908 gimple stmt = gsi_stmt (gsi);
3909 if (gimple_uid (stmt) != 0)
3910 break;
3911 gimple_set_uid (stmt, uid);
3913 return resultofnegate;
3916 /* Return true if we should break up the subtract in STMT into an add
3917 with negate. This is true when we the subtract operands are really
3918 adds, or the subtract itself is used in an add expression. In
3919 either case, breaking up the subtract into an add with negate
3920 exposes the adds to reassociation. */
3922 static bool
3923 should_break_up_subtract (gimple stmt)
3925 tree lhs = gimple_assign_lhs (stmt);
3926 tree binlhs = gimple_assign_rhs1 (stmt);
3927 tree binrhs = gimple_assign_rhs2 (stmt);
3928 gimple immusestmt;
3929 struct loop *loop = loop_containing_stmt (stmt);
3931 if (TREE_CODE (binlhs) == SSA_NAME
3932 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3933 return true;
3935 if (TREE_CODE (binrhs) == SSA_NAME
3936 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3937 return true;
3939 if (TREE_CODE (lhs) == SSA_NAME
3940 && (immusestmt = get_single_immediate_use (lhs))
3941 && is_gimple_assign (immusestmt)
3942 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3943 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3944 return true;
3945 return false;
3948 /* Transform STMT from A - B into A + -B. */
3950 static void
3951 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3953 tree rhs1 = gimple_assign_rhs1 (stmt);
3954 tree rhs2 = gimple_assign_rhs2 (stmt);
3956 if (dump_file && (dump_flags & TDF_DETAILS))
3958 fprintf (dump_file, "Breaking up subtract ");
3959 print_gimple_stmt (dump_file, stmt, 0, 0);
3962 rhs2 = negate_value (rhs2, gsip);
3963 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3964 update_stmt (stmt);
3967 /* Determine whether STMT is a builtin call that raises an SSA name
3968 to an integer power and has only one use. If so, and this is early
3969 reassociation and unsafe math optimizations are permitted, place
3970 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3971 If any of these conditions does not hold, return FALSE. */
3973 static bool
3974 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3976 tree fndecl, arg1;
3977 REAL_VALUE_TYPE c, cint;
3979 if (!first_pass_instance
3980 || !flag_unsafe_math_optimizations
3981 || !is_gimple_call (stmt)
3982 || !has_single_use (gimple_call_lhs (stmt)))
3983 return false;
3985 fndecl = gimple_call_fndecl (stmt);
3987 if (!fndecl
3988 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3989 return false;
3991 switch (DECL_FUNCTION_CODE (fndecl))
3993 CASE_FLT_FN (BUILT_IN_POW):
3994 if (flag_errno_math)
3995 return false;
3997 *base = gimple_call_arg (stmt, 0);
3998 arg1 = gimple_call_arg (stmt, 1);
4000 if (TREE_CODE (arg1) != REAL_CST)
4001 return false;
4003 c = TREE_REAL_CST (arg1);
4005 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4006 return false;
4008 *exponent = real_to_integer (&c);
4009 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4010 if (!real_identical (&c, &cint))
4011 return false;
4013 break;
4015 CASE_FLT_FN (BUILT_IN_POWI):
4016 *base = gimple_call_arg (stmt, 0);
4017 arg1 = gimple_call_arg (stmt, 1);
4019 if (!tree_fits_shwi_p (arg1))
4020 return false;
4022 *exponent = tree_to_shwi (arg1);
4023 break;
4025 default:
4026 return false;
4029 /* Expanding negative exponents is generally unproductive, so we don't
4030 complicate matters with those. Exponents of zero and one should
4031 have been handled by expression folding. */
4032 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4033 return false;
4035 return true;
4038 /* Recursively linearize a binary expression that is the RHS of STMT.
4039 Place the operands of the expression tree in the vector named OPS. */
4041 static void
4042 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4043 bool is_associative, bool set_visited)
4045 tree binlhs = gimple_assign_rhs1 (stmt);
4046 tree binrhs = gimple_assign_rhs2 (stmt);
4047 gimple binlhsdef = NULL, binrhsdef = NULL;
4048 bool binlhsisreassoc = false;
4049 bool binrhsisreassoc = false;
4050 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4051 struct loop *loop = loop_containing_stmt (stmt);
4052 tree base = NULL_TREE;
4053 HOST_WIDE_INT exponent = 0;
4055 if (set_visited)
4056 gimple_set_visited (stmt, true);
4058 if (TREE_CODE (binlhs) == SSA_NAME)
4060 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4061 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4062 && !stmt_could_throw_p (binlhsdef));
4065 if (TREE_CODE (binrhs) == SSA_NAME)
4067 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4068 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4069 && !stmt_could_throw_p (binrhsdef));
4072 /* If the LHS is not reassociable, but the RHS is, we need to swap
4073 them. If neither is reassociable, there is nothing we can do, so
4074 just put them in the ops vector. If the LHS is reassociable,
4075 linearize it. If both are reassociable, then linearize the RHS
4076 and the LHS. */
4078 if (!binlhsisreassoc)
4080 /* If this is not a associative operation like division, give up. */
4081 if (!is_associative)
4083 add_to_ops_vec (ops, binrhs);
4084 return;
4087 if (!binrhsisreassoc)
4089 if (rhscode == MULT_EXPR
4090 && TREE_CODE (binrhs) == SSA_NAME
4091 && acceptable_pow_call (binrhsdef, &base, &exponent))
4093 add_repeat_to_ops_vec (ops, base, exponent);
4094 gimple_set_visited (binrhsdef, true);
4096 else
4097 add_to_ops_vec (ops, binrhs);
4099 if (rhscode == MULT_EXPR
4100 && TREE_CODE (binlhs) == SSA_NAME
4101 && acceptable_pow_call (binlhsdef, &base, &exponent))
4103 add_repeat_to_ops_vec (ops, base, exponent);
4104 gimple_set_visited (binlhsdef, true);
4106 else
4107 add_to_ops_vec (ops, binlhs);
4109 return;
4112 if (dump_file && (dump_flags & TDF_DETAILS))
4114 fprintf (dump_file, "swapping operands of ");
4115 print_gimple_stmt (dump_file, stmt, 0, 0);
4118 swap_ssa_operands (stmt,
4119 gimple_assign_rhs1_ptr (stmt),
4120 gimple_assign_rhs2_ptr (stmt));
4121 update_stmt (stmt);
4123 if (dump_file && (dump_flags & TDF_DETAILS))
4125 fprintf (dump_file, " is now ");
4126 print_gimple_stmt (dump_file, stmt, 0, 0);
4129 /* We want to make it so the lhs is always the reassociative op,
4130 so swap. */
4131 std::swap (binlhs, binrhs);
4133 else if (binrhsisreassoc)
4135 linearize_expr (stmt);
4136 binlhs = gimple_assign_rhs1 (stmt);
4137 binrhs = gimple_assign_rhs2 (stmt);
4140 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4141 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4142 rhscode, loop));
4143 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4144 is_associative, set_visited);
4146 if (rhscode == MULT_EXPR
4147 && TREE_CODE (binrhs) == SSA_NAME
4148 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4150 add_repeat_to_ops_vec (ops, base, exponent);
4151 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4153 else
4154 add_to_ops_vec (ops, binrhs);
4157 /* Repropagate the negates back into subtracts, since no other pass
4158 currently does it. */
4160 static void
4161 repropagate_negates (void)
4163 unsigned int i = 0;
4164 tree negate;
4166 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4168 gimple user = get_single_immediate_use (negate);
4170 if (!user || !is_gimple_assign (user))
4171 continue;
4173 /* The negate operand can be either operand of a PLUS_EXPR
4174 (it can be the LHS if the RHS is a constant for example).
4176 Force the negate operand to the RHS of the PLUS_EXPR, then
4177 transform the PLUS_EXPR into a MINUS_EXPR. */
4178 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4180 /* If the negated operand appears on the LHS of the
4181 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4182 to force the negated operand to the RHS of the PLUS_EXPR. */
4183 if (gimple_assign_rhs1 (user) == negate)
4185 swap_ssa_operands (user,
4186 gimple_assign_rhs1_ptr (user),
4187 gimple_assign_rhs2_ptr (user));
4190 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4191 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4192 if (gimple_assign_rhs2 (user) == negate)
4194 tree rhs1 = gimple_assign_rhs1 (user);
4195 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4196 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4197 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4198 update_stmt (user);
4201 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4203 if (gimple_assign_rhs1 (user) == negate)
4205 /* We have
4206 x = -a
4207 y = x - b
4208 which we transform into
4209 x = a + b
4210 y = -x .
4211 This pushes down the negate which we possibly can merge
4212 into some other operation, hence insert it into the
4213 plus_negates vector. */
4214 gimple feed = SSA_NAME_DEF_STMT (negate);
4215 tree a = gimple_assign_rhs1 (feed);
4216 tree b = gimple_assign_rhs2 (user);
4217 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4218 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4219 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4220 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4221 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4222 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4223 user = gsi_stmt (gsi2);
4224 update_stmt (user);
4225 reassoc_remove_stmt (&gsi);
4226 release_defs (feed);
4227 plus_negates.safe_push (gimple_assign_lhs (user));
4229 else
4231 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4232 rid of one operation. */
4233 gimple feed = SSA_NAME_DEF_STMT (negate);
4234 tree a = gimple_assign_rhs1 (feed);
4235 tree rhs1 = gimple_assign_rhs1 (user);
4236 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4237 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4238 update_stmt (gsi_stmt (gsi));
4244 /* Returns true if OP is of a type for which we can do reassociation.
4245 That is for integral or non-saturating fixed-point types, and for
4246 floating point type when associative-math is enabled. */
4248 static bool
4249 can_reassociate_p (tree op)
4251 tree type = TREE_TYPE (op);
4252 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4253 || NON_SAT_FIXED_POINT_TYPE_P (type)
4254 || (flag_associative_math && FLOAT_TYPE_P (type)))
4255 return true;
4256 return false;
4259 /* Break up subtract operations in block BB.
4261 We do this top down because we don't know whether the subtract is
4262 part of a possible chain of reassociation except at the top.
4264 IE given
4265 d = f + g
4266 c = a + e
4267 b = c - d
4268 q = b - r
4269 k = t - q
4271 we want to break up k = t - q, but we won't until we've transformed q
4272 = b - r, which won't be broken up until we transform b = c - d.
4274 En passant, clear the GIMPLE visited flag on every statement
4275 and set UIDs within each basic block. */
4277 static void
4278 break_up_subtract_bb (basic_block bb)
4280 gimple_stmt_iterator gsi;
4281 basic_block son;
4282 unsigned int uid = 1;
4284 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4286 gimple stmt = gsi_stmt (gsi);
4287 gimple_set_visited (stmt, false);
4288 gimple_set_uid (stmt, uid++);
4290 if (!is_gimple_assign (stmt)
4291 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4292 continue;
4294 /* Look for simple gimple subtract operations. */
4295 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4297 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4298 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4299 continue;
4301 /* Check for a subtract used only in an addition. If this
4302 is the case, transform it into add of a negate for better
4303 reassociation. IE transform C = A-B into C = A + -B if C
4304 is only used in an addition. */
4305 if (should_break_up_subtract (stmt))
4306 break_up_subtract (stmt, &gsi);
4308 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4309 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4310 plus_negates.safe_push (gimple_assign_lhs (stmt));
4312 for (son = first_dom_son (CDI_DOMINATORS, bb);
4313 son;
4314 son = next_dom_son (CDI_DOMINATORS, son))
4315 break_up_subtract_bb (son);
4318 /* Used for repeated factor analysis. */
4319 struct repeat_factor_d
4321 /* An SSA name that occurs in a multiply chain. */
4322 tree factor;
4324 /* Cached rank of the factor. */
4325 unsigned rank;
4327 /* Number of occurrences of the factor in the chain. */
4328 HOST_WIDE_INT count;
4330 /* An SSA name representing the product of this factor and
4331 all factors appearing later in the repeated factor vector. */
4332 tree repr;
4335 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4336 typedef const struct repeat_factor_d *const_repeat_factor_t;
4339 static vec<repeat_factor> repeat_factor_vec;
4341 /* Used for sorting the repeat factor vector. Sort primarily by
4342 ascending occurrence count, secondarily by descending rank. */
4344 static int
4345 compare_repeat_factors (const void *x1, const void *x2)
4347 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4348 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4350 if (rf1->count != rf2->count)
4351 return rf1->count - rf2->count;
4353 return rf2->rank - rf1->rank;
4356 /* Look for repeated operands in OPS in the multiply tree rooted at
4357 STMT. Replace them with an optimal sequence of multiplies and powi
4358 builtin calls, and remove the used operands from OPS. Return an
4359 SSA name representing the value of the replacement sequence. */
4361 static tree
4362 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4364 unsigned i, j, vec_len;
4365 int ii;
4366 operand_entry_t oe;
4367 repeat_factor_t rf1, rf2;
4368 repeat_factor rfnew;
4369 tree result = NULL_TREE;
4370 tree target_ssa, iter_result;
4371 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4372 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4373 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4374 gimple mul_stmt, pow_stmt;
4376 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4377 target. */
4378 if (!powi_fndecl)
4379 return NULL_TREE;
4381 /* Allocate the repeated factor vector. */
4382 repeat_factor_vec.create (10);
4384 /* Scan the OPS vector for all SSA names in the product and build
4385 up a vector of occurrence counts for each factor. */
4386 FOR_EACH_VEC_ELT (*ops, i, oe)
4388 if (TREE_CODE (oe->op) == SSA_NAME)
4390 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4392 if (rf1->factor == oe->op)
4394 rf1->count += oe->count;
4395 break;
4399 if (j >= repeat_factor_vec.length ())
4401 rfnew.factor = oe->op;
4402 rfnew.rank = oe->rank;
4403 rfnew.count = oe->count;
4404 rfnew.repr = NULL_TREE;
4405 repeat_factor_vec.safe_push (rfnew);
4410 /* Sort the repeated factor vector by (a) increasing occurrence count,
4411 and (b) decreasing rank. */
4412 repeat_factor_vec.qsort (compare_repeat_factors);
4414 /* It is generally best to combine as many base factors as possible
4415 into a product before applying __builtin_powi to the result.
4416 However, the sort order chosen for the repeated factor vector
4417 allows us to cache partial results for the product of the base
4418 factors for subsequent use. When we already have a cached partial
4419 result from a previous iteration, it is best to make use of it
4420 before looking for another __builtin_pow opportunity.
4422 As an example, consider x * x * y * y * y * z * z * z * z.
4423 We want to first compose the product x * y * z, raise it to the
4424 second power, then multiply this by y * z, and finally multiply
4425 by z. This can be done in 5 multiplies provided we cache y * z
4426 for use in both expressions:
4428 t1 = y * z
4429 t2 = t1 * x
4430 t3 = t2 * t2
4431 t4 = t1 * t3
4432 result = t4 * z
4434 If we instead ignored the cached y * z and first multiplied by
4435 the __builtin_pow opportunity z * z, we would get the inferior:
4437 t1 = y * z
4438 t2 = t1 * x
4439 t3 = t2 * t2
4440 t4 = z * z
4441 t5 = t3 * t4
4442 result = t5 * y */
4444 vec_len = repeat_factor_vec.length ();
4446 /* Repeatedly look for opportunities to create a builtin_powi call. */
4447 while (true)
4449 HOST_WIDE_INT power;
4451 /* First look for the largest cached product of factors from
4452 preceding iterations. If found, create a builtin_powi for
4453 it if the minimum occurrence count for its factors is at
4454 least 2, or just use this cached product as our next
4455 multiplicand if the minimum occurrence count is 1. */
4456 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4458 if (rf1->repr && rf1->count > 0)
4459 break;
4462 if (j < vec_len)
4464 power = rf1->count;
4466 if (power == 1)
4468 iter_result = rf1->repr;
4470 if (dump_file && (dump_flags & TDF_DETAILS))
4472 unsigned elt;
4473 repeat_factor_t rf;
4474 fputs ("Multiplying by cached product ", dump_file);
4475 for (elt = j; elt < vec_len; elt++)
4477 rf = &repeat_factor_vec[elt];
4478 print_generic_expr (dump_file, rf->factor, 0);
4479 if (elt < vec_len - 1)
4480 fputs (" * ", dump_file);
4482 fputs ("\n", dump_file);
4485 else
4487 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4488 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4489 build_int_cst (integer_type_node,
4490 power));
4491 gimple_call_set_lhs (pow_stmt, iter_result);
4492 gimple_set_location (pow_stmt, gimple_location (stmt));
4493 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4495 if (dump_file && (dump_flags & TDF_DETAILS))
4497 unsigned elt;
4498 repeat_factor_t rf;
4499 fputs ("Building __builtin_pow call for cached product (",
4500 dump_file);
4501 for (elt = j; elt < vec_len; elt++)
4503 rf = &repeat_factor_vec[elt];
4504 print_generic_expr (dump_file, rf->factor, 0);
4505 if (elt < vec_len - 1)
4506 fputs (" * ", dump_file);
4508 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4509 power);
4513 else
4515 /* Otherwise, find the first factor in the repeated factor
4516 vector whose occurrence count is at least 2. If no such
4517 factor exists, there are no builtin_powi opportunities
4518 remaining. */
4519 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4521 if (rf1->count >= 2)
4522 break;
4525 if (j >= vec_len)
4526 break;
4528 power = rf1->count;
4530 if (dump_file && (dump_flags & TDF_DETAILS))
4532 unsigned elt;
4533 repeat_factor_t rf;
4534 fputs ("Building __builtin_pow call for (", dump_file);
4535 for (elt = j; elt < vec_len; elt++)
4537 rf = &repeat_factor_vec[elt];
4538 print_generic_expr (dump_file, rf->factor, 0);
4539 if (elt < vec_len - 1)
4540 fputs (" * ", dump_file);
4542 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4545 reassociate_stats.pows_created++;
4547 /* Visit each element of the vector in reverse order (so that
4548 high-occurrence elements are visited first, and within the
4549 same occurrence count, lower-ranked elements are visited
4550 first). Form a linear product of all elements in this order
4551 whose occurrencce count is at least that of element J.
4552 Record the SSA name representing the product of each element
4553 with all subsequent elements in the vector. */
4554 if (j == vec_len - 1)
4555 rf1->repr = rf1->factor;
4556 else
4558 for (ii = vec_len - 2; ii >= (int)j; ii--)
4560 tree op1, op2;
4562 rf1 = &repeat_factor_vec[ii];
4563 rf2 = &repeat_factor_vec[ii + 1];
4565 /* Init the last factor's representative to be itself. */
4566 if (!rf2->repr)
4567 rf2->repr = rf2->factor;
4569 op1 = rf1->factor;
4570 op2 = rf2->repr;
4572 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4573 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4574 op1, op2);
4575 gimple_set_location (mul_stmt, gimple_location (stmt));
4576 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4577 rf1->repr = target_ssa;
4579 /* Don't reprocess the multiply we just introduced. */
4580 gimple_set_visited (mul_stmt, true);
4584 /* Form a call to __builtin_powi for the maximum product
4585 just formed, raised to the power obtained earlier. */
4586 rf1 = &repeat_factor_vec[j];
4587 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4588 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4589 build_int_cst (integer_type_node,
4590 power));
4591 gimple_call_set_lhs (pow_stmt, iter_result);
4592 gimple_set_location (pow_stmt, gimple_location (stmt));
4593 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4596 /* If we previously formed at least one other builtin_powi call,
4597 form the product of this one and those others. */
4598 if (result)
4600 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4601 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4602 result, iter_result);
4603 gimple_set_location (mul_stmt, gimple_location (stmt));
4604 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4605 gimple_set_visited (mul_stmt, true);
4606 result = new_result;
4608 else
4609 result = iter_result;
4611 /* Decrement the occurrence count of each element in the product
4612 by the count found above, and remove this many copies of each
4613 factor from OPS. */
4614 for (i = j; i < vec_len; i++)
4616 unsigned k = power;
4617 unsigned n;
4619 rf1 = &repeat_factor_vec[i];
4620 rf1->count -= power;
4622 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4624 if (oe->op == rf1->factor)
4626 if (oe->count <= k)
4628 ops->ordered_remove (n);
4629 k -= oe->count;
4631 if (k == 0)
4632 break;
4634 else
4636 oe->count -= k;
4637 break;
4644 /* At this point all elements in the repeated factor vector have a
4645 remaining occurrence count of 0 or 1, and those with a count of 1
4646 don't have cached representatives. Re-sort the ops vector and
4647 clean up. */
4648 ops->qsort (sort_by_operand_rank);
4649 repeat_factor_vec.release ();
4651 /* Return the final product computed herein. Note that there may
4652 still be some elements with single occurrence count left in OPS;
4653 those will be handled by the normal reassociation logic. */
4654 return result;
4657 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4659 static void
4660 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4662 tree rhs1;
4664 if (dump_file && (dump_flags & TDF_DETAILS))
4666 fprintf (dump_file, "Transforming ");
4667 print_gimple_stmt (dump_file, stmt, 0, 0);
4670 rhs1 = gimple_assign_rhs1 (stmt);
4671 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4672 update_stmt (stmt);
4673 remove_visited_stmt_chain (rhs1);
4675 if (dump_file && (dump_flags & TDF_DETAILS))
4677 fprintf (dump_file, " into ");
4678 print_gimple_stmt (dump_file, stmt, 0, 0);
4682 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4684 static void
4685 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4686 tree rhs1, tree rhs2)
4688 if (dump_file && (dump_flags & TDF_DETAILS))
4690 fprintf (dump_file, "Transforming ");
4691 print_gimple_stmt (dump_file, stmt, 0, 0);
4694 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4695 update_stmt (gsi_stmt (*gsi));
4696 remove_visited_stmt_chain (rhs1);
4698 if (dump_file && (dump_flags & TDF_DETAILS))
4700 fprintf (dump_file, " into ");
4701 print_gimple_stmt (dump_file, stmt, 0, 0);
4705 /* Reassociate expressions in basic block BB and its post-dominator as
4706 children. */
4708 static void
4709 reassociate_bb (basic_block bb)
4711 gimple_stmt_iterator gsi;
4712 basic_block son;
4713 gimple stmt = last_stmt (bb);
4715 if (stmt && !gimple_visited_p (stmt))
4716 maybe_optimize_range_tests (stmt);
4718 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4720 stmt = gsi_stmt (gsi);
4722 if (is_gimple_assign (stmt)
4723 && !stmt_could_throw_p (stmt))
4725 tree lhs, rhs1, rhs2;
4726 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4728 /* If this is not a gimple binary expression, there is
4729 nothing for us to do with it. */
4730 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4731 continue;
4733 /* If this was part of an already processed statement,
4734 we don't need to touch it again. */
4735 if (gimple_visited_p (stmt))
4737 /* This statement might have become dead because of previous
4738 reassociations. */
4739 if (has_zero_uses (gimple_get_lhs (stmt)))
4741 reassoc_remove_stmt (&gsi);
4742 release_defs (stmt);
4743 /* We might end up removing the last stmt above which
4744 places the iterator to the end of the sequence.
4745 Reset it to the last stmt in this case which might
4746 be the end of the sequence as well if we removed
4747 the last statement of the sequence. In which case
4748 we need to bail out. */
4749 if (gsi_end_p (gsi))
4751 gsi = gsi_last_bb (bb);
4752 if (gsi_end_p (gsi))
4753 break;
4756 continue;
4759 lhs = gimple_assign_lhs (stmt);
4760 rhs1 = gimple_assign_rhs1 (stmt);
4761 rhs2 = gimple_assign_rhs2 (stmt);
4763 /* For non-bit or min/max operations we can't associate
4764 all types. Verify that here. */
4765 if (rhs_code != BIT_IOR_EXPR
4766 && rhs_code != BIT_AND_EXPR
4767 && rhs_code != BIT_XOR_EXPR
4768 && rhs_code != MIN_EXPR
4769 && rhs_code != MAX_EXPR
4770 && (!can_reassociate_p (lhs)
4771 || !can_reassociate_p (rhs1)
4772 || !can_reassociate_p (rhs2)))
4773 continue;
4775 if (associative_tree_code (rhs_code))
4777 auto_vec<operand_entry_t> ops;
4778 tree powi_result = NULL_TREE;
4780 /* There may be no immediate uses left by the time we
4781 get here because we may have eliminated them all. */
4782 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4783 continue;
4785 gimple_set_visited (stmt, true);
4786 linearize_expr_tree (&ops, stmt, true, true);
4787 ops.qsort (sort_by_operand_rank);
4788 optimize_ops_list (rhs_code, &ops);
4789 if (undistribute_ops_list (rhs_code, &ops,
4790 loop_containing_stmt (stmt)))
4792 ops.qsort (sort_by_operand_rank);
4793 optimize_ops_list (rhs_code, &ops);
4796 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4797 optimize_range_tests (rhs_code, &ops);
4799 if (first_pass_instance
4800 && rhs_code == MULT_EXPR
4801 && flag_unsafe_math_optimizations)
4802 powi_result = attempt_builtin_powi (stmt, &ops);
4804 /* If the operand vector is now empty, all operands were
4805 consumed by the __builtin_powi optimization. */
4806 if (ops.length () == 0)
4807 transform_stmt_to_copy (&gsi, stmt, powi_result);
4808 else if (ops.length () == 1)
4810 tree last_op = ops.last ()->op;
4812 if (powi_result)
4813 transform_stmt_to_multiply (&gsi, stmt, last_op,
4814 powi_result);
4815 else
4816 transform_stmt_to_copy (&gsi, stmt, last_op);
4818 else
4820 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4821 int ops_num = ops.length ();
4822 int width = get_reassociation_width (ops_num, rhs_code, mode);
4823 tree new_lhs = lhs;
4825 if (dump_file && (dump_flags & TDF_DETAILS))
4826 fprintf (dump_file,
4827 "Width = %d was chosen for reassociation\n", width);
4829 if (width > 1
4830 && ops.length () > 3)
4831 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4832 width, ops);
4833 else
4835 /* When there are three operands left, we want
4836 to make sure the ones that get the double
4837 binary op are chosen wisely. */
4838 int len = ops.length ();
4839 if (len >= 3)
4840 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4842 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4843 powi_result != NULL);
4846 /* If we combined some repeated factors into a
4847 __builtin_powi call, multiply that result by the
4848 reassociated operands. */
4849 if (powi_result)
4851 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4852 tree type = TREE_TYPE (lhs);
4853 tree target_ssa = make_temp_ssa_name (type, NULL,
4854 "reassocpow");
4855 gimple_set_lhs (lhs_stmt, target_ssa);
4856 update_stmt (lhs_stmt);
4857 if (lhs != new_lhs)
4858 target_ssa = new_lhs;
4859 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4860 powi_result, target_ssa);
4861 gimple_set_location (mul_stmt, gimple_location (stmt));
4862 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4868 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4869 son;
4870 son = next_dom_son (CDI_POST_DOMINATORS, son))
4871 reassociate_bb (son);
4874 /* Add jumps around shifts for range tests turned into bit tests.
4875 For each SSA_NAME VAR we have code like:
4876 VAR = ...; // final stmt of range comparison
4877 // bit test here...;
4878 OTHERVAR = ...; // final stmt of the bit test sequence
4879 RES = VAR | OTHERVAR;
4880 Turn the above into:
4881 VAR = ...;
4882 if (VAR != 0)
4883 goto <l3>;
4884 else
4885 goto <l2>;
4886 <l2>:
4887 // bit test here...;
4888 OTHERVAR = ...;
4889 <l3>:
4890 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4892 static void
4893 branch_fixup (void)
4895 tree var;
4896 unsigned int i;
4898 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4900 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4901 gimple use_stmt;
4902 use_operand_p use;
4903 bool ok = single_imm_use (var, &use, &use_stmt);
4904 gcc_assert (ok
4905 && is_gimple_assign (use_stmt)
4906 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4907 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4909 basic_block cond_bb = gimple_bb (def_stmt);
4910 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4911 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4913 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4914 gimple g = gimple_build_cond (NE_EXPR, var,
4915 build_zero_cst (TREE_TYPE (var)),
4916 NULL_TREE, NULL_TREE);
4917 location_t loc = gimple_location (use_stmt);
4918 gimple_set_location (g, loc);
4919 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4921 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4922 etrue->probability = REG_BR_PROB_BASE / 2;
4923 etrue->count = cond_bb->count / 2;
4924 edge efalse = find_edge (cond_bb, then_bb);
4925 efalse->flags = EDGE_FALSE_VALUE;
4926 efalse->probability -= etrue->probability;
4927 efalse->count -= etrue->count;
4928 then_bb->count -= etrue->count;
4930 tree othervar = NULL_TREE;
4931 if (gimple_assign_rhs1 (use_stmt) == var)
4932 othervar = gimple_assign_rhs2 (use_stmt);
4933 else if (gimple_assign_rhs2 (use_stmt) == var)
4934 othervar = gimple_assign_rhs1 (use_stmt);
4935 else
4936 gcc_unreachable ();
4937 tree lhs = gimple_assign_lhs (use_stmt);
4938 gphi *phi = create_phi_node (lhs, merge_bb);
4939 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4940 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4941 gsi = gsi_for_stmt (use_stmt);
4942 gsi_remove (&gsi, true);
4944 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4945 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4947 reassoc_branch_fixups.release ();
4950 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4951 void debug_ops_vector (vec<operand_entry_t> ops);
4953 /* Dump the operand entry vector OPS to FILE. */
4955 void
4956 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4958 operand_entry_t oe;
4959 unsigned int i;
4961 FOR_EACH_VEC_ELT (ops, i, oe)
4963 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4964 print_generic_expr (file, oe->op, 0);
4968 /* Dump the operand entry vector OPS to STDERR. */
4970 DEBUG_FUNCTION void
4971 debug_ops_vector (vec<operand_entry_t> ops)
4973 dump_ops_vector (stderr, ops);
4976 static void
4977 do_reassoc (void)
4979 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4980 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4983 /* Initialize the reassociation pass. */
4985 static void
4986 init_reassoc (void)
4988 int i;
4989 long rank = 2;
4990 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4992 /* Find the loops, so that we can prevent moving calculations in
4993 them. */
4994 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4996 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4998 next_operand_entry_id = 0;
5000 /* Reverse RPO (Reverse Post Order) will give us something where
5001 deeper loops come later. */
5002 pre_and_rev_post_order_compute (NULL, bbs, false);
5003 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5004 operand_rank = new hash_map<tree, long>;
5006 /* Give each default definition a distinct rank. This includes
5007 parameters and the static chain. Walk backwards over all
5008 SSA names so that we get proper rank ordering according
5009 to tree_swap_operands_p. */
5010 for (i = num_ssa_names - 1; i > 0; --i)
5012 tree name = ssa_name (i);
5013 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5014 insert_operand_rank (name, ++rank);
5017 /* Set up rank for each BB */
5018 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5019 bb_rank[bbs[i]] = ++rank << 16;
5021 free (bbs);
5022 calculate_dominance_info (CDI_POST_DOMINATORS);
5023 plus_negates = vNULL;
5026 /* Cleanup after the reassociation pass, and print stats if
5027 requested. */
5029 static void
5030 fini_reassoc (void)
5032 statistics_counter_event (cfun, "Linearized",
5033 reassociate_stats.linearized);
5034 statistics_counter_event (cfun, "Constants eliminated",
5035 reassociate_stats.constants_eliminated);
5036 statistics_counter_event (cfun, "Ops eliminated",
5037 reassociate_stats.ops_eliminated);
5038 statistics_counter_event (cfun, "Statements rewritten",
5039 reassociate_stats.rewritten);
5040 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5041 reassociate_stats.pows_encountered);
5042 statistics_counter_event (cfun, "Built-in powi calls created",
5043 reassociate_stats.pows_created);
5045 delete operand_rank;
5046 operand_entry_pool.release ();
5047 free (bb_rank);
5048 plus_negates.release ();
5049 free_dominance_info (CDI_POST_DOMINATORS);
5050 loop_optimizer_finalize ();
5053 /* Gate and execute functions for Reassociation. */
5055 static unsigned int
5056 execute_reassoc (void)
5058 init_reassoc ();
5060 do_reassoc ();
5061 repropagate_negates ();
5062 branch_fixup ();
5064 fini_reassoc ();
5065 return 0;
5068 namespace {
5070 const pass_data pass_data_reassoc =
5072 GIMPLE_PASS, /* type */
5073 "reassoc", /* name */
5074 OPTGROUP_NONE, /* optinfo_flags */
5075 TV_TREE_REASSOC, /* tv_id */
5076 ( PROP_cfg | PROP_ssa ), /* properties_required */
5077 0, /* properties_provided */
5078 0, /* properties_destroyed */
5079 0, /* todo_flags_start */
5080 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5083 class pass_reassoc : public gimple_opt_pass
5085 public:
5086 pass_reassoc (gcc::context *ctxt)
5087 : gimple_opt_pass (pass_data_reassoc, ctxt)
5090 /* opt_pass methods: */
5091 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5092 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5093 virtual unsigned int execute (function *) { return execute_reassoc (); }
5095 }; // class pass_reassoc
5097 } // anon namespace
5099 gimple_opt_pass *
5100 make_pass_reassoc (gcc::context *ctxt)
5102 return new pass_reassoc (ctxt);