2015-06-23 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob3eb098eca5a167e43d61c917a8465ed4205e113c
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "predict.h"
33 #include "hard-reg-set.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "cfg.h"
37 #include "cfganal.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-inline.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "gimple-fold.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "gimple.h"
47 #include "gimple-iterator.h"
48 #include "gimplify-me.h"
49 #include "gimple-ssa.h"
50 #include "tree-cfg.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "stringpool.h"
54 #include "tree-ssanames.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "flags.h"
58 #include "insn-config.h"
59 #include "expmed.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "calls.h"
63 #include "emit-rtl.h"
64 #include "varasm.h"
65 #include "stmt.h"
66 #include "expr.h"
67 #include "tree-dfa.h"
68 #include "tree-ssa.h"
69 #include "tree-iterator.h"
70 #include "tree-pass.h"
71 #include "alloc-pool.h"
72 #include "langhooks.h"
73 #include "cfgloop.h"
74 #include "target.h"
75 #include "params.h"
76 #include "diagnostic-core.h"
77 #include "builtins.h"
78 #include "gimplify.h"
79 #include "insn-codes.h"
80 #include "optabs.h"
82 /* This is a simple global reassociation pass. It is, in part, based
83 on the LLVM pass of the same name (They do some things more/less
84 than we do, in different orders, etc).
86 It consists of five steps:
88 1. Breaking up subtract operations into addition + negate, where
89 it would promote the reassociation of adds.
91 2. Left linearization of the expression trees, so that (A+B)+(C+D)
92 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
93 During linearization, we place the operands of the binary
94 expressions into a vector of operand_entry_t
96 3. Optimization of the operand lists, eliminating things like a +
97 -a, a & a, etc.
99 3a. Combine repeated factors with the same occurrence counts
100 into a __builtin_powi call that will later be optimized into
101 an optimal number of multiplies.
103 4. Rewrite the expression trees we linearized and optimized so
104 they are in proper rank order.
106 5. Repropagate negates, as nothing else will clean it up ATM.
108 A bit of theory on #4, since nobody seems to write anything down
109 about why it makes sense to do it the way they do it:
111 We could do this much nicer theoretically, but don't (for reasons
112 explained after how to do it theoretically nice :P).
114 In order to promote the most redundancy elimination, you want
115 binary expressions whose operands are the same rank (or
116 preferably, the same value) exposed to the redundancy eliminator,
117 for possible elimination.
119 So the way to do this if we really cared, is to build the new op
120 tree from the leaves to the roots, merging as you go, and putting the
121 new op on the end of the worklist, until you are left with one
122 thing on the worklist.
124 IE if you have to rewrite the following set of operands (listed with
125 rank in parentheses), with opcode PLUS_EXPR:
127 a (1), b (1), c (1), d (2), e (2)
130 We start with our merge worklist empty, and the ops list with all of
131 those on it.
133 You want to first merge all leaves of the same rank, as much as
134 possible.
136 So first build a binary op of
138 mergetmp = a + b, and put "mergetmp" on the merge worklist.
140 Because there is no three operand form of PLUS_EXPR, c is not going to
141 be exposed to redundancy elimination as a rank 1 operand.
143 So you might as well throw it on the merge worklist (you could also
144 consider it to now be a rank two operand, and merge it with d and e,
145 but in this case, you then have evicted e from a binary op. So at
146 least in this situation, you can't win.)
148 Then build a binary op of d + e
149 mergetmp2 = d + e
151 and put mergetmp2 on the merge worklist.
153 so merge worklist = {mergetmp, c, mergetmp2}
155 Continue building binary ops of these operations until you have only
156 one operation left on the worklist.
158 So we have
160 build binary op
161 mergetmp3 = mergetmp + c
163 worklist = {mergetmp2, mergetmp3}
165 mergetmp4 = mergetmp2 + mergetmp3
167 worklist = {mergetmp4}
169 because we have one operation left, we can now just set the original
170 statement equal to the result of that operation.
172 This will at least expose a + b and d + e to redundancy elimination
173 as binary operations.
175 For extra points, you can reuse the old statements to build the
176 mergetmps, since you shouldn't run out.
178 So why don't we do this?
180 Because it's expensive, and rarely will help. Most trees we are
181 reassociating have 3 or less ops. If they have 2 ops, they already
182 will be written into a nice single binary op. If you have 3 ops, a
183 single simple check suffices to tell you whether the first two are of the
184 same rank. If so, you know to order it
186 mergetmp = op1 + op2
187 newstmt = mergetmp + op3
189 instead of
190 mergetmp = op2 + op3
191 newstmt = mergetmp + op1
193 If all three are of the same rank, you can't expose them all in a
194 single binary operator anyway, so the above is *still* the best you
195 can do.
197 Thus, this is what we do. When we have three ops left, we check to see
198 what order to put them in, and call it a day. As a nod to vector sum
199 reduction, we check if any of the ops are really a phi node that is a
200 destructive update for the associating op, and keep the destructive
201 update together for vector sum reduction recognition. */
204 /* Statistics */
205 static struct
207 int linearized;
208 int constants_eliminated;
209 int ops_eliminated;
210 int rewritten;
211 int pows_encountered;
212 int pows_created;
213 } reassociate_stats;
215 /* Operator, rank pair. */
216 typedef struct operand_entry
218 unsigned int rank;
219 int id;
220 tree op;
221 unsigned int count;
222 } *operand_entry_t;
224 static pool_allocator<operand_entry> operand_entry_pool ("operand entry pool",
225 30);
227 /* This is used to assign a unique ID to each struct operand_entry
228 so that qsort results are identical on different hosts. */
229 static int next_operand_entry_id;
231 /* Starting rank number for a given basic block, so that we can rank
232 operations using unmovable instructions in that BB based on the bb
233 depth. */
234 static long *bb_rank;
236 /* Operand->rank hashtable. */
237 static hash_map<tree, long> *operand_rank;
239 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
240 all basic blocks the CFG should be adjusted - basic blocks
241 split right after that SSA_NAME's definition statement and before
242 the only use, which must be a bit ior. */
243 static vec<tree> reassoc_branch_fixups;
245 /* Forward decls. */
246 static long get_rank (tree);
247 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
249 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
250 possibly added by gsi_remove. */
252 bool
253 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
255 gimple stmt = gsi_stmt (*gsi);
257 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
258 return gsi_remove (gsi, true);
260 gimple_stmt_iterator prev = *gsi;
261 gsi_prev (&prev);
262 unsigned uid = gimple_uid (stmt);
263 basic_block bb = gimple_bb (stmt);
264 bool ret = gsi_remove (gsi, true);
265 if (!gsi_end_p (prev))
266 gsi_next (&prev);
267 else
268 prev = gsi_start_bb (bb);
269 gimple end_stmt = gsi_stmt (*gsi);
270 while ((stmt = gsi_stmt (prev)) != end_stmt)
272 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
273 gimple_set_uid (stmt, uid);
274 gsi_next (&prev);
276 return ret;
279 /* Bias amount for loop-carried phis. We want this to be larger than
280 the depth of any reassociation tree we can see, but not larger than
281 the rank difference between two blocks. */
282 #define PHI_LOOP_BIAS (1 << 15)
284 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
285 an innermost loop, and the phi has only a single use which is inside
286 the loop, then the rank is the block rank of the loop latch plus an
287 extra bias for the loop-carried dependence. This causes expressions
288 calculated into an accumulator variable to be independent for each
289 iteration of the loop. If STMT is some other phi, the rank is the
290 block rank of its containing block. */
291 static long
292 phi_rank (gimple stmt)
294 basic_block bb = gimple_bb (stmt);
295 struct loop *father = bb->loop_father;
296 tree res;
297 unsigned i;
298 use_operand_p use;
299 gimple use_stmt;
301 /* We only care about real loops (those with a latch). */
302 if (!father->latch)
303 return bb_rank[bb->index];
305 /* Interesting phis must be in headers of innermost loops. */
306 if (bb != father->header
307 || father->inner)
308 return bb_rank[bb->index];
310 /* Ignore virtual SSA_NAMEs. */
311 res = gimple_phi_result (stmt);
312 if (virtual_operand_p (res))
313 return bb_rank[bb->index];
315 /* The phi definition must have a single use, and that use must be
316 within the loop. Otherwise this isn't an accumulator pattern. */
317 if (!single_imm_use (res, &use, &use_stmt)
318 || gimple_bb (use_stmt)->loop_father != father)
319 return bb_rank[bb->index];
321 /* Look for phi arguments from within the loop. If found, bias this phi. */
322 for (i = 0; i < gimple_phi_num_args (stmt); i++)
324 tree arg = gimple_phi_arg_def (stmt, i);
325 if (TREE_CODE (arg) == SSA_NAME
326 && !SSA_NAME_IS_DEFAULT_DEF (arg))
328 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
329 if (gimple_bb (def_stmt)->loop_father == father)
330 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
334 /* Must be an uninteresting phi. */
335 return bb_rank[bb->index];
338 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
339 loop-carried dependence of an innermost loop, return TRUE; else
340 return FALSE. */
341 static bool
342 loop_carried_phi (tree exp)
344 gimple phi_stmt;
345 long block_rank;
347 if (TREE_CODE (exp) != SSA_NAME
348 || SSA_NAME_IS_DEFAULT_DEF (exp))
349 return false;
351 phi_stmt = SSA_NAME_DEF_STMT (exp);
353 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
354 return false;
356 /* Non-loop-carried phis have block rank. Loop-carried phis have
357 an additional bias added in. If this phi doesn't have block rank,
358 it's biased and should not be propagated. */
359 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
361 if (phi_rank (phi_stmt) != block_rank)
362 return true;
364 return false;
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
371 static long
372 propagate_rank (long rank, tree op)
374 long op_rank;
376 if (loop_carried_phi (op))
377 return rank;
379 op_rank = get_rank (op);
381 return MAX (rank, op_rank);
384 /* Look up the operand rank structure for expression E. */
386 static inline long
387 find_operand_rank (tree e)
389 long *slot = operand_rank->get (e);
390 return slot ? *slot : -1;
393 /* Insert {E,RANK} into the operand rank hashtable. */
395 static inline void
396 insert_operand_rank (tree e, long rank)
398 gcc_assert (rank > 0);
399 gcc_assert (!operand_rank->put (e, rank));
402 /* Given an expression E, return the rank of the expression. */
404 static long
405 get_rank (tree e)
407 /* SSA_NAME's have the rank of the expression they are the result
409 For globals and uninitialized values, the rank is 0.
410 For function arguments, use the pre-setup rank.
411 For PHI nodes, stores, asm statements, etc, we use the rank of
412 the BB.
413 For simple operations, the rank is the maximum rank of any of
414 its operands, or the bb_rank, whichever is less.
415 I make no claims that this is optimal, however, it gives good
416 results. */
418 /* We make an exception to the normal ranking system to break
419 dependences of accumulator variables in loops. Suppose we
420 have a simple one-block loop containing:
422 x_1 = phi(x_0, x_2)
423 b = a + x_1
424 c = b + d
425 x_2 = c + e
427 As shown, each iteration of the calculation into x is fully
428 dependent upon the iteration before it. We would prefer to
429 see this in the form:
431 x_1 = phi(x_0, x_2)
432 b = a + d
433 c = b + e
434 x_2 = c + x_1
436 If the loop is unrolled, the calculations of b and c from
437 different iterations can be interleaved.
439 To obtain this result during reassociation, we bias the rank
440 of the phi definition x_1 upward, when it is recognized as an
441 accumulator pattern. The artificial rank causes it to be
442 added last, providing the desired independence. */
444 if (TREE_CODE (e) == SSA_NAME)
446 ssa_op_iter iter;
447 gimple stmt;
448 long rank;
449 tree op;
451 if (SSA_NAME_IS_DEFAULT_DEF (e))
452 return find_operand_rank (e);
454 stmt = SSA_NAME_DEF_STMT (e);
455 if (gimple_code (stmt) == GIMPLE_PHI)
456 return phi_rank (stmt);
458 if (!is_gimple_assign (stmt))
459 return bb_rank[gimple_bb (stmt)->index];
461 /* If we already have a rank for this expression, use that. */
462 rank = find_operand_rank (e);
463 if (rank != -1)
464 return rank;
466 /* Otherwise, find the maximum rank for the operands. As an
467 exception, remove the bias from loop-carried phis when propagating
468 the rank so that dependent operations are not also biased. */
469 /* Simply walk over all SSA uses - this takes advatage of the
470 fact that non-SSA operands are is_gimple_min_invariant and
471 thus have rank 0. */
472 rank = 0;
473 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
474 rank = propagate_rank (rank, op);
476 if (dump_file && (dump_flags & TDF_DETAILS))
478 fprintf (dump_file, "Rank for ");
479 print_generic_expr (dump_file, e, 0);
480 fprintf (dump_file, " is %ld\n", (rank + 1));
483 /* Note the rank in the hashtable so we don't recompute it. */
484 insert_operand_rank (e, (rank + 1));
485 return (rank + 1);
488 /* Constants, globals, etc., are rank 0 */
489 return 0;
493 /* We want integer ones to end up last no matter what, since they are
494 the ones we can do the most with. */
495 #define INTEGER_CONST_TYPE 1 << 3
496 #define FLOAT_CONST_TYPE 1 << 2
497 #define OTHER_CONST_TYPE 1 << 1
499 /* Classify an invariant tree into integer, float, or other, so that
500 we can sort them to be near other constants of the same type. */
501 static inline int
502 constant_type (tree t)
504 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
505 return INTEGER_CONST_TYPE;
506 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
507 return FLOAT_CONST_TYPE;
508 else
509 return OTHER_CONST_TYPE;
512 /* qsort comparison function to sort operand entries PA and PB by rank
513 so that the sorted array is ordered by rank in decreasing order. */
514 static int
515 sort_by_operand_rank (const void *pa, const void *pb)
517 const operand_entry_t oea = *(const operand_entry_t *)pa;
518 const operand_entry_t oeb = *(const operand_entry_t *)pb;
520 /* It's nicer for optimize_expression if constants that are likely
521 to fold when added/multiplied//whatever are put next to each
522 other. Since all constants have rank 0, order them by type. */
523 if (oeb->rank == 0 && oea->rank == 0)
525 if (constant_type (oeb->op) != constant_type (oea->op))
526 return constant_type (oeb->op) - constant_type (oea->op);
527 else
528 /* To make sorting result stable, we use unique IDs to determine
529 order. */
530 return oeb->id - oea->id;
533 /* Lastly, make sure the versions that are the same go next to each
534 other. */
535 if ((oeb->rank - oea->rank == 0)
536 && TREE_CODE (oea->op) == SSA_NAME
537 && TREE_CODE (oeb->op) == SSA_NAME)
539 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
540 versions of removed SSA_NAMEs, so if possible, prefer to sort
541 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
542 See PR60418. */
543 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
544 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
545 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
547 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
548 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
549 basic_block bba = gimple_bb (stmta);
550 basic_block bbb = gimple_bb (stmtb);
551 if (bbb != bba)
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return bb_rank[bbb->index] - bb_rank[bba->index];
556 else
558 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
559 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
560 if (da != db)
561 return da ? 1 : -1;
565 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
566 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
567 else
568 return oeb->id - oea->id;
571 if (oeb->rank != oea->rank)
572 return oeb->rank - oea->rank;
573 else
574 return oeb->id - oea->id;
577 /* Add an operand entry to *OPS for the tree operand OP. */
579 static void
580 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
582 operand_entry_t oe = operand_entry_pool.allocate ();
584 oe->op = op;
585 oe->rank = get_rank (op);
586 oe->id = next_operand_entry_id++;
587 oe->count = 1;
588 ops->safe_push (oe);
591 /* Add an operand entry to *OPS for the tree operand OP with repeat
592 count REPEAT. */
594 static void
595 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
596 HOST_WIDE_INT repeat)
598 operand_entry_t oe = operand_entry_pool.allocate ();
600 oe->op = op;
601 oe->rank = get_rank (op);
602 oe->id = next_operand_entry_id++;
603 oe->count = repeat;
604 ops->safe_push (oe);
606 reassociate_stats.pows_encountered++;
609 /* Return true if STMT is reassociable operation containing a binary
610 operation with tree code CODE, and is inside LOOP. */
612 static bool
613 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
615 basic_block bb = gimple_bb (stmt);
617 if (gimple_bb (stmt) == NULL)
618 return false;
620 if (!flow_bb_inside_loop_p (loop, bb))
621 return false;
623 if (is_gimple_assign (stmt)
624 && gimple_assign_rhs_code (stmt) == code
625 && has_single_use (gimple_assign_lhs (stmt)))
626 return true;
628 return false;
632 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
633 operand of the negate operation. Otherwise, return NULL. */
635 static tree
636 get_unary_op (tree name, enum tree_code opcode)
638 gimple stmt = SSA_NAME_DEF_STMT (name);
640 if (!is_gimple_assign (stmt))
641 return NULL_TREE;
643 if (gimple_assign_rhs_code (stmt) == opcode)
644 return gimple_assign_rhs1 (stmt);
645 return NULL_TREE;
648 /* If CURR and LAST are a pair of ops that OPCODE allows us to
649 eliminate through equivalences, do so, remove them from OPS, and
650 return true. Otherwise, return false. */
652 static bool
653 eliminate_duplicate_pair (enum tree_code opcode,
654 vec<operand_entry_t> *ops,
655 bool *all_done,
656 unsigned int i,
657 operand_entry_t curr,
658 operand_entry_t last)
661 /* If we have two of the same op, and the opcode is & |, min, or max,
662 we can eliminate one of them.
663 If we have two of the same op, and the opcode is ^, we can
664 eliminate both of them. */
666 if (last && last->op == curr->op)
668 switch (opcode)
670 case MAX_EXPR:
671 case MIN_EXPR:
672 case BIT_IOR_EXPR:
673 case BIT_AND_EXPR:
674 if (dump_file && (dump_flags & TDF_DETAILS))
676 fprintf (dump_file, "Equivalence: ");
677 print_generic_expr (dump_file, curr->op, 0);
678 fprintf (dump_file, " [&|minmax] ");
679 print_generic_expr (dump_file, last->op, 0);
680 fprintf (dump_file, " -> ");
681 print_generic_stmt (dump_file, last->op, 0);
684 ops->ordered_remove (i);
685 reassociate_stats.ops_eliminated ++;
687 return true;
689 case BIT_XOR_EXPR:
690 if (dump_file && (dump_flags & TDF_DETAILS))
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, curr->op, 0);
694 fprintf (dump_file, " ^ ");
695 print_generic_expr (dump_file, last->op, 0);
696 fprintf (dump_file, " -> nothing\n");
699 reassociate_stats.ops_eliminated += 2;
701 if (ops->length () == 2)
703 ops->create (0);
704 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
705 *all_done = true;
707 else
709 ops->ordered_remove (i-1);
710 ops->ordered_remove (i-1);
713 return true;
715 default:
716 break;
719 return false;
722 static vec<tree> plus_negates;
724 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
725 expression, look in OPS for a corresponding positive operation to cancel
726 it out. If we find one, remove the other from OPS, replace
727 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
728 return false. */
730 static bool
731 eliminate_plus_minus_pair (enum tree_code opcode,
732 vec<operand_entry_t> *ops,
733 unsigned int currindex,
734 operand_entry_t curr)
736 tree negateop;
737 tree notop;
738 unsigned int i;
739 operand_entry_t oe;
741 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
742 return false;
744 negateop = get_unary_op (curr->op, NEGATE_EXPR);
745 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
746 if (negateop == NULL_TREE && notop == NULL_TREE)
747 return false;
749 /* Any non-negated version will have a rank that is one less than
750 the current rank. So once we hit those ranks, if we don't find
751 one, we can stop. */
753 for (i = currindex + 1;
754 ops->iterate (i, &oe)
755 && oe->rank >= curr->rank - 1 ;
756 i++)
758 if (oe->op == negateop)
761 if (dump_file && (dump_flags & TDF_DETAILS))
763 fprintf (dump_file, "Equivalence: ");
764 print_generic_expr (dump_file, negateop, 0);
765 fprintf (dump_file, " + -");
766 print_generic_expr (dump_file, oe->op, 0);
767 fprintf (dump_file, " -> 0\n");
770 ops->ordered_remove (i);
771 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
772 ops->ordered_remove (currindex);
773 reassociate_stats.ops_eliminated ++;
775 return true;
777 else if (oe->op == notop)
779 tree op_type = TREE_TYPE (oe->op);
781 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Equivalence: ");
784 print_generic_expr (dump_file, notop, 0);
785 fprintf (dump_file, " + ~");
786 print_generic_expr (dump_file, oe->op, 0);
787 fprintf (dump_file, " -> -1\n");
790 ops->ordered_remove (i);
791 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
792 ops->ordered_remove (currindex);
793 reassociate_stats.ops_eliminated ++;
795 return true;
799 /* CURR->OP is a negate expr in a plus expr: save it for later
800 inspection in repropagate_negates(). */
801 if (negateop != NULL_TREE)
802 plus_negates.safe_push (curr->op);
804 return false;
807 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
808 bitwise not expression, look in OPS for a corresponding operand to
809 cancel it out. If we find one, remove the other from OPS, replace
810 OPS[CURRINDEX] with 0, and return true. Otherwise, return
811 false. */
813 static bool
814 eliminate_not_pairs (enum tree_code opcode,
815 vec<operand_entry_t> *ops,
816 unsigned int currindex,
817 operand_entry_t curr)
819 tree notop;
820 unsigned int i;
821 operand_entry_t oe;
823 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
824 || TREE_CODE (curr->op) != SSA_NAME)
825 return false;
827 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
828 if (notop == NULL_TREE)
829 return false;
831 /* Any non-not version will have a rank that is one less than
832 the current rank. So once we hit those ranks, if we don't find
833 one, we can stop. */
835 for (i = currindex + 1;
836 ops->iterate (i, &oe)
837 && oe->rank >= curr->rank - 1;
838 i++)
840 if (oe->op == notop)
842 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop, 0);
846 if (opcode == BIT_AND_EXPR)
847 fprintf (dump_file, " & ~");
848 else if (opcode == BIT_IOR_EXPR)
849 fprintf (dump_file, " | ~");
850 print_generic_expr (dump_file, oe->op, 0);
851 if (opcode == BIT_AND_EXPR)
852 fprintf (dump_file, " -> 0\n");
853 else if (opcode == BIT_IOR_EXPR)
854 fprintf (dump_file, " -> -1\n");
857 if (opcode == BIT_AND_EXPR)
858 oe->op = build_zero_cst (TREE_TYPE (oe->op));
859 else if (opcode == BIT_IOR_EXPR)
860 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
862 reassociate_stats.ops_eliminated += ops->length () - 1;
863 ops->truncate (0);
864 ops->quick_push (oe);
865 return true;
869 return false;
872 /* Use constant value that may be present in OPS to try to eliminate
873 operands. Note that this function is only really used when we've
874 eliminated ops for other reasons, or merged constants. Across
875 single statements, fold already does all of this, plus more. There
876 is little point in duplicating logic, so I've only included the
877 identities that I could ever construct testcases to trigger. */
879 static void
880 eliminate_using_constants (enum tree_code opcode,
881 vec<operand_entry_t> *ops)
883 operand_entry_t oelast = ops->last ();
884 tree type = TREE_TYPE (oelast->op);
886 if (oelast->rank == 0
887 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
889 switch (opcode)
891 case BIT_AND_EXPR:
892 if (integer_zerop (oelast->op))
894 if (ops->length () != 1)
896 if (dump_file && (dump_flags & TDF_DETAILS))
897 fprintf (dump_file, "Found & 0, removing all other ops\n");
899 reassociate_stats.ops_eliminated += ops->length () - 1;
901 ops->truncate (0);
902 ops->quick_push (oelast);
903 return;
906 else if (integer_all_onesp (oelast->op))
908 if (ops->length () != 1)
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Found & -1, removing\n");
912 ops->pop ();
913 reassociate_stats.ops_eliminated++;
916 break;
917 case BIT_IOR_EXPR:
918 if (integer_all_onesp (oelast->op))
920 if (ops->length () != 1)
922 if (dump_file && (dump_flags & TDF_DETAILS))
923 fprintf (dump_file, "Found | -1, removing all other ops\n");
925 reassociate_stats.ops_eliminated += ops->length () - 1;
927 ops->truncate (0);
928 ops->quick_push (oelast);
929 return;
932 else if (integer_zerop (oelast->op))
934 if (ops->length () != 1)
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found | 0, removing\n");
938 ops->pop ();
939 reassociate_stats.ops_eliminated++;
942 break;
943 case MULT_EXPR:
944 if (integer_zerop (oelast->op)
945 || (FLOAT_TYPE_P (type)
946 && !HONOR_NANS (type)
947 && !HONOR_SIGNED_ZEROS (type)
948 && real_zerop (oelast->op)))
950 if (ops->length () != 1)
952 if (dump_file && (dump_flags & TDF_DETAILS))
953 fprintf (dump_file, "Found * 0, removing all other ops\n");
955 reassociate_stats.ops_eliminated += ops->length () - 1;
956 ops->truncate (1);
957 ops->quick_push (oelast);
958 return;
961 else if (integer_onep (oelast->op)
962 || (FLOAT_TYPE_P (type)
963 && !HONOR_SNANS (type)
964 && real_onep (oelast->op)))
966 if (ops->length () != 1)
968 if (dump_file && (dump_flags & TDF_DETAILS))
969 fprintf (dump_file, "Found * 1, removing\n");
970 ops->pop ();
971 reassociate_stats.ops_eliminated++;
972 return;
975 break;
976 case BIT_XOR_EXPR:
977 case PLUS_EXPR:
978 case MINUS_EXPR:
979 if (integer_zerop (oelast->op)
980 || (FLOAT_TYPE_P (type)
981 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
982 && fold_real_zero_addition_p (type, oelast->op,
983 opcode == MINUS_EXPR)))
985 if (ops->length () != 1)
987 if (dump_file && (dump_flags & TDF_DETAILS))
988 fprintf (dump_file, "Found [|^+] 0, removing\n");
989 ops->pop ();
990 reassociate_stats.ops_eliminated++;
991 return;
994 break;
995 default:
996 break;
1002 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1003 bool, bool);
1005 /* Structure for tracking and counting operands. */
1006 typedef struct oecount_s {
1007 int cnt;
1008 int id;
1009 enum tree_code oecode;
1010 tree op;
1011 } oecount;
1014 /* The heap for the oecount hashtable and the sorted list of operands. */
1015 static vec<oecount> cvec;
1018 /* Oecount hashtable helpers. */
1020 struct oecount_hasher
1022 typedef int value_type;
1023 typedef int compare_type;
1024 static inline hashval_t hash (const value_type &);
1025 static inline bool equal (const value_type &, const compare_type &);
1026 static bool is_deleted (int &v) { return v == 1; }
1027 static void mark_deleted (int &e) { e = 1; }
1028 static bool is_empty (int &v) { return v == 0; }
1029 static void mark_empty (int &e) { e = 0; }
1030 static void remove (int &) {}
1033 /* Hash function for oecount. */
1035 inline hashval_t
1036 oecount_hasher::hash (const value_type &p)
1038 const oecount *c = &cvec[p - 42];
1039 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1042 /* Comparison function for oecount. */
1044 inline bool
1045 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1047 const oecount *c1 = &cvec[p1 - 42];
1048 const oecount *c2 = &cvec[p2 - 42];
1049 return (c1->oecode == c2->oecode
1050 && c1->op == c2->op);
1053 /* Comparison function for qsort sorting oecount elements by count. */
1055 static int
1056 oecount_cmp (const void *p1, const void *p2)
1058 const oecount *c1 = (const oecount *)p1;
1059 const oecount *c2 = (const oecount *)p2;
1060 if (c1->cnt != c2->cnt)
1061 return c1->cnt - c2->cnt;
1062 else
1063 /* If counts are identical, use unique IDs to stabilize qsort. */
1064 return c1->id - c2->id;
1067 /* Return TRUE iff STMT represents a builtin call that raises OP
1068 to some exponent. */
1070 static bool
1071 stmt_is_power_of_op (gimple stmt, tree op)
1073 tree fndecl;
1075 if (!is_gimple_call (stmt))
1076 return false;
1078 fndecl = gimple_call_fndecl (stmt);
1080 if (!fndecl
1081 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1082 return false;
1084 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1086 CASE_FLT_FN (BUILT_IN_POW):
1087 CASE_FLT_FN (BUILT_IN_POWI):
1088 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1090 default:
1091 return false;
1095 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1096 in place and return the result. Assumes that stmt_is_power_of_op
1097 was previously called for STMT and returned TRUE. */
1099 static HOST_WIDE_INT
1100 decrement_power (gimple stmt)
1102 REAL_VALUE_TYPE c, cint;
1103 HOST_WIDE_INT power;
1104 tree arg1;
1106 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1108 CASE_FLT_FN (BUILT_IN_POW):
1109 arg1 = gimple_call_arg (stmt, 1);
1110 c = TREE_REAL_CST (arg1);
1111 power = real_to_integer (&c) - 1;
1112 real_from_integer (&cint, VOIDmode, power, SIGNED);
1113 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1114 return power;
1116 CASE_FLT_FN (BUILT_IN_POWI):
1117 arg1 = gimple_call_arg (stmt, 1);
1118 power = TREE_INT_CST_LOW (arg1) - 1;
1119 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1120 return power;
1122 default:
1123 gcc_unreachable ();
1127 /* Find the single immediate use of STMT's LHS, and replace it
1128 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1129 replace *DEF with OP as well. */
1131 static void
1132 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1134 tree lhs;
1135 gimple use_stmt;
1136 use_operand_p use;
1137 gimple_stmt_iterator gsi;
1139 if (is_gimple_call (stmt))
1140 lhs = gimple_call_lhs (stmt);
1141 else
1142 lhs = gimple_assign_lhs (stmt);
1144 gcc_assert (has_single_use (lhs));
1145 single_imm_use (lhs, &use, &use_stmt);
1146 if (lhs == *def)
1147 *def = op;
1148 SET_USE (use, op);
1149 if (TREE_CODE (op) != SSA_NAME)
1150 update_stmt (use_stmt);
1151 gsi = gsi_for_stmt (stmt);
1152 unlink_stmt_vdef (stmt);
1153 reassoc_remove_stmt (&gsi);
1154 release_defs (stmt);
1157 /* Walks the linear chain with result *DEF searching for an operation
1158 with operand OP and code OPCODE removing that from the chain. *DEF
1159 is updated if there is only one operand but no operation left. */
1161 static void
1162 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1164 gimple stmt = SSA_NAME_DEF_STMT (*def);
1168 tree name;
1170 if (opcode == MULT_EXPR
1171 && stmt_is_power_of_op (stmt, op))
1173 if (decrement_power (stmt) == 1)
1174 propagate_op_to_single_use (op, stmt, def);
1175 return;
1178 name = gimple_assign_rhs1 (stmt);
1180 /* If this is the operation we look for and one of the operands
1181 is ours simply propagate the other operand into the stmts
1182 single use. */
1183 if (gimple_assign_rhs_code (stmt) == opcode
1184 && (name == op
1185 || gimple_assign_rhs2 (stmt) == op))
1187 if (name == op)
1188 name = gimple_assign_rhs2 (stmt);
1189 propagate_op_to_single_use (name, stmt, def);
1190 return;
1193 /* We might have a multiply of two __builtin_pow* calls, and
1194 the operand might be hiding in the rightmost one. */
1195 if (opcode == MULT_EXPR
1196 && gimple_assign_rhs_code (stmt) == opcode
1197 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1198 && has_single_use (gimple_assign_rhs2 (stmt)))
1200 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1201 if (stmt_is_power_of_op (stmt2, op))
1203 if (decrement_power (stmt2) == 1)
1204 propagate_op_to_single_use (op, stmt2, def);
1205 return;
1209 /* Continue walking the chain. */
1210 gcc_assert (name != op
1211 && TREE_CODE (name) == SSA_NAME);
1212 stmt = SSA_NAME_DEF_STMT (name);
1214 while (1);
1217 /* Returns true if statement S1 dominates statement S2. Like
1218 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1220 static bool
1221 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1223 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1225 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1226 SSA_NAME. Assume it lives at the beginning of function and
1227 thus dominates everything. */
1228 if (!bb1 || s1 == s2)
1229 return true;
1231 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1232 if (!bb2)
1233 return false;
1235 if (bb1 == bb2)
1237 /* PHIs in the same basic block are assumed to be
1238 executed all in parallel, if only one stmt is a PHI,
1239 it dominates the other stmt in the same basic block. */
1240 if (gimple_code (s1) == GIMPLE_PHI)
1241 return true;
1243 if (gimple_code (s2) == GIMPLE_PHI)
1244 return false;
1246 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1248 if (gimple_uid (s1) < gimple_uid (s2))
1249 return true;
1251 if (gimple_uid (s1) > gimple_uid (s2))
1252 return false;
1254 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1255 unsigned int uid = gimple_uid (s1);
1256 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1258 gimple s = gsi_stmt (gsi);
1259 if (gimple_uid (s) != uid)
1260 break;
1261 if (s == s2)
1262 return true;
1265 return false;
1268 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1271 /* Insert STMT after INSERT_POINT. */
1273 static void
1274 insert_stmt_after (gimple stmt, gimple insert_point)
1276 gimple_stmt_iterator gsi;
1277 basic_block bb;
1279 if (gimple_code (insert_point) == GIMPLE_PHI)
1280 bb = gimple_bb (insert_point);
1281 else if (!stmt_ends_bb_p (insert_point))
1283 gsi = gsi_for_stmt (insert_point);
1284 gimple_set_uid (stmt, gimple_uid (insert_point));
1285 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1286 return;
1288 else
1289 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1290 thus if it must end a basic block, it should be a call that can
1291 throw, or some assignment that can throw. If it throws, the LHS
1292 of it will not be initialized though, so only valid places using
1293 the SSA_NAME should be dominated by the fallthru edge. */
1294 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1295 gsi = gsi_after_labels (bb);
1296 if (gsi_end_p (gsi))
1298 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1299 gimple_set_uid (stmt,
1300 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1302 else
1303 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1304 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1307 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1308 the result. Places the statement after the definition of either
1309 OP1 or OP2. Returns the new statement. */
1311 static gimple
1312 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1314 gimple op1def = NULL, op2def = NULL;
1315 gimple_stmt_iterator gsi;
1316 tree op;
1317 gassign *sum;
1319 /* Create the addition statement. */
1320 op = make_ssa_name (type);
1321 sum = gimple_build_assign (op, opcode, op1, op2);
1323 /* Find an insertion place and insert. */
1324 if (TREE_CODE (op1) == SSA_NAME)
1325 op1def = SSA_NAME_DEF_STMT (op1);
1326 if (TREE_CODE (op2) == SSA_NAME)
1327 op2def = SSA_NAME_DEF_STMT (op2);
1328 if ((!op1def || gimple_nop_p (op1def))
1329 && (!op2def || gimple_nop_p (op2def)))
1331 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1332 if (gsi_end_p (gsi))
1334 gimple_stmt_iterator gsi2
1335 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1336 gimple_set_uid (sum,
1337 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1339 else
1340 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1341 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1343 else
1345 gimple insert_point;
1346 if ((!op1def || gimple_nop_p (op1def))
1347 || (op2def && !gimple_nop_p (op2def)
1348 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1349 insert_point = op2def;
1350 else
1351 insert_point = op1def;
1352 insert_stmt_after (sum, insert_point);
1354 update_stmt (sum);
1356 return sum;
1359 /* Perform un-distribution of divisions and multiplications.
1360 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1361 to (A + B) / X for real X.
1363 The algorithm is organized as follows.
1365 - First we walk the addition chain *OPS looking for summands that
1366 are defined by a multiplication or a real division. This results
1367 in the candidates bitmap with relevant indices into *OPS.
1369 - Second we build the chains of multiplications or divisions for
1370 these candidates, counting the number of occurrences of (operand, code)
1371 pairs in all of the candidates chains.
1373 - Third we sort the (operand, code) pairs by number of occurrence and
1374 process them starting with the pair with the most uses.
1376 * For each such pair we walk the candidates again to build a
1377 second candidate bitmap noting all multiplication/division chains
1378 that have at least one occurrence of (operand, code).
1380 * We build an alternate addition chain only covering these
1381 candidates with one (operand, code) operation removed from their
1382 multiplication/division chain.
1384 * The first candidate gets replaced by the alternate addition chain
1385 multiplied/divided by the operand.
1387 * All candidate chains get disabled for further processing and
1388 processing of (operand, code) pairs continues.
1390 The alternate addition chains built are re-processed by the main
1391 reassociation algorithm which allows optimizing a * x * y + b * y * x
1392 to (a + b ) * x * y in one invocation of the reassociation pass. */
1394 static bool
1395 undistribute_ops_list (enum tree_code opcode,
1396 vec<operand_entry_t> *ops, struct loop *loop)
1398 unsigned int length = ops->length ();
1399 operand_entry_t oe1;
1400 unsigned i, j;
1401 sbitmap candidates, candidates2;
1402 unsigned nr_candidates, nr_candidates2;
1403 sbitmap_iterator sbi0;
1404 vec<operand_entry_t> *subops;
1405 bool changed = false;
1406 int next_oecount_id = 0;
1408 if (length <= 1
1409 || opcode != PLUS_EXPR)
1410 return false;
1412 /* Build a list of candidates to process. */
1413 candidates = sbitmap_alloc (length);
1414 bitmap_clear (candidates);
1415 nr_candidates = 0;
1416 FOR_EACH_VEC_ELT (*ops, i, oe1)
1418 enum tree_code dcode;
1419 gimple oe1def;
1421 if (TREE_CODE (oe1->op) != SSA_NAME)
1422 continue;
1423 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1424 if (!is_gimple_assign (oe1def))
1425 continue;
1426 dcode = gimple_assign_rhs_code (oe1def);
1427 if ((dcode != MULT_EXPR
1428 && dcode != RDIV_EXPR)
1429 || !is_reassociable_op (oe1def, dcode, loop))
1430 continue;
1432 bitmap_set_bit (candidates, i);
1433 nr_candidates++;
1436 if (nr_candidates < 2)
1438 sbitmap_free (candidates);
1439 return false;
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1444 fprintf (dump_file, "searching for un-distribute opportunities ");
1445 print_generic_expr (dump_file,
1446 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1447 fprintf (dump_file, " %d\n", nr_candidates);
1450 /* Build linearized sub-operand lists and the counting table. */
1451 cvec.create (0);
1453 hash_table<oecount_hasher> ctable (15);
1455 /* ??? Macro arguments cannot have multi-argument template types in
1456 them. This typedef is needed to workaround that limitation. */
1457 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1458 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1459 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1461 gimple oedef;
1462 enum tree_code oecode;
1463 unsigned j;
1465 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1466 oecode = gimple_assign_rhs_code (oedef);
1467 linearize_expr_tree (&subops[i], oedef,
1468 associative_tree_code (oecode), false);
1470 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1472 oecount c;
1473 int *slot;
1474 int idx;
1475 c.oecode = oecode;
1476 c.cnt = 1;
1477 c.id = next_oecount_id++;
1478 c.op = oe1->op;
1479 cvec.safe_push (c);
1480 idx = cvec.length () + 41;
1481 slot = ctable.find_slot (idx, INSERT);
1482 if (!*slot)
1484 *slot = idx;
1486 else
1488 cvec.pop ();
1489 cvec[*slot - 42].cnt++;
1494 /* Sort the counting table. */
1495 cvec.qsort (oecount_cmp);
1497 if (dump_file && (dump_flags & TDF_DETAILS))
1499 oecount *c;
1500 fprintf (dump_file, "Candidates:\n");
1501 FOR_EACH_VEC_ELT (cvec, j, c)
1503 fprintf (dump_file, " %u %s: ", c->cnt,
1504 c->oecode == MULT_EXPR
1505 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1506 print_generic_expr (dump_file, c->op, 0);
1507 fprintf (dump_file, "\n");
1511 /* Process the (operand, code) pairs in order of most occurrence. */
1512 candidates2 = sbitmap_alloc (length);
1513 while (!cvec.is_empty ())
1515 oecount *c = &cvec.last ();
1516 if (c->cnt < 2)
1517 break;
1519 /* Now collect the operands in the outer chain that contain
1520 the common operand in their inner chain. */
1521 bitmap_clear (candidates2);
1522 nr_candidates2 = 0;
1523 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1525 gimple oedef;
1526 enum tree_code oecode;
1527 unsigned j;
1528 tree op = (*ops)[i]->op;
1530 /* If we undistributed in this chain already this may be
1531 a constant. */
1532 if (TREE_CODE (op) != SSA_NAME)
1533 continue;
1535 oedef = SSA_NAME_DEF_STMT (op);
1536 oecode = gimple_assign_rhs_code (oedef);
1537 if (oecode != c->oecode)
1538 continue;
1540 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1542 if (oe1->op == c->op)
1544 bitmap_set_bit (candidates2, i);
1545 ++nr_candidates2;
1546 break;
1551 if (nr_candidates2 >= 2)
1553 operand_entry_t oe1, oe2;
1554 gimple prod;
1555 int first = bitmap_first_set_bit (candidates2);
1557 /* Build the new addition chain. */
1558 oe1 = (*ops)[first];
1559 if (dump_file && (dump_flags & TDF_DETAILS))
1561 fprintf (dump_file, "Building (");
1562 print_generic_expr (dump_file, oe1->op, 0);
1564 zero_one_operation (&oe1->op, c->oecode, c->op);
1565 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1567 gimple sum;
1568 oe2 = (*ops)[i];
1569 if (dump_file && (dump_flags & TDF_DETAILS))
1571 fprintf (dump_file, " + ");
1572 print_generic_expr (dump_file, oe2->op, 0);
1574 zero_one_operation (&oe2->op, c->oecode, c->op);
1575 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1576 oe1->op, oe2->op, opcode);
1577 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1578 oe2->rank = 0;
1579 oe1->op = gimple_get_lhs (sum);
1582 /* Apply the multiplication/division. */
1583 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1584 oe1->op, c->op, c->oecode);
1585 if (dump_file && (dump_flags & TDF_DETAILS))
1587 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1588 print_generic_expr (dump_file, c->op, 0);
1589 fprintf (dump_file, "\n");
1592 /* Record it in the addition chain and disable further
1593 undistribution with this op. */
1594 oe1->op = gimple_assign_lhs (prod);
1595 oe1->rank = get_rank (oe1->op);
1596 subops[first].release ();
1598 changed = true;
1601 cvec.pop ();
1604 for (i = 0; i < ops->length (); ++i)
1605 subops[i].release ();
1606 free (subops);
1607 cvec.release ();
1608 sbitmap_free (candidates);
1609 sbitmap_free (candidates2);
1611 return changed;
1614 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1615 expression, examine the other OPS to see if any of them are comparisons
1616 of the same values, which we may be able to combine or eliminate.
1617 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1619 static bool
1620 eliminate_redundant_comparison (enum tree_code opcode,
1621 vec<operand_entry_t> *ops,
1622 unsigned int currindex,
1623 operand_entry_t curr)
1625 tree op1, op2;
1626 enum tree_code lcode, rcode;
1627 gimple def1, def2;
1628 int i;
1629 operand_entry_t oe;
1631 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1632 return false;
1634 /* Check that CURR is a comparison. */
1635 if (TREE_CODE (curr->op) != SSA_NAME)
1636 return false;
1637 def1 = SSA_NAME_DEF_STMT (curr->op);
1638 if (!is_gimple_assign (def1))
1639 return false;
1640 lcode = gimple_assign_rhs_code (def1);
1641 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1642 return false;
1643 op1 = gimple_assign_rhs1 (def1);
1644 op2 = gimple_assign_rhs2 (def1);
1646 /* Now look for a similar comparison in the remaining OPS. */
1647 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1649 tree t;
1651 if (TREE_CODE (oe->op) != SSA_NAME)
1652 continue;
1653 def2 = SSA_NAME_DEF_STMT (oe->op);
1654 if (!is_gimple_assign (def2))
1655 continue;
1656 rcode = gimple_assign_rhs_code (def2);
1657 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1658 continue;
1660 /* If we got here, we have a match. See if we can combine the
1661 two comparisons. */
1662 if (opcode == BIT_IOR_EXPR)
1663 t = maybe_fold_or_comparisons (lcode, op1, op2,
1664 rcode, gimple_assign_rhs1 (def2),
1665 gimple_assign_rhs2 (def2));
1666 else
1667 t = maybe_fold_and_comparisons (lcode, op1, op2,
1668 rcode, gimple_assign_rhs1 (def2),
1669 gimple_assign_rhs2 (def2));
1670 if (!t)
1671 continue;
1673 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1674 always give us a boolean_type_node value back. If the original
1675 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1676 we need to convert. */
1677 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1678 t = fold_convert (TREE_TYPE (curr->op), t);
1680 if (TREE_CODE (t) != INTEGER_CST
1681 && !operand_equal_p (t, curr->op, 0))
1683 enum tree_code subcode;
1684 tree newop1, newop2;
1685 if (!COMPARISON_CLASS_P (t))
1686 continue;
1687 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1688 STRIP_USELESS_TYPE_CONVERSION (newop1);
1689 STRIP_USELESS_TYPE_CONVERSION (newop2);
1690 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1691 continue;
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "Equivalence: ");
1697 print_generic_expr (dump_file, curr->op, 0);
1698 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1699 print_generic_expr (dump_file, oe->op, 0);
1700 fprintf (dump_file, " -> ");
1701 print_generic_expr (dump_file, t, 0);
1702 fprintf (dump_file, "\n");
1705 /* Now we can delete oe, as it has been subsumed by the new combined
1706 expression t. */
1707 ops->ordered_remove (i);
1708 reassociate_stats.ops_eliminated ++;
1710 /* If t is the same as curr->op, we're done. Otherwise we must
1711 replace curr->op with t. Special case is if we got a constant
1712 back, in which case we add it to the end instead of in place of
1713 the current entry. */
1714 if (TREE_CODE (t) == INTEGER_CST)
1716 ops->ordered_remove (currindex);
1717 add_to_ops_vec (ops, t);
1719 else if (!operand_equal_p (t, curr->op, 0))
1721 gimple sum;
1722 enum tree_code subcode;
1723 tree newop1;
1724 tree newop2;
1725 gcc_assert (COMPARISON_CLASS_P (t));
1726 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1727 STRIP_USELESS_TYPE_CONVERSION (newop1);
1728 STRIP_USELESS_TYPE_CONVERSION (newop2);
1729 gcc_checking_assert (is_gimple_val (newop1)
1730 && is_gimple_val (newop2));
1731 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1732 curr->op = gimple_get_lhs (sum);
1734 return true;
1737 return false;
1740 /* Perform various identities and other optimizations on the list of
1741 operand entries, stored in OPS. The tree code for the binary
1742 operation between all the operands is OPCODE. */
1744 static void
1745 optimize_ops_list (enum tree_code opcode,
1746 vec<operand_entry_t> *ops)
1748 unsigned int length = ops->length ();
1749 unsigned int i;
1750 operand_entry_t oe;
1751 operand_entry_t oelast = NULL;
1752 bool iterate = false;
1754 if (length == 1)
1755 return;
1757 oelast = ops->last ();
1759 /* If the last two are constants, pop the constants off, merge them
1760 and try the next two. */
1761 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1763 operand_entry_t oelm1 = (*ops)[length - 2];
1765 if (oelm1->rank == 0
1766 && is_gimple_min_invariant (oelm1->op)
1767 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1768 TREE_TYPE (oelast->op)))
1770 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1771 oelm1->op, oelast->op);
1773 if (folded && is_gimple_min_invariant (folded))
1775 if (dump_file && (dump_flags & TDF_DETAILS))
1776 fprintf (dump_file, "Merging constants\n");
1778 ops->pop ();
1779 ops->pop ();
1781 add_to_ops_vec (ops, folded);
1782 reassociate_stats.constants_eliminated++;
1784 optimize_ops_list (opcode, ops);
1785 return;
1790 eliminate_using_constants (opcode, ops);
1791 oelast = NULL;
1793 for (i = 0; ops->iterate (i, &oe);)
1795 bool done = false;
1797 if (eliminate_not_pairs (opcode, ops, i, oe))
1798 return;
1799 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1800 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1801 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1803 if (done)
1804 return;
1805 iterate = true;
1806 oelast = NULL;
1807 continue;
1809 oelast = oe;
1810 i++;
1813 length = ops->length ();
1814 oelast = ops->last ();
1816 if (iterate)
1817 optimize_ops_list (opcode, ops);
1820 /* The following functions are subroutines to optimize_range_tests and allow
1821 it to try to change a logical combination of comparisons into a range
1822 test.
1824 For example, both
1825 X == 2 || X == 5 || X == 3 || X == 4
1827 X >= 2 && X <= 5
1828 are converted to
1829 (unsigned) (X - 2) <= 3
1831 For more information see comments above fold_test_range in fold-const.c,
1832 this implementation is for GIMPLE. */
1834 struct range_entry
1836 tree exp;
1837 tree low;
1838 tree high;
1839 bool in_p;
1840 bool strict_overflow_p;
1841 unsigned int idx, next;
1844 /* This is similar to make_range in fold-const.c, but on top of
1845 GIMPLE instead of trees. If EXP is non-NULL, it should be
1846 an SSA_NAME and STMT argument is ignored, otherwise STMT
1847 argument should be a GIMPLE_COND. */
1849 static void
1850 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1852 int in_p;
1853 tree low, high;
1854 bool is_bool, strict_overflow_p;
1856 r->exp = NULL_TREE;
1857 r->in_p = false;
1858 r->strict_overflow_p = false;
1859 r->low = NULL_TREE;
1860 r->high = NULL_TREE;
1861 if (exp != NULL_TREE
1862 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1863 return;
1865 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1866 and see if we can refine the range. Some of the cases below may not
1867 happen, but it doesn't seem worth worrying about this. We "continue"
1868 the outer loop when we've changed something; otherwise we "break"
1869 the switch, which will "break" the while. */
1870 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1871 high = low;
1872 in_p = 0;
1873 strict_overflow_p = false;
1874 is_bool = false;
1875 if (exp == NULL_TREE)
1876 is_bool = true;
1877 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1879 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1880 is_bool = true;
1881 else
1882 return;
1884 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1885 is_bool = true;
1887 while (1)
1889 enum tree_code code;
1890 tree arg0, arg1, exp_type;
1891 tree nexp;
1892 location_t loc;
1894 if (exp != NULL_TREE)
1896 if (TREE_CODE (exp) != SSA_NAME
1897 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1898 break;
1900 stmt = SSA_NAME_DEF_STMT (exp);
1901 if (!is_gimple_assign (stmt))
1902 break;
1904 code = gimple_assign_rhs_code (stmt);
1905 arg0 = gimple_assign_rhs1 (stmt);
1906 arg1 = gimple_assign_rhs2 (stmt);
1907 exp_type = TREE_TYPE (exp);
1909 else
1911 code = gimple_cond_code (stmt);
1912 arg0 = gimple_cond_lhs (stmt);
1913 arg1 = gimple_cond_rhs (stmt);
1914 exp_type = boolean_type_node;
1917 if (TREE_CODE (arg0) != SSA_NAME)
1918 break;
1919 loc = gimple_location (stmt);
1920 switch (code)
1922 case BIT_NOT_EXPR:
1923 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1924 /* Ensure the range is either +[-,0], +[0,0],
1925 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1926 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1927 or similar expression of unconditional true or
1928 false, it should not be negated. */
1929 && ((high && integer_zerop (high))
1930 || (low && integer_onep (low))))
1932 in_p = !in_p;
1933 exp = arg0;
1934 continue;
1936 break;
1937 case SSA_NAME:
1938 exp = arg0;
1939 continue;
1940 CASE_CONVERT:
1941 if (is_bool)
1942 goto do_default;
1943 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1945 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1946 is_bool = true;
1947 else
1948 return;
1950 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1951 is_bool = true;
1952 goto do_default;
1953 case EQ_EXPR:
1954 case NE_EXPR:
1955 case LT_EXPR:
1956 case LE_EXPR:
1957 case GE_EXPR:
1958 case GT_EXPR:
1959 is_bool = true;
1960 /* FALLTHRU */
1961 default:
1962 if (!is_bool)
1963 return;
1964 do_default:
1965 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1966 &low, &high, &in_p,
1967 &strict_overflow_p);
1968 if (nexp != NULL_TREE)
1970 exp = nexp;
1971 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1972 continue;
1974 break;
1976 break;
1978 if (is_bool)
1980 r->exp = exp;
1981 r->in_p = in_p;
1982 r->low = low;
1983 r->high = high;
1984 r->strict_overflow_p = strict_overflow_p;
1988 /* Comparison function for qsort. Sort entries
1989 without SSA_NAME exp first, then with SSA_NAMEs sorted
1990 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1991 by increasing ->low and if ->low is the same, by increasing
1992 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1993 maximum. */
1995 static int
1996 range_entry_cmp (const void *a, const void *b)
1998 const struct range_entry *p = (const struct range_entry *) a;
1999 const struct range_entry *q = (const struct range_entry *) b;
2001 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2003 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2005 /* Group range_entries for the same SSA_NAME together. */
2006 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2007 return -1;
2008 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2009 return 1;
2010 /* If ->low is different, NULL low goes first, then by
2011 ascending low. */
2012 if (p->low != NULL_TREE)
2014 if (q->low != NULL_TREE)
2016 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2017 p->low, q->low);
2018 if (tem && integer_onep (tem))
2019 return -1;
2020 tem = fold_binary (GT_EXPR, boolean_type_node,
2021 p->low, q->low);
2022 if (tem && integer_onep (tem))
2023 return 1;
2025 else
2026 return 1;
2028 else if (q->low != NULL_TREE)
2029 return -1;
2030 /* If ->high is different, NULL high goes last, before that by
2031 ascending high. */
2032 if (p->high != NULL_TREE)
2034 if (q->high != NULL_TREE)
2036 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2037 p->high, q->high);
2038 if (tem && integer_onep (tem))
2039 return -1;
2040 tem = fold_binary (GT_EXPR, boolean_type_node,
2041 p->high, q->high);
2042 if (tem && integer_onep (tem))
2043 return 1;
2045 else
2046 return -1;
2048 else if (q->high != NULL_TREE)
2049 return 1;
2050 /* If both ranges are the same, sort below by ascending idx. */
2052 else
2053 return 1;
2055 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2056 return -1;
2058 if (p->idx < q->idx)
2059 return -1;
2060 else
2062 gcc_checking_assert (p->idx > q->idx);
2063 return 1;
2067 /* Helper routine of optimize_range_test.
2068 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2069 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2070 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2071 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2072 an array of COUNT pointers to other ranges. Return
2073 true if the range merge has been successful.
2074 If OPCODE is ERROR_MARK, this is called from within
2075 maybe_optimize_range_tests and is performing inter-bb range optimization.
2076 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2077 oe->rank. */
2079 static bool
2080 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2081 struct range_entry **otherrangep,
2082 unsigned int count, enum tree_code opcode,
2083 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2084 bool in_p, tree low, tree high, bool strict_overflow_p)
2086 operand_entry_t oe = (*ops)[range->idx];
2087 tree op = oe->op;
2088 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2089 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2090 location_t loc = gimple_location (stmt);
2091 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2092 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2093 in_p, low, high);
2094 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2095 gimple_stmt_iterator gsi;
2096 unsigned int i;
2098 if (tem == NULL_TREE)
2099 return false;
2101 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2102 warning_at (loc, OPT_Wstrict_overflow,
2103 "assuming signed overflow does not occur "
2104 "when simplifying range test");
2106 if (dump_file && (dump_flags & TDF_DETAILS))
2108 struct range_entry *r;
2109 fprintf (dump_file, "Optimizing range tests ");
2110 print_generic_expr (dump_file, range->exp, 0);
2111 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2112 print_generic_expr (dump_file, range->low, 0);
2113 fprintf (dump_file, ", ");
2114 print_generic_expr (dump_file, range->high, 0);
2115 fprintf (dump_file, "]");
2116 for (i = 0; i < count; i++)
2118 if (otherrange)
2119 r = otherrange + i;
2120 else
2121 r = otherrangep[i];
2122 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2123 print_generic_expr (dump_file, r->low, 0);
2124 fprintf (dump_file, ", ");
2125 print_generic_expr (dump_file, r->high, 0);
2126 fprintf (dump_file, "]");
2128 fprintf (dump_file, "\n into ");
2129 print_generic_expr (dump_file, tem, 0);
2130 fprintf (dump_file, "\n");
2133 if (opcode == BIT_IOR_EXPR
2134 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2135 tem = invert_truthvalue_loc (loc, tem);
2137 tem = fold_convert_loc (loc, optype, tem);
2138 gsi = gsi_for_stmt (stmt);
2139 unsigned int uid = gimple_uid (stmt);
2140 /* In rare cases range->exp can be equal to lhs of stmt.
2141 In that case we have to insert after the stmt rather then before
2142 it. If stmt is a PHI, insert it at the start of the basic block. */
2143 if (op != range->exp)
2145 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2146 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2147 GSI_SAME_STMT);
2148 gsi_prev (&gsi);
2150 else if (gimple_code (stmt) != GIMPLE_PHI)
2152 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2153 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2154 GSI_CONTINUE_LINKING);
2156 else
2158 gsi = gsi_after_labels (gimple_bb (stmt));
2159 if (!gsi_end_p (gsi))
2160 uid = gimple_uid (gsi_stmt (gsi));
2161 else
2163 gsi = gsi_start_bb (gimple_bb (stmt));
2164 uid = 1;
2165 while (!gsi_end_p (gsi))
2167 uid = gimple_uid (gsi_stmt (gsi));
2168 gsi_next (&gsi);
2171 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2172 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2173 GSI_SAME_STMT);
2174 if (gsi_end_p (gsi))
2175 gsi = gsi_last_bb (gimple_bb (stmt));
2176 else
2177 gsi_prev (&gsi);
2179 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2180 if (gimple_uid (gsi_stmt (gsi)))
2181 break;
2182 else
2183 gimple_set_uid (gsi_stmt (gsi), uid);
2185 oe->op = tem;
2186 range->exp = exp;
2187 range->low = low;
2188 range->high = high;
2189 range->in_p = in_p;
2190 range->strict_overflow_p = false;
2192 for (i = 0; i < count; i++)
2194 if (otherrange)
2195 range = otherrange + i;
2196 else
2197 range = otherrangep[i];
2198 oe = (*ops)[range->idx];
2199 /* Now change all the other range test immediate uses, so that
2200 those tests will be optimized away. */
2201 if (opcode == ERROR_MARK)
2203 if (oe->op)
2204 oe->op = build_int_cst (TREE_TYPE (oe->op),
2205 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2206 else
2207 oe->op = (oe->rank == BIT_IOR_EXPR
2208 ? boolean_false_node : boolean_true_node);
2210 else
2211 oe->op = error_mark_node;
2212 range->exp = NULL_TREE;
2214 return true;
2217 /* Optimize X == CST1 || X == CST2
2218 if popcount (CST1 ^ CST2) == 1 into
2219 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2220 Similarly for ranges. E.g.
2221 X != 2 && X != 3 && X != 10 && X != 11
2222 will be transformed by the previous optimization into
2223 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2224 and this loop can transform that into
2225 !(((X & ~8) - 2U) <= 1U). */
2227 static bool
2228 optimize_range_tests_xor (enum tree_code opcode, tree type,
2229 tree lowi, tree lowj, tree highi, tree highj,
2230 vec<operand_entry_t> *ops,
2231 struct range_entry *rangei,
2232 struct range_entry *rangej)
2234 tree lowxor, highxor, tem, exp;
2235 /* Check lowi ^ lowj == highi ^ highj and
2236 popcount (lowi ^ lowj) == 1. */
2237 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2238 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2239 return false;
2240 if (!integer_pow2p (lowxor))
2241 return false;
2242 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2243 if (!tree_int_cst_equal (lowxor, highxor))
2244 return false;
2246 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2247 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2248 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2249 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2250 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2251 NULL, rangei->in_p, lowj, highj,
2252 rangei->strict_overflow_p
2253 || rangej->strict_overflow_p))
2254 return true;
2255 return false;
2258 /* Optimize X == CST1 || X == CST2
2259 if popcount (CST2 - CST1) == 1 into
2260 ((X - CST1) & ~(CST2 - CST1)) == 0.
2261 Similarly for ranges. E.g.
2262 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2263 || X == 75 || X == 45
2264 will be transformed by the previous optimization into
2265 (X - 43U) <= 3U || (X - 75U) <= 3U
2266 and this loop can transform that into
2267 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2268 static bool
2269 optimize_range_tests_diff (enum tree_code opcode, tree type,
2270 tree lowi, tree lowj, tree highi, tree highj,
2271 vec<operand_entry_t> *ops,
2272 struct range_entry *rangei,
2273 struct range_entry *rangej)
2275 tree tem1, tem2, mask;
2276 /* Check highi - lowi == highj - lowj. */
2277 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2278 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2279 return false;
2280 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2281 if (!tree_int_cst_equal (tem1, tem2))
2282 return false;
2283 /* Check popcount (lowj - lowi) == 1. */
2284 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2285 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2286 return false;
2287 if (!integer_pow2p (tem1))
2288 return false;
2290 type = unsigned_type_for (type);
2291 tem1 = fold_convert (type, tem1);
2292 tem2 = fold_convert (type, tem2);
2293 lowi = fold_convert (type, lowi);
2294 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2295 tem1 = fold_binary (MINUS_EXPR, type,
2296 fold_convert (type, rangei->exp), lowi);
2297 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2298 lowj = build_int_cst (type, 0);
2299 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2300 NULL, rangei->in_p, lowj, tem2,
2301 rangei->strict_overflow_p
2302 || rangej->strict_overflow_p))
2303 return true;
2304 return false;
2307 /* It does some common checks for function optimize_range_tests_xor and
2308 optimize_range_tests_diff.
2309 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2310 Else it calls optimize_range_tests_diff. */
2312 static bool
2313 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2314 bool optimize_xor, vec<operand_entry_t> *ops,
2315 struct range_entry *ranges)
2317 int i, j;
2318 bool any_changes = false;
2319 for (i = first; i < length; i++)
2321 tree lowi, highi, lowj, highj, type, tem;
2323 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2324 continue;
2325 type = TREE_TYPE (ranges[i].exp);
2326 if (!INTEGRAL_TYPE_P (type))
2327 continue;
2328 lowi = ranges[i].low;
2329 if (lowi == NULL_TREE)
2330 lowi = TYPE_MIN_VALUE (type);
2331 highi = ranges[i].high;
2332 if (highi == NULL_TREE)
2333 continue;
2334 for (j = i + 1; j < length && j < i + 64; j++)
2336 bool changes;
2337 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2338 continue;
2339 lowj = ranges[j].low;
2340 if (lowj == NULL_TREE)
2341 continue;
2342 highj = ranges[j].high;
2343 if (highj == NULL_TREE)
2344 highj = TYPE_MAX_VALUE (type);
2345 /* Check lowj > highi. */
2346 tem = fold_binary (GT_EXPR, boolean_type_node,
2347 lowj, highi);
2348 if (tem == NULL_TREE || !integer_onep (tem))
2349 continue;
2350 if (optimize_xor)
2351 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2352 highi, highj, ops,
2353 ranges + i, ranges + j);
2354 else
2355 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2356 highi, highj, ops,
2357 ranges + i, ranges + j);
2358 if (changes)
2360 any_changes = true;
2361 break;
2365 return any_changes;
2368 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2369 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2370 EXP on success, NULL otherwise. */
2372 static tree
2373 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2374 wide_int *mask, tree *totallowp)
2376 tree tem = int_const_binop (MINUS_EXPR, high, low);
2377 if (tem == NULL_TREE
2378 || TREE_CODE (tem) != INTEGER_CST
2379 || TREE_OVERFLOW (tem)
2380 || tree_int_cst_sgn (tem) == -1
2381 || compare_tree_int (tem, prec) != -1)
2382 return NULL_TREE;
2384 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2385 *mask = wi::shifted_mask (0, max, false, prec);
2386 if (TREE_CODE (exp) == BIT_AND_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2389 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2390 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2391 if (wi::popcount (msk) == 1
2392 && wi::ltu_p (msk, prec - max))
2394 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2395 max += msk.to_uhwi ();
2396 exp = TREE_OPERAND (exp, 0);
2397 if (integer_zerop (low)
2398 && TREE_CODE (exp) == PLUS_EXPR
2399 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2401 tree ret = TREE_OPERAND (exp, 0);
2402 STRIP_NOPS (ret);
2403 widest_int bias
2404 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2405 TYPE_PRECISION (TREE_TYPE (low))));
2406 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2407 if (totallowp)
2409 *totallowp = tbias;
2410 return ret;
2412 else if (!tree_int_cst_lt (totallow, tbias))
2413 return NULL_TREE;
2414 bias = wi::to_widest (tbias);
2415 bias -= wi::to_widest (totallow);
2416 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2418 *mask = wi::lshift (*mask, bias);
2419 return ret;
2424 if (totallowp)
2425 return exp;
2426 if (!tree_int_cst_lt (totallow, low))
2427 return exp;
2428 tem = int_const_binop (MINUS_EXPR, low, totallow);
2429 if (tem == NULL_TREE
2430 || TREE_CODE (tem) != INTEGER_CST
2431 || TREE_OVERFLOW (tem)
2432 || compare_tree_int (tem, prec - max) == 1)
2433 return NULL_TREE;
2435 *mask = wi::lshift (*mask, wi::to_widest (tem));
2436 return exp;
2439 /* Attempt to optimize small range tests using bit test.
2440 E.g.
2441 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2442 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2443 has been by earlier optimizations optimized into:
2444 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2445 As all the 43 through 82 range is less than 64 numbers,
2446 for 64-bit word targets optimize that into:
2447 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2449 static bool
2450 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2451 vec<operand_entry_t> *ops,
2452 struct range_entry *ranges)
2454 int i, j;
2455 bool any_changes = false;
2456 int prec = GET_MODE_BITSIZE (word_mode);
2457 auto_vec<struct range_entry *, 64> candidates;
2459 for (i = first; i < length - 2; i++)
2461 tree lowi, highi, lowj, highj, type;
2463 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2464 continue;
2465 type = TREE_TYPE (ranges[i].exp);
2466 if (!INTEGRAL_TYPE_P (type))
2467 continue;
2468 lowi = ranges[i].low;
2469 if (lowi == NULL_TREE)
2470 lowi = TYPE_MIN_VALUE (type);
2471 highi = ranges[i].high;
2472 if (highi == NULL_TREE)
2473 continue;
2474 wide_int mask;
2475 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2476 highi, &mask, &lowi);
2477 if (exp == NULL_TREE)
2478 continue;
2479 bool strict_overflow_p = ranges[i].strict_overflow_p;
2480 candidates.truncate (0);
2481 int end = MIN (i + 64, length);
2482 for (j = i + 1; j < end; j++)
2484 tree exp2;
2485 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2486 continue;
2487 if (ranges[j].exp == exp)
2489 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2491 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2492 if (exp2 == exp)
2494 else if (TREE_CODE (exp2) == PLUS_EXPR)
2496 exp2 = TREE_OPERAND (exp2, 0);
2497 STRIP_NOPS (exp2);
2498 if (exp2 != exp)
2499 continue;
2501 else
2502 continue;
2504 else
2505 continue;
2506 lowj = ranges[j].low;
2507 if (lowj == NULL_TREE)
2508 continue;
2509 highj = ranges[j].high;
2510 if (highj == NULL_TREE)
2511 highj = TYPE_MAX_VALUE (type);
2512 wide_int mask2;
2513 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2514 highj, &mask2, NULL);
2515 if (exp2 != exp)
2516 continue;
2517 mask |= mask2;
2518 strict_overflow_p |= ranges[j].strict_overflow_p;
2519 candidates.safe_push (&ranges[j]);
2522 /* If we need otherwise 3 or more comparisons, use a bit test. */
2523 if (candidates.length () >= 2)
2525 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2526 wi::to_widest (lowi)
2527 + prec - 1 - wi::clz (mask));
2528 operand_entry_t oe = (*ops)[ranges[i].idx];
2529 tree op = oe->op;
2530 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2531 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2532 location_t loc = gimple_location (stmt);
2533 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2535 /* See if it isn't cheaper to pretend the minimum value of the
2536 range is 0, if maximum value is small enough.
2537 We can avoid then subtraction of the minimum value, but the
2538 mask constant could be perhaps more expensive. */
2539 if (compare_tree_int (lowi, 0) > 0
2540 && compare_tree_int (high, prec) < 0)
2542 int cost_diff;
2543 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2544 rtx reg = gen_raw_REG (word_mode, 10000);
2545 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2546 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2547 GEN_INT (-m)), speed_p);
2548 rtx r = immed_wide_int_const (mask, word_mode);
2549 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2550 speed_p);
2551 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2552 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2553 speed_p);
2554 if (cost_diff > 0)
2556 mask = wi::lshift (mask, m);
2557 lowi = build_zero_cst (TREE_TYPE (lowi));
2561 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2562 false, lowi, high);
2563 if (tem == NULL_TREE || is_gimple_val (tem))
2564 continue;
2565 tree etype = unsigned_type_for (TREE_TYPE (exp));
2566 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2567 fold_convert_loc (loc, etype, exp),
2568 fold_convert_loc (loc, etype, lowi));
2569 exp = fold_convert_loc (loc, integer_type_node, exp);
2570 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2571 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2572 build_int_cst (word_type, 1), exp);
2573 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2574 wide_int_to_tree (word_type, mask));
2575 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2576 build_zero_cst (word_type));
2577 if (is_gimple_val (exp))
2578 continue;
2580 /* The shift might have undefined behavior if TEM is true,
2581 but reassociate_bb isn't prepared to have basic blocks
2582 split when it is running. So, temporarily emit a code
2583 with BIT_IOR_EXPR instead of &&, and fix it up in
2584 branch_fixup. */
2585 gimple_seq seq;
2586 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2587 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2588 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2589 gimple_seq seq2;
2590 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2591 gimple_seq_add_seq_without_update (&seq, seq2);
2592 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2593 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2594 gimple g = gimple_build_assign (make_ssa_name (optype),
2595 BIT_IOR_EXPR, tem, exp);
2596 gimple_set_location (g, loc);
2597 gimple_seq_add_stmt_without_update (&seq, g);
2598 exp = gimple_assign_lhs (g);
2599 tree val = build_zero_cst (optype);
2600 if (update_range_test (&ranges[i], NULL, candidates.address (),
2601 candidates.length (), opcode, ops, exp,
2602 seq, false, val, val, strict_overflow_p))
2604 any_changes = true;
2605 reassoc_branch_fixups.safe_push (tem);
2607 else
2608 gimple_seq_discard (seq);
2611 return any_changes;
2614 /* Optimize range tests, similarly how fold_range_test optimizes
2615 it on trees. The tree code for the binary
2616 operation between all the operands is OPCODE.
2617 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2618 maybe_optimize_range_tests for inter-bb range optimization.
2619 In that case if oe->op is NULL, oe->id is bb->index whose
2620 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2621 the actual opcode. */
2623 static bool
2624 optimize_range_tests (enum tree_code opcode,
2625 vec<operand_entry_t> *ops)
2627 unsigned int length = ops->length (), i, j, first;
2628 operand_entry_t oe;
2629 struct range_entry *ranges;
2630 bool any_changes = false;
2632 if (length == 1)
2633 return false;
2635 ranges = XNEWVEC (struct range_entry, length);
2636 for (i = 0; i < length; i++)
2638 oe = (*ops)[i];
2639 ranges[i].idx = i;
2640 init_range_entry (ranges + i, oe->op,
2641 oe->op ? NULL :
2642 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2643 /* For | invert it now, we will invert it again before emitting
2644 the optimized expression. */
2645 if (opcode == BIT_IOR_EXPR
2646 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2647 ranges[i].in_p = !ranges[i].in_p;
2650 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2651 for (i = 0; i < length; i++)
2652 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2653 break;
2655 /* Try to merge ranges. */
2656 for (first = i; i < length; i++)
2658 tree low = ranges[i].low;
2659 tree high = ranges[i].high;
2660 int in_p = ranges[i].in_p;
2661 bool strict_overflow_p = ranges[i].strict_overflow_p;
2662 int update_fail_count = 0;
2664 for (j = i + 1; j < length; j++)
2666 if (ranges[i].exp != ranges[j].exp)
2667 break;
2668 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2669 ranges[j].in_p, ranges[j].low, ranges[j].high))
2670 break;
2671 strict_overflow_p |= ranges[j].strict_overflow_p;
2674 if (j == i + 1)
2675 continue;
2677 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2678 opcode, ops, ranges[i].exp, NULL, in_p,
2679 low, high, strict_overflow_p))
2681 i = j - 1;
2682 any_changes = true;
2684 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2685 while update_range_test would fail. */
2686 else if (update_fail_count == 64)
2687 i = j - 1;
2688 else
2689 ++update_fail_count;
2692 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2693 ops, ranges);
2695 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2696 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2697 ops, ranges);
2698 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2699 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2700 ops, ranges);
2702 if (any_changes && opcode != ERROR_MARK)
2704 j = 0;
2705 FOR_EACH_VEC_ELT (*ops, i, oe)
2707 if (oe->op == error_mark_node)
2708 continue;
2709 else if (i != j)
2710 (*ops)[j] = oe;
2711 j++;
2713 ops->truncate (j);
2716 XDELETEVEC (ranges);
2717 return any_changes;
2720 /* Return true if STMT is a cast like:
2721 <bb N>:
2723 _123 = (int) _234;
2725 <bb M>:
2726 # _345 = PHI <_123(N), 1(...), 1(...)>
2727 where _234 has bool type, _123 has single use and
2728 bb N has a single successor M. This is commonly used in
2729 the last block of a range test. */
2731 static bool
2732 final_range_test_p (gimple stmt)
2734 basic_block bb, rhs_bb;
2735 edge e;
2736 tree lhs, rhs;
2737 use_operand_p use_p;
2738 gimple use_stmt;
2740 if (!gimple_assign_cast_p (stmt))
2741 return false;
2742 bb = gimple_bb (stmt);
2743 if (!single_succ_p (bb))
2744 return false;
2745 e = single_succ_edge (bb);
2746 if (e->flags & EDGE_COMPLEX)
2747 return false;
2749 lhs = gimple_assign_lhs (stmt);
2750 rhs = gimple_assign_rhs1 (stmt);
2751 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2752 || TREE_CODE (rhs) != SSA_NAME
2753 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2754 return false;
2756 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2757 if (!single_imm_use (lhs, &use_p, &use_stmt))
2758 return false;
2760 if (gimple_code (use_stmt) != GIMPLE_PHI
2761 || gimple_bb (use_stmt) != e->dest)
2762 return false;
2764 /* And that the rhs is defined in the same loop. */
2765 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2766 if (rhs_bb == NULL
2767 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2768 return false;
2770 return true;
2773 /* Return true if BB is suitable basic block for inter-bb range test
2774 optimization. If BACKWARD is true, BB should be the only predecessor
2775 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2776 or compared with to find a common basic block to which all conditions
2777 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2778 be the only predecessor of BB. */
2780 static bool
2781 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2782 bool backward)
2784 edge_iterator ei, ei2;
2785 edge e, e2;
2786 gimple stmt;
2787 gphi_iterator gsi;
2788 bool other_edge_seen = false;
2789 bool is_cond;
2791 if (test_bb == bb)
2792 return false;
2793 /* Check last stmt first. */
2794 stmt = last_stmt (bb);
2795 if (stmt == NULL
2796 || (gimple_code (stmt) != GIMPLE_COND
2797 && (backward || !final_range_test_p (stmt)))
2798 || gimple_visited_p (stmt)
2799 || stmt_could_throw_p (stmt)
2800 || *other_bb == bb)
2801 return false;
2802 is_cond = gimple_code (stmt) == GIMPLE_COND;
2803 if (is_cond)
2805 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2806 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2807 to *OTHER_BB (if not set yet, try to find it out). */
2808 if (EDGE_COUNT (bb->succs) != 2)
2809 return false;
2810 FOR_EACH_EDGE (e, ei, bb->succs)
2812 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2813 return false;
2814 if (e->dest == test_bb)
2816 if (backward)
2817 continue;
2818 else
2819 return false;
2821 if (e->dest == bb)
2822 return false;
2823 if (*other_bb == NULL)
2825 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2826 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2827 return false;
2828 else if (e->dest == e2->dest)
2829 *other_bb = e->dest;
2830 if (*other_bb == NULL)
2831 return false;
2833 if (e->dest == *other_bb)
2834 other_edge_seen = true;
2835 else if (backward)
2836 return false;
2838 if (*other_bb == NULL || !other_edge_seen)
2839 return false;
2841 else if (single_succ (bb) != *other_bb)
2842 return false;
2844 /* Now check all PHIs of *OTHER_BB. */
2845 e = find_edge (bb, *other_bb);
2846 e2 = find_edge (test_bb, *other_bb);
2847 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2849 gphi *phi = gsi.phi ();
2850 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2851 corresponding to BB and TEST_BB predecessor must be the same. */
2852 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2853 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2855 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2856 one of the PHIs should have the lhs of the last stmt in
2857 that block as PHI arg and that PHI should have 0 or 1
2858 corresponding to it in all other range test basic blocks
2859 considered. */
2860 if (!is_cond)
2862 if (gimple_phi_arg_def (phi, e->dest_idx)
2863 == gimple_assign_lhs (stmt)
2864 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2865 || integer_onep (gimple_phi_arg_def (phi,
2866 e2->dest_idx))))
2867 continue;
2869 else
2871 gimple test_last = last_stmt (test_bb);
2872 if (gimple_code (test_last) != GIMPLE_COND
2873 && gimple_phi_arg_def (phi, e2->dest_idx)
2874 == gimple_assign_lhs (test_last)
2875 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2876 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2877 continue;
2880 return false;
2883 return true;
2886 /* Return true if BB doesn't have side-effects that would disallow
2887 range test optimization, all SSA_NAMEs set in the bb are consumed
2888 in the bb and there are no PHIs. */
2890 static bool
2891 no_side_effect_bb (basic_block bb)
2893 gimple_stmt_iterator gsi;
2894 gimple last;
2896 if (!gimple_seq_empty_p (phi_nodes (bb)))
2897 return false;
2898 last = last_stmt (bb);
2899 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2901 gimple stmt = gsi_stmt (gsi);
2902 tree lhs;
2903 imm_use_iterator imm_iter;
2904 use_operand_p use_p;
2906 if (is_gimple_debug (stmt))
2907 continue;
2908 if (gimple_has_side_effects (stmt))
2909 return false;
2910 if (stmt == last)
2911 return true;
2912 if (!is_gimple_assign (stmt))
2913 return false;
2914 lhs = gimple_assign_lhs (stmt);
2915 if (TREE_CODE (lhs) != SSA_NAME)
2916 return false;
2917 if (gimple_assign_rhs_could_trap_p (stmt))
2918 return false;
2919 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2921 gimple use_stmt = USE_STMT (use_p);
2922 if (is_gimple_debug (use_stmt))
2923 continue;
2924 if (gimple_bb (use_stmt) != bb)
2925 return false;
2928 return false;
2931 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2932 return true and fill in *OPS recursively. */
2934 static bool
2935 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2936 struct loop *loop)
2938 gimple stmt = SSA_NAME_DEF_STMT (var);
2939 tree rhs[2];
2940 int i;
2942 if (!is_reassociable_op (stmt, code, loop))
2943 return false;
2945 rhs[0] = gimple_assign_rhs1 (stmt);
2946 rhs[1] = gimple_assign_rhs2 (stmt);
2947 gimple_set_visited (stmt, true);
2948 for (i = 0; i < 2; i++)
2949 if (TREE_CODE (rhs[i]) == SSA_NAME
2950 && !get_ops (rhs[i], code, ops, loop)
2951 && has_single_use (rhs[i]))
2953 operand_entry_t oe = operand_entry_pool.allocate ();
2955 oe->op = rhs[i];
2956 oe->rank = code;
2957 oe->id = 0;
2958 oe->count = 1;
2959 ops->safe_push (oe);
2961 return true;
2964 /* Find the ops that were added by get_ops starting from VAR, see if
2965 they were changed during update_range_test and if yes, create new
2966 stmts. */
2968 static tree
2969 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2970 unsigned int *pidx, struct loop *loop)
2972 gimple stmt = SSA_NAME_DEF_STMT (var);
2973 tree rhs[4];
2974 int i;
2976 if (!is_reassociable_op (stmt, code, loop))
2977 return NULL;
2979 rhs[0] = gimple_assign_rhs1 (stmt);
2980 rhs[1] = gimple_assign_rhs2 (stmt);
2981 rhs[2] = rhs[0];
2982 rhs[3] = rhs[1];
2983 for (i = 0; i < 2; i++)
2984 if (TREE_CODE (rhs[i]) == SSA_NAME)
2986 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2987 if (rhs[2 + i] == NULL_TREE)
2989 if (has_single_use (rhs[i]))
2990 rhs[2 + i] = ops[(*pidx)++]->op;
2991 else
2992 rhs[2 + i] = rhs[i];
2995 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2996 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2998 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2999 var = make_ssa_name (TREE_TYPE (var));
3000 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3001 rhs[2], rhs[3]);
3002 gimple_set_uid (g, gimple_uid (stmt));
3003 gimple_set_visited (g, true);
3004 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3006 return var;
3009 /* Structure to track the initial value passed to get_ops and
3010 the range in the ops vector for each basic block. */
3012 struct inter_bb_range_test_entry
3014 tree op;
3015 unsigned int first_idx, last_idx;
3018 /* Inter-bb range test optimization. */
3020 static void
3021 maybe_optimize_range_tests (gimple stmt)
3023 basic_block first_bb = gimple_bb (stmt);
3024 basic_block last_bb = first_bb;
3025 basic_block other_bb = NULL;
3026 basic_block bb;
3027 edge_iterator ei;
3028 edge e;
3029 auto_vec<operand_entry_t> ops;
3030 auto_vec<inter_bb_range_test_entry> bbinfo;
3031 bool any_changes = false;
3033 /* Consider only basic blocks that end with GIMPLE_COND or
3034 a cast statement satisfying final_range_test_p. All
3035 but the last bb in the first_bb .. last_bb range
3036 should end with GIMPLE_COND. */
3037 if (gimple_code (stmt) == GIMPLE_COND)
3039 if (EDGE_COUNT (first_bb->succs) != 2)
3040 return;
3042 else if (final_range_test_p (stmt))
3043 other_bb = single_succ (first_bb);
3044 else
3045 return;
3047 if (stmt_could_throw_p (stmt))
3048 return;
3050 /* As relative ordering of post-dominator sons isn't fixed,
3051 maybe_optimize_range_tests can be called first on any
3052 bb in the range we want to optimize. So, start searching
3053 backwards, if first_bb can be set to a predecessor. */
3054 while (single_pred_p (first_bb))
3056 basic_block pred_bb = single_pred (first_bb);
3057 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3058 break;
3059 if (!no_side_effect_bb (first_bb))
3060 break;
3061 first_bb = pred_bb;
3063 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3064 Before starting forward search in last_bb successors, find
3065 out the other_bb. */
3066 if (first_bb == last_bb)
3068 other_bb = NULL;
3069 /* As non-GIMPLE_COND last stmt always terminates the range,
3070 if forward search didn't discover anything, just give up. */
3071 if (gimple_code (stmt) != GIMPLE_COND)
3072 return;
3073 /* Look at both successors. Either it ends with a GIMPLE_COND
3074 and satisfies suitable_cond_bb, or ends with a cast and
3075 other_bb is that cast's successor. */
3076 FOR_EACH_EDGE (e, ei, first_bb->succs)
3077 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3078 || e->dest == first_bb)
3079 return;
3080 else if (single_pred_p (e->dest))
3082 stmt = last_stmt (e->dest);
3083 if (stmt
3084 && gimple_code (stmt) == GIMPLE_COND
3085 && EDGE_COUNT (e->dest->succs) == 2)
3087 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3088 break;
3089 else
3090 other_bb = NULL;
3092 else if (stmt
3093 && final_range_test_p (stmt)
3094 && find_edge (first_bb, single_succ (e->dest)))
3096 other_bb = single_succ (e->dest);
3097 if (other_bb == first_bb)
3098 other_bb = NULL;
3101 if (other_bb == NULL)
3102 return;
3104 /* Now do the forward search, moving last_bb to successor bbs
3105 that aren't other_bb. */
3106 while (EDGE_COUNT (last_bb->succs) == 2)
3108 FOR_EACH_EDGE (e, ei, last_bb->succs)
3109 if (e->dest != other_bb)
3110 break;
3111 if (e == NULL)
3112 break;
3113 if (!single_pred_p (e->dest))
3114 break;
3115 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3116 break;
3117 if (!no_side_effect_bb (e->dest))
3118 break;
3119 last_bb = e->dest;
3121 if (first_bb == last_bb)
3122 return;
3123 /* Here basic blocks first_bb through last_bb's predecessor
3124 end with GIMPLE_COND, all of them have one of the edges to
3125 other_bb and another to another block in the range,
3126 all blocks except first_bb don't have side-effects and
3127 last_bb ends with either GIMPLE_COND, or cast satisfying
3128 final_range_test_p. */
3129 for (bb = last_bb; ; bb = single_pred (bb))
3131 enum tree_code code;
3132 tree lhs, rhs;
3133 inter_bb_range_test_entry bb_ent;
3135 bb_ent.op = NULL_TREE;
3136 bb_ent.first_idx = ops.length ();
3137 bb_ent.last_idx = bb_ent.first_idx;
3138 e = find_edge (bb, other_bb);
3139 stmt = last_stmt (bb);
3140 gimple_set_visited (stmt, true);
3141 if (gimple_code (stmt) != GIMPLE_COND)
3143 use_operand_p use_p;
3144 gimple phi;
3145 edge e2;
3146 unsigned int d;
3148 lhs = gimple_assign_lhs (stmt);
3149 rhs = gimple_assign_rhs1 (stmt);
3150 gcc_assert (bb == last_bb);
3152 /* stmt is
3153 _123 = (int) _234;
3155 followed by:
3156 <bb M>:
3157 # _345 = PHI <_123(N), 1(...), 1(...)>
3159 or 0 instead of 1. If it is 0, the _234
3160 range test is anded together with all the
3161 other range tests, if it is 1, it is ored with
3162 them. */
3163 single_imm_use (lhs, &use_p, &phi);
3164 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3165 e2 = find_edge (first_bb, other_bb);
3166 d = e2->dest_idx;
3167 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3168 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3169 code = BIT_AND_EXPR;
3170 else
3172 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3173 code = BIT_IOR_EXPR;
3176 /* If _234 SSA_NAME_DEF_STMT is
3177 _234 = _567 | _789;
3178 (or &, corresponding to 1/0 in the phi arguments,
3179 push into ops the individual range test arguments
3180 of the bitwise or resp. and, recursively. */
3181 if (!get_ops (rhs, code, &ops,
3182 loop_containing_stmt (stmt))
3183 && has_single_use (rhs))
3185 /* Otherwise, push the _234 range test itself. */
3186 operand_entry_t oe = operand_entry_pool.allocate ();
3188 oe->op = rhs;
3189 oe->rank = code;
3190 oe->id = 0;
3191 oe->count = 1;
3192 ops.safe_push (oe);
3193 bb_ent.last_idx++;
3195 else
3196 bb_ent.last_idx = ops.length ();
3197 bb_ent.op = rhs;
3198 bbinfo.safe_push (bb_ent);
3199 continue;
3201 /* Otherwise stmt is GIMPLE_COND. */
3202 code = gimple_cond_code (stmt);
3203 lhs = gimple_cond_lhs (stmt);
3204 rhs = gimple_cond_rhs (stmt);
3205 if (TREE_CODE (lhs) == SSA_NAME
3206 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3207 && ((code != EQ_EXPR && code != NE_EXPR)
3208 || rhs != boolean_false_node
3209 /* Either push into ops the individual bitwise
3210 or resp. and operands, depending on which
3211 edge is other_bb. */
3212 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3213 ^ (code == EQ_EXPR))
3214 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3215 loop_containing_stmt (stmt))))
3217 /* Or push the GIMPLE_COND stmt itself. */
3218 operand_entry_t oe = operand_entry_pool.allocate ();
3220 oe->op = NULL;
3221 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3222 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3223 /* oe->op = NULL signs that there is no SSA_NAME
3224 for the range test, and oe->id instead is the
3225 basic block number, at which's end the GIMPLE_COND
3226 is. */
3227 oe->id = bb->index;
3228 oe->count = 1;
3229 ops.safe_push (oe);
3230 bb_ent.op = NULL;
3231 bb_ent.last_idx++;
3233 else if (ops.length () > bb_ent.first_idx)
3235 bb_ent.op = lhs;
3236 bb_ent.last_idx = ops.length ();
3238 bbinfo.safe_push (bb_ent);
3239 if (bb == first_bb)
3240 break;
3242 if (ops.length () > 1)
3243 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3244 if (any_changes)
3246 unsigned int idx;
3247 /* update_ops relies on has_single_use predicates returning the
3248 same values as it did during get_ops earlier. Additionally it
3249 never removes statements, only adds new ones and it should walk
3250 from the single imm use and check the predicate already before
3251 making those changes.
3252 On the other side, the handling of GIMPLE_COND directly can turn
3253 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3254 it needs to be done in a separate loop afterwards. */
3255 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3257 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3258 && bbinfo[idx].op != NULL_TREE)
3260 tree new_op;
3262 stmt = last_stmt (bb);
3263 new_op = update_ops (bbinfo[idx].op,
3264 (enum tree_code)
3265 ops[bbinfo[idx].first_idx]->rank,
3266 ops, &bbinfo[idx].first_idx,
3267 loop_containing_stmt (stmt));
3268 if (new_op == NULL_TREE)
3270 gcc_assert (bb == last_bb);
3271 new_op = ops[bbinfo[idx].first_idx++]->op;
3273 if (bbinfo[idx].op != new_op)
3275 imm_use_iterator iter;
3276 use_operand_p use_p;
3277 gimple use_stmt, cast_stmt = NULL;
3279 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3280 if (is_gimple_debug (use_stmt))
3281 continue;
3282 else if (gimple_code (use_stmt) == GIMPLE_COND
3283 || gimple_code (use_stmt) == GIMPLE_PHI)
3284 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3285 SET_USE (use_p, new_op);
3286 else if (gimple_assign_cast_p (use_stmt))
3287 cast_stmt = use_stmt;
3288 else
3289 gcc_unreachable ();
3290 if (cast_stmt)
3292 gcc_assert (bb == last_bb);
3293 tree lhs = gimple_assign_lhs (cast_stmt);
3294 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3295 enum tree_code rhs_code
3296 = gimple_assign_rhs_code (cast_stmt);
3297 gassign *g;
3298 if (is_gimple_min_invariant (new_op))
3300 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3301 g = gimple_build_assign (new_lhs, new_op);
3303 else
3304 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3305 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3306 gimple_set_uid (g, gimple_uid (cast_stmt));
3307 gimple_set_visited (g, true);
3308 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3309 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3310 if (is_gimple_debug (use_stmt))
3311 continue;
3312 else if (gimple_code (use_stmt) == GIMPLE_COND
3313 || gimple_code (use_stmt) == GIMPLE_PHI)
3314 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3315 SET_USE (use_p, new_lhs);
3316 else
3317 gcc_unreachable ();
3321 if (bb == first_bb)
3322 break;
3324 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3326 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3327 && bbinfo[idx].op == NULL_TREE
3328 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3330 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3331 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3332 gimple_cond_make_false (cond_stmt);
3333 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3334 gimple_cond_make_true (cond_stmt);
3335 else
3337 gimple_cond_set_code (cond_stmt, NE_EXPR);
3338 gimple_cond_set_lhs (cond_stmt,
3339 ops[bbinfo[idx].first_idx]->op);
3340 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3342 update_stmt (cond_stmt);
3344 if (bb == first_bb)
3345 break;
3350 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3351 of STMT in it's operands. This is also known as a "destructive
3352 update" operation. */
3354 static bool
3355 is_phi_for_stmt (gimple stmt, tree operand)
3357 gimple def_stmt;
3358 gphi *def_phi;
3359 tree lhs;
3360 use_operand_p arg_p;
3361 ssa_op_iter i;
3363 if (TREE_CODE (operand) != SSA_NAME)
3364 return false;
3366 lhs = gimple_assign_lhs (stmt);
3368 def_stmt = SSA_NAME_DEF_STMT (operand);
3369 def_phi = dyn_cast <gphi *> (def_stmt);
3370 if (!def_phi)
3371 return false;
3373 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3374 if (lhs == USE_FROM_PTR (arg_p))
3375 return true;
3376 return false;
3379 /* Remove def stmt of VAR if VAR has zero uses and recurse
3380 on rhs1 operand if so. */
3382 static void
3383 remove_visited_stmt_chain (tree var)
3385 gimple stmt;
3386 gimple_stmt_iterator gsi;
3388 while (1)
3390 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3391 return;
3392 stmt = SSA_NAME_DEF_STMT (var);
3393 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3395 var = gimple_assign_rhs1 (stmt);
3396 gsi = gsi_for_stmt (stmt);
3397 reassoc_remove_stmt (&gsi);
3398 release_defs (stmt);
3400 else
3401 return;
3405 /* This function checks three consequtive operands in
3406 passed operands vector OPS starting from OPINDEX and
3407 swaps two operands if it is profitable for binary operation
3408 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3410 We pair ops with the same rank if possible.
3412 The alternative we try is to see if STMT is a destructive
3413 update style statement, which is like:
3414 b = phi (a, ...)
3415 a = c + b;
3416 In that case, we want to use the destructive update form to
3417 expose the possible vectorizer sum reduction opportunity.
3418 In that case, the third operand will be the phi node. This
3419 check is not performed if STMT is null.
3421 We could, of course, try to be better as noted above, and do a
3422 lot of work to try to find these opportunities in >3 operand
3423 cases, but it is unlikely to be worth it. */
3425 static void
3426 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3427 unsigned int opindex, gimple stmt)
3429 operand_entry_t oe1, oe2, oe3;
3431 oe1 = ops[opindex];
3432 oe2 = ops[opindex + 1];
3433 oe3 = ops[opindex + 2];
3435 if ((oe1->rank == oe2->rank
3436 && oe2->rank != oe3->rank)
3437 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3438 && !is_phi_for_stmt (stmt, oe1->op)
3439 && !is_phi_for_stmt (stmt, oe2->op)))
3441 struct operand_entry temp = *oe3;
3442 oe3->op = oe1->op;
3443 oe3->rank = oe1->rank;
3444 oe1->op = temp.op;
3445 oe1->rank= temp.rank;
3447 else if ((oe1->rank == oe3->rank
3448 && oe2->rank != oe3->rank)
3449 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3450 && !is_phi_for_stmt (stmt, oe1->op)
3451 && !is_phi_for_stmt (stmt, oe3->op)))
3453 struct operand_entry temp = *oe2;
3454 oe2->op = oe1->op;
3455 oe2->rank = oe1->rank;
3456 oe1->op = temp.op;
3457 oe1->rank = temp.rank;
3461 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3462 two definitions, otherwise return STMT. */
3464 static inline gimple
3465 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3467 if (TREE_CODE (rhs1) == SSA_NAME
3468 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3469 stmt = SSA_NAME_DEF_STMT (rhs1);
3470 if (TREE_CODE (rhs2) == SSA_NAME
3471 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3472 stmt = SSA_NAME_DEF_STMT (rhs2);
3473 return stmt;
3476 /* Recursively rewrite our linearized statements so that the operators
3477 match those in OPS[OPINDEX], putting the computation in rank
3478 order. Return new lhs. */
3480 static tree
3481 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3482 vec<operand_entry_t> ops, bool changed)
3484 tree rhs1 = gimple_assign_rhs1 (stmt);
3485 tree rhs2 = gimple_assign_rhs2 (stmt);
3486 tree lhs = gimple_assign_lhs (stmt);
3487 operand_entry_t oe;
3489 /* The final recursion case for this function is that you have
3490 exactly two operations left.
3491 If we had exactly one op in the entire list to start with, we
3492 would have never called this function, and the tail recursion
3493 rewrites them one at a time. */
3494 if (opindex + 2 == ops.length ())
3496 operand_entry_t oe1, oe2;
3498 oe1 = ops[opindex];
3499 oe2 = ops[opindex + 1];
3501 if (rhs1 != oe1->op || rhs2 != oe2->op)
3503 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3504 unsigned int uid = gimple_uid (stmt);
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file, "Transforming ");
3509 print_gimple_stmt (dump_file, stmt, 0, 0);
3512 /* Even when changed is false, reassociation could have e.g. removed
3513 some redundant operations, so unless we are just swapping the
3514 arguments or unless there is no change at all (then we just
3515 return lhs), force creation of a new SSA_NAME. */
3516 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3518 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3519 lhs = make_ssa_name (TREE_TYPE (lhs));
3520 stmt
3521 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3522 oe1->op, oe2->op);
3523 gimple_set_uid (stmt, uid);
3524 gimple_set_visited (stmt, true);
3525 if (insert_point == gsi_stmt (gsi))
3526 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3527 else
3528 insert_stmt_after (stmt, insert_point);
3530 else
3532 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3533 == stmt);
3534 gimple_assign_set_rhs1 (stmt, oe1->op);
3535 gimple_assign_set_rhs2 (stmt, oe2->op);
3536 update_stmt (stmt);
3539 if (rhs1 != oe1->op && rhs1 != oe2->op)
3540 remove_visited_stmt_chain (rhs1);
3542 if (dump_file && (dump_flags & TDF_DETAILS))
3544 fprintf (dump_file, " into ");
3545 print_gimple_stmt (dump_file, stmt, 0, 0);
3548 return lhs;
3551 /* If we hit here, we should have 3 or more ops left. */
3552 gcc_assert (opindex + 2 < ops.length ());
3554 /* Rewrite the next operator. */
3555 oe = ops[opindex];
3557 /* Recurse on the LHS of the binary operator, which is guaranteed to
3558 be the non-leaf side. */
3559 tree new_rhs1
3560 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3561 changed || oe->op != rhs2);
3563 if (oe->op != rhs2 || new_rhs1 != rhs1)
3565 if (dump_file && (dump_flags & TDF_DETAILS))
3567 fprintf (dump_file, "Transforming ");
3568 print_gimple_stmt (dump_file, stmt, 0, 0);
3571 /* If changed is false, this is either opindex == 0
3572 or all outer rhs2's were equal to corresponding oe->op,
3573 and powi_result is NULL.
3574 That means lhs is equivalent before and after reassociation.
3575 Otherwise ensure the old lhs SSA_NAME is not reused and
3576 create a new stmt as well, so that any debug stmts will be
3577 properly adjusted. */
3578 if (changed)
3580 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3581 unsigned int uid = gimple_uid (stmt);
3582 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3584 lhs = make_ssa_name (TREE_TYPE (lhs));
3585 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3586 new_rhs1, oe->op);
3587 gimple_set_uid (stmt, uid);
3588 gimple_set_visited (stmt, true);
3589 if (insert_point == gsi_stmt (gsi))
3590 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3591 else
3592 insert_stmt_after (stmt, insert_point);
3594 else
3596 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3597 == stmt);
3598 gimple_assign_set_rhs1 (stmt, new_rhs1);
3599 gimple_assign_set_rhs2 (stmt, oe->op);
3600 update_stmt (stmt);
3603 if (dump_file && (dump_flags & TDF_DETAILS))
3605 fprintf (dump_file, " into ");
3606 print_gimple_stmt (dump_file, stmt, 0, 0);
3609 return lhs;
3612 /* Find out how many cycles we need to compute statements chain.
3613 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3614 maximum number of independent statements we may execute per cycle. */
3616 static int
3617 get_required_cycles (int ops_num, int cpu_width)
3619 int res;
3620 int elog;
3621 unsigned int rest;
3623 /* While we have more than 2 * cpu_width operands
3624 we may reduce number of operands by cpu_width
3625 per cycle. */
3626 res = ops_num / (2 * cpu_width);
3628 /* Remained operands count may be reduced twice per cycle
3629 until we have only one operand. */
3630 rest = (unsigned)(ops_num - res * cpu_width);
3631 elog = exact_log2 (rest);
3632 if (elog >= 0)
3633 res += elog;
3634 else
3635 res += floor_log2 (rest) + 1;
3637 return res;
3640 /* Returns an optimal number of registers to use for computation of
3641 given statements. */
3643 static int
3644 get_reassociation_width (int ops_num, enum tree_code opc,
3645 machine_mode mode)
3647 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3648 int width;
3649 int width_min;
3650 int cycles_best;
3652 if (param_width > 0)
3653 width = param_width;
3654 else
3655 width = targetm.sched.reassociation_width (opc, mode);
3657 if (width == 1)
3658 return width;
3660 /* Get the minimal time required for sequence computation. */
3661 cycles_best = get_required_cycles (ops_num, width);
3663 /* Check if we may use less width and still compute sequence for
3664 the same time. It will allow us to reduce registers usage.
3665 get_required_cycles is monotonically increasing with lower width
3666 so we can perform a binary search for the minimal width that still
3667 results in the optimal cycle count. */
3668 width_min = 1;
3669 while (width > width_min)
3671 int width_mid = (width + width_min) / 2;
3673 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3674 width = width_mid;
3675 else if (width_min < width_mid)
3676 width_min = width_mid;
3677 else
3678 break;
3681 return width;
3684 /* Recursively rewrite our linearized statements so that the operators
3685 match those in OPS[OPINDEX], putting the computation in rank
3686 order and trying to allow operations to be executed in
3687 parallel. */
3689 static void
3690 rewrite_expr_tree_parallel (gassign *stmt, int width,
3691 vec<operand_entry_t> ops)
3693 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3694 int op_num = ops.length ();
3695 int stmt_num = op_num - 1;
3696 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3697 int op_index = op_num - 1;
3698 int stmt_index = 0;
3699 int ready_stmts_end = 0;
3700 int i = 0;
3701 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3703 /* We start expression rewriting from the top statements.
3704 So, in this loop we create a full list of statements
3705 we will work with. */
3706 stmts[stmt_num - 1] = stmt;
3707 for (i = stmt_num - 2; i >= 0; i--)
3708 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3710 for (i = 0; i < stmt_num; i++)
3712 tree op1, op2;
3714 /* Determine whether we should use results of
3715 already handled statements or not. */
3716 if (ready_stmts_end == 0
3717 && (i - stmt_index >= width || op_index < 1))
3718 ready_stmts_end = i;
3720 /* Now we choose operands for the next statement. Non zero
3721 value in ready_stmts_end means here that we should use
3722 the result of already generated statements as new operand. */
3723 if (ready_stmts_end > 0)
3725 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3726 if (ready_stmts_end > stmt_index)
3727 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3728 else if (op_index >= 0)
3729 op2 = ops[op_index--]->op;
3730 else
3732 gcc_assert (stmt_index < i);
3733 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3736 if (stmt_index >= ready_stmts_end)
3737 ready_stmts_end = 0;
3739 else
3741 if (op_index > 1)
3742 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3743 op2 = ops[op_index--]->op;
3744 op1 = ops[op_index--]->op;
3747 /* If we emit the last statement then we should put
3748 operands into the last statement. It will also
3749 break the loop. */
3750 if (op_index < 0 && stmt_index == i)
3751 i = stmt_num - 1;
3753 if (dump_file && (dump_flags & TDF_DETAILS))
3755 fprintf (dump_file, "Transforming ");
3756 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3759 /* We keep original statement only for the last one. All
3760 others are recreated. */
3761 if (i == stmt_num - 1)
3763 gimple_assign_set_rhs1 (stmts[i], op1);
3764 gimple_assign_set_rhs2 (stmts[i], op2);
3765 update_stmt (stmts[i]);
3767 else
3768 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3770 if (dump_file && (dump_flags & TDF_DETAILS))
3772 fprintf (dump_file, " into ");
3773 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3777 remove_visited_stmt_chain (last_rhs1);
3780 /* Transform STMT, which is really (A +B) + (C + D) into the left
3781 linear form, ((A+B)+C)+D.
3782 Recurse on D if necessary. */
3784 static void
3785 linearize_expr (gimple stmt)
3787 gimple_stmt_iterator gsi;
3788 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3789 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3790 gimple oldbinrhs = binrhs;
3791 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3792 gimple newbinrhs = NULL;
3793 struct loop *loop = loop_containing_stmt (stmt);
3794 tree lhs = gimple_assign_lhs (stmt);
3796 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3797 && is_reassociable_op (binrhs, rhscode, loop));
3799 gsi = gsi_for_stmt (stmt);
3801 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3802 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3803 gimple_assign_rhs_code (binrhs),
3804 gimple_assign_lhs (binlhs),
3805 gimple_assign_rhs2 (binrhs));
3806 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3807 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3808 gimple_set_uid (binrhs, gimple_uid (stmt));
3810 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3811 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3815 fprintf (dump_file, "Linearized: ");
3816 print_gimple_stmt (dump_file, stmt, 0, 0);
3819 reassociate_stats.linearized++;
3820 update_stmt (stmt);
3822 gsi = gsi_for_stmt (oldbinrhs);
3823 reassoc_remove_stmt (&gsi);
3824 release_defs (oldbinrhs);
3826 gimple_set_visited (stmt, true);
3827 gimple_set_visited (binlhs, true);
3828 gimple_set_visited (binrhs, true);
3830 /* Tail recurse on the new rhs if it still needs reassociation. */
3831 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3832 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3833 want to change the algorithm while converting to tuples. */
3834 linearize_expr (stmt);
3837 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3838 it. Otherwise, return NULL. */
3840 static gimple
3841 get_single_immediate_use (tree lhs)
3843 use_operand_p immuse;
3844 gimple immusestmt;
3846 if (TREE_CODE (lhs) == SSA_NAME
3847 && single_imm_use (lhs, &immuse, &immusestmt)
3848 && is_gimple_assign (immusestmt))
3849 return immusestmt;
3851 return NULL;
3854 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3855 representing the negated value. Insertions of any necessary
3856 instructions go before GSI.
3857 This function is recursive in that, if you hand it "a_5" as the
3858 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3859 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3861 static tree
3862 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3864 gimple negatedefstmt = NULL;
3865 tree resultofnegate;
3866 gimple_stmt_iterator gsi;
3867 unsigned int uid;
3869 /* If we are trying to negate a name, defined by an add, negate the
3870 add operands instead. */
3871 if (TREE_CODE (tonegate) == SSA_NAME)
3872 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3873 if (TREE_CODE (tonegate) == SSA_NAME
3874 && is_gimple_assign (negatedefstmt)
3875 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3876 && has_single_use (gimple_assign_lhs (negatedefstmt))
3877 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3879 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3880 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3881 tree lhs = gimple_assign_lhs (negatedefstmt);
3882 gimple g;
3884 gsi = gsi_for_stmt (negatedefstmt);
3885 rhs1 = negate_value (rhs1, &gsi);
3887 gsi = gsi_for_stmt (negatedefstmt);
3888 rhs2 = negate_value (rhs2, &gsi);
3890 gsi = gsi_for_stmt (negatedefstmt);
3891 lhs = make_ssa_name (TREE_TYPE (lhs));
3892 gimple_set_visited (negatedefstmt, true);
3893 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3894 gimple_set_uid (g, gimple_uid (negatedefstmt));
3895 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3896 return lhs;
3899 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3900 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3901 NULL_TREE, true, GSI_SAME_STMT);
3902 gsi = *gsip;
3903 uid = gimple_uid (gsi_stmt (gsi));
3904 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3906 gimple stmt = gsi_stmt (gsi);
3907 if (gimple_uid (stmt) != 0)
3908 break;
3909 gimple_set_uid (stmt, uid);
3911 return resultofnegate;
3914 /* Return true if we should break up the subtract in STMT into an add
3915 with negate. This is true when we the subtract operands are really
3916 adds, or the subtract itself is used in an add expression. In
3917 either case, breaking up the subtract into an add with negate
3918 exposes the adds to reassociation. */
3920 static bool
3921 should_break_up_subtract (gimple stmt)
3923 tree lhs = gimple_assign_lhs (stmt);
3924 tree binlhs = gimple_assign_rhs1 (stmt);
3925 tree binrhs = gimple_assign_rhs2 (stmt);
3926 gimple immusestmt;
3927 struct loop *loop = loop_containing_stmt (stmt);
3929 if (TREE_CODE (binlhs) == SSA_NAME
3930 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3931 return true;
3933 if (TREE_CODE (binrhs) == SSA_NAME
3934 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3935 return true;
3937 if (TREE_CODE (lhs) == SSA_NAME
3938 && (immusestmt = get_single_immediate_use (lhs))
3939 && is_gimple_assign (immusestmt)
3940 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3941 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3942 return true;
3943 return false;
3946 /* Transform STMT from A - B into A + -B. */
3948 static void
3949 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3951 tree rhs1 = gimple_assign_rhs1 (stmt);
3952 tree rhs2 = gimple_assign_rhs2 (stmt);
3954 if (dump_file && (dump_flags & TDF_DETAILS))
3956 fprintf (dump_file, "Breaking up subtract ");
3957 print_gimple_stmt (dump_file, stmt, 0, 0);
3960 rhs2 = negate_value (rhs2, gsip);
3961 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3962 update_stmt (stmt);
3965 /* Determine whether STMT is a builtin call that raises an SSA name
3966 to an integer power and has only one use. If so, and this is early
3967 reassociation and unsafe math optimizations are permitted, place
3968 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3969 If any of these conditions does not hold, return FALSE. */
3971 static bool
3972 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3974 tree fndecl, arg1;
3975 REAL_VALUE_TYPE c, cint;
3977 if (!first_pass_instance
3978 || !flag_unsafe_math_optimizations
3979 || !is_gimple_call (stmt)
3980 || !has_single_use (gimple_call_lhs (stmt)))
3981 return false;
3983 fndecl = gimple_call_fndecl (stmt);
3985 if (!fndecl
3986 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3987 return false;
3989 switch (DECL_FUNCTION_CODE (fndecl))
3991 CASE_FLT_FN (BUILT_IN_POW):
3992 if (flag_errno_math)
3993 return false;
3995 *base = gimple_call_arg (stmt, 0);
3996 arg1 = gimple_call_arg (stmt, 1);
3998 if (TREE_CODE (arg1) != REAL_CST)
3999 return false;
4001 c = TREE_REAL_CST (arg1);
4003 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4004 return false;
4006 *exponent = real_to_integer (&c);
4007 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4008 if (!real_identical (&c, &cint))
4009 return false;
4011 break;
4013 CASE_FLT_FN (BUILT_IN_POWI):
4014 *base = gimple_call_arg (stmt, 0);
4015 arg1 = gimple_call_arg (stmt, 1);
4017 if (!tree_fits_shwi_p (arg1))
4018 return false;
4020 *exponent = tree_to_shwi (arg1);
4021 break;
4023 default:
4024 return false;
4027 /* Expanding negative exponents is generally unproductive, so we don't
4028 complicate matters with those. Exponents of zero and one should
4029 have been handled by expression folding. */
4030 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4031 return false;
4033 return true;
4036 /* Recursively linearize a binary expression that is the RHS of STMT.
4037 Place the operands of the expression tree in the vector named OPS. */
4039 static void
4040 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4041 bool is_associative, bool set_visited)
4043 tree binlhs = gimple_assign_rhs1 (stmt);
4044 tree binrhs = gimple_assign_rhs2 (stmt);
4045 gimple binlhsdef = NULL, binrhsdef = NULL;
4046 bool binlhsisreassoc = false;
4047 bool binrhsisreassoc = false;
4048 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4049 struct loop *loop = loop_containing_stmt (stmt);
4050 tree base = NULL_TREE;
4051 HOST_WIDE_INT exponent = 0;
4053 if (set_visited)
4054 gimple_set_visited (stmt, true);
4056 if (TREE_CODE (binlhs) == SSA_NAME)
4058 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4059 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4060 && !stmt_could_throw_p (binlhsdef));
4063 if (TREE_CODE (binrhs) == SSA_NAME)
4065 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4066 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4067 && !stmt_could_throw_p (binrhsdef));
4070 /* If the LHS is not reassociable, but the RHS is, we need to swap
4071 them. If neither is reassociable, there is nothing we can do, so
4072 just put them in the ops vector. If the LHS is reassociable,
4073 linearize it. If both are reassociable, then linearize the RHS
4074 and the LHS. */
4076 if (!binlhsisreassoc)
4078 /* If this is not a associative operation like division, give up. */
4079 if (!is_associative)
4081 add_to_ops_vec (ops, binrhs);
4082 return;
4085 if (!binrhsisreassoc)
4087 if (rhscode == MULT_EXPR
4088 && TREE_CODE (binrhs) == SSA_NAME
4089 && acceptable_pow_call (binrhsdef, &base, &exponent))
4091 add_repeat_to_ops_vec (ops, base, exponent);
4092 gimple_set_visited (binrhsdef, true);
4094 else
4095 add_to_ops_vec (ops, binrhs);
4097 if (rhscode == MULT_EXPR
4098 && TREE_CODE (binlhs) == SSA_NAME
4099 && acceptable_pow_call (binlhsdef, &base, &exponent))
4101 add_repeat_to_ops_vec (ops, base, exponent);
4102 gimple_set_visited (binlhsdef, true);
4104 else
4105 add_to_ops_vec (ops, binlhs);
4107 return;
4110 if (dump_file && (dump_flags & TDF_DETAILS))
4112 fprintf (dump_file, "swapping operands of ");
4113 print_gimple_stmt (dump_file, stmt, 0, 0);
4116 swap_ssa_operands (stmt,
4117 gimple_assign_rhs1_ptr (stmt),
4118 gimple_assign_rhs2_ptr (stmt));
4119 update_stmt (stmt);
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4123 fprintf (dump_file, " is now ");
4124 print_gimple_stmt (dump_file, stmt, 0, 0);
4127 /* We want to make it so the lhs is always the reassociative op,
4128 so swap. */
4129 std::swap (binlhs, binrhs);
4131 else if (binrhsisreassoc)
4133 linearize_expr (stmt);
4134 binlhs = gimple_assign_rhs1 (stmt);
4135 binrhs = gimple_assign_rhs2 (stmt);
4138 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4139 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4140 rhscode, loop));
4141 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4142 is_associative, set_visited);
4144 if (rhscode == MULT_EXPR
4145 && TREE_CODE (binrhs) == SSA_NAME
4146 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4148 add_repeat_to_ops_vec (ops, base, exponent);
4149 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4151 else
4152 add_to_ops_vec (ops, binrhs);
4155 /* Repropagate the negates back into subtracts, since no other pass
4156 currently does it. */
4158 static void
4159 repropagate_negates (void)
4161 unsigned int i = 0;
4162 tree negate;
4164 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4166 gimple user = get_single_immediate_use (negate);
4168 if (!user || !is_gimple_assign (user))
4169 continue;
4171 /* The negate operand can be either operand of a PLUS_EXPR
4172 (it can be the LHS if the RHS is a constant for example).
4174 Force the negate operand to the RHS of the PLUS_EXPR, then
4175 transform the PLUS_EXPR into a MINUS_EXPR. */
4176 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4178 /* If the negated operand appears on the LHS of the
4179 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4180 to force the negated operand to the RHS of the PLUS_EXPR. */
4181 if (gimple_assign_rhs1 (user) == negate)
4183 swap_ssa_operands (user,
4184 gimple_assign_rhs1_ptr (user),
4185 gimple_assign_rhs2_ptr (user));
4188 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4189 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4190 if (gimple_assign_rhs2 (user) == negate)
4192 tree rhs1 = gimple_assign_rhs1 (user);
4193 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4194 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4195 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4196 update_stmt (user);
4199 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4201 if (gimple_assign_rhs1 (user) == negate)
4203 /* We have
4204 x = -a
4205 y = x - b
4206 which we transform into
4207 x = a + b
4208 y = -x .
4209 This pushes down the negate which we possibly can merge
4210 into some other operation, hence insert it into the
4211 plus_negates vector. */
4212 gimple feed = SSA_NAME_DEF_STMT (negate);
4213 tree a = gimple_assign_rhs1 (feed);
4214 tree b = gimple_assign_rhs2 (user);
4215 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4216 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4217 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4218 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4219 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4220 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4221 user = gsi_stmt (gsi2);
4222 update_stmt (user);
4223 reassoc_remove_stmt (&gsi);
4224 release_defs (feed);
4225 plus_negates.safe_push (gimple_assign_lhs (user));
4227 else
4229 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4230 rid of one operation. */
4231 gimple feed = SSA_NAME_DEF_STMT (negate);
4232 tree a = gimple_assign_rhs1 (feed);
4233 tree rhs1 = gimple_assign_rhs1 (user);
4234 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4235 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4236 update_stmt (gsi_stmt (gsi));
4242 /* Returns true if OP is of a type for which we can do reassociation.
4243 That is for integral or non-saturating fixed-point types, and for
4244 floating point type when associative-math is enabled. */
4246 static bool
4247 can_reassociate_p (tree op)
4249 tree type = TREE_TYPE (op);
4250 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4251 || NON_SAT_FIXED_POINT_TYPE_P (type)
4252 || (flag_associative_math && FLOAT_TYPE_P (type)))
4253 return true;
4254 return false;
4257 /* Break up subtract operations in block BB.
4259 We do this top down because we don't know whether the subtract is
4260 part of a possible chain of reassociation except at the top.
4262 IE given
4263 d = f + g
4264 c = a + e
4265 b = c - d
4266 q = b - r
4267 k = t - q
4269 we want to break up k = t - q, but we won't until we've transformed q
4270 = b - r, which won't be broken up until we transform b = c - d.
4272 En passant, clear the GIMPLE visited flag on every statement
4273 and set UIDs within each basic block. */
4275 static void
4276 break_up_subtract_bb (basic_block bb)
4278 gimple_stmt_iterator gsi;
4279 basic_block son;
4280 unsigned int uid = 1;
4282 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4284 gimple stmt = gsi_stmt (gsi);
4285 gimple_set_visited (stmt, false);
4286 gimple_set_uid (stmt, uid++);
4288 if (!is_gimple_assign (stmt)
4289 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4290 continue;
4292 /* Look for simple gimple subtract operations. */
4293 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4295 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4296 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4297 continue;
4299 /* Check for a subtract used only in an addition. If this
4300 is the case, transform it into add of a negate for better
4301 reassociation. IE transform C = A-B into C = A + -B if C
4302 is only used in an addition. */
4303 if (should_break_up_subtract (stmt))
4304 break_up_subtract (stmt, &gsi);
4306 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4307 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4308 plus_negates.safe_push (gimple_assign_lhs (stmt));
4310 for (son = first_dom_son (CDI_DOMINATORS, bb);
4311 son;
4312 son = next_dom_son (CDI_DOMINATORS, son))
4313 break_up_subtract_bb (son);
4316 /* Used for repeated factor analysis. */
4317 struct repeat_factor_d
4319 /* An SSA name that occurs in a multiply chain. */
4320 tree factor;
4322 /* Cached rank of the factor. */
4323 unsigned rank;
4325 /* Number of occurrences of the factor in the chain. */
4326 HOST_WIDE_INT count;
4328 /* An SSA name representing the product of this factor and
4329 all factors appearing later in the repeated factor vector. */
4330 tree repr;
4333 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4334 typedef const struct repeat_factor_d *const_repeat_factor_t;
4337 static vec<repeat_factor> repeat_factor_vec;
4339 /* Used for sorting the repeat factor vector. Sort primarily by
4340 ascending occurrence count, secondarily by descending rank. */
4342 static int
4343 compare_repeat_factors (const void *x1, const void *x2)
4345 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4346 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4348 if (rf1->count != rf2->count)
4349 return rf1->count - rf2->count;
4351 return rf2->rank - rf1->rank;
4354 /* Look for repeated operands in OPS in the multiply tree rooted at
4355 STMT. Replace them with an optimal sequence of multiplies and powi
4356 builtin calls, and remove the used operands from OPS. Return an
4357 SSA name representing the value of the replacement sequence. */
4359 static tree
4360 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4362 unsigned i, j, vec_len;
4363 int ii;
4364 operand_entry_t oe;
4365 repeat_factor_t rf1, rf2;
4366 repeat_factor rfnew;
4367 tree result = NULL_TREE;
4368 tree target_ssa, iter_result;
4369 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4370 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4371 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4372 gimple mul_stmt, pow_stmt;
4374 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4375 target. */
4376 if (!powi_fndecl)
4377 return NULL_TREE;
4379 /* Allocate the repeated factor vector. */
4380 repeat_factor_vec.create (10);
4382 /* Scan the OPS vector for all SSA names in the product and build
4383 up a vector of occurrence counts for each factor. */
4384 FOR_EACH_VEC_ELT (*ops, i, oe)
4386 if (TREE_CODE (oe->op) == SSA_NAME)
4388 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4390 if (rf1->factor == oe->op)
4392 rf1->count += oe->count;
4393 break;
4397 if (j >= repeat_factor_vec.length ())
4399 rfnew.factor = oe->op;
4400 rfnew.rank = oe->rank;
4401 rfnew.count = oe->count;
4402 rfnew.repr = NULL_TREE;
4403 repeat_factor_vec.safe_push (rfnew);
4408 /* Sort the repeated factor vector by (a) increasing occurrence count,
4409 and (b) decreasing rank. */
4410 repeat_factor_vec.qsort (compare_repeat_factors);
4412 /* It is generally best to combine as many base factors as possible
4413 into a product before applying __builtin_powi to the result.
4414 However, the sort order chosen for the repeated factor vector
4415 allows us to cache partial results for the product of the base
4416 factors for subsequent use. When we already have a cached partial
4417 result from a previous iteration, it is best to make use of it
4418 before looking for another __builtin_pow opportunity.
4420 As an example, consider x * x * y * y * y * z * z * z * z.
4421 We want to first compose the product x * y * z, raise it to the
4422 second power, then multiply this by y * z, and finally multiply
4423 by z. This can be done in 5 multiplies provided we cache y * z
4424 for use in both expressions:
4426 t1 = y * z
4427 t2 = t1 * x
4428 t3 = t2 * t2
4429 t4 = t1 * t3
4430 result = t4 * z
4432 If we instead ignored the cached y * z and first multiplied by
4433 the __builtin_pow opportunity z * z, we would get the inferior:
4435 t1 = y * z
4436 t2 = t1 * x
4437 t3 = t2 * t2
4438 t4 = z * z
4439 t5 = t3 * t4
4440 result = t5 * y */
4442 vec_len = repeat_factor_vec.length ();
4444 /* Repeatedly look for opportunities to create a builtin_powi call. */
4445 while (true)
4447 HOST_WIDE_INT power;
4449 /* First look for the largest cached product of factors from
4450 preceding iterations. If found, create a builtin_powi for
4451 it if the minimum occurrence count for its factors is at
4452 least 2, or just use this cached product as our next
4453 multiplicand if the minimum occurrence count is 1. */
4454 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4456 if (rf1->repr && rf1->count > 0)
4457 break;
4460 if (j < vec_len)
4462 power = rf1->count;
4464 if (power == 1)
4466 iter_result = rf1->repr;
4468 if (dump_file && (dump_flags & TDF_DETAILS))
4470 unsigned elt;
4471 repeat_factor_t rf;
4472 fputs ("Multiplying by cached product ", dump_file);
4473 for (elt = j; elt < vec_len; elt++)
4475 rf = &repeat_factor_vec[elt];
4476 print_generic_expr (dump_file, rf->factor, 0);
4477 if (elt < vec_len - 1)
4478 fputs (" * ", dump_file);
4480 fputs ("\n", dump_file);
4483 else
4485 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4486 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4487 build_int_cst (integer_type_node,
4488 power));
4489 gimple_call_set_lhs (pow_stmt, iter_result);
4490 gimple_set_location (pow_stmt, gimple_location (stmt));
4491 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4493 if (dump_file && (dump_flags & TDF_DETAILS))
4495 unsigned elt;
4496 repeat_factor_t rf;
4497 fputs ("Building __builtin_pow call for cached product (",
4498 dump_file);
4499 for (elt = j; elt < vec_len; elt++)
4501 rf = &repeat_factor_vec[elt];
4502 print_generic_expr (dump_file, rf->factor, 0);
4503 if (elt < vec_len - 1)
4504 fputs (" * ", dump_file);
4506 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4507 power);
4511 else
4513 /* Otherwise, find the first factor in the repeated factor
4514 vector whose occurrence count is at least 2. If no such
4515 factor exists, there are no builtin_powi opportunities
4516 remaining. */
4517 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4519 if (rf1->count >= 2)
4520 break;
4523 if (j >= vec_len)
4524 break;
4526 power = rf1->count;
4528 if (dump_file && (dump_flags & TDF_DETAILS))
4530 unsigned elt;
4531 repeat_factor_t rf;
4532 fputs ("Building __builtin_pow call for (", dump_file);
4533 for (elt = j; elt < vec_len; elt++)
4535 rf = &repeat_factor_vec[elt];
4536 print_generic_expr (dump_file, rf->factor, 0);
4537 if (elt < vec_len - 1)
4538 fputs (" * ", dump_file);
4540 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4543 reassociate_stats.pows_created++;
4545 /* Visit each element of the vector in reverse order (so that
4546 high-occurrence elements are visited first, and within the
4547 same occurrence count, lower-ranked elements are visited
4548 first). Form a linear product of all elements in this order
4549 whose occurrencce count is at least that of element J.
4550 Record the SSA name representing the product of each element
4551 with all subsequent elements in the vector. */
4552 if (j == vec_len - 1)
4553 rf1->repr = rf1->factor;
4554 else
4556 for (ii = vec_len - 2; ii >= (int)j; ii--)
4558 tree op1, op2;
4560 rf1 = &repeat_factor_vec[ii];
4561 rf2 = &repeat_factor_vec[ii + 1];
4563 /* Init the last factor's representative to be itself. */
4564 if (!rf2->repr)
4565 rf2->repr = rf2->factor;
4567 op1 = rf1->factor;
4568 op2 = rf2->repr;
4570 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4571 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4572 op1, op2);
4573 gimple_set_location (mul_stmt, gimple_location (stmt));
4574 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4575 rf1->repr = target_ssa;
4577 /* Don't reprocess the multiply we just introduced. */
4578 gimple_set_visited (mul_stmt, true);
4582 /* Form a call to __builtin_powi for the maximum product
4583 just formed, raised to the power obtained earlier. */
4584 rf1 = &repeat_factor_vec[j];
4585 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4586 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4587 build_int_cst (integer_type_node,
4588 power));
4589 gimple_call_set_lhs (pow_stmt, iter_result);
4590 gimple_set_location (pow_stmt, gimple_location (stmt));
4591 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4594 /* If we previously formed at least one other builtin_powi call,
4595 form the product of this one and those others. */
4596 if (result)
4598 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4599 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4600 result, iter_result);
4601 gimple_set_location (mul_stmt, gimple_location (stmt));
4602 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4603 gimple_set_visited (mul_stmt, true);
4604 result = new_result;
4606 else
4607 result = iter_result;
4609 /* Decrement the occurrence count of each element in the product
4610 by the count found above, and remove this many copies of each
4611 factor from OPS. */
4612 for (i = j; i < vec_len; i++)
4614 unsigned k = power;
4615 unsigned n;
4617 rf1 = &repeat_factor_vec[i];
4618 rf1->count -= power;
4620 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4622 if (oe->op == rf1->factor)
4624 if (oe->count <= k)
4626 ops->ordered_remove (n);
4627 k -= oe->count;
4629 if (k == 0)
4630 break;
4632 else
4634 oe->count -= k;
4635 break;
4642 /* At this point all elements in the repeated factor vector have a
4643 remaining occurrence count of 0 or 1, and those with a count of 1
4644 don't have cached representatives. Re-sort the ops vector and
4645 clean up. */
4646 ops->qsort (sort_by_operand_rank);
4647 repeat_factor_vec.release ();
4649 /* Return the final product computed herein. Note that there may
4650 still be some elements with single occurrence count left in OPS;
4651 those will be handled by the normal reassociation logic. */
4652 return result;
4655 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4657 static void
4658 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4660 tree rhs1;
4662 if (dump_file && (dump_flags & TDF_DETAILS))
4664 fprintf (dump_file, "Transforming ");
4665 print_gimple_stmt (dump_file, stmt, 0, 0);
4668 rhs1 = gimple_assign_rhs1 (stmt);
4669 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4670 update_stmt (stmt);
4671 remove_visited_stmt_chain (rhs1);
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4675 fprintf (dump_file, " into ");
4676 print_gimple_stmt (dump_file, stmt, 0, 0);
4680 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4682 static void
4683 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4684 tree rhs1, tree rhs2)
4686 if (dump_file && (dump_flags & TDF_DETAILS))
4688 fprintf (dump_file, "Transforming ");
4689 print_gimple_stmt (dump_file, stmt, 0, 0);
4692 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4693 update_stmt (gsi_stmt (*gsi));
4694 remove_visited_stmt_chain (rhs1);
4696 if (dump_file && (dump_flags & TDF_DETAILS))
4698 fprintf (dump_file, " into ");
4699 print_gimple_stmt (dump_file, stmt, 0, 0);
4703 /* Reassociate expressions in basic block BB and its post-dominator as
4704 children. */
4706 static void
4707 reassociate_bb (basic_block bb)
4709 gimple_stmt_iterator gsi;
4710 basic_block son;
4711 gimple stmt = last_stmt (bb);
4713 if (stmt && !gimple_visited_p (stmt))
4714 maybe_optimize_range_tests (stmt);
4716 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4718 stmt = gsi_stmt (gsi);
4720 if (is_gimple_assign (stmt)
4721 && !stmt_could_throw_p (stmt))
4723 tree lhs, rhs1, rhs2;
4724 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4726 /* If this is not a gimple binary expression, there is
4727 nothing for us to do with it. */
4728 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4729 continue;
4731 /* If this was part of an already processed statement,
4732 we don't need to touch it again. */
4733 if (gimple_visited_p (stmt))
4735 /* This statement might have become dead because of previous
4736 reassociations. */
4737 if (has_zero_uses (gimple_get_lhs (stmt)))
4739 reassoc_remove_stmt (&gsi);
4740 release_defs (stmt);
4741 /* We might end up removing the last stmt above which
4742 places the iterator to the end of the sequence.
4743 Reset it to the last stmt in this case which might
4744 be the end of the sequence as well if we removed
4745 the last statement of the sequence. In which case
4746 we need to bail out. */
4747 if (gsi_end_p (gsi))
4749 gsi = gsi_last_bb (bb);
4750 if (gsi_end_p (gsi))
4751 break;
4754 continue;
4757 lhs = gimple_assign_lhs (stmt);
4758 rhs1 = gimple_assign_rhs1 (stmt);
4759 rhs2 = gimple_assign_rhs2 (stmt);
4761 /* For non-bit or min/max operations we can't associate
4762 all types. Verify that here. */
4763 if (rhs_code != BIT_IOR_EXPR
4764 && rhs_code != BIT_AND_EXPR
4765 && rhs_code != BIT_XOR_EXPR
4766 && rhs_code != MIN_EXPR
4767 && rhs_code != MAX_EXPR
4768 && (!can_reassociate_p (lhs)
4769 || !can_reassociate_p (rhs1)
4770 || !can_reassociate_p (rhs2)))
4771 continue;
4773 if (associative_tree_code (rhs_code))
4775 auto_vec<operand_entry_t> ops;
4776 tree powi_result = NULL_TREE;
4778 /* There may be no immediate uses left by the time we
4779 get here because we may have eliminated them all. */
4780 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4781 continue;
4783 gimple_set_visited (stmt, true);
4784 linearize_expr_tree (&ops, stmt, true, true);
4785 ops.qsort (sort_by_operand_rank);
4786 optimize_ops_list (rhs_code, &ops);
4787 if (undistribute_ops_list (rhs_code, &ops,
4788 loop_containing_stmt (stmt)))
4790 ops.qsort (sort_by_operand_rank);
4791 optimize_ops_list (rhs_code, &ops);
4794 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4795 optimize_range_tests (rhs_code, &ops);
4797 if (first_pass_instance
4798 && rhs_code == MULT_EXPR
4799 && flag_unsafe_math_optimizations)
4800 powi_result = attempt_builtin_powi (stmt, &ops);
4802 /* If the operand vector is now empty, all operands were
4803 consumed by the __builtin_powi optimization. */
4804 if (ops.length () == 0)
4805 transform_stmt_to_copy (&gsi, stmt, powi_result);
4806 else if (ops.length () == 1)
4808 tree last_op = ops.last ()->op;
4810 if (powi_result)
4811 transform_stmt_to_multiply (&gsi, stmt, last_op,
4812 powi_result);
4813 else
4814 transform_stmt_to_copy (&gsi, stmt, last_op);
4816 else
4818 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4819 int ops_num = ops.length ();
4820 int width = get_reassociation_width (ops_num, rhs_code, mode);
4821 tree new_lhs = lhs;
4823 if (dump_file && (dump_flags & TDF_DETAILS))
4824 fprintf (dump_file,
4825 "Width = %d was chosen for reassociation\n", width);
4827 if (width > 1
4828 && ops.length () > 3)
4829 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4830 width, ops);
4831 else
4833 /* When there are three operands left, we want
4834 to make sure the ones that get the double
4835 binary op are chosen wisely. */
4836 int len = ops.length ();
4837 if (len >= 3)
4838 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4840 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4841 powi_result != NULL);
4844 /* If we combined some repeated factors into a
4845 __builtin_powi call, multiply that result by the
4846 reassociated operands. */
4847 if (powi_result)
4849 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4850 tree type = TREE_TYPE (lhs);
4851 tree target_ssa = make_temp_ssa_name (type, NULL,
4852 "reassocpow");
4853 gimple_set_lhs (lhs_stmt, target_ssa);
4854 update_stmt (lhs_stmt);
4855 if (lhs != new_lhs)
4856 target_ssa = new_lhs;
4857 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4858 powi_result, target_ssa);
4859 gimple_set_location (mul_stmt, gimple_location (stmt));
4860 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4866 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4867 son;
4868 son = next_dom_son (CDI_POST_DOMINATORS, son))
4869 reassociate_bb (son);
4872 /* Add jumps around shifts for range tests turned into bit tests.
4873 For each SSA_NAME VAR we have code like:
4874 VAR = ...; // final stmt of range comparison
4875 // bit test here...;
4876 OTHERVAR = ...; // final stmt of the bit test sequence
4877 RES = VAR | OTHERVAR;
4878 Turn the above into:
4879 VAR = ...;
4880 if (VAR != 0)
4881 goto <l3>;
4882 else
4883 goto <l2>;
4884 <l2>:
4885 // bit test here...;
4886 OTHERVAR = ...;
4887 <l3>:
4888 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4890 static void
4891 branch_fixup (void)
4893 tree var;
4894 unsigned int i;
4896 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4898 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4899 gimple use_stmt;
4900 use_operand_p use;
4901 bool ok = single_imm_use (var, &use, &use_stmt);
4902 gcc_assert (ok
4903 && is_gimple_assign (use_stmt)
4904 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4905 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4907 basic_block cond_bb = gimple_bb (def_stmt);
4908 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4909 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4911 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4912 gimple g = gimple_build_cond (NE_EXPR, var,
4913 build_zero_cst (TREE_TYPE (var)),
4914 NULL_TREE, NULL_TREE);
4915 location_t loc = gimple_location (use_stmt);
4916 gimple_set_location (g, loc);
4917 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4919 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4920 etrue->probability = REG_BR_PROB_BASE / 2;
4921 etrue->count = cond_bb->count / 2;
4922 edge efalse = find_edge (cond_bb, then_bb);
4923 efalse->flags = EDGE_FALSE_VALUE;
4924 efalse->probability -= etrue->probability;
4925 efalse->count -= etrue->count;
4926 then_bb->count -= etrue->count;
4928 tree othervar = NULL_TREE;
4929 if (gimple_assign_rhs1 (use_stmt) == var)
4930 othervar = gimple_assign_rhs2 (use_stmt);
4931 else if (gimple_assign_rhs2 (use_stmt) == var)
4932 othervar = gimple_assign_rhs1 (use_stmt);
4933 else
4934 gcc_unreachable ();
4935 tree lhs = gimple_assign_lhs (use_stmt);
4936 gphi *phi = create_phi_node (lhs, merge_bb);
4937 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4938 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4939 gsi = gsi_for_stmt (use_stmt);
4940 gsi_remove (&gsi, true);
4942 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4943 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4945 reassoc_branch_fixups.release ();
4948 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4949 void debug_ops_vector (vec<operand_entry_t> ops);
4951 /* Dump the operand entry vector OPS to FILE. */
4953 void
4954 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4956 operand_entry_t oe;
4957 unsigned int i;
4959 FOR_EACH_VEC_ELT (ops, i, oe)
4961 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4962 print_generic_expr (file, oe->op, 0);
4966 /* Dump the operand entry vector OPS to STDERR. */
4968 DEBUG_FUNCTION void
4969 debug_ops_vector (vec<operand_entry_t> ops)
4971 dump_ops_vector (stderr, ops);
4974 static void
4975 do_reassoc (void)
4977 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4978 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4981 /* Initialize the reassociation pass. */
4983 static void
4984 init_reassoc (void)
4986 int i;
4987 long rank = 2;
4988 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4990 /* Find the loops, so that we can prevent moving calculations in
4991 them. */
4992 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4994 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4996 next_operand_entry_id = 0;
4998 /* Reverse RPO (Reverse Post Order) will give us something where
4999 deeper loops come later. */
5000 pre_and_rev_post_order_compute (NULL, bbs, false);
5001 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5002 operand_rank = new hash_map<tree, long>;
5004 /* Give each default definition a distinct rank. This includes
5005 parameters and the static chain. Walk backwards over all
5006 SSA names so that we get proper rank ordering according
5007 to tree_swap_operands_p. */
5008 for (i = num_ssa_names - 1; i > 0; --i)
5010 tree name = ssa_name (i);
5011 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5012 insert_operand_rank (name, ++rank);
5015 /* Set up rank for each BB */
5016 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5017 bb_rank[bbs[i]] = ++rank << 16;
5019 free (bbs);
5020 calculate_dominance_info (CDI_POST_DOMINATORS);
5021 plus_negates = vNULL;
5024 /* Cleanup after the reassociation pass, and print stats if
5025 requested. */
5027 static void
5028 fini_reassoc (void)
5030 statistics_counter_event (cfun, "Linearized",
5031 reassociate_stats.linearized);
5032 statistics_counter_event (cfun, "Constants eliminated",
5033 reassociate_stats.constants_eliminated);
5034 statistics_counter_event (cfun, "Ops eliminated",
5035 reassociate_stats.ops_eliminated);
5036 statistics_counter_event (cfun, "Statements rewritten",
5037 reassociate_stats.rewritten);
5038 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5039 reassociate_stats.pows_encountered);
5040 statistics_counter_event (cfun, "Built-in powi calls created",
5041 reassociate_stats.pows_created);
5043 delete operand_rank;
5044 operand_entry_pool.release ();
5045 free (bb_rank);
5046 plus_negates.release ();
5047 free_dominance_info (CDI_POST_DOMINATORS);
5048 loop_optimizer_finalize ();
5051 /* Gate and execute functions for Reassociation. */
5053 static unsigned int
5054 execute_reassoc (void)
5056 init_reassoc ();
5058 do_reassoc ();
5059 repropagate_negates ();
5060 branch_fixup ();
5062 fini_reassoc ();
5063 return 0;
5066 namespace {
5068 const pass_data pass_data_reassoc =
5070 GIMPLE_PASS, /* type */
5071 "reassoc", /* name */
5072 OPTGROUP_NONE, /* optinfo_flags */
5073 TV_TREE_REASSOC, /* tv_id */
5074 ( PROP_cfg | PROP_ssa ), /* properties_required */
5075 0, /* properties_provided */
5076 0, /* properties_destroyed */
5077 0, /* todo_flags_start */
5078 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5081 class pass_reassoc : public gimple_opt_pass
5083 public:
5084 pass_reassoc (gcc::context *ctxt)
5085 : gimple_opt_pass (pass_data_reassoc, ctxt)
5088 /* opt_pass methods: */
5089 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5090 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5091 virtual unsigned int execute (function *) { return execute_reassoc (); }
5093 }; // class pass_reassoc
5095 } // anon namespace
5097 gimple_opt_pass *
5098 make_pass_reassoc (gcc::context *ctxt)
5100 return new pass_reassoc (ctxt);