[AArch64][14/14] Reuse target_option_current_node when passing pragma string to targe...
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobefb813c3efaf9d2b2d16349b5cf3ba23b7f04b0d
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "rtl.h"
29 #include "ssa.h"
30 #include "tm_p.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "cfganal.h"
35 #include "gimple-pretty-print.h"
36 #include "tree-inline.h"
37 #include "internal-fn.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "tree-cfg.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "flags.h"
46 #include "insn-config.h"
47 #include "expmed.h"
48 #include "dojump.h"
49 #include "explow.h"
50 #include "calls.h"
51 #include "emit-rtl.h"
52 #include "varasm.h"
53 #include "stmt.h"
54 #include "expr.h"
55 #include "tree-dfa.h"
56 #include "tree-ssa.h"
57 #include "tree-iterator.h"
58 #include "tree-pass.h"
59 #include "alloc-pool.h"
60 #include "langhooks.h"
61 #include "cfgloop.h"
62 #include "target.h"
63 #include "params.h"
64 #include "diagnostic-core.h"
65 #include "builtins.h"
66 #include "gimplify.h"
67 #include "insn-codes.h"
68 #include "optabs.h"
70 /* This is a simple global reassociation pass. It is, in part, based
71 on the LLVM pass of the same name (They do some things more/less
72 than we do, in different orders, etc).
74 It consists of five steps:
76 1. Breaking up subtract operations into addition + negate, where
77 it would promote the reassociation of adds.
79 2. Left linearization of the expression trees, so that (A+B)+(C+D)
80 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
81 During linearization, we place the operands of the binary
82 expressions into a vector of operand_entry_t
84 3. Optimization of the operand lists, eliminating things like a +
85 -a, a & a, etc.
87 3a. Combine repeated factors with the same occurrence counts
88 into a __builtin_powi call that will later be optimized into
89 an optimal number of multiplies.
91 4. Rewrite the expression trees we linearized and optimized so
92 they are in proper rank order.
94 5. Repropagate negates, as nothing else will clean it up ATM.
96 A bit of theory on #4, since nobody seems to write anything down
97 about why it makes sense to do it the way they do it:
99 We could do this much nicer theoretically, but don't (for reasons
100 explained after how to do it theoretically nice :P).
102 In order to promote the most redundancy elimination, you want
103 binary expressions whose operands are the same rank (or
104 preferably, the same value) exposed to the redundancy eliminator,
105 for possible elimination.
107 So the way to do this if we really cared, is to build the new op
108 tree from the leaves to the roots, merging as you go, and putting the
109 new op on the end of the worklist, until you are left with one
110 thing on the worklist.
112 IE if you have to rewrite the following set of operands (listed with
113 rank in parentheses), with opcode PLUS_EXPR:
115 a (1), b (1), c (1), d (2), e (2)
118 We start with our merge worklist empty, and the ops list with all of
119 those on it.
121 You want to first merge all leaves of the same rank, as much as
122 possible.
124 So first build a binary op of
126 mergetmp = a + b, and put "mergetmp" on the merge worklist.
128 Because there is no three operand form of PLUS_EXPR, c is not going to
129 be exposed to redundancy elimination as a rank 1 operand.
131 So you might as well throw it on the merge worklist (you could also
132 consider it to now be a rank two operand, and merge it with d and e,
133 but in this case, you then have evicted e from a binary op. So at
134 least in this situation, you can't win.)
136 Then build a binary op of d + e
137 mergetmp2 = d + e
139 and put mergetmp2 on the merge worklist.
141 so merge worklist = {mergetmp, c, mergetmp2}
143 Continue building binary ops of these operations until you have only
144 one operation left on the worklist.
146 So we have
148 build binary op
149 mergetmp3 = mergetmp + c
151 worklist = {mergetmp2, mergetmp3}
153 mergetmp4 = mergetmp2 + mergetmp3
155 worklist = {mergetmp4}
157 because we have one operation left, we can now just set the original
158 statement equal to the result of that operation.
160 This will at least expose a + b and d + e to redundancy elimination
161 as binary operations.
163 For extra points, you can reuse the old statements to build the
164 mergetmps, since you shouldn't run out.
166 So why don't we do this?
168 Because it's expensive, and rarely will help. Most trees we are
169 reassociating have 3 or less ops. If they have 2 ops, they already
170 will be written into a nice single binary op. If you have 3 ops, a
171 single simple check suffices to tell you whether the first two are of the
172 same rank. If so, you know to order it
174 mergetmp = op1 + op2
175 newstmt = mergetmp + op3
177 instead of
178 mergetmp = op2 + op3
179 newstmt = mergetmp + op1
181 If all three are of the same rank, you can't expose them all in a
182 single binary operator anyway, so the above is *still* the best you
183 can do.
185 Thus, this is what we do. When we have three ops left, we check to see
186 what order to put them in, and call it a day. As a nod to vector sum
187 reduction, we check if any of the ops are really a phi node that is a
188 destructive update for the associating op, and keep the destructive
189 update together for vector sum reduction recognition. */
192 /* Statistics */
193 static struct
195 int linearized;
196 int constants_eliminated;
197 int ops_eliminated;
198 int rewritten;
199 int pows_encountered;
200 int pows_created;
201 } reassociate_stats;
203 /* Operator, rank pair. */
204 typedef struct operand_entry
206 unsigned int rank;
207 int id;
208 tree op;
209 unsigned int count;
210 } *operand_entry_t;
212 static object_allocator<operand_entry> operand_entry_pool ("operand entry pool",
213 30);
215 /* This is used to assign a unique ID to each struct operand_entry
216 so that qsort results are identical on different hosts. */
217 static int next_operand_entry_id;
219 /* Starting rank number for a given basic block, so that we can rank
220 operations using unmovable instructions in that BB based on the bb
221 depth. */
222 static long *bb_rank;
224 /* Operand->rank hashtable. */
225 static hash_map<tree, long> *operand_rank;
227 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
228 all basic blocks the CFG should be adjusted - basic blocks
229 split right after that SSA_NAME's definition statement and before
230 the only use, which must be a bit ior. */
231 static vec<tree> reassoc_branch_fixups;
233 /* Forward decls. */
234 static long get_rank (tree);
235 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
237 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
238 possibly added by gsi_remove. */
240 bool
241 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
243 gimple stmt = gsi_stmt (*gsi);
245 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
246 return gsi_remove (gsi, true);
248 gimple_stmt_iterator prev = *gsi;
249 gsi_prev (&prev);
250 unsigned uid = gimple_uid (stmt);
251 basic_block bb = gimple_bb (stmt);
252 bool ret = gsi_remove (gsi, true);
253 if (!gsi_end_p (prev))
254 gsi_next (&prev);
255 else
256 prev = gsi_start_bb (bb);
257 gimple end_stmt = gsi_stmt (*gsi);
258 while ((stmt = gsi_stmt (prev)) != end_stmt)
260 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
261 gimple_set_uid (stmt, uid);
262 gsi_next (&prev);
264 return ret;
267 /* Bias amount for loop-carried phis. We want this to be larger than
268 the depth of any reassociation tree we can see, but not larger than
269 the rank difference between two blocks. */
270 #define PHI_LOOP_BIAS (1 << 15)
272 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
273 an innermost loop, and the phi has only a single use which is inside
274 the loop, then the rank is the block rank of the loop latch plus an
275 extra bias for the loop-carried dependence. This causes expressions
276 calculated into an accumulator variable to be independent for each
277 iteration of the loop. If STMT is some other phi, the rank is the
278 block rank of its containing block. */
279 static long
280 phi_rank (gimple stmt)
282 basic_block bb = gimple_bb (stmt);
283 struct loop *father = bb->loop_father;
284 tree res;
285 unsigned i;
286 use_operand_p use;
287 gimple use_stmt;
289 /* We only care about real loops (those with a latch). */
290 if (!father->latch)
291 return bb_rank[bb->index];
293 /* Interesting phis must be in headers of innermost loops. */
294 if (bb != father->header
295 || father->inner)
296 return bb_rank[bb->index];
298 /* Ignore virtual SSA_NAMEs. */
299 res = gimple_phi_result (stmt);
300 if (virtual_operand_p (res))
301 return bb_rank[bb->index];
303 /* The phi definition must have a single use, and that use must be
304 within the loop. Otherwise this isn't an accumulator pattern. */
305 if (!single_imm_use (res, &use, &use_stmt)
306 || gimple_bb (use_stmt)->loop_father != father)
307 return bb_rank[bb->index];
309 /* Look for phi arguments from within the loop. If found, bias this phi. */
310 for (i = 0; i < gimple_phi_num_args (stmt); i++)
312 tree arg = gimple_phi_arg_def (stmt, i);
313 if (TREE_CODE (arg) == SSA_NAME
314 && !SSA_NAME_IS_DEFAULT_DEF (arg))
316 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
317 if (gimple_bb (def_stmt)->loop_father == father)
318 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
322 /* Must be an uninteresting phi. */
323 return bb_rank[bb->index];
326 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
327 loop-carried dependence of an innermost loop, return TRUE; else
328 return FALSE. */
329 static bool
330 loop_carried_phi (tree exp)
332 gimple phi_stmt;
333 long block_rank;
335 if (TREE_CODE (exp) != SSA_NAME
336 || SSA_NAME_IS_DEFAULT_DEF (exp))
337 return false;
339 phi_stmt = SSA_NAME_DEF_STMT (exp);
341 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
342 return false;
344 /* Non-loop-carried phis have block rank. Loop-carried phis have
345 an additional bias added in. If this phi doesn't have block rank,
346 it's biased and should not be propagated. */
347 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
349 if (phi_rank (phi_stmt) != block_rank)
350 return true;
352 return false;
355 /* Return the maximum of RANK and the rank that should be propagated
356 from expression OP. For most operands, this is just the rank of OP.
357 For loop-carried phis, the value is zero to avoid undoing the bias
358 in favor of the phi. */
359 static long
360 propagate_rank (long rank, tree op)
362 long op_rank;
364 if (loop_carried_phi (op))
365 return rank;
367 op_rank = get_rank (op);
369 return MAX (rank, op_rank);
372 /* Look up the operand rank structure for expression E. */
374 static inline long
375 find_operand_rank (tree e)
377 long *slot = operand_rank->get (e);
378 return slot ? *slot : -1;
381 /* Insert {E,RANK} into the operand rank hashtable. */
383 static inline void
384 insert_operand_rank (tree e, long rank)
386 gcc_assert (rank > 0);
387 gcc_assert (!operand_rank->put (e, rank));
390 /* Given an expression E, return the rank of the expression. */
392 static long
393 get_rank (tree e)
395 /* SSA_NAME's have the rank of the expression they are the result
397 For globals and uninitialized values, the rank is 0.
398 For function arguments, use the pre-setup rank.
399 For PHI nodes, stores, asm statements, etc, we use the rank of
400 the BB.
401 For simple operations, the rank is the maximum rank of any of
402 its operands, or the bb_rank, whichever is less.
403 I make no claims that this is optimal, however, it gives good
404 results. */
406 /* We make an exception to the normal ranking system to break
407 dependences of accumulator variables in loops. Suppose we
408 have a simple one-block loop containing:
410 x_1 = phi(x_0, x_2)
411 b = a + x_1
412 c = b + d
413 x_2 = c + e
415 As shown, each iteration of the calculation into x is fully
416 dependent upon the iteration before it. We would prefer to
417 see this in the form:
419 x_1 = phi(x_0, x_2)
420 b = a + d
421 c = b + e
422 x_2 = c + x_1
424 If the loop is unrolled, the calculations of b and c from
425 different iterations can be interleaved.
427 To obtain this result during reassociation, we bias the rank
428 of the phi definition x_1 upward, when it is recognized as an
429 accumulator pattern. The artificial rank causes it to be
430 added last, providing the desired independence. */
432 if (TREE_CODE (e) == SSA_NAME)
434 ssa_op_iter iter;
435 gimple stmt;
436 long rank;
437 tree op;
439 if (SSA_NAME_IS_DEFAULT_DEF (e))
440 return find_operand_rank (e);
442 stmt = SSA_NAME_DEF_STMT (e);
443 if (gimple_code (stmt) == GIMPLE_PHI)
444 return phi_rank (stmt);
446 if (!is_gimple_assign (stmt))
447 return bb_rank[gimple_bb (stmt)->index];
449 /* If we already have a rank for this expression, use that. */
450 rank = find_operand_rank (e);
451 if (rank != -1)
452 return rank;
454 /* Otherwise, find the maximum rank for the operands. As an
455 exception, remove the bias from loop-carried phis when propagating
456 the rank so that dependent operations are not also biased. */
457 /* Simply walk over all SSA uses - this takes advatage of the
458 fact that non-SSA operands are is_gimple_min_invariant and
459 thus have rank 0. */
460 rank = 0;
461 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
462 rank = propagate_rank (rank, op);
464 if (dump_file && (dump_flags & TDF_DETAILS))
466 fprintf (dump_file, "Rank for ");
467 print_generic_expr (dump_file, e, 0);
468 fprintf (dump_file, " is %ld\n", (rank + 1));
471 /* Note the rank in the hashtable so we don't recompute it. */
472 insert_operand_rank (e, (rank + 1));
473 return (rank + 1);
476 /* Constants, globals, etc., are rank 0 */
477 return 0;
481 /* We want integer ones to end up last no matter what, since they are
482 the ones we can do the most with. */
483 #define INTEGER_CONST_TYPE 1 << 3
484 #define FLOAT_CONST_TYPE 1 << 2
485 #define OTHER_CONST_TYPE 1 << 1
487 /* Classify an invariant tree into integer, float, or other, so that
488 we can sort them to be near other constants of the same type. */
489 static inline int
490 constant_type (tree t)
492 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
493 return INTEGER_CONST_TYPE;
494 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
495 return FLOAT_CONST_TYPE;
496 else
497 return OTHER_CONST_TYPE;
500 /* qsort comparison function to sort operand entries PA and PB by rank
501 so that the sorted array is ordered by rank in decreasing order. */
502 static int
503 sort_by_operand_rank (const void *pa, const void *pb)
505 const operand_entry_t oea = *(const operand_entry_t *)pa;
506 const operand_entry_t oeb = *(const operand_entry_t *)pb;
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied//whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oeb->rank == 0 && oea->rank == 0)
513 if (constant_type (oeb->op) != constant_type (oea->op))
514 return constant_type (oeb->op) - constant_type (oea->op);
515 else
516 /* To make sorting result stable, we use unique IDs to determine
517 order. */
518 return oeb->id - oea->id;
521 /* Lastly, make sure the versions that are the same go next to each
522 other. */
523 if ((oeb->rank - oea->rank == 0)
524 && TREE_CODE (oea->op) == SSA_NAME
525 && TREE_CODE (oeb->op) == SSA_NAME)
527 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
528 versions of removed SSA_NAMEs, so if possible, prefer to sort
529 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
530 See PR60418. */
531 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
532 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
533 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
536 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
537 basic_block bba = gimple_bb (stmta);
538 basic_block bbb = gimple_bb (stmtb);
539 if (bbb != bba)
541 if (bb_rank[bbb->index] != bb_rank[bba->index])
542 return bb_rank[bbb->index] - bb_rank[bba->index];
544 else
546 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
547 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
548 if (da != db)
549 return da ? 1 : -1;
553 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
554 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
555 else
556 return oeb->id - oea->id;
559 if (oeb->rank != oea->rank)
560 return oeb->rank - oea->rank;
561 else
562 return oeb->id - oea->id;
565 /* Add an operand entry to *OPS for the tree operand OP. */
567 static void
568 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
570 operand_entry_t oe = operand_entry_pool.allocate ();
572 oe->op = op;
573 oe->rank = get_rank (op);
574 oe->id = next_operand_entry_id++;
575 oe->count = 1;
576 ops->safe_push (oe);
579 /* Add an operand entry to *OPS for the tree operand OP with repeat
580 count REPEAT. */
582 static void
583 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
584 HOST_WIDE_INT repeat)
586 operand_entry_t oe = operand_entry_pool.allocate ();
588 oe->op = op;
589 oe->rank = get_rank (op);
590 oe->id = next_operand_entry_id++;
591 oe->count = repeat;
592 ops->safe_push (oe);
594 reassociate_stats.pows_encountered++;
597 /* Return true if STMT is reassociable operation containing a binary
598 operation with tree code CODE, and is inside LOOP. */
600 static bool
601 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
603 basic_block bb = gimple_bb (stmt);
605 if (gimple_bb (stmt) == NULL)
606 return false;
608 if (!flow_bb_inside_loop_p (loop, bb))
609 return false;
611 if (is_gimple_assign (stmt)
612 && gimple_assign_rhs_code (stmt) == code
613 && has_single_use (gimple_assign_lhs (stmt)))
614 return true;
616 return false;
620 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
621 operand of the negate operation. Otherwise, return NULL. */
623 static tree
624 get_unary_op (tree name, enum tree_code opcode)
626 gimple stmt = SSA_NAME_DEF_STMT (name);
628 if (!is_gimple_assign (stmt))
629 return NULL_TREE;
631 if (gimple_assign_rhs_code (stmt) == opcode)
632 return gimple_assign_rhs1 (stmt);
633 return NULL_TREE;
636 /* If CURR and LAST are a pair of ops that OPCODE allows us to
637 eliminate through equivalences, do so, remove them from OPS, and
638 return true. Otherwise, return false. */
640 static bool
641 eliminate_duplicate_pair (enum tree_code opcode,
642 vec<operand_entry_t> *ops,
643 bool *all_done,
644 unsigned int i,
645 operand_entry_t curr,
646 operand_entry_t last)
649 /* If we have two of the same op, and the opcode is & |, min, or max,
650 we can eliminate one of them.
651 If we have two of the same op, and the opcode is ^, we can
652 eliminate both of them. */
654 if (last && last->op == curr->op)
656 switch (opcode)
658 case MAX_EXPR:
659 case MIN_EXPR:
660 case BIT_IOR_EXPR:
661 case BIT_AND_EXPR:
662 if (dump_file && (dump_flags & TDF_DETAILS))
664 fprintf (dump_file, "Equivalence: ");
665 print_generic_expr (dump_file, curr->op, 0);
666 fprintf (dump_file, " [&|minmax] ");
667 print_generic_expr (dump_file, last->op, 0);
668 fprintf (dump_file, " -> ");
669 print_generic_stmt (dump_file, last->op, 0);
672 ops->ordered_remove (i);
673 reassociate_stats.ops_eliminated ++;
675 return true;
677 case BIT_XOR_EXPR:
678 if (dump_file && (dump_flags & TDF_DETAILS))
680 fprintf (dump_file, "Equivalence: ");
681 print_generic_expr (dump_file, curr->op, 0);
682 fprintf (dump_file, " ^ ");
683 print_generic_expr (dump_file, last->op, 0);
684 fprintf (dump_file, " -> nothing\n");
687 reassociate_stats.ops_eliminated += 2;
689 if (ops->length () == 2)
691 ops->create (0);
692 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
693 *all_done = true;
695 else
697 ops->ordered_remove (i-1);
698 ops->ordered_remove (i-1);
701 return true;
703 default:
704 break;
707 return false;
710 static vec<tree> plus_negates;
712 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
713 expression, look in OPS for a corresponding positive operation to cancel
714 it out. If we find one, remove the other from OPS, replace
715 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
716 return false. */
718 static bool
719 eliminate_plus_minus_pair (enum tree_code opcode,
720 vec<operand_entry_t> *ops,
721 unsigned int currindex,
722 operand_entry_t curr)
724 tree negateop;
725 tree notop;
726 unsigned int i;
727 operand_entry_t oe;
729 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
730 return false;
732 negateop = get_unary_op (curr->op, NEGATE_EXPR);
733 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
734 if (negateop == NULL_TREE && notop == NULL_TREE)
735 return false;
737 /* Any non-negated version will have a rank that is one less than
738 the current rank. So once we hit those ranks, if we don't find
739 one, we can stop. */
741 for (i = currindex + 1;
742 ops->iterate (i, &oe)
743 && oe->rank >= curr->rank - 1 ;
744 i++)
746 if (oe->op == negateop)
749 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "Equivalence: ");
752 print_generic_expr (dump_file, negateop, 0);
753 fprintf (dump_file, " + -");
754 print_generic_expr (dump_file, oe->op, 0);
755 fprintf (dump_file, " -> 0\n");
758 ops->ordered_remove (i);
759 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
760 ops->ordered_remove (currindex);
761 reassociate_stats.ops_eliminated ++;
763 return true;
765 else if (oe->op == notop)
767 tree op_type = TREE_TYPE (oe->op);
769 if (dump_file && (dump_flags & TDF_DETAILS))
771 fprintf (dump_file, "Equivalence: ");
772 print_generic_expr (dump_file, notop, 0);
773 fprintf (dump_file, " + ~");
774 print_generic_expr (dump_file, oe->op, 0);
775 fprintf (dump_file, " -> -1\n");
778 ops->ordered_remove (i);
779 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
780 ops->ordered_remove (currindex);
781 reassociate_stats.ops_eliminated ++;
783 return true;
787 /* CURR->OP is a negate expr in a plus expr: save it for later
788 inspection in repropagate_negates(). */
789 if (negateop != NULL_TREE)
790 plus_negates.safe_push (curr->op);
792 return false;
795 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
796 bitwise not expression, look in OPS for a corresponding operand to
797 cancel it out. If we find one, remove the other from OPS, replace
798 OPS[CURRINDEX] with 0, and return true. Otherwise, return
799 false. */
801 static bool
802 eliminate_not_pairs (enum tree_code opcode,
803 vec<operand_entry_t> *ops,
804 unsigned int currindex,
805 operand_entry_t curr)
807 tree notop;
808 unsigned int i;
809 operand_entry_t oe;
811 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
812 || TREE_CODE (curr->op) != SSA_NAME)
813 return false;
815 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
816 if (notop == NULL_TREE)
817 return false;
819 /* Any non-not version will have a rank that is one less than
820 the current rank. So once we hit those ranks, if we don't find
821 one, we can stop. */
823 for (i = currindex + 1;
824 ops->iterate (i, &oe)
825 && oe->rank >= curr->rank - 1;
826 i++)
828 if (oe->op == notop)
830 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file, "Equivalence: ");
833 print_generic_expr (dump_file, notop, 0);
834 if (opcode == BIT_AND_EXPR)
835 fprintf (dump_file, " & ~");
836 else if (opcode == BIT_IOR_EXPR)
837 fprintf (dump_file, " | ~");
838 print_generic_expr (dump_file, oe->op, 0);
839 if (opcode == BIT_AND_EXPR)
840 fprintf (dump_file, " -> 0\n");
841 else if (opcode == BIT_IOR_EXPR)
842 fprintf (dump_file, " -> -1\n");
845 if (opcode == BIT_AND_EXPR)
846 oe->op = build_zero_cst (TREE_TYPE (oe->op));
847 else if (opcode == BIT_IOR_EXPR)
848 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
850 reassociate_stats.ops_eliminated += ops->length () - 1;
851 ops->truncate (0);
852 ops->quick_push (oe);
853 return true;
857 return false;
860 /* Use constant value that may be present in OPS to try to eliminate
861 operands. Note that this function is only really used when we've
862 eliminated ops for other reasons, or merged constants. Across
863 single statements, fold already does all of this, plus more. There
864 is little point in duplicating logic, so I've only included the
865 identities that I could ever construct testcases to trigger. */
867 static void
868 eliminate_using_constants (enum tree_code opcode,
869 vec<operand_entry_t> *ops)
871 operand_entry_t oelast = ops->last ();
872 tree type = TREE_TYPE (oelast->op);
874 if (oelast->rank == 0
875 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
877 switch (opcode)
879 case BIT_AND_EXPR:
880 if (integer_zerop (oelast->op))
882 if (ops->length () != 1)
884 if (dump_file && (dump_flags & TDF_DETAILS))
885 fprintf (dump_file, "Found & 0, removing all other ops\n");
887 reassociate_stats.ops_eliminated += ops->length () - 1;
889 ops->truncate (0);
890 ops->quick_push (oelast);
891 return;
894 else if (integer_all_onesp (oelast->op))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found & -1, removing\n");
900 ops->pop ();
901 reassociate_stats.ops_eliminated++;
904 break;
905 case BIT_IOR_EXPR:
906 if (integer_all_onesp (oelast->op))
908 if (ops->length () != 1)
910 if (dump_file && (dump_flags & TDF_DETAILS))
911 fprintf (dump_file, "Found | -1, removing all other ops\n");
913 reassociate_stats.ops_eliminated += ops->length () - 1;
915 ops->truncate (0);
916 ops->quick_push (oelast);
917 return;
920 else if (integer_zerop (oelast->op))
922 if (ops->length () != 1)
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 fprintf (dump_file, "Found | 0, removing\n");
926 ops->pop ();
927 reassociate_stats.ops_eliminated++;
930 break;
931 case MULT_EXPR:
932 if (integer_zerop (oelast->op)
933 || (FLOAT_TYPE_P (type)
934 && !HONOR_NANS (type)
935 && !HONOR_SIGNED_ZEROS (type)
936 && real_zerop (oelast->op)))
938 if (ops->length () != 1)
940 if (dump_file && (dump_flags & TDF_DETAILS))
941 fprintf (dump_file, "Found * 0, removing all other ops\n");
943 reassociate_stats.ops_eliminated += ops->length () - 1;
944 ops->truncate (1);
945 ops->quick_push (oelast);
946 return;
949 else if (integer_onep (oelast->op)
950 || (FLOAT_TYPE_P (type)
951 && !HONOR_SNANS (type)
952 && real_onep (oelast->op)))
954 if (ops->length () != 1)
956 if (dump_file && (dump_flags & TDF_DETAILS))
957 fprintf (dump_file, "Found * 1, removing\n");
958 ops->pop ();
959 reassociate_stats.ops_eliminated++;
960 return;
963 break;
964 case BIT_XOR_EXPR:
965 case PLUS_EXPR:
966 case MINUS_EXPR:
967 if (integer_zerop (oelast->op)
968 || (FLOAT_TYPE_P (type)
969 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
970 && fold_real_zero_addition_p (type, oelast->op,
971 opcode == MINUS_EXPR)))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found [|^+] 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
979 return;
982 break;
983 default:
984 break;
990 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
991 bool, bool);
993 /* Structure for tracking and counting operands. */
994 typedef struct oecount_s {
995 int cnt;
996 int id;
997 enum tree_code oecode;
998 tree op;
999 } oecount;
1002 /* The heap for the oecount hashtable and the sorted list of operands. */
1003 static vec<oecount> cvec;
1006 /* Oecount hashtable helpers. */
1008 struct oecount_hasher : int_hash <int, 0, 1>
1010 static inline hashval_t hash (int);
1011 static inline bool equal (int, int);
1014 /* Hash function for oecount. */
1016 inline hashval_t
1017 oecount_hasher::hash (int p)
1019 const oecount *c = &cvec[p - 42];
1020 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1023 /* Comparison function for oecount. */
1025 inline bool
1026 oecount_hasher::equal (int p1, int p2)
1028 const oecount *c1 = &cvec[p1 - 42];
1029 const oecount *c2 = &cvec[p2 - 42];
1030 return (c1->oecode == c2->oecode
1031 && c1->op == c2->op);
1034 /* Comparison function for qsort sorting oecount elements by count. */
1036 static int
1037 oecount_cmp (const void *p1, const void *p2)
1039 const oecount *c1 = (const oecount *)p1;
1040 const oecount *c2 = (const oecount *)p2;
1041 if (c1->cnt != c2->cnt)
1042 return c1->cnt - c2->cnt;
1043 else
1044 /* If counts are identical, use unique IDs to stabilize qsort. */
1045 return c1->id - c2->id;
1048 /* Return TRUE iff STMT represents a builtin call that raises OP
1049 to some exponent. */
1051 static bool
1052 stmt_is_power_of_op (gimple stmt, tree op)
1054 tree fndecl;
1056 if (!is_gimple_call (stmt))
1057 return false;
1059 fndecl = gimple_call_fndecl (stmt);
1061 if (!fndecl
1062 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1063 return false;
1065 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1067 CASE_FLT_FN (BUILT_IN_POW):
1068 CASE_FLT_FN (BUILT_IN_POWI):
1069 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1071 default:
1072 return false;
1076 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1077 in place and return the result. Assumes that stmt_is_power_of_op
1078 was previously called for STMT and returned TRUE. */
1080 static HOST_WIDE_INT
1081 decrement_power (gimple stmt)
1083 REAL_VALUE_TYPE c, cint;
1084 HOST_WIDE_INT power;
1085 tree arg1;
1087 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1089 CASE_FLT_FN (BUILT_IN_POW):
1090 arg1 = gimple_call_arg (stmt, 1);
1091 c = TREE_REAL_CST (arg1);
1092 power = real_to_integer (&c) - 1;
1093 real_from_integer (&cint, VOIDmode, power, SIGNED);
1094 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1095 return power;
1097 CASE_FLT_FN (BUILT_IN_POWI):
1098 arg1 = gimple_call_arg (stmt, 1);
1099 power = TREE_INT_CST_LOW (arg1) - 1;
1100 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1101 return power;
1103 default:
1104 gcc_unreachable ();
1108 /* Find the single immediate use of STMT's LHS, and replace it
1109 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1110 replace *DEF with OP as well. */
1112 static void
1113 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1115 tree lhs;
1116 gimple use_stmt;
1117 use_operand_p use;
1118 gimple_stmt_iterator gsi;
1120 if (is_gimple_call (stmt))
1121 lhs = gimple_call_lhs (stmt);
1122 else
1123 lhs = gimple_assign_lhs (stmt);
1125 gcc_assert (has_single_use (lhs));
1126 single_imm_use (lhs, &use, &use_stmt);
1127 if (lhs == *def)
1128 *def = op;
1129 SET_USE (use, op);
1130 if (TREE_CODE (op) != SSA_NAME)
1131 update_stmt (use_stmt);
1132 gsi = gsi_for_stmt (stmt);
1133 unlink_stmt_vdef (stmt);
1134 reassoc_remove_stmt (&gsi);
1135 release_defs (stmt);
1138 /* Walks the linear chain with result *DEF searching for an operation
1139 with operand OP and code OPCODE removing that from the chain. *DEF
1140 is updated if there is only one operand but no operation left. */
1142 static void
1143 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1145 gimple stmt = SSA_NAME_DEF_STMT (*def);
1149 tree name;
1151 if (opcode == MULT_EXPR
1152 && stmt_is_power_of_op (stmt, op))
1154 if (decrement_power (stmt) == 1)
1155 propagate_op_to_single_use (op, stmt, def);
1156 return;
1159 name = gimple_assign_rhs1 (stmt);
1161 /* If this is the operation we look for and one of the operands
1162 is ours simply propagate the other operand into the stmts
1163 single use. */
1164 if (gimple_assign_rhs_code (stmt) == opcode
1165 && (name == op
1166 || gimple_assign_rhs2 (stmt) == op))
1168 if (name == op)
1169 name = gimple_assign_rhs2 (stmt);
1170 propagate_op_to_single_use (name, stmt, def);
1171 return;
1174 /* We might have a multiply of two __builtin_pow* calls, and
1175 the operand might be hiding in the rightmost one. */
1176 if (opcode == MULT_EXPR
1177 && gimple_assign_rhs_code (stmt) == opcode
1178 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1179 && has_single_use (gimple_assign_rhs2 (stmt)))
1181 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1182 if (stmt_is_power_of_op (stmt2, op))
1184 if (decrement_power (stmt2) == 1)
1185 propagate_op_to_single_use (op, stmt2, def);
1186 return;
1190 /* Continue walking the chain. */
1191 gcc_assert (name != op
1192 && TREE_CODE (name) == SSA_NAME);
1193 stmt = SSA_NAME_DEF_STMT (name);
1195 while (1);
1198 /* Returns true if statement S1 dominates statement S2. Like
1199 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1201 static bool
1202 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1204 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1206 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1207 SSA_NAME. Assume it lives at the beginning of function and
1208 thus dominates everything. */
1209 if (!bb1 || s1 == s2)
1210 return true;
1212 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1213 if (!bb2)
1214 return false;
1216 if (bb1 == bb2)
1218 /* PHIs in the same basic block are assumed to be
1219 executed all in parallel, if only one stmt is a PHI,
1220 it dominates the other stmt in the same basic block. */
1221 if (gimple_code (s1) == GIMPLE_PHI)
1222 return true;
1224 if (gimple_code (s2) == GIMPLE_PHI)
1225 return false;
1227 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1229 if (gimple_uid (s1) < gimple_uid (s2))
1230 return true;
1232 if (gimple_uid (s1) > gimple_uid (s2))
1233 return false;
1235 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1236 unsigned int uid = gimple_uid (s1);
1237 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1239 gimple s = gsi_stmt (gsi);
1240 if (gimple_uid (s) != uid)
1241 break;
1242 if (s == s2)
1243 return true;
1246 return false;
1249 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1252 /* Insert STMT after INSERT_POINT. */
1254 static void
1255 insert_stmt_after (gimple stmt, gimple insert_point)
1257 gimple_stmt_iterator gsi;
1258 basic_block bb;
1260 if (gimple_code (insert_point) == GIMPLE_PHI)
1261 bb = gimple_bb (insert_point);
1262 else if (!stmt_ends_bb_p (insert_point))
1264 gsi = gsi_for_stmt (insert_point);
1265 gimple_set_uid (stmt, gimple_uid (insert_point));
1266 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1267 return;
1269 else
1270 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1271 thus if it must end a basic block, it should be a call that can
1272 throw, or some assignment that can throw. If it throws, the LHS
1273 of it will not be initialized though, so only valid places using
1274 the SSA_NAME should be dominated by the fallthru edge. */
1275 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1276 gsi = gsi_after_labels (bb);
1277 if (gsi_end_p (gsi))
1279 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1280 gimple_set_uid (stmt,
1281 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1283 else
1284 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1285 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1288 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1289 the result. Places the statement after the definition of either
1290 OP1 or OP2. Returns the new statement. */
1292 static gimple
1293 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1295 gimple op1def = NULL, op2def = NULL;
1296 gimple_stmt_iterator gsi;
1297 tree op;
1298 gassign *sum;
1300 /* Create the addition statement. */
1301 op = make_ssa_name (type);
1302 sum = gimple_build_assign (op, opcode, op1, op2);
1304 /* Find an insertion place and insert. */
1305 if (TREE_CODE (op1) == SSA_NAME)
1306 op1def = SSA_NAME_DEF_STMT (op1);
1307 if (TREE_CODE (op2) == SSA_NAME)
1308 op2def = SSA_NAME_DEF_STMT (op2);
1309 if ((!op1def || gimple_nop_p (op1def))
1310 && (!op2def || gimple_nop_p (op2def)))
1312 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1313 if (gsi_end_p (gsi))
1315 gimple_stmt_iterator gsi2
1316 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1317 gimple_set_uid (sum,
1318 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1320 else
1321 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1322 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1324 else
1326 gimple insert_point;
1327 if ((!op1def || gimple_nop_p (op1def))
1328 || (op2def && !gimple_nop_p (op2def)
1329 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1330 insert_point = op2def;
1331 else
1332 insert_point = op1def;
1333 insert_stmt_after (sum, insert_point);
1335 update_stmt (sum);
1337 return sum;
1340 /* Perform un-distribution of divisions and multiplications.
1341 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1342 to (A + B) / X for real X.
1344 The algorithm is organized as follows.
1346 - First we walk the addition chain *OPS looking for summands that
1347 are defined by a multiplication or a real division. This results
1348 in the candidates bitmap with relevant indices into *OPS.
1350 - Second we build the chains of multiplications or divisions for
1351 these candidates, counting the number of occurrences of (operand, code)
1352 pairs in all of the candidates chains.
1354 - Third we sort the (operand, code) pairs by number of occurrence and
1355 process them starting with the pair with the most uses.
1357 * For each such pair we walk the candidates again to build a
1358 second candidate bitmap noting all multiplication/division chains
1359 that have at least one occurrence of (operand, code).
1361 * We build an alternate addition chain only covering these
1362 candidates with one (operand, code) operation removed from their
1363 multiplication/division chain.
1365 * The first candidate gets replaced by the alternate addition chain
1366 multiplied/divided by the operand.
1368 * All candidate chains get disabled for further processing and
1369 processing of (operand, code) pairs continues.
1371 The alternate addition chains built are re-processed by the main
1372 reassociation algorithm which allows optimizing a * x * y + b * y * x
1373 to (a + b ) * x * y in one invocation of the reassociation pass. */
1375 static bool
1376 undistribute_ops_list (enum tree_code opcode,
1377 vec<operand_entry_t> *ops, struct loop *loop)
1379 unsigned int length = ops->length ();
1380 operand_entry_t oe1;
1381 unsigned i, j;
1382 sbitmap candidates, candidates2;
1383 unsigned nr_candidates, nr_candidates2;
1384 sbitmap_iterator sbi0;
1385 vec<operand_entry_t> *subops;
1386 bool changed = false;
1387 int next_oecount_id = 0;
1389 if (length <= 1
1390 || opcode != PLUS_EXPR)
1391 return false;
1393 /* Build a list of candidates to process. */
1394 candidates = sbitmap_alloc (length);
1395 bitmap_clear (candidates);
1396 nr_candidates = 0;
1397 FOR_EACH_VEC_ELT (*ops, i, oe1)
1399 enum tree_code dcode;
1400 gimple oe1def;
1402 if (TREE_CODE (oe1->op) != SSA_NAME)
1403 continue;
1404 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1405 if (!is_gimple_assign (oe1def))
1406 continue;
1407 dcode = gimple_assign_rhs_code (oe1def);
1408 if ((dcode != MULT_EXPR
1409 && dcode != RDIV_EXPR)
1410 || !is_reassociable_op (oe1def, dcode, loop))
1411 continue;
1413 bitmap_set_bit (candidates, i);
1414 nr_candidates++;
1417 if (nr_candidates < 2)
1419 sbitmap_free (candidates);
1420 return false;
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file, "searching for un-distribute opportunities ");
1426 print_generic_expr (dump_file,
1427 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1428 fprintf (dump_file, " %d\n", nr_candidates);
1431 /* Build linearized sub-operand lists and the counting table. */
1432 cvec.create (0);
1434 hash_table<oecount_hasher> ctable (15);
1436 /* ??? Macro arguments cannot have multi-argument template types in
1437 them. This typedef is needed to workaround that limitation. */
1438 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1439 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1440 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1442 gimple oedef;
1443 enum tree_code oecode;
1444 unsigned j;
1446 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1447 oecode = gimple_assign_rhs_code (oedef);
1448 linearize_expr_tree (&subops[i], oedef,
1449 associative_tree_code (oecode), false);
1451 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1453 oecount c;
1454 int *slot;
1455 int idx;
1456 c.oecode = oecode;
1457 c.cnt = 1;
1458 c.id = next_oecount_id++;
1459 c.op = oe1->op;
1460 cvec.safe_push (c);
1461 idx = cvec.length () + 41;
1462 slot = ctable.find_slot (idx, INSERT);
1463 if (!*slot)
1465 *slot = idx;
1467 else
1469 cvec.pop ();
1470 cvec[*slot - 42].cnt++;
1475 /* Sort the counting table. */
1476 cvec.qsort (oecount_cmp);
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1480 oecount *c;
1481 fprintf (dump_file, "Candidates:\n");
1482 FOR_EACH_VEC_ELT (cvec, j, c)
1484 fprintf (dump_file, " %u %s: ", c->cnt,
1485 c->oecode == MULT_EXPR
1486 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1487 print_generic_expr (dump_file, c->op, 0);
1488 fprintf (dump_file, "\n");
1492 /* Process the (operand, code) pairs in order of most occurrence. */
1493 candidates2 = sbitmap_alloc (length);
1494 while (!cvec.is_empty ())
1496 oecount *c = &cvec.last ();
1497 if (c->cnt < 2)
1498 break;
1500 /* Now collect the operands in the outer chain that contain
1501 the common operand in their inner chain. */
1502 bitmap_clear (candidates2);
1503 nr_candidates2 = 0;
1504 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1506 gimple oedef;
1507 enum tree_code oecode;
1508 unsigned j;
1509 tree op = (*ops)[i]->op;
1511 /* If we undistributed in this chain already this may be
1512 a constant. */
1513 if (TREE_CODE (op) != SSA_NAME)
1514 continue;
1516 oedef = SSA_NAME_DEF_STMT (op);
1517 oecode = gimple_assign_rhs_code (oedef);
1518 if (oecode != c->oecode)
1519 continue;
1521 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1523 if (oe1->op == c->op)
1525 bitmap_set_bit (candidates2, i);
1526 ++nr_candidates2;
1527 break;
1532 if (nr_candidates2 >= 2)
1534 operand_entry_t oe1, oe2;
1535 gimple prod;
1536 int first = bitmap_first_set_bit (candidates2);
1538 /* Build the new addition chain. */
1539 oe1 = (*ops)[first];
1540 if (dump_file && (dump_flags & TDF_DETAILS))
1542 fprintf (dump_file, "Building (");
1543 print_generic_expr (dump_file, oe1->op, 0);
1545 zero_one_operation (&oe1->op, c->oecode, c->op);
1546 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1548 gimple sum;
1549 oe2 = (*ops)[i];
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1552 fprintf (dump_file, " + ");
1553 print_generic_expr (dump_file, oe2->op, 0);
1555 zero_one_operation (&oe2->op, c->oecode, c->op);
1556 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1557 oe1->op, oe2->op, opcode);
1558 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1559 oe2->rank = 0;
1560 oe1->op = gimple_get_lhs (sum);
1563 /* Apply the multiplication/division. */
1564 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1565 oe1->op, c->op, c->oecode);
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1568 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1569 print_generic_expr (dump_file, c->op, 0);
1570 fprintf (dump_file, "\n");
1573 /* Record it in the addition chain and disable further
1574 undistribution with this op. */
1575 oe1->op = gimple_assign_lhs (prod);
1576 oe1->rank = get_rank (oe1->op);
1577 subops[first].release ();
1579 changed = true;
1582 cvec.pop ();
1585 for (i = 0; i < ops->length (); ++i)
1586 subops[i].release ();
1587 free (subops);
1588 cvec.release ();
1589 sbitmap_free (candidates);
1590 sbitmap_free (candidates2);
1592 return changed;
1595 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1596 expression, examine the other OPS to see if any of them are comparisons
1597 of the same values, which we may be able to combine or eliminate.
1598 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1600 static bool
1601 eliminate_redundant_comparison (enum tree_code opcode,
1602 vec<operand_entry_t> *ops,
1603 unsigned int currindex,
1604 operand_entry_t curr)
1606 tree op1, op2;
1607 enum tree_code lcode, rcode;
1608 gimple def1, def2;
1609 int i;
1610 operand_entry_t oe;
1612 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1613 return false;
1615 /* Check that CURR is a comparison. */
1616 if (TREE_CODE (curr->op) != SSA_NAME)
1617 return false;
1618 def1 = SSA_NAME_DEF_STMT (curr->op);
1619 if (!is_gimple_assign (def1))
1620 return false;
1621 lcode = gimple_assign_rhs_code (def1);
1622 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1623 return false;
1624 op1 = gimple_assign_rhs1 (def1);
1625 op2 = gimple_assign_rhs2 (def1);
1627 /* Now look for a similar comparison in the remaining OPS. */
1628 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1630 tree t;
1632 if (TREE_CODE (oe->op) != SSA_NAME)
1633 continue;
1634 def2 = SSA_NAME_DEF_STMT (oe->op);
1635 if (!is_gimple_assign (def2))
1636 continue;
1637 rcode = gimple_assign_rhs_code (def2);
1638 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1639 continue;
1641 /* If we got here, we have a match. See if we can combine the
1642 two comparisons. */
1643 if (opcode == BIT_IOR_EXPR)
1644 t = maybe_fold_or_comparisons (lcode, op1, op2,
1645 rcode, gimple_assign_rhs1 (def2),
1646 gimple_assign_rhs2 (def2));
1647 else
1648 t = maybe_fold_and_comparisons (lcode, op1, op2,
1649 rcode, gimple_assign_rhs1 (def2),
1650 gimple_assign_rhs2 (def2));
1651 if (!t)
1652 continue;
1654 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1655 always give us a boolean_type_node value back. If the original
1656 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1657 we need to convert. */
1658 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1659 t = fold_convert (TREE_TYPE (curr->op), t);
1661 if (TREE_CODE (t) != INTEGER_CST
1662 && !operand_equal_p (t, curr->op, 0))
1664 enum tree_code subcode;
1665 tree newop1, newop2;
1666 if (!COMPARISON_CLASS_P (t))
1667 continue;
1668 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1669 STRIP_USELESS_TYPE_CONVERSION (newop1);
1670 STRIP_USELESS_TYPE_CONVERSION (newop2);
1671 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1672 continue;
1675 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "Equivalence: ");
1678 print_generic_expr (dump_file, curr->op, 0);
1679 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1680 print_generic_expr (dump_file, oe->op, 0);
1681 fprintf (dump_file, " -> ");
1682 print_generic_expr (dump_file, t, 0);
1683 fprintf (dump_file, "\n");
1686 /* Now we can delete oe, as it has been subsumed by the new combined
1687 expression t. */
1688 ops->ordered_remove (i);
1689 reassociate_stats.ops_eliminated ++;
1691 /* If t is the same as curr->op, we're done. Otherwise we must
1692 replace curr->op with t. Special case is if we got a constant
1693 back, in which case we add it to the end instead of in place of
1694 the current entry. */
1695 if (TREE_CODE (t) == INTEGER_CST)
1697 ops->ordered_remove (currindex);
1698 add_to_ops_vec (ops, t);
1700 else if (!operand_equal_p (t, curr->op, 0))
1702 gimple sum;
1703 enum tree_code subcode;
1704 tree newop1;
1705 tree newop2;
1706 gcc_assert (COMPARISON_CLASS_P (t));
1707 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1708 STRIP_USELESS_TYPE_CONVERSION (newop1);
1709 STRIP_USELESS_TYPE_CONVERSION (newop2);
1710 gcc_checking_assert (is_gimple_val (newop1)
1711 && is_gimple_val (newop2));
1712 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1713 curr->op = gimple_get_lhs (sum);
1715 return true;
1718 return false;
1721 /* Perform various identities and other optimizations on the list of
1722 operand entries, stored in OPS. The tree code for the binary
1723 operation between all the operands is OPCODE. */
1725 static void
1726 optimize_ops_list (enum tree_code opcode,
1727 vec<operand_entry_t> *ops)
1729 unsigned int length = ops->length ();
1730 unsigned int i;
1731 operand_entry_t oe;
1732 operand_entry_t oelast = NULL;
1733 bool iterate = false;
1735 if (length == 1)
1736 return;
1738 oelast = ops->last ();
1740 /* If the last two are constants, pop the constants off, merge them
1741 and try the next two. */
1742 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1744 operand_entry_t oelm1 = (*ops)[length - 2];
1746 if (oelm1->rank == 0
1747 && is_gimple_min_invariant (oelm1->op)
1748 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1749 TREE_TYPE (oelast->op)))
1751 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1752 oelm1->op, oelast->op);
1754 if (folded && is_gimple_min_invariant (folded))
1756 if (dump_file && (dump_flags & TDF_DETAILS))
1757 fprintf (dump_file, "Merging constants\n");
1759 ops->pop ();
1760 ops->pop ();
1762 add_to_ops_vec (ops, folded);
1763 reassociate_stats.constants_eliminated++;
1765 optimize_ops_list (opcode, ops);
1766 return;
1771 eliminate_using_constants (opcode, ops);
1772 oelast = NULL;
1774 for (i = 0; ops->iterate (i, &oe);)
1776 bool done = false;
1778 if (eliminate_not_pairs (opcode, ops, i, oe))
1779 return;
1780 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1781 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1782 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1784 if (done)
1785 return;
1786 iterate = true;
1787 oelast = NULL;
1788 continue;
1790 oelast = oe;
1791 i++;
1794 length = ops->length ();
1795 oelast = ops->last ();
1797 if (iterate)
1798 optimize_ops_list (opcode, ops);
1801 /* The following functions are subroutines to optimize_range_tests and allow
1802 it to try to change a logical combination of comparisons into a range
1803 test.
1805 For example, both
1806 X == 2 || X == 5 || X == 3 || X == 4
1808 X >= 2 && X <= 5
1809 are converted to
1810 (unsigned) (X - 2) <= 3
1812 For more information see comments above fold_test_range in fold-const.c,
1813 this implementation is for GIMPLE. */
1815 struct range_entry
1817 tree exp;
1818 tree low;
1819 tree high;
1820 bool in_p;
1821 bool strict_overflow_p;
1822 unsigned int idx, next;
1825 /* This is similar to make_range in fold-const.c, but on top of
1826 GIMPLE instead of trees. If EXP is non-NULL, it should be
1827 an SSA_NAME and STMT argument is ignored, otherwise STMT
1828 argument should be a GIMPLE_COND. */
1830 static void
1831 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1833 int in_p;
1834 tree low, high;
1835 bool is_bool, strict_overflow_p;
1837 r->exp = NULL_TREE;
1838 r->in_p = false;
1839 r->strict_overflow_p = false;
1840 r->low = NULL_TREE;
1841 r->high = NULL_TREE;
1842 if (exp != NULL_TREE
1843 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1844 return;
1846 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1847 and see if we can refine the range. Some of the cases below may not
1848 happen, but it doesn't seem worth worrying about this. We "continue"
1849 the outer loop when we've changed something; otherwise we "break"
1850 the switch, which will "break" the while. */
1851 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1852 high = low;
1853 in_p = 0;
1854 strict_overflow_p = false;
1855 is_bool = false;
1856 if (exp == NULL_TREE)
1857 is_bool = true;
1858 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1860 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1861 is_bool = true;
1862 else
1863 return;
1865 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1866 is_bool = true;
1868 while (1)
1870 enum tree_code code;
1871 tree arg0, arg1, exp_type;
1872 tree nexp;
1873 location_t loc;
1875 if (exp != NULL_TREE)
1877 if (TREE_CODE (exp) != SSA_NAME
1878 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1879 break;
1881 stmt = SSA_NAME_DEF_STMT (exp);
1882 if (!is_gimple_assign (stmt))
1883 break;
1885 code = gimple_assign_rhs_code (stmt);
1886 arg0 = gimple_assign_rhs1 (stmt);
1887 arg1 = gimple_assign_rhs2 (stmt);
1888 exp_type = TREE_TYPE (exp);
1890 else
1892 code = gimple_cond_code (stmt);
1893 arg0 = gimple_cond_lhs (stmt);
1894 arg1 = gimple_cond_rhs (stmt);
1895 exp_type = boolean_type_node;
1898 if (TREE_CODE (arg0) != SSA_NAME)
1899 break;
1900 loc = gimple_location (stmt);
1901 switch (code)
1903 case BIT_NOT_EXPR:
1904 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1905 /* Ensure the range is either +[-,0], +[0,0],
1906 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1907 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1908 or similar expression of unconditional true or
1909 false, it should not be negated. */
1910 && ((high && integer_zerop (high))
1911 || (low && integer_onep (low))))
1913 in_p = !in_p;
1914 exp = arg0;
1915 continue;
1917 break;
1918 case SSA_NAME:
1919 exp = arg0;
1920 continue;
1921 CASE_CONVERT:
1922 if (is_bool)
1923 goto do_default;
1924 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1926 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1927 is_bool = true;
1928 else
1929 return;
1931 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1932 is_bool = true;
1933 goto do_default;
1934 case EQ_EXPR:
1935 case NE_EXPR:
1936 case LT_EXPR:
1937 case LE_EXPR:
1938 case GE_EXPR:
1939 case GT_EXPR:
1940 is_bool = true;
1941 /* FALLTHRU */
1942 default:
1943 if (!is_bool)
1944 return;
1945 do_default:
1946 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1947 &low, &high, &in_p,
1948 &strict_overflow_p);
1949 if (nexp != NULL_TREE)
1951 exp = nexp;
1952 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1953 continue;
1955 break;
1957 break;
1959 if (is_bool)
1961 r->exp = exp;
1962 r->in_p = in_p;
1963 r->low = low;
1964 r->high = high;
1965 r->strict_overflow_p = strict_overflow_p;
1969 /* Comparison function for qsort. Sort entries
1970 without SSA_NAME exp first, then with SSA_NAMEs sorted
1971 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1972 by increasing ->low and if ->low is the same, by increasing
1973 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1974 maximum. */
1976 static int
1977 range_entry_cmp (const void *a, const void *b)
1979 const struct range_entry *p = (const struct range_entry *) a;
1980 const struct range_entry *q = (const struct range_entry *) b;
1982 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1984 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1986 /* Group range_entries for the same SSA_NAME together. */
1987 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1988 return -1;
1989 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1990 return 1;
1991 /* If ->low is different, NULL low goes first, then by
1992 ascending low. */
1993 if (p->low != NULL_TREE)
1995 if (q->low != NULL_TREE)
1997 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1998 p->low, q->low);
1999 if (tem && integer_onep (tem))
2000 return -1;
2001 tem = fold_binary (GT_EXPR, boolean_type_node,
2002 p->low, q->low);
2003 if (tem && integer_onep (tem))
2004 return 1;
2006 else
2007 return 1;
2009 else if (q->low != NULL_TREE)
2010 return -1;
2011 /* If ->high is different, NULL high goes last, before that by
2012 ascending high. */
2013 if (p->high != NULL_TREE)
2015 if (q->high != NULL_TREE)
2017 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2018 p->high, q->high);
2019 if (tem && integer_onep (tem))
2020 return -1;
2021 tem = fold_binary (GT_EXPR, boolean_type_node,
2022 p->high, q->high);
2023 if (tem && integer_onep (tem))
2024 return 1;
2026 else
2027 return -1;
2029 else if (q->high != NULL_TREE)
2030 return 1;
2031 /* If both ranges are the same, sort below by ascending idx. */
2033 else
2034 return 1;
2036 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2037 return -1;
2039 if (p->idx < q->idx)
2040 return -1;
2041 else
2043 gcc_checking_assert (p->idx > q->idx);
2044 return 1;
2048 /* Helper routine of optimize_range_test.
2049 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2050 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2051 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2052 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2053 an array of COUNT pointers to other ranges. Return
2054 true if the range merge has been successful.
2055 If OPCODE is ERROR_MARK, this is called from within
2056 maybe_optimize_range_tests and is performing inter-bb range optimization.
2057 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2058 oe->rank. */
2060 static bool
2061 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2062 struct range_entry **otherrangep,
2063 unsigned int count, enum tree_code opcode,
2064 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2065 bool in_p, tree low, tree high, bool strict_overflow_p)
2067 operand_entry_t oe = (*ops)[range->idx];
2068 tree op = oe->op;
2069 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2070 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2071 location_t loc = gimple_location (stmt);
2072 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2073 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2074 in_p, low, high);
2075 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2076 gimple_stmt_iterator gsi;
2077 unsigned int i;
2079 if (tem == NULL_TREE)
2080 return false;
2082 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2083 warning_at (loc, OPT_Wstrict_overflow,
2084 "assuming signed overflow does not occur "
2085 "when simplifying range test");
2087 if (dump_file && (dump_flags & TDF_DETAILS))
2089 struct range_entry *r;
2090 fprintf (dump_file, "Optimizing range tests ");
2091 print_generic_expr (dump_file, range->exp, 0);
2092 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2093 print_generic_expr (dump_file, range->low, 0);
2094 fprintf (dump_file, ", ");
2095 print_generic_expr (dump_file, range->high, 0);
2096 fprintf (dump_file, "]");
2097 for (i = 0; i < count; i++)
2099 if (otherrange)
2100 r = otherrange + i;
2101 else
2102 r = otherrangep[i];
2103 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2104 print_generic_expr (dump_file, r->low, 0);
2105 fprintf (dump_file, ", ");
2106 print_generic_expr (dump_file, r->high, 0);
2107 fprintf (dump_file, "]");
2109 fprintf (dump_file, "\n into ");
2110 print_generic_expr (dump_file, tem, 0);
2111 fprintf (dump_file, "\n");
2114 if (opcode == BIT_IOR_EXPR
2115 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2116 tem = invert_truthvalue_loc (loc, tem);
2118 tem = fold_convert_loc (loc, optype, tem);
2119 gsi = gsi_for_stmt (stmt);
2120 unsigned int uid = gimple_uid (stmt);
2121 /* In rare cases range->exp can be equal to lhs of stmt.
2122 In that case we have to insert after the stmt rather then before
2123 it. If stmt is a PHI, insert it at the start of the basic block. */
2124 if (op != range->exp)
2126 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2127 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2128 GSI_SAME_STMT);
2129 gsi_prev (&gsi);
2131 else if (gimple_code (stmt) != GIMPLE_PHI)
2133 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2134 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2135 GSI_CONTINUE_LINKING);
2137 else
2139 gsi = gsi_after_labels (gimple_bb (stmt));
2140 if (!gsi_end_p (gsi))
2141 uid = gimple_uid (gsi_stmt (gsi));
2142 else
2144 gsi = gsi_start_bb (gimple_bb (stmt));
2145 uid = 1;
2146 while (!gsi_end_p (gsi))
2148 uid = gimple_uid (gsi_stmt (gsi));
2149 gsi_next (&gsi);
2152 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2153 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2154 GSI_SAME_STMT);
2155 if (gsi_end_p (gsi))
2156 gsi = gsi_last_bb (gimple_bb (stmt));
2157 else
2158 gsi_prev (&gsi);
2160 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2161 if (gimple_uid (gsi_stmt (gsi)))
2162 break;
2163 else
2164 gimple_set_uid (gsi_stmt (gsi), uid);
2166 oe->op = tem;
2167 range->exp = exp;
2168 range->low = low;
2169 range->high = high;
2170 range->in_p = in_p;
2171 range->strict_overflow_p = false;
2173 for (i = 0; i < count; i++)
2175 if (otherrange)
2176 range = otherrange + i;
2177 else
2178 range = otherrangep[i];
2179 oe = (*ops)[range->idx];
2180 /* Now change all the other range test immediate uses, so that
2181 those tests will be optimized away. */
2182 if (opcode == ERROR_MARK)
2184 if (oe->op)
2185 oe->op = build_int_cst (TREE_TYPE (oe->op),
2186 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2187 else
2188 oe->op = (oe->rank == BIT_IOR_EXPR
2189 ? boolean_false_node : boolean_true_node);
2191 else
2192 oe->op = error_mark_node;
2193 range->exp = NULL_TREE;
2195 return true;
2198 /* Optimize X == CST1 || X == CST2
2199 if popcount (CST1 ^ CST2) == 1 into
2200 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2201 Similarly for ranges. E.g.
2202 X != 2 && X != 3 && X != 10 && X != 11
2203 will be transformed by the previous optimization into
2204 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2205 and this loop can transform that into
2206 !(((X & ~8) - 2U) <= 1U). */
2208 static bool
2209 optimize_range_tests_xor (enum tree_code opcode, tree type,
2210 tree lowi, tree lowj, tree highi, tree highj,
2211 vec<operand_entry_t> *ops,
2212 struct range_entry *rangei,
2213 struct range_entry *rangej)
2215 tree lowxor, highxor, tem, exp;
2216 /* Check lowi ^ lowj == highi ^ highj and
2217 popcount (lowi ^ lowj) == 1. */
2218 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2219 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2220 return false;
2221 if (!integer_pow2p (lowxor))
2222 return false;
2223 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2224 if (!tree_int_cst_equal (lowxor, highxor))
2225 return false;
2227 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2228 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2229 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2230 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2231 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2232 NULL, rangei->in_p, lowj, highj,
2233 rangei->strict_overflow_p
2234 || rangej->strict_overflow_p))
2235 return true;
2236 return false;
2239 /* Optimize X == CST1 || X == CST2
2240 if popcount (CST2 - CST1) == 1 into
2241 ((X - CST1) & ~(CST2 - CST1)) == 0.
2242 Similarly for ranges. E.g.
2243 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2244 || X == 75 || X == 45
2245 will be transformed by the previous optimization into
2246 (X - 43U) <= 3U || (X - 75U) <= 3U
2247 and this loop can transform that into
2248 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2249 static bool
2250 optimize_range_tests_diff (enum tree_code opcode, tree type,
2251 tree lowi, tree lowj, tree highi, tree highj,
2252 vec<operand_entry_t> *ops,
2253 struct range_entry *rangei,
2254 struct range_entry *rangej)
2256 tree tem1, tem2, mask;
2257 /* Check highi - lowi == highj - lowj. */
2258 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2259 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2260 return false;
2261 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2262 if (!tree_int_cst_equal (tem1, tem2))
2263 return false;
2264 /* Check popcount (lowj - lowi) == 1. */
2265 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2266 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2267 return false;
2268 if (!integer_pow2p (tem1))
2269 return false;
2271 type = unsigned_type_for (type);
2272 tem1 = fold_convert (type, tem1);
2273 tem2 = fold_convert (type, tem2);
2274 lowi = fold_convert (type, lowi);
2275 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2276 tem1 = fold_binary (MINUS_EXPR, type,
2277 fold_convert (type, rangei->exp), lowi);
2278 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2279 lowj = build_int_cst (type, 0);
2280 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2281 NULL, rangei->in_p, lowj, tem2,
2282 rangei->strict_overflow_p
2283 || rangej->strict_overflow_p))
2284 return true;
2285 return false;
2288 /* It does some common checks for function optimize_range_tests_xor and
2289 optimize_range_tests_diff.
2290 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2291 Else it calls optimize_range_tests_diff. */
2293 static bool
2294 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2295 bool optimize_xor, vec<operand_entry_t> *ops,
2296 struct range_entry *ranges)
2298 int i, j;
2299 bool any_changes = false;
2300 for (i = first; i < length; i++)
2302 tree lowi, highi, lowj, highj, type, tem;
2304 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2305 continue;
2306 type = TREE_TYPE (ranges[i].exp);
2307 if (!INTEGRAL_TYPE_P (type))
2308 continue;
2309 lowi = ranges[i].low;
2310 if (lowi == NULL_TREE)
2311 lowi = TYPE_MIN_VALUE (type);
2312 highi = ranges[i].high;
2313 if (highi == NULL_TREE)
2314 continue;
2315 for (j = i + 1; j < length && j < i + 64; j++)
2317 bool changes;
2318 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2319 continue;
2320 lowj = ranges[j].low;
2321 if (lowj == NULL_TREE)
2322 continue;
2323 highj = ranges[j].high;
2324 if (highj == NULL_TREE)
2325 highj = TYPE_MAX_VALUE (type);
2326 /* Check lowj > highi. */
2327 tem = fold_binary (GT_EXPR, boolean_type_node,
2328 lowj, highi);
2329 if (tem == NULL_TREE || !integer_onep (tem))
2330 continue;
2331 if (optimize_xor)
2332 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2333 highi, highj, ops,
2334 ranges + i, ranges + j);
2335 else
2336 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2337 highi, highj, ops,
2338 ranges + i, ranges + j);
2339 if (changes)
2341 any_changes = true;
2342 break;
2346 return any_changes;
2349 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2350 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2351 EXP on success, NULL otherwise. */
2353 static tree
2354 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2355 wide_int *mask, tree *totallowp)
2357 tree tem = int_const_binop (MINUS_EXPR, high, low);
2358 if (tem == NULL_TREE
2359 || TREE_CODE (tem) != INTEGER_CST
2360 || TREE_OVERFLOW (tem)
2361 || tree_int_cst_sgn (tem) == -1
2362 || compare_tree_int (tem, prec) != -1)
2363 return NULL_TREE;
2365 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2366 *mask = wi::shifted_mask (0, max, false, prec);
2367 if (TREE_CODE (exp) == BIT_AND_EXPR
2368 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2370 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2371 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2372 if (wi::popcount (msk) == 1
2373 && wi::ltu_p (msk, prec - max))
2375 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2376 max += msk.to_uhwi ();
2377 exp = TREE_OPERAND (exp, 0);
2378 if (integer_zerop (low)
2379 && TREE_CODE (exp) == PLUS_EXPR
2380 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2382 tree ret = TREE_OPERAND (exp, 0);
2383 STRIP_NOPS (ret);
2384 widest_int bias
2385 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2386 TYPE_PRECISION (TREE_TYPE (low))));
2387 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2388 if (totallowp)
2390 *totallowp = tbias;
2391 return ret;
2393 else if (!tree_int_cst_lt (totallow, tbias))
2394 return NULL_TREE;
2395 bias = wi::to_widest (tbias);
2396 bias -= wi::to_widest (totallow);
2397 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2399 *mask = wi::lshift (*mask, bias);
2400 return ret;
2405 if (totallowp)
2406 return exp;
2407 if (!tree_int_cst_lt (totallow, low))
2408 return exp;
2409 tem = int_const_binop (MINUS_EXPR, low, totallow);
2410 if (tem == NULL_TREE
2411 || TREE_CODE (tem) != INTEGER_CST
2412 || TREE_OVERFLOW (tem)
2413 || compare_tree_int (tem, prec - max) == 1)
2414 return NULL_TREE;
2416 *mask = wi::lshift (*mask, wi::to_widest (tem));
2417 return exp;
2420 /* Attempt to optimize small range tests using bit test.
2421 E.g.
2422 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2423 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2424 has been by earlier optimizations optimized into:
2425 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2426 As all the 43 through 82 range is less than 64 numbers,
2427 for 64-bit word targets optimize that into:
2428 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2430 static bool
2431 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2432 vec<operand_entry_t> *ops,
2433 struct range_entry *ranges)
2435 int i, j;
2436 bool any_changes = false;
2437 int prec = GET_MODE_BITSIZE (word_mode);
2438 auto_vec<struct range_entry *, 64> candidates;
2440 for (i = first; i < length - 2; i++)
2442 tree lowi, highi, lowj, highj, type;
2444 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2445 continue;
2446 type = TREE_TYPE (ranges[i].exp);
2447 if (!INTEGRAL_TYPE_P (type))
2448 continue;
2449 lowi = ranges[i].low;
2450 if (lowi == NULL_TREE)
2451 lowi = TYPE_MIN_VALUE (type);
2452 highi = ranges[i].high;
2453 if (highi == NULL_TREE)
2454 continue;
2455 wide_int mask;
2456 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2457 highi, &mask, &lowi);
2458 if (exp == NULL_TREE)
2459 continue;
2460 bool strict_overflow_p = ranges[i].strict_overflow_p;
2461 candidates.truncate (0);
2462 int end = MIN (i + 64, length);
2463 for (j = i + 1; j < end; j++)
2465 tree exp2;
2466 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2467 continue;
2468 if (ranges[j].exp == exp)
2470 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2472 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2473 if (exp2 == exp)
2475 else if (TREE_CODE (exp2) == PLUS_EXPR)
2477 exp2 = TREE_OPERAND (exp2, 0);
2478 STRIP_NOPS (exp2);
2479 if (exp2 != exp)
2480 continue;
2482 else
2483 continue;
2485 else
2486 continue;
2487 lowj = ranges[j].low;
2488 if (lowj == NULL_TREE)
2489 continue;
2490 highj = ranges[j].high;
2491 if (highj == NULL_TREE)
2492 highj = TYPE_MAX_VALUE (type);
2493 wide_int mask2;
2494 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2495 highj, &mask2, NULL);
2496 if (exp2 != exp)
2497 continue;
2498 mask |= mask2;
2499 strict_overflow_p |= ranges[j].strict_overflow_p;
2500 candidates.safe_push (&ranges[j]);
2503 /* If we need otherwise 3 or more comparisons, use a bit test. */
2504 if (candidates.length () >= 2)
2506 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2507 wi::to_widest (lowi)
2508 + prec - 1 - wi::clz (mask));
2509 operand_entry_t oe = (*ops)[ranges[i].idx];
2510 tree op = oe->op;
2511 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2512 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2513 location_t loc = gimple_location (stmt);
2514 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2516 /* See if it isn't cheaper to pretend the minimum value of the
2517 range is 0, if maximum value is small enough.
2518 We can avoid then subtraction of the minimum value, but the
2519 mask constant could be perhaps more expensive. */
2520 if (compare_tree_int (lowi, 0) > 0
2521 && compare_tree_int (high, prec) < 0)
2523 int cost_diff;
2524 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2525 rtx reg = gen_raw_REG (word_mode, 10000);
2526 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2527 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2528 GEN_INT (-m)), speed_p);
2529 rtx r = immed_wide_int_const (mask, word_mode);
2530 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2531 word_mode, speed_p);
2532 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2533 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2534 word_mode, speed_p);
2535 if (cost_diff > 0)
2537 mask = wi::lshift (mask, m);
2538 lowi = build_zero_cst (TREE_TYPE (lowi));
2542 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2543 false, lowi, high);
2544 if (tem == NULL_TREE || is_gimple_val (tem))
2545 continue;
2546 tree etype = unsigned_type_for (TREE_TYPE (exp));
2547 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2548 fold_convert_loc (loc, etype, exp),
2549 fold_convert_loc (loc, etype, lowi));
2550 exp = fold_convert_loc (loc, integer_type_node, exp);
2551 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2552 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2553 build_int_cst (word_type, 1), exp);
2554 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2555 wide_int_to_tree (word_type, mask));
2556 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2557 build_zero_cst (word_type));
2558 if (is_gimple_val (exp))
2559 continue;
2561 /* The shift might have undefined behavior if TEM is true,
2562 but reassociate_bb isn't prepared to have basic blocks
2563 split when it is running. So, temporarily emit a code
2564 with BIT_IOR_EXPR instead of &&, and fix it up in
2565 branch_fixup. */
2566 gimple_seq seq;
2567 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2568 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2569 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2570 gimple_seq seq2;
2571 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2572 gimple_seq_add_seq_without_update (&seq, seq2);
2573 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2574 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2575 gimple g = gimple_build_assign (make_ssa_name (optype),
2576 BIT_IOR_EXPR, tem, exp);
2577 gimple_set_location (g, loc);
2578 gimple_seq_add_stmt_without_update (&seq, g);
2579 exp = gimple_assign_lhs (g);
2580 tree val = build_zero_cst (optype);
2581 if (update_range_test (&ranges[i], NULL, candidates.address (),
2582 candidates.length (), opcode, ops, exp,
2583 seq, false, val, val, strict_overflow_p))
2585 any_changes = true;
2586 reassoc_branch_fixups.safe_push (tem);
2588 else
2589 gimple_seq_discard (seq);
2592 return any_changes;
2595 /* Optimize range tests, similarly how fold_range_test optimizes
2596 it on trees. The tree code for the binary
2597 operation between all the operands is OPCODE.
2598 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2599 maybe_optimize_range_tests for inter-bb range optimization.
2600 In that case if oe->op is NULL, oe->id is bb->index whose
2601 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2602 the actual opcode. */
2604 static bool
2605 optimize_range_tests (enum tree_code opcode,
2606 vec<operand_entry_t> *ops)
2608 unsigned int length = ops->length (), i, j, first;
2609 operand_entry_t oe;
2610 struct range_entry *ranges;
2611 bool any_changes = false;
2613 if (length == 1)
2614 return false;
2616 ranges = XNEWVEC (struct range_entry, length);
2617 for (i = 0; i < length; i++)
2619 oe = (*ops)[i];
2620 ranges[i].idx = i;
2621 init_range_entry (ranges + i, oe->op,
2622 oe->op ? NULL :
2623 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2624 /* For | invert it now, we will invert it again before emitting
2625 the optimized expression. */
2626 if (opcode == BIT_IOR_EXPR
2627 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2628 ranges[i].in_p = !ranges[i].in_p;
2631 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2632 for (i = 0; i < length; i++)
2633 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2634 break;
2636 /* Try to merge ranges. */
2637 for (first = i; i < length; i++)
2639 tree low = ranges[i].low;
2640 tree high = ranges[i].high;
2641 int in_p = ranges[i].in_p;
2642 bool strict_overflow_p = ranges[i].strict_overflow_p;
2643 int update_fail_count = 0;
2645 for (j = i + 1; j < length; j++)
2647 if (ranges[i].exp != ranges[j].exp)
2648 break;
2649 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2650 ranges[j].in_p, ranges[j].low, ranges[j].high))
2651 break;
2652 strict_overflow_p |= ranges[j].strict_overflow_p;
2655 if (j == i + 1)
2656 continue;
2658 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2659 opcode, ops, ranges[i].exp, NULL, in_p,
2660 low, high, strict_overflow_p))
2662 i = j - 1;
2663 any_changes = true;
2665 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2666 while update_range_test would fail. */
2667 else if (update_fail_count == 64)
2668 i = j - 1;
2669 else
2670 ++update_fail_count;
2673 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2674 ops, ranges);
2676 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2677 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2678 ops, ranges);
2679 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2680 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2681 ops, ranges);
2683 if (any_changes && opcode != ERROR_MARK)
2685 j = 0;
2686 FOR_EACH_VEC_ELT (*ops, i, oe)
2688 if (oe->op == error_mark_node)
2689 continue;
2690 else if (i != j)
2691 (*ops)[j] = oe;
2692 j++;
2694 ops->truncate (j);
2697 XDELETEVEC (ranges);
2698 return any_changes;
2701 /* Return true if STMT is a cast like:
2702 <bb N>:
2704 _123 = (int) _234;
2706 <bb M>:
2707 # _345 = PHI <_123(N), 1(...), 1(...)>
2708 where _234 has bool type, _123 has single use and
2709 bb N has a single successor M. This is commonly used in
2710 the last block of a range test. */
2712 static bool
2713 final_range_test_p (gimple stmt)
2715 basic_block bb, rhs_bb;
2716 edge e;
2717 tree lhs, rhs;
2718 use_operand_p use_p;
2719 gimple use_stmt;
2721 if (!gimple_assign_cast_p (stmt))
2722 return false;
2723 bb = gimple_bb (stmt);
2724 if (!single_succ_p (bb))
2725 return false;
2726 e = single_succ_edge (bb);
2727 if (e->flags & EDGE_COMPLEX)
2728 return false;
2730 lhs = gimple_assign_lhs (stmt);
2731 rhs = gimple_assign_rhs1 (stmt);
2732 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2733 || TREE_CODE (rhs) != SSA_NAME
2734 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2735 return false;
2737 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2738 if (!single_imm_use (lhs, &use_p, &use_stmt))
2739 return false;
2741 if (gimple_code (use_stmt) != GIMPLE_PHI
2742 || gimple_bb (use_stmt) != e->dest)
2743 return false;
2745 /* And that the rhs is defined in the same loop. */
2746 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2747 if (rhs_bb == NULL
2748 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2749 return false;
2751 return true;
2754 /* Return true if BB is suitable basic block for inter-bb range test
2755 optimization. If BACKWARD is true, BB should be the only predecessor
2756 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2757 or compared with to find a common basic block to which all conditions
2758 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2759 be the only predecessor of BB. */
2761 static bool
2762 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2763 bool backward)
2765 edge_iterator ei, ei2;
2766 edge e, e2;
2767 gimple stmt;
2768 gphi_iterator gsi;
2769 bool other_edge_seen = false;
2770 bool is_cond;
2772 if (test_bb == bb)
2773 return false;
2774 /* Check last stmt first. */
2775 stmt = last_stmt (bb);
2776 if (stmt == NULL
2777 || (gimple_code (stmt) != GIMPLE_COND
2778 && (backward || !final_range_test_p (stmt)))
2779 || gimple_visited_p (stmt)
2780 || stmt_could_throw_p (stmt)
2781 || *other_bb == bb)
2782 return false;
2783 is_cond = gimple_code (stmt) == GIMPLE_COND;
2784 if (is_cond)
2786 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2787 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2788 to *OTHER_BB (if not set yet, try to find it out). */
2789 if (EDGE_COUNT (bb->succs) != 2)
2790 return false;
2791 FOR_EACH_EDGE (e, ei, bb->succs)
2793 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2794 return false;
2795 if (e->dest == test_bb)
2797 if (backward)
2798 continue;
2799 else
2800 return false;
2802 if (e->dest == bb)
2803 return false;
2804 if (*other_bb == NULL)
2806 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2807 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2808 return false;
2809 else if (e->dest == e2->dest)
2810 *other_bb = e->dest;
2811 if (*other_bb == NULL)
2812 return false;
2814 if (e->dest == *other_bb)
2815 other_edge_seen = true;
2816 else if (backward)
2817 return false;
2819 if (*other_bb == NULL || !other_edge_seen)
2820 return false;
2822 else if (single_succ (bb) != *other_bb)
2823 return false;
2825 /* Now check all PHIs of *OTHER_BB. */
2826 e = find_edge (bb, *other_bb);
2827 e2 = find_edge (test_bb, *other_bb);
2828 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2830 gphi *phi = gsi.phi ();
2831 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2832 corresponding to BB and TEST_BB predecessor must be the same. */
2833 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2834 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2836 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2837 one of the PHIs should have the lhs of the last stmt in
2838 that block as PHI arg and that PHI should have 0 or 1
2839 corresponding to it in all other range test basic blocks
2840 considered. */
2841 if (!is_cond)
2843 if (gimple_phi_arg_def (phi, e->dest_idx)
2844 == gimple_assign_lhs (stmt)
2845 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2846 || integer_onep (gimple_phi_arg_def (phi,
2847 e2->dest_idx))))
2848 continue;
2850 else
2852 gimple test_last = last_stmt (test_bb);
2853 if (gimple_code (test_last) != GIMPLE_COND
2854 && gimple_phi_arg_def (phi, e2->dest_idx)
2855 == gimple_assign_lhs (test_last)
2856 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2857 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2858 continue;
2861 return false;
2864 return true;
2867 /* Return true if BB doesn't have side-effects that would disallow
2868 range test optimization, all SSA_NAMEs set in the bb are consumed
2869 in the bb and there are no PHIs. */
2871 static bool
2872 no_side_effect_bb (basic_block bb)
2874 gimple_stmt_iterator gsi;
2875 gimple last;
2877 if (!gimple_seq_empty_p (phi_nodes (bb)))
2878 return false;
2879 last = last_stmt (bb);
2880 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2882 gimple stmt = gsi_stmt (gsi);
2883 tree lhs;
2884 imm_use_iterator imm_iter;
2885 use_operand_p use_p;
2887 if (is_gimple_debug (stmt))
2888 continue;
2889 if (gimple_has_side_effects (stmt))
2890 return false;
2891 if (stmt == last)
2892 return true;
2893 if (!is_gimple_assign (stmt))
2894 return false;
2895 lhs = gimple_assign_lhs (stmt);
2896 if (TREE_CODE (lhs) != SSA_NAME)
2897 return false;
2898 if (gimple_assign_rhs_could_trap_p (stmt))
2899 return false;
2900 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2902 gimple use_stmt = USE_STMT (use_p);
2903 if (is_gimple_debug (use_stmt))
2904 continue;
2905 if (gimple_bb (use_stmt) != bb)
2906 return false;
2909 return false;
2912 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2913 return true and fill in *OPS recursively. */
2915 static bool
2916 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2917 struct loop *loop)
2919 gimple stmt = SSA_NAME_DEF_STMT (var);
2920 tree rhs[2];
2921 int i;
2923 if (!is_reassociable_op (stmt, code, loop))
2924 return false;
2926 rhs[0] = gimple_assign_rhs1 (stmt);
2927 rhs[1] = gimple_assign_rhs2 (stmt);
2928 gimple_set_visited (stmt, true);
2929 for (i = 0; i < 2; i++)
2930 if (TREE_CODE (rhs[i]) == SSA_NAME
2931 && !get_ops (rhs[i], code, ops, loop)
2932 && has_single_use (rhs[i]))
2934 operand_entry_t oe = operand_entry_pool.allocate ();
2936 oe->op = rhs[i];
2937 oe->rank = code;
2938 oe->id = 0;
2939 oe->count = 1;
2940 ops->safe_push (oe);
2942 return true;
2945 /* Find the ops that were added by get_ops starting from VAR, see if
2946 they were changed during update_range_test and if yes, create new
2947 stmts. */
2949 static tree
2950 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2951 unsigned int *pidx, struct loop *loop)
2953 gimple stmt = SSA_NAME_DEF_STMT (var);
2954 tree rhs[4];
2955 int i;
2957 if (!is_reassociable_op (stmt, code, loop))
2958 return NULL;
2960 rhs[0] = gimple_assign_rhs1 (stmt);
2961 rhs[1] = gimple_assign_rhs2 (stmt);
2962 rhs[2] = rhs[0];
2963 rhs[3] = rhs[1];
2964 for (i = 0; i < 2; i++)
2965 if (TREE_CODE (rhs[i]) == SSA_NAME)
2967 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2968 if (rhs[2 + i] == NULL_TREE)
2970 if (has_single_use (rhs[i]))
2971 rhs[2 + i] = ops[(*pidx)++]->op;
2972 else
2973 rhs[2 + i] = rhs[i];
2976 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2977 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2979 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2980 var = make_ssa_name (TREE_TYPE (var));
2981 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2982 rhs[2], rhs[3]);
2983 gimple_set_uid (g, gimple_uid (stmt));
2984 gimple_set_visited (g, true);
2985 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2987 return var;
2990 /* Structure to track the initial value passed to get_ops and
2991 the range in the ops vector for each basic block. */
2993 struct inter_bb_range_test_entry
2995 tree op;
2996 unsigned int first_idx, last_idx;
2999 /* Inter-bb range test optimization. */
3001 static void
3002 maybe_optimize_range_tests (gimple stmt)
3004 basic_block first_bb = gimple_bb (stmt);
3005 basic_block last_bb = first_bb;
3006 basic_block other_bb = NULL;
3007 basic_block bb;
3008 edge_iterator ei;
3009 edge e;
3010 auto_vec<operand_entry_t> ops;
3011 auto_vec<inter_bb_range_test_entry> bbinfo;
3012 bool any_changes = false;
3014 /* Consider only basic blocks that end with GIMPLE_COND or
3015 a cast statement satisfying final_range_test_p. All
3016 but the last bb in the first_bb .. last_bb range
3017 should end with GIMPLE_COND. */
3018 if (gimple_code (stmt) == GIMPLE_COND)
3020 if (EDGE_COUNT (first_bb->succs) != 2)
3021 return;
3023 else if (final_range_test_p (stmt))
3024 other_bb = single_succ (first_bb);
3025 else
3026 return;
3028 if (stmt_could_throw_p (stmt))
3029 return;
3031 /* As relative ordering of post-dominator sons isn't fixed,
3032 maybe_optimize_range_tests can be called first on any
3033 bb in the range we want to optimize. So, start searching
3034 backwards, if first_bb can be set to a predecessor. */
3035 while (single_pred_p (first_bb))
3037 basic_block pred_bb = single_pred (first_bb);
3038 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3039 break;
3040 if (!no_side_effect_bb (first_bb))
3041 break;
3042 first_bb = pred_bb;
3044 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3045 Before starting forward search in last_bb successors, find
3046 out the other_bb. */
3047 if (first_bb == last_bb)
3049 other_bb = NULL;
3050 /* As non-GIMPLE_COND last stmt always terminates the range,
3051 if forward search didn't discover anything, just give up. */
3052 if (gimple_code (stmt) != GIMPLE_COND)
3053 return;
3054 /* Look at both successors. Either it ends with a GIMPLE_COND
3055 and satisfies suitable_cond_bb, or ends with a cast and
3056 other_bb is that cast's successor. */
3057 FOR_EACH_EDGE (e, ei, first_bb->succs)
3058 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3059 || e->dest == first_bb)
3060 return;
3061 else if (single_pred_p (e->dest))
3063 stmt = last_stmt (e->dest);
3064 if (stmt
3065 && gimple_code (stmt) == GIMPLE_COND
3066 && EDGE_COUNT (e->dest->succs) == 2)
3068 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3069 break;
3070 else
3071 other_bb = NULL;
3073 else if (stmt
3074 && final_range_test_p (stmt)
3075 && find_edge (first_bb, single_succ (e->dest)))
3077 other_bb = single_succ (e->dest);
3078 if (other_bb == first_bb)
3079 other_bb = NULL;
3082 if (other_bb == NULL)
3083 return;
3085 /* Now do the forward search, moving last_bb to successor bbs
3086 that aren't other_bb. */
3087 while (EDGE_COUNT (last_bb->succs) == 2)
3089 FOR_EACH_EDGE (e, ei, last_bb->succs)
3090 if (e->dest != other_bb)
3091 break;
3092 if (e == NULL)
3093 break;
3094 if (!single_pred_p (e->dest))
3095 break;
3096 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3097 break;
3098 if (!no_side_effect_bb (e->dest))
3099 break;
3100 last_bb = e->dest;
3102 if (first_bb == last_bb)
3103 return;
3104 /* Here basic blocks first_bb through last_bb's predecessor
3105 end with GIMPLE_COND, all of them have one of the edges to
3106 other_bb and another to another block in the range,
3107 all blocks except first_bb don't have side-effects and
3108 last_bb ends with either GIMPLE_COND, or cast satisfying
3109 final_range_test_p. */
3110 for (bb = last_bb; ; bb = single_pred (bb))
3112 enum tree_code code;
3113 tree lhs, rhs;
3114 inter_bb_range_test_entry bb_ent;
3116 bb_ent.op = NULL_TREE;
3117 bb_ent.first_idx = ops.length ();
3118 bb_ent.last_idx = bb_ent.first_idx;
3119 e = find_edge (bb, other_bb);
3120 stmt = last_stmt (bb);
3121 gimple_set_visited (stmt, true);
3122 if (gimple_code (stmt) != GIMPLE_COND)
3124 use_operand_p use_p;
3125 gimple phi;
3126 edge e2;
3127 unsigned int d;
3129 lhs = gimple_assign_lhs (stmt);
3130 rhs = gimple_assign_rhs1 (stmt);
3131 gcc_assert (bb == last_bb);
3133 /* stmt is
3134 _123 = (int) _234;
3136 followed by:
3137 <bb M>:
3138 # _345 = PHI <_123(N), 1(...), 1(...)>
3140 or 0 instead of 1. If it is 0, the _234
3141 range test is anded together with all the
3142 other range tests, if it is 1, it is ored with
3143 them. */
3144 single_imm_use (lhs, &use_p, &phi);
3145 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3146 e2 = find_edge (first_bb, other_bb);
3147 d = e2->dest_idx;
3148 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3149 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3150 code = BIT_AND_EXPR;
3151 else
3153 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3154 code = BIT_IOR_EXPR;
3157 /* If _234 SSA_NAME_DEF_STMT is
3158 _234 = _567 | _789;
3159 (or &, corresponding to 1/0 in the phi arguments,
3160 push into ops the individual range test arguments
3161 of the bitwise or resp. and, recursively. */
3162 if (!get_ops (rhs, code, &ops,
3163 loop_containing_stmt (stmt))
3164 && has_single_use (rhs))
3166 /* Otherwise, push the _234 range test itself. */
3167 operand_entry_t oe = operand_entry_pool.allocate ();
3169 oe->op = rhs;
3170 oe->rank = code;
3171 oe->id = 0;
3172 oe->count = 1;
3173 ops.safe_push (oe);
3174 bb_ent.last_idx++;
3176 else
3177 bb_ent.last_idx = ops.length ();
3178 bb_ent.op = rhs;
3179 bbinfo.safe_push (bb_ent);
3180 continue;
3182 /* Otherwise stmt is GIMPLE_COND. */
3183 code = gimple_cond_code (stmt);
3184 lhs = gimple_cond_lhs (stmt);
3185 rhs = gimple_cond_rhs (stmt);
3186 if (TREE_CODE (lhs) == SSA_NAME
3187 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3188 && ((code != EQ_EXPR && code != NE_EXPR)
3189 || rhs != boolean_false_node
3190 /* Either push into ops the individual bitwise
3191 or resp. and operands, depending on which
3192 edge is other_bb. */
3193 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3194 ^ (code == EQ_EXPR))
3195 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3196 loop_containing_stmt (stmt))))
3198 /* Or push the GIMPLE_COND stmt itself. */
3199 operand_entry_t oe = operand_entry_pool.allocate ();
3201 oe->op = NULL;
3202 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3203 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3204 /* oe->op = NULL signs that there is no SSA_NAME
3205 for the range test, and oe->id instead is the
3206 basic block number, at which's end the GIMPLE_COND
3207 is. */
3208 oe->id = bb->index;
3209 oe->count = 1;
3210 ops.safe_push (oe);
3211 bb_ent.op = NULL;
3212 bb_ent.last_idx++;
3214 else if (ops.length () > bb_ent.first_idx)
3216 bb_ent.op = lhs;
3217 bb_ent.last_idx = ops.length ();
3219 bbinfo.safe_push (bb_ent);
3220 if (bb == first_bb)
3221 break;
3223 if (ops.length () > 1)
3224 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3225 if (any_changes)
3227 unsigned int idx;
3228 /* update_ops relies on has_single_use predicates returning the
3229 same values as it did during get_ops earlier. Additionally it
3230 never removes statements, only adds new ones and it should walk
3231 from the single imm use and check the predicate already before
3232 making those changes.
3233 On the other side, the handling of GIMPLE_COND directly can turn
3234 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3235 it needs to be done in a separate loop afterwards. */
3236 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3238 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3239 && bbinfo[idx].op != NULL_TREE)
3241 tree new_op;
3243 stmt = last_stmt (bb);
3244 new_op = update_ops (bbinfo[idx].op,
3245 (enum tree_code)
3246 ops[bbinfo[idx].first_idx]->rank,
3247 ops, &bbinfo[idx].first_idx,
3248 loop_containing_stmt (stmt));
3249 if (new_op == NULL_TREE)
3251 gcc_assert (bb == last_bb);
3252 new_op = ops[bbinfo[idx].first_idx++]->op;
3254 if (bbinfo[idx].op != new_op)
3256 imm_use_iterator iter;
3257 use_operand_p use_p;
3258 gimple use_stmt, cast_stmt = NULL;
3260 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3261 if (is_gimple_debug (use_stmt))
3262 continue;
3263 else if (gimple_code (use_stmt) == GIMPLE_COND
3264 || gimple_code (use_stmt) == GIMPLE_PHI)
3265 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3266 SET_USE (use_p, new_op);
3267 else if (gimple_assign_cast_p (use_stmt))
3268 cast_stmt = use_stmt;
3269 else
3270 gcc_unreachable ();
3271 if (cast_stmt)
3273 gcc_assert (bb == last_bb);
3274 tree lhs = gimple_assign_lhs (cast_stmt);
3275 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3276 enum tree_code rhs_code
3277 = gimple_assign_rhs_code (cast_stmt);
3278 gassign *g;
3279 if (is_gimple_min_invariant (new_op))
3281 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3282 g = gimple_build_assign (new_lhs, new_op);
3284 else
3285 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3286 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3287 gimple_set_uid (g, gimple_uid (cast_stmt));
3288 gimple_set_visited (g, true);
3289 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3290 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3291 if (is_gimple_debug (use_stmt))
3292 continue;
3293 else if (gimple_code (use_stmt) == GIMPLE_COND
3294 || gimple_code (use_stmt) == GIMPLE_PHI)
3295 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3296 SET_USE (use_p, new_lhs);
3297 else
3298 gcc_unreachable ();
3302 if (bb == first_bb)
3303 break;
3305 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3307 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3308 && bbinfo[idx].op == NULL_TREE
3309 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3311 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3312 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3313 gimple_cond_make_false (cond_stmt);
3314 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3315 gimple_cond_make_true (cond_stmt);
3316 else
3318 gimple_cond_set_code (cond_stmt, NE_EXPR);
3319 gimple_cond_set_lhs (cond_stmt,
3320 ops[bbinfo[idx].first_idx]->op);
3321 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3323 update_stmt (cond_stmt);
3325 if (bb == first_bb)
3326 break;
3331 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3332 of STMT in it's operands. This is also known as a "destructive
3333 update" operation. */
3335 static bool
3336 is_phi_for_stmt (gimple stmt, tree operand)
3338 gimple def_stmt;
3339 gphi *def_phi;
3340 tree lhs;
3341 use_operand_p arg_p;
3342 ssa_op_iter i;
3344 if (TREE_CODE (operand) != SSA_NAME)
3345 return false;
3347 lhs = gimple_assign_lhs (stmt);
3349 def_stmt = SSA_NAME_DEF_STMT (operand);
3350 def_phi = dyn_cast <gphi *> (def_stmt);
3351 if (!def_phi)
3352 return false;
3354 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3355 if (lhs == USE_FROM_PTR (arg_p))
3356 return true;
3357 return false;
3360 /* Remove def stmt of VAR if VAR has zero uses and recurse
3361 on rhs1 operand if so. */
3363 static void
3364 remove_visited_stmt_chain (tree var)
3366 gimple stmt;
3367 gimple_stmt_iterator gsi;
3369 while (1)
3371 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3372 return;
3373 stmt = SSA_NAME_DEF_STMT (var);
3374 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3376 var = gimple_assign_rhs1 (stmt);
3377 gsi = gsi_for_stmt (stmt);
3378 reassoc_remove_stmt (&gsi);
3379 release_defs (stmt);
3381 else
3382 return;
3386 /* This function checks three consequtive operands in
3387 passed operands vector OPS starting from OPINDEX and
3388 swaps two operands if it is profitable for binary operation
3389 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3391 We pair ops with the same rank if possible.
3393 The alternative we try is to see if STMT is a destructive
3394 update style statement, which is like:
3395 b = phi (a, ...)
3396 a = c + b;
3397 In that case, we want to use the destructive update form to
3398 expose the possible vectorizer sum reduction opportunity.
3399 In that case, the third operand will be the phi node. This
3400 check is not performed if STMT is null.
3402 We could, of course, try to be better as noted above, and do a
3403 lot of work to try to find these opportunities in >3 operand
3404 cases, but it is unlikely to be worth it. */
3406 static void
3407 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3408 unsigned int opindex, gimple stmt)
3410 operand_entry_t oe1, oe2, oe3;
3412 oe1 = ops[opindex];
3413 oe2 = ops[opindex + 1];
3414 oe3 = ops[opindex + 2];
3416 if ((oe1->rank == oe2->rank
3417 && oe2->rank != oe3->rank)
3418 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3419 && !is_phi_for_stmt (stmt, oe1->op)
3420 && !is_phi_for_stmt (stmt, oe2->op)))
3422 struct operand_entry temp = *oe3;
3423 oe3->op = oe1->op;
3424 oe3->rank = oe1->rank;
3425 oe1->op = temp.op;
3426 oe1->rank= temp.rank;
3428 else if ((oe1->rank == oe3->rank
3429 && oe2->rank != oe3->rank)
3430 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3431 && !is_phi_for_stmt (stmt, oe1->op)
3432 && !is_phi_for_stmt (stmt, oe3->op)))
3434 struct operand_entry temp = *oe2;
3435 oe2->op = oe1->op;
3436 oe2->rank = oe1->rank;
3437 oe1->op = temp.op;
3438 oe1->rank = temp.rank;
3442 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3443 two definitions, otherwise return STMT. */
3445 static inline gimple
3446 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3448 if (TREE_CODE (rhs1) == SSA_NAME
3449 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3450 stmt = SSA_NAME_DEF_STMT (rhs1);
3451 if (TREE_CODE (rhs2) == SSA_NAME
3452 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3453 stmt = SSA_NAME_DEF_STMT (rhs2);
3454 return stmt;
3457 /* Recursively rewrite our linearized statements so that the operators
3458 match those in OPS[OPINDEX], putting the computation in rank
3459 order. Return new lhs. */
3461 static tree
3462 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3463 vec<operand_entry_t> ops, bool changed)
3465 tree rhs1 = gimple_assign_rhs1 (stmt);
3466 tree rhs2 = gimple_assign_rhs2 (stmt);
3467 tree lhs = gimple_assign_lhs (stmt);
3468 operand_entry_t oe;
3470 /* The final recursion case for this function is that you have
3471 exactly two operations left.
3472 If we had exactly one op in the entire list to start with, we
3473 would have never called this function, and the tail recursion
3474 rewrites them one at a time. */
3475 if (opindex + 2 == ops.length ())
3477 operand_entry_t oe1, oe2;
3479 oe1 = ops[opindex];
3480 oe2 = ops[opindex + 1];
3482 if (rhs1 != oe1->op || rhs2 != oe2->op)
3484 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3485 unsigned int uid = gimple_uid (stmt);
3487 if (dump_file && (dump_flags & TDF_DETAILS))
3489 fprintf (dump_file, "Transforming ");
3490 print_gimple_stmt (dump_file, stmt, 0, 0);
3493 /* Even when changed is false, reassociation could have e.g. removed
3494 some redundant operations, so unless we are just swapping the
3495 arguments or unless there is no change at all (then we just
3496 return lhs), force creation of a new SSA_NAME. */
3497 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3499 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3500 lhs = make_ssa_name (TREE_TYPE (lhs));
3501 stmt
3502 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3503 oe1->op, oe2->op);
3504 gimple_set_uid (stmt, uid);
3505 gimple_set_visited (stmt, true);
3506 if (insert_point == gsi_stmt (gsi))
3507 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3508 else
3509 insert_stmt_after (stmt, insert_point);
3511 else
3513 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3514 == stmt);
3515 gimple_assign_set_rhs1 (stmt, oe1->op);
3516 gimple_assign_set_rhs2 (stmt, oe2->op);
3517 update_stmt (stmt);
3520 if (rhs1 != oe1->op && rhs1 != oe2->op)
3521 remove_visited_stmt_chain (rhs1);
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3525 fprintf (dump_file, " into ");
3526 print_gimple_stmt (dump_file, stmt, 0, 0);
3529 return lhs;
3532 /* If we hit here, we should have 3 or more ops left. */
3533 gcc_assert (opindex + 2 < ops.length ());
3535 /* Rewrite the next operator. */
3536 oe = ops[opindex];
3538 /* Recurse on the LHS of the binary operator, which is guaranteed to
3539 be the non-leaf side. */
3540 tree new_rhs1
3541 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3542 changed || oe->op != rhs2);
3544 if (oe->op != rhs2 || new_rhs1 != rhs1)
3546 if (dump_file && (dump_flags & TDF_DETAILS))
3548 fprintf (dump_file, "Transforming ");
3549 print_gimple_stmt (dump_file, stmt, 0, 0);
3552 /* If changed is false, this is either opindex == 0
3553 or all outer rhs2's were equal to corresponding oe->op,
3554 and powi_result is NULL.
3555 That means lhs is equivalent before and after reassociation.
3556 Otherwise ensure the old lhs SSA_NAME is not reused and
3557 create a new stmt as well, so that any debug stmts will be
3558 properly adjusted. */
3559 if (changed)
3561 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3562 unsigned int uid = gimple_uid (stmt);
3563 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3565 lhs = make_ssa_name (TREE_TYPE (lhs));
3566 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3567 new_rhs1, oe->op);
3568 gimple_set_uid (stmt, uid);
3569 gimple_set_visited (stmt, true);
3570 if (insert_point == gsi_stmt (gsi))
3571 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3572 else
3573 insert_stmt_after (stmt, insert_point);
3575 else
3577 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3578 == stmt);
3579 gimple_assign_set_rhs1 (stmt, new_rhs1);
3580 gimple_assign_set_rhs2 (stmt, oe->op);
3581 update_stmt (stmt);
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3586 fprintf (dump_file, " into ");
3587 print_gimple_stmt (dump_file, stmt, 0, 0);
3590 return lhs;
3593 /* Find out how many cycles we need to compute statements chain.
3594 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3595 maximum number of independent statements we may execute per cycle. */
3597 static int
3598 get_required_cycles (int ops_num, int cpu_width)
3600 int res;
3601 int elog;
3602 unsigned int rest;
3604 /* While we have more than 2 * cpu_width operands
3605 we may reduce number of operands by cpu_width
3606 per cycle. */
3607 res = ops_num / (2 * cpu_width);
3609 /* Remained operands count may be reduced twice per cycle
3610 until we have only one operand. */
3611 rest = (unsigned)(ops_num - res * cpu_width);
3612 elog = exact_log2 (rest);
3613 if (elog >= 0)
3614 res += elog;
3615 else
3616 res += floor_log2 (rest) + 1;
3618 return res;
3621 /* Returns an optimal number of registers to use for computation of
3622 given statements. */
3624 static int
3625 get_reassociation_width (int ops_num, enum tree_code opc,
3626 machine_mode mode)
3628 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3629 int width;
3630 int width_min;
3631 int cycles_best;
3633 if (param_width > 0)
3634 width = param_width;
3635 else
3636 width = targetm.sched.reassociation_width (opc, mode);
3638 if (width == 1)
3639 return width;
3641 /* Get the minimal time required for sequence computation. */
3642 cycles_best = get_required_cycles (ops_num, width);
3644 /* Check if we may use less width and still compute sequence for
3645 the same time. It will allow us to reduce registers usage.
3646 get_required_cycles is monotonically increasing with lower width
3647 so we can perform a binary search for the minimal width that still
3648 results in the optimal cycle count. */
3649 width_min = 1;
3650 while (width > width_min)
3652 int width_mid = (width + width_min) / 2;
3654 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3655 width = width_mid;
3656 else if (width_min < width_mid)
3657 width_min = width_mid;
3658 else
3659 break;
3662 return width;
3665 /* Recursively rewrite our linearized statements so that the operators
3666 match those in OPS[OPINDEX], putting the computation in rank
3667 order and trying to allow operations to be executed in
3668 parallel. */
3670 static void
3671 rewrite_expr_tree_parallel (gassign *stmt, int width,
3672 vec<operand_entry_t> ops)
3674 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3675 int op_num = ops.length ();
3676 int stmt_num = op_num - 1;
3677 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3678 int op_index = op_num - 1;
3679 int stmt_index = 0;
3680 int ready_stmts_end = 0;
3681 int i = 0;
3682 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3684 /* We start expression rewriting from the top statements.
3685 So, in this loop we create a full list of statements
3686 we will work with. */
3687 stmts[stmt_num - 1] = stmt;
3688 for (i = stmt_num - 2; i >= 0; i--)
3689 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3691 for (i = 0; i < stmt_num; i++)
3693 tree op1, op2;
3695 /* Determine whether we should use results of
3696 already handled statements or not. */
3697 if (ready_stmts_end == 0
3698 && (i - stmt_index >= width || op_index < 1))
3699 ready_stmts_end = i;
3701 /* Now we choose operands for the next statement. Non zero
3702 value in ready_stmts_end means here that we should use
3703 the result of already generated statements as new operand. */
3704 if (ready_stmts_end > 0)
3706 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3707 if (ready_stmts_end > stmt_index)
3708 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3709 else if (op_index >= 0)
3710 op2 = ops[op_index--]->op;
3711 else
3713 gcc_assert (stmt_index < i);
3714 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3717 if (stmt_index >= ready_stmts_end)
3718 ready_stmts_end = 0;
3720 else
3722 if (op_index > 1)
3723 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3724 op2 = ops[op_index--]->op;
3725 op1 = ops[op_index--]->op;
3728 /* If we emit the last statement then we should put
3729 operands into the last statement. It will also
3730 break the loop. */
3731 if (op_index < 0 && stmt_index == i)
3732 i = stmt_num - 1;
3734 if (dump_file && (dump_flags & TDF_DETAILS))
3736 fprintf (dump_file, "Transforming ");
3737 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3740 /* We keep original statement only for the last one. All
3741 others are recreated. */
3742 if (i == stmt_num - 1)
3744 gimple_assign_set_rhs1 (stmts[i], op1);
3745 gimple_assign_set_rhs2 (stmts[i], op2);
3746 update_stmt (stmts[i]);
3748 else
3749 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3751 if (dump_file && (dump_flags & TDF_DETAILS))
3753 fprintf (dump_file, " into ");
3754 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3758 remove_visited_stmt_chain (last_rhs1);
3761 /* Transform STMT, which is really (A +B) + (C + D) into the left
3762 linear form, ((A+B)+C)+D.
3763 Recurse on D if necessary. */
3765 static void
3766 linearize_expr (gimple stmt)
3768 gimple_stmt_iterator gsi;
3769 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3770 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3771 gimple oldbinrhs = binrhs;
3772 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3773 gimple newbinrhs = NULL;
3774 struct loop *loop = loop_containing_stmt (stmt);
3775 tree lhs = gimple_assign_lhs (stmt);
3777 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3778 && is_reassociable_op (binrhs, rhscode, loop));
3780 gsi = gsi_for_stmt (stmt);
3782 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3783 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3784 gimple_assign_rhs_code (binrhs),
3785 gimple_assign_lhs (binlhs),
3786 gimple_assign_rhs2 (binrhs));
3787 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3788 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3789 gimple_set_uid (binrhs, gimple_uid (stmt));
3791 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3792 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3796 fprintf (dump_file, "Linearized: ");
3797 print_gimple_stmt (dump_file, stmt, 0, 0);
3800 reassociate_stats.linearized++;
3801 update_stmt (stmt);
3803 gsi = gsi_for_stmt (oldbinrhs);
3804 reassoc_remove_stmt (&gsi);
3805 release_defs (oldbinrhs);
3807 gimple_set_visited (stmt, true);
3808 gimple_set_visited (binlhs, true);
3809 gimple_set_visited (binrhs, true);
3811 /* Tail recurse on the new rhs if it still needs reassociation. */
3812 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3813 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3814 want to change the algorithm while converting to tuples. */
3815 linearize_expr (stmt);
3818 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3819 it. Otherwise, return NULL. */
3821 static gimple
3822 get_single_immediate_use (tree lhs)
3824 use_operand_p immuse;
3825 gimple immusestmt;
3827 if (TREE_CODE (lhs) == SSA_NAME
3828 && single_imm_use (lhs, &immuse, &immusestmt)
3829 && is_gimple_assign (immusestmt))
3830 return immusestmt;
3832 return NULL;
3835 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3836 representing the negated value. Insertions of any necessary
3837 instructions go before GSI.
3838 This function is recursive in that, if you hand it "a_5" as the
3839 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3840 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3842 static tree
3843 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3845 gimple negatedefstmt = NULL;
3846 tree resultofnegate;
3847 gimple_stmt_iterator gsi;
3848 unsigned int uid;
3850 /* If we are trying to negate a name, defined by an add, negate the
3851 add operands instead. */
3852 if (TREE_CODE (tonegate) == SSA_NAME)
3853 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3854 if (TREE_CODE (tonegate) == SSA_NAME
3855 && is_gimple_assign (negatedefstmt)
3856 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3857 && has_single_use (gimple_assign_lhs (negatedefstmt))
3858 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3860 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3861 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3862 tree lhs = gimple_assign_lhs (negatedefstmt);
3863 gimple g;
3865 gsi = gsi_for_stmt (negatedefstmt);
3866 rhs1 = negate_value (rhs1, &gsi);
3868 gsi = gsi_for_stmt (negatedefstmt);
3869 rhs2 = negate_value (rhs2, &gsi);
3871 gsi = gsi_for_stmt (negatedefstmt);
3872 lhs = make_ssa_name (TREE_TYPE (lhs));
3873 gimple_set_visited (negatedefstmt, true);
3874 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3875 gimple_set_uid (g, gimple_uid (negatedefstmt));
3876 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3877 return lhs;
3880 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3881 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3882 NULL_TREE, true, GSI_SAME_STMT);
3883 gsi = *gsip;
3884 uid = gimple_uid (gsi_stmt (gsi));
3885 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3887 gimple stmt = gsi_stmt (gsi);
3888 if (gimple_uid (stmt) != 0)
3889 break;
3890 gimple_set_uid (stmt, uid);
3892 return resultofnegate;
3895 /* Return true if we should break up the subtract in STMT into an add
3896 with negate. This is true when we the subtract operands are really
3897 adds, or the subtract itself is used in an add expression. In
3898 either case, breaking up the subtract into an add with negate
3899 exposes the adds to reassociation. */
3901 static bool
3902 should_break_up_subtract (gimple stmt)
3904 tree lhs = gimple_assign_lhs (stmt);
3905 tree binlhs = gimple_assign_rhs1 (stmt);
3906 tree binrhs = gimple_assign_rhs2 (stmt);
3907 gimple immusestmt;
3908 struct loop *loop = loop_containing_stmt (stmt);
3910 if (TREE_CODE (binlhs) == SSA_NAME
3911 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3912 return true;
3914 if (TREE_CODE (binrhs) == SSA_NAME
3915 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3916 return true;
3918 if (TREE_CODE (lhs) == SSA_NAME
3919 && (immusestmt = get_single_immediate_use (lhs))
3920 && is_gimple_assign (immusestmt)
3921 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3922 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3923 return true;
3924 return false;
3927 /* Transform STMT from A - B into A + -B. */
3929 static void
3930 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3932 tree rhs1 = gimple_assign_rhs1 (stmt);
3933 tree rhs2 = gimple_assign_rhs2 (stmt);
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3937 fprintf (dump_file, "Breaking up subtract ");
3938 print_gimple_stmt (dump_file, stmt, 0, 0);
3941 rhs2 = negate_value (rhs2, gsip);
3942 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3943 update_stmt (stmt);
3946 /* Determine whether STMT is a builtin call that raises an SSA name
3947 to an integer power and has only one use. If so, and this is early
3948 reassociation and unsafe math optimizations are permitted, place
3949 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3950 If any of these conditions does not hold, return FALSE. */
3952 static bool
3953 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3955 tree fndecl, arg1;
3956 REAL_VALUE_TYPE c, cint;
3958 if (!first_pass_instance
3959 || !flag_unsafe_math_optimizations
3960 || !is_gimple_call (stmt)
3961 || !has_single_use (gimple_call_lhs (stmt)))
3962 return false;
3964 fndecl = gimple_call_fndecl (stmt);
3966 if (!fndecl
3967 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3968 return false;
3970 switch (DECL_FUNCTION_CODE (fndecl))
3972 CASE_FLT_FN (BUILT_IN_POW):
3973 if (flag_errno_math)
3974 return false;
3976 *base = gimple_call_arg (stmt, 0);
3977 arg1 = gimple_call_arg (stmt, 1);
3979 if (TREE_CODE (arg1) != REAL_CST)
3980 return false;
3982 c = TREE_REAL_CST (arg1);
3984 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3985 return false;
3987 *exponent = real_to_integer (&c);
3988 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3989 if (!real_identical (&c, &cint))
3990 return false;
3992 break;
3994 CASE_FLT_FN (BUILT_IN_POWI):
3995 *base = gimple_call_arg (stmt, 0);
3996 arg1 = gimple_call_arg (stmt, 1);
3998 if (!tree_fits_shwi_p (arg1))
3999 return false;
4001 *exponent = tree_to_shwi (arg1);
4002 break;
4004 default:
4005 return false;
4008 /* Expanding negative exponents is generally unproductive, so we don't
4009 complicate matters with those. Exponents of zero and one should
4010 have been handled by expression folding. */
4011 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4012 return false;
4014 return true;
4017 /* Recursively linearize a binary expression that is the RHS of STMT.
4018 Place the operands of the expression tree in the vector named OPS. */
4020 static void
4021 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4022 bool is_associative, bool set_visited)
4024 tree binlhs = gimple_assign_rhs1 (stmt);
4025 tree binrhs = gimple_assign_rhs2 (stmt);
4026 gimple binlhsdef = NULL, binrhsdef = NULL;
4027 bool binlhsisreassoc = false;
4028 bool binrhsisreassoc = false;
4029 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4030 struct loop *loop = loop_containing_stmt (stmt);
4031 tree base = NULL_TREE;
4032 HOST_WIDE_INT exponent = 0;
4034 if (set_visited)
4035 gimple_set_visited (stmt, true);
4037 if (TREE_CODE (binlhs) == SSA_NAME)
4039 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4040 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4041 && !stmt_could_throw_p (binlhsdef));
4044 if (TREE_CODE (binrhs) == SSA_NAME)
4046 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4047 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4048 && !stmt_could_throw_p (binrhsdef));
4051 /* If the LHS is not reassociable, but the RHS is, we need to swap
4052 them. If neither is reassociable, there is nothing we can do, so
4053 just put them in the ops vector. If the LHS is reassociable,
4054 linearize it. If both are reassociable, then linearize the RHS
4055 and the LHS. */
4057 if (!binlhsisreassoc)
4059 /* If this is not a associative operation like division, give up. */
4060 if (!is_associative)
4062 add_to_ops_vec (ops, binrhs);
4063 return;
4066 if (!binrhsisreassoc)
4068 if (rhscode == MULT_EXPR
4069 && TREE_CODE (binrhs) == SSA_NAME
4070 && acceptable_pow_call (binrhsdef, &base, &exponent))
4072 add_repeat_to_ops_vec (ops, base, exponent);
4073 gimple_set_visited (binrhsdef, true);
4075 else
4076 add_to_ops_vec (ops, binrhs);
4078 if (rhscode == MULT_EXPR
4079 && TREE_CODE (binlhs) == SSA_NAME
4080 && acceptable_pow_call (binlhsdef, &base, &exponent))
4082 add_repeat_to_ops_vec (ops, base, exponent);
4083 gimple_set_visited (binlhsdef, true);
4085 else
4086 add_to_ops_vec (ops, binlhs);
4088 return;
4091 if (dump_file && (dump_flags & TDF_DETAILS))
4093 fprintf (dump_file, "swapping operands of ");
4094 print_gimple_stmt (dump_file, stmt, 0, 0);
4097 swap_ssa_operands (stmt,
4098 gimple_assign_rhs1_ptr (stmt),
4099 gimple_assign_rhs2_ptr (stmt));
4100 update_stmt (stmt);
4102 if (dump_file && (dump_flags & TDF_DETAILS))
4104 fprintf (dump_file, " is now ");
4105 print_gimple_stmt (dump_file, stmt, 0, 0);
4108 /* We want to make it so the lhs is always the reassociative op,
4109 so swap. */
4110 std::swap (binlhs, binrhs);
4112 else if (binrhsisreassoc)
4114 linearize_expr (stmt);
4115 binlhs = gimple_assign_rhs1 (stmt);
4116 binrhs = gimple_assign_rhs2 (stmt);
4119 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4120 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4121 rhscode, loop));
4122 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4123 is_associative, set_visited);
4125 if (rhscode == MULT_EXPR
4126 && TREE_CODE (binrhs) == SSA_NAME
4127 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4129 add_repeat_to_ops_vec (ops, base, exponent);
4130 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4132 else
4133 add_to_ops_vec (ops, binrhs);
4136 /* Repropagate the negates back into subtracts, since no other pass
4137 currently does it. */
4139 static void
4140 repropagate_negates (void)
4142 unsigned int i = 0;
4143 tree negate;
4145 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4147 gimple user = get_single_immediate_use (negate);
4149 if (!user || !is_gimple_assign (user))
4150 continue;
4152 /* The negate operand can be either operand of a PLUS_EXPR
4153 (it can be the LHS if the RHS is a constant for example).
4155 Force the negate operand to the RHS of the PLUS_EXPR, then
4156 transform the PLUS_EXPR into a MINUS_EXPR. */
4157 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4159 /* If the negated operand appears on the LHS of the
4160 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4161 to force the negated operand to the RHS of the PLUS_EXPR. */
4162 if (gimple_assign_rhs1 (user) == negate)
4164 swap_ssa_operands (user,
4165 gimple_assign_rhs1_ptr (user),
4166 gimple_assign_rhs2_ptr (user));
4169 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4170 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4171 if (gimple_assign_rhs2 (user) == negate)
4173 tree rhs1 = gimple_assign_rhs1 (user);
4174 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4175 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4176 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4177 update_stmt (user);
4180 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4182 if (gimple_assign_rhs1 (user) == negate)
4184 /* We have
4185 x = -a
4186 y = x - b
4187 which we transform into
4188 x = a + b
4189 y = -x .
4190 This pushes down the negate which we possibly can merge
4191 into some other operation, hence insert it into the
4192 plus_negates vector. */
4193 gimple feed = SSA_NAME_DEF_STMT (negate);
4194 tree a = gimple_assign_rhs1 (feed);
4195 tree b = gimple_assign_rhs2 (user);
4196 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4197 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4198 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4199 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4200 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4201 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4202 user = gsi_stmt (gsi2);
4203 update_stmt (user);
4204 reassoc_remove_stmt (&gsi);
4205 release_defs (feed);
4206 plus_negates.safe_push (gimple_assign_lhs (user));
4208 else
4210 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4211 rid of one operation. */
4212 gimple feed = SSA_NAME_DEF_STMT (negate);
4213 tree a = gimple_assign_rhs1 (feed);
4214 tree rhs1 = gimple_assign_rhs1 (user);
4215 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4216 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4217 update_stmt (gsi_stmt (gsi));
4223 /* Returns true if OP is of a type for which we can do reassociation.
4224 That is for integral or non-saturating fixed-point types, and for
4225 floating point type when associative-math is enabled. */
4227 static bool
4228 can_reassociate_p (tree op)
4230 tree type = TREE_TYPE (op);
4231 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4232 || NON_SAT_FIXED_POINT_TYPE_P (type)
4233 || (flag_associative_math && FLOAT_TYPE_P (type)))
4234 return true;
4235 return false;
4238 /* Break up subtract operations in block BB.
4240 We do this top down because we don't know whether the subtract is
4241 part of a possible chain of reassociation except at the top.
4243 IE given
4244 d = f + g
4245 c = a + e
4246 b = c - d
4247 q = b - r
4248 k = t - q
4250 we want to break up k = t - q, but we won't until we've transformed q
4251 = b - r, which won't be broken up until we transform b = c - d.
4253 En passant, clear the GIMPLE visited flag on every statement
4254 and set UIDs within each basic block. */
4256 static void
4257 break_up_subtract_bb (basic_block bb)
4259 gimple_stmt_iterator gsi;
4260 basic_block son;
4261 unsigned int uid = 1;
4263 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4265 gimple stmt = gsi_stmt (gsi);
4266 gimple_set_visited (stmt, false);
4267 gimple_set_uid (stmt, uid++);
4269 if (!is_gimple_assign (stmt)
4270 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4271 continue;
4273 /* Look for simple gimple subtract operations. */
4274 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4276 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4277 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4278 continue;
4280 /* Check for a subtract used only in an addition. If this
4281 is the case, transform it into add of a negate for better
4282 reassociation. IE transform C = A-B into C = A + -B if C
4283 is only used in an addition. */
4284 if (should_break_up_subtract (stmt))
4285 break_up_subtract (stmt, &gsi);
4287 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4288 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4289 plus_negates.safe_push (gimple_assign_lhs (stmt));
4291 for (son = first_dom_son (CDI_DOMINATORS, bb);
4292 son;
4293 son = next_dom_son (CDI_DOMINATORS, son))
4294 break_up_subtract_bb (son);
4297 /* Used for repeated factor analysis. */
4298 struct repeat_factor_d
4300 /* An SSA name that occurs in a multiply chain. */
4301 tree factor;
4303 /* Cached rank of the factor. */
4304 unsigned rank;
4306 /* Number of occurrences of the factor in the chain. */
4307 HOST_WIDE_INT count;
4309 /* An SSA name representing the product of this factor and
4310 all factors appearing later in the repeated factor vector. */
4311 tree repr;
4314 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4315 typedef const struct repeat_factor_d *const_repeat_factor_t;
4318 static vec<repeat_factor> repeat_factor_vec;
4320 /* Used for sorting the repeat factor vector. Sort primarily by
4321 ascending occurrence count, secondarily by descending rank. */
4323 static int
4324 compare_repeat_factors (const void *x1, const void *x2)
4326 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4327 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4329 if (rf1->count != rf2->count)
4330 return rf1->count - rf2->count;
4332 return rf2->rank - rf1->rank;
4335 /* Look for repeated operands in OPS in the multiply tree rooted at
4336 STMT. Replace them with an optimal sequence of multiplies and powi
4337 builtin calls, and remove the used operands from OPS. Return an
4338 SSA name representing the value of the replacement sequence. */
4340 static tree
4341 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4343 unsigned i, j, vec_len;
4344 int ii;
4345 operand_entry_t oe;
4346 repeat_factor_t rf1, rf2;
4347 repeat_factor rfnew;
4348 tree result = NULL_TREE;
4349 tree target_ssa, iter_result;
4350 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4351 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4352 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4353 gimple mul_stmt, pow_stmt;
4355 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4356 target. */
4357 if (!powi_fndecl)
4358 return NULL_TREE;
4360 /* Allocate the repeated factor vector. */
4361 repeat_factor_vec.create (10);
4363 /* Scan the OPS vector for all SSA names in the product and build
4364 up a vector of occurrence counts for each factor. */
4365 FOR_EACH_VEC_ELT (*ops, i, oe)
4367 if (TREE_CODE (oe->op) == SSA_NAME)
4369 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4371 if (rf1->factor == oe->op)
4373 rf1->count += oe->count;
4374 break;
4378 if (j >= repeat_factor_vec.length ())
4380 rfnew.factor = oe->op;
4381 rfnew.rank = oe->rank;
4382 rfnew.count = oe->count;
4383 rfnew.repr = NULL_TREE;
4384 repeat_factor_vec.safe_push (rfnew);
4389 /* Sort the repeated factor vector by (a) increasing occurrence count,
4390 and (b) decreasing rank. */
4391 repeat_factor_vec.qsort (compare_repeat_factors);
4393 /* It is generally best to combine as many base factors as possible
4394 into a product before applying __builtin_powi to the result.
4395 However, the sort order chosen for the repeated factor vector
4396 allows us to cache partial results for the product of the base
4397 factors for subsequent use. When we already have a cached partial
4398 result from a previous iteration, it is best to make use of it
4399 before looking for another __builtin_pow opportunity.
4401 As an example, consider x * x * y * y * y * z * z * z * z.
4402 We want to first compose the product x * y * z, raise it to the
4403 second power, then multiply this by y * z, and finally multiply
4404 by z. This can be done in 5 multiplies provided we cache y * z
4405 for use in both expressions:
4407 t1 = y * z
4408 t2 = t1 * x
4409 t3 = t2 * t2
4410 t4 = t1 * t3
4411 result = t4 * z
4413 If we instead ignored the cached y * z and first multiplied by
4414 the __builtin_pow opportunity z * z, we would get the inferior:
4416 t1 = y * z
4417 t2 = t1 * x
4418 t3 = t2 * t2
4419 t4 = z * z
4420 t5 = t3 * t4
4421 result = t5 * y */
4423 vec_len = repeat_factor_vec.length ();
4425 /* Repeatedly look for opportunities to create a builtin_powi call. */
4426 while (true)
4428 HOST_WIDE_INT power;
4430 /* First look for the largest cached product of factors from
4431 preceding iterations. If found, create a builtin_powi for
4432 it if the minimum occurrence count for its factors is at
4433 least 2, or just use this cached product as our next
4434 multiplicand if the minimum occurrence count is 1. */
4435 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4437 if (rf1->repr && rf1->count > 0)
4438 break;
4441 if (j < vec_len)
4443 power = rf1->count;
4445 if (power == 1)
4447 iter_result = rf1->repr;
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4451 unsigned elt;
4452 repeat_factor_t rf;
4453 fputs ("Multiplying by cached product ", dump_file);
4454 for (elt = j; elt < vec_len; elt++)
4456 rf = &repeat_factor_vec[elt];
4457 print_generic_expr (dump_file, rf->factor, 0);
4458 if (elt < vec_len - 1)
4459 fputs (" * ", dump_file);
4461 fputs ("\n", dump_file);
4464 else
4466 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4467 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4468 build_int_cst (integer_type_node,
4469 power));
4470 gimple_call_set_lhs (pow_stmt, iter_result);
4471 gimple_set_location (pow_stmt, gimple_location (stmt));
4472 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4474 if (dump_file && (dump_flags & TDF_DETAILS))
4476 unsigned elt;
4477 repeat_factor_t rf;
4478 fputs ("Building __builtin_pow call for cached product (",
4479 dump_file);
4480 for (elt = j; elt < vec_len; elt++)
4482 rf = &repeat_factor_vec[elt];
4483 print_generic_expr (dump_file, rf->factor, 0);
4484 if (elt < vec_len - 1)
4485 fputs (" * ", dump_file);
4487 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4488 power);
4492 else
4494 /* Otherwise, find the first factor in the repeated factor
4495 vector whose occurrence count is at least 2. If no such
4496 factor exists, there are no builtin_powi opportunities
4497 remaining. */
4498 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4500 if (rf1->count >= 2)
4501 break;
4504 if (j >= vec_len)
4505 break;
4507 power = rf1->count;
4509 if (dump_file && (dump_flags & TDF_DETAILS))
4511 unsigned elt;
4512 repeat_factor_t rf;
4513 fputs ("Building __builtin_pow call for (", dump_file);
4514 for (elt = j; elt < vec_len; elt++)
4516 rf = &repeat_factor_vec[elt];
4517 print_generic_expr (dump_file, rf->factor, 0);
4518 if (elt < vec_len - 1)
4519 fputs (" * ", dump_file);
4521 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4524 reassociate_stats.pows_created++;
4526 /* Visit each element of the vector in reverse order (so that
4527 high-occurrence elements are visited first, and within the
4528 same occurrence count, lower-ranked elements are visited
4529 first). Form a linear product of all elements in this order
4530 whose occurrencce count is at least that of element J.
4531 Record the SSA name representing the product of each element
4532 with all subsequent elements in the vector. */
4533 if (j == vec_len - 1)
4534 rf1->repr = rf1->factor;
4535 else
4537 for (ii = vec_len - 2; ii >= (int)j; ii--)
4539 tree op1, op2;
4541 rf1 = &repeat_factor_vec[ii];
4542 rf2 = &repeat_factor_vec[ii + 1];
4544 /* Init the last factor's representative to be itself. */
4545 if (!rf2->repr)
4546 rf2->repr = rf2->factor;
4548 op1 = rf1->factor;
4549 op2 = rf2->repr;
4551 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4552 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4553 op1, op2);
4554 gimple_set_location (mul_stmt, gimple_location (stmt));
4555 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4556 rf1->repr = target_ssa;
4558 /* Don't reprocess the multiply we just introduced. */
4559 gimple_set_visited (mul_stmt, true);
4563 /* Form a call to __builtin_powi for the maximum product
4564 just formed, raised to the power obtained earlier. */
4565 rf1 = &repeat_factor_vec[j];
4566 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4567 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4568 build_int_cst (integer_type_node,
4569 power));
4570 gimple_call_set_lhs (pow_stmt, iter_result);
4571 gimple_set_location (pow_stmt, gimple_location (stmt));
4572 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4575 /* If we previously formed at least one other builtin_powi call,
4576 form the product of this one and those others. */
4577 if (result)
4579 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4580 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4581 result, iter_result);
4582 gimple_set_location (mul_stmt, gimple_location (stmt));
4583 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4584 gimple_set_visited (mul_stmt, true);
4585 result = new_result;
4587 else
4588 result = iter_result;
4590 /* Decrement the occurrence count of each element in the product
4591 by the count found above, and remove this many copies of each
4592 factor from OPS. */
4593 for (i = j; i < vec_len; i++)
4595 unsigned k = power;
4596 unsigned n;
4598 rf1 = &repeat_factor_vec[i];
4599 rf1->count -= power;
4601 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4603 if (oe->op == rf1->factor)
4605 if (oe->count <= k)
4607 ops->ordered_remove (n);
4608 k -= oe->count;
4610 if (k == 0)
4611 break;
4613 else
4615 oe->count -= k;
4616 break;
4623 /* At this point all elements in the repeated factor vector have a
4624 remaining occurrence count of 0 or 1, and those with a count of 1
4625 don't have cached representatives. Re-sort the ops vector and
4626 clean up. */
4627 ops->qsort (sort_by_operand_rank);
4628 repeat_factor_vec.release ();
4630 /* Return the final product computed herein. Note that there may
4631 still be some elements with single occurrence count left in OPS;
4632 those will be handled by the normal reassociation logic. */
4633 return result;
4636 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4638 static void
4639 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4641 tree rhs1;
4643 if (dump_file && (dump_flags & TDF_DETAILS))
4645 fprintf (dump_file, "Transforming ");
4646 print_gimple_stmt (dump_file, stmt, 0, 0);
4649 rhs1 = gimple_assign_rhs1 (stmt);
4650 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4651 update_stmt (stmt);
4652 remove_visited_stmt_chain (rhs1);
4654 if (dump_file && (dump_flags & TDF_DETAILS))
4656 fprintf (dump_file, " into ");
4657 print_gimple_stmt (dump_file, stmt, 0, 0);
4661 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4663 static void
4664 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4665 tree rhs1, tree rhs2)
4667 if (dump_file && (dump_flags & TDF_DETAILS))
4669 fprintf (dump_file, "Transforming ");
4670 print_gimple_stmt (dump_file, stmt, 0, 0);
4673 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4674 update_stmt (gsi_stmt (*gsi));
4675 remove_visited_stmt_chain (rhs1);
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4679 fprintf (dump_file, " into ");
4680 print_gimple_stmt (dump_file, stmt, 0, 0);
4684 /* Reassociate expressions in basic block BB and its post-dominator as
4685 children. */
4687 static void
4688 reassociate_bb (basic_block bb)
4690 gimple_stmt_iterator gsi;
4691 basic_block son;
4692 gimple stmt = last_stmt (bb);
4694 if (stmt && !gimple_visited_p (stmt))
4695 maybe_optimize_range_tests (stmt);
4697 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4699 stmt = gsi_stmt (gsi);
4701 if (is_gimple_assign (stmt)
4702 && !stmt_could_throw_p (stmt))
4704 tree lhs, rhs1, rhs2;
4705 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4707 /* If this is not a gimple binary expression, there is
4708 nothing for us to do with it. */
4709 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4710 continue;
4712 /* If this was part of an already processed statement,
4713 we don't need to touch it again. */
4714 if (gimple_visited_p (stmt))
4716 /* This statement might have become dead because of previous
4717 reassociations. */
4718 if (has_zero_uses (gimple_get_lhs (stmt)))
4720 reassoc_remove_stmt (&gsi);
4721 release_defs (stmt);
4722 /* We might end up removing the last stmt above which
4723 places the iterator to the end of the sequence.
4724 Reset it to the last stmt in this case which might
4725 be the end of the sequence as well if we removed
4726 the last statement of the sequence. In which case
4727 we need to bail out. */
4728 if (gsi_end_p (gsi))
4730 gsi = gsi_last_bb (bb);
4731 if (gsi_end_p (gsi))
4732 break;
4735 continue;
4738 lhs = gimple_assign_lhs (stmt);
4739 rhs1 = gimple_assign_rhs1 (stmt);
4740 rhs2 = gimple_assign_rhs2 (stmt);
4742 /* For non-bit or min/max operations we can't associate
4743 all types. Verify that here. */
4744 if (rhs_code != BIT_IOR_EXPR
4745 && rhs_code != BIT_AND_EXPR
4746 && rhs_code != BIT_XOR_EXPR
4747 && rhs_code != MIN_EXPR
4748 && rhs_code != MAX_EXPR
4749 && (!can_reassociate_p (lhs)
4750 || !can_reassociate_p (rhs1)
4751 || !can_reassociate_p (rhs2)))
4752 continue;
4754 if (associative_tree_code (rhs_code))
4756 auto_vec<operand_entry_t> ops;
4757 tree powi_result = NULL_TREE;
4759 /* There may be no immediate uses left by the time we
4760 get here because we may have eliminated them all. */
4761 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4762 continue;
4764 gimple_set_visited (stmt, true);
4765 linearize_expr_tree (&ops, stmt, true, true);
4766 ops.qsort (sort_by_operand_rank);
4767 optimize_ops_list (rhs_code, &ops);
4768 if (undistribute_ops_list (rhs_code, &ops,
4769 loop_containing_stmt (stmt)))
4771 ops.qsort (sort_by_operand_rank);
4772 optimize_ops_list (rhs_code, &ops);
4775 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4776 optimize_range_tests (rhs_code, &ops);
4778 if (first_pass_instance
4779 && rhs_code == MULT_EXPR
4780 && flag_unsafe_math_optimizations)
4781 powi_result = attempt_builtin_powi (stmt, &ops);
4783 /* If the operand vector is now empty, all operands were
4784 consumed by the __builtin_powi optimization. */
4785 if (ops.length () == 0)
4786 transform_stmt_to_copy (&gsi, stmt, powi_result);
4787 else if (ops.length () == 1)
4789 tree last_op = ops.last ()->op;
4791 if (powi_result)
4792 transform_stmt_to_multiply (&gsi, stmt, last_op,
4793 powi_result);
4794 else
4795 transform_stmt_to_copy (&gsi, stmt, last_op);
4797 else
4799 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4800 int ops_num = ops.length ();
4801 int width = get_reassociation_width (ops_num, rhs_code, mode);
4802 tree new_lhs = lhs;
4804 if (dump_file && (dump_flags & TDF_DETAILS))
4805 fprintf (dump_file,
4806 "Width = %d was chosen for reassociation\n", width);
4808 if (width > 1
4809 && ops.length () > 3)
4810 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4811 width, ops);
4812 else
4814 /* When there are three operands left, we want
4815 to make sure the ones that get the double
4816 binary op are chosen wisely. */
4817 int len = ops.length ();
4818 if (len >= 3)
4819 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4821 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4822 powi_result != NULL);
4825 /* If we combined some repeated factors into a
4826 __builtin_powi call, multiply that result by the
4827 reassociated operands. */
4828 if (powi_result)
4830 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4831 tree type = TREE_TYPE (lhs);
4832 tree target_ssa = make_temp_ssa_name (type, NULL,
4833 "reassocpow");
4834 gimple_set_lhs (lhs_stmt, target_ssa);
4835 update_stmt (lhs_stmt);
4836 if (lhs != new_lhs)
4837 target_ssa = new_lhs;
4838 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4839 powi_result, target_ssa);
4840 gimple_set_location (mul_stmt, gimple_location (stmt));
4841 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4847 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4848 son;
4849 son = next_dom_son (CDI_POST_DOMINATORS, son))
4850 reassociate_bb (son);
4853 /* Add jumps around shifts for range tests turned into bit tests.
4854 For each SSA_NAME VAR we have code like:
4855 VAR = ...; // final stmt of range comparison
4856 // bit test here...;
4857 OTHERVAR = ...; // final stmt of the bit test sequence
4858 RES = VAR | OTHERVAR;
4859 Turn the above into:
4860 VAR = ...;
4861 if (VAR != 0)
4862 goto <l3>;
4863 else
4864 goto <l2>;
4865 <l2>:
4866 // bit test here...;
4867 OTHERVAR = ...;
4868 <l3>:
4869 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4871 static void
4872 branch_fixup (void)
4874 tree var;
4875 unsigned int i;
4877 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4879 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4880 gimple use_stmt;
4881 use_operand_p use;
4882 bool ok = single_imm_use (var, &use, &use_stmt);
4883 gcc_assert (ok
4884 && is_gimple_assign (use_stmt)
4885 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4886 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4888 basic_block cond_bb = gimple_bb (def_stmt);
4889 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4890 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4892 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4893 gimple g = gimple_build_cond (NE_EXPR, var,
4894 build_zero_cst (TREE_TYPE (var)),
4895 NULL_TREE, NULL_TREE);
4896 location_t loc = gimple_location (use_stmt);
4897 gimple_set_location (g, loc);
4898 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4900 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4901 etrue->probability = REG_BR_PROB_BASE / 2;
4902 etrue->count = cond_bb->count / 2;
4903 edge efalse = find_edge (cond_bb, then_bb);
4904 efalse->flags = EDGE_FALSE_VALUE;
4905 efalse->probability -= etrue->probability;
4906 efalse->count -= etrue->count;
4907 then_bb->count -= etrue->count;
4909 tree othervar = NULL_TREE;
4910 if (gimple_assign_rhs1 (use_stmt) == var)
4911 othervar = gimple_assign_rhs2 (use_stmt);
4912 else if (gimple_assign_rhs2 (use_stmt) == var)
4913 othervar = gimple_assign_rhs1 (use_stmt);
4914 else
4915 gcc_unreachable ();
4916 tree lhs = gimple_assign_lhs (use_stmt);
4917 gphi *phi = create_phi_node (lhs, merge_bb);
4918 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4919 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4920 gsi = gsi_for_stmt (use_stmt);
4921 gsi_remove (&gsi, true);
4923 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4924 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4926 reassoc_branch_fixups.release ();
4929 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4930 void debug_ops_vector (vec<operand_entry_t> ops);
4932 /* Dump the operand entry vector OPS to FILE. */
4934 void
4935 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4937 operand_entry_t oe;
4938 unsigned int i;
4940 FOR_EACH_VEC_ELT (ops, i, oe)
4942 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4943 print_generic_expr (file, oe->op, 0);
4947 /* Dump the operand entry vector OPS to STDERR. */
4949 DEBUG_FUNCTION void
4950 debug_ops_vector (vec<operand_entry_t> ops)
4952 dump_ops_vector (stderr, ops);
4955 static void
4956 do_reassoc (void)
4958 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4959 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4962 /* Initialize the reassociation pass. */
4964 static void
4965 init_reassoc (void)
4967 int i;
4968 long rank = 2;
4969 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4971 /* Find the loops, so that we can prevent moving calculations in
4972 them. */
4973 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4975 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4977 next_operand_entry_id = 0;
4979 /* Reverse RPO (Reverse Post Order) will give us something where
4980 deeper loops come later. */
4981 pre_and_rev_post_order_compute (NULL, bbs, false);
4982 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4983 operand_rank = new hash_map<tree, long>;
4985 /* Give each default definition a distinct rank. This includes
4986 parameters and the static chain. Walk backwards over all
4987 SSA names so that we get proper rank ordering according
4988 to tree_swap_operands_p. */
4989 for (i = num_ssa_names - 1; i > 0; --i)
4991 tree name = ssa_name (i);
4992 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4993 insert_operand_rank (name, ++rank);
4996 /* Set up rank for each BB */
4997 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4998 bb_rank[bbs[i]] = ++rank << 16;
5000 free (bbs);
5001 calculate_dominance_info (CDI_POST_DOMINATORS);
5002 plus_negates = vNULL;
5005 /* Cleanup after the reassociation pass, and print stats if
5006 requested. */
5008 static void
5009 fini_reassoc (void)
5011 statistics_counter_event (cfun, "Linearized",
5012 reassociate_stats.linearized);
5013 statistics_counter_event (cfun, "Constants eliminated",
5014 reassociate_stats.constants_eliminated);
5015 statistics_counter_event (cfun, "Ops eliminated",
5016 reassociate_stats.ops_eliminated);
5017 statistics_counter_event (cfun, "Statements rewritten",
5018 reassociate_stats.rewritten);
5019 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5020 reassociate_stats.pows_encountered);
5021 statistics_counter_event (cfun, "Built-in powi calls created",
5022 reassociate_stats.pows_created);
5024 delete operand_rank;
5025 operand_entry_pool.release ();
5026 free (bb_rank);
5027 plus_negates.release ();
5028 free_dominance_info (CDI_POST_DOMINATORS);
5029 loop_optimizer_finalize ();
5032 /* Gate and execute functions for Reassociation. */
5034 static unsigned int
5035 execute_reassoc (void)
5037 init_reassoc ();
5039 do_reassoc ();
5040 repropagate_negates ();
5041 branch_fixup ();
5043 fini_reassoc ();
5044 return 0;
5047 namespace {
5049 const pass_data pass_data_reassoc =
5051 GIMPLE_PASS, /* type */
5052 "reassoc", /* name */
5053 OPTGROUP_NONE, /* optinfo_flags */
5054 TV_TREE_REASSOC, /* tv_id */
5055 ( PROP_cfg | PROP_ssa ), /* properties_required */
5056 0, /* properties_provided */
5057 0, /* properties_destroyed */
5058 0, /* todo_flags_start */
5059 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5062 class pass_reassoc : public gimple_opt_pass
5064 public:
5065 pass_reassoc (gcc::context *ctxt)
5066 : gimple_opt_pass (pass_data_reassoc, ctxt)
5069 /* opt_pass methods: */
5070 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5071 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5072 virtual unsigned int execute (function *) { return execute_reassoc (); }
5074 }; // class pass_reassoc
5076 } // anon namespace
5078 gimple_opt_pass *
5079 make_pass_reassoc (gcc::context *ctxt)
5081 return new pass_reassoc (ctxt);