[Patch 7/7] Remove *_BY_PIECES_P
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobb95acfe7c6f264e52f8fa9ccebe18decc1411ba7
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "cfganal.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-inline.h"
44 #include "hash-map.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-fold.h"
48 #include "tree-eh.h"
49 #include "gimple-expr.h"
50 #include "is-a.h"
51 #include "gimple.h"
52 #include "gimple-iterator.h"
53 #include "gimplify-me.h"
54 #include "gimple-ssa.h"
55 #include "tree-cfg.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "tree-ssa-loop-niter.h"
61 #include "tree-ssa-loop.h"
62 #include "expr.h"
63 #include "tree-dfa.h"
64 #include "tree-ssa.h"
65 #include "tree-iterator.h"
66 #include "tree-pass.h"
67 #include "alloc-pool.h"
68 #include "langhooks.h"
69 #include "cfgloop.h"
70 #include "flags.h"
71 #include "target.h"
72 #include "params.h"
73 #include "diagnostic-core.h"
74 #include "builtins.h"
75 #include "gimplify.h"
76 #include "optabs.h"
78 /* This is a simple global reassociation pass. It is, in part, based
79 on the LLVM pass of the same name (They do some things more/less
80 than we do, in different orders, etc).
82 It consists of five steps:
84 1. Breaking up subtract operations into addition + negate, where
85 it would promote the reassociation of adds.
87 2. Left linearization of the expression trees, so that (A+B)+(C+D)
88 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
89 During linearization, we place the operands of the binary
90 expressions into a vector of operand_entry_t
92 3. Optimization of the operand lists, eliminating things like a +
93 -a, a & a, etc.
95 3a. Combine repeated factors with the same occurrence counts
96 into a __builtin_powi call that will later be optimized into
97 an optimal number of multiplies.
99 4. Rewrite the expression trees we linearized and optimized so
100 they are in proper rank order.
102 5. Repropagate negates, as nothing else will clean it up ATM.
104 A bit of theory on #4, since nobody seems to write anything down
105 about why it makes sense to do it the way they do it:
107 We could do this much nicer theoretically, but don't (for reasons
108 explained after how to do it theoretically nice :P).
110 In order to promote the most redundancy elimination, you want
111 binary expressions whose operands are the same rank (or
112 preferably, the same value) exposed to the redundancy eliminator,
113 for possible elimination.
115 So the way to do this if we really cared, is to build the new op
116 tree from the leaves to the roots, merging as you go, and putting the
117 new op on the end of the worklist, until you are left with one
118 thing on the worklist.
120 IE if you have to rewrite the following set of operands (listed with
121 rank in parentheses), with opcode PLUS_EXPR:
123 a (1), b (1), c (1), d (2), e (2)
126 We start with our merge worklist empty, and the ops list with all of
127 those on it.
129 You want to first merge all leaves of the same rank, as much as
130 possible.
132 So first build a binary op of
134 mergetmp = a + b, and put "mergetmp" on the merge worklist.
136 Because there is no three operand form of PLUS_EXPR, c is not going to
137 be exposed to redundancy elimination as a rank 1 operand.
139 So you might as well throw it on the merge worklist (you could also
140 consider it to now be a rank two operand, and merge it with d and e,
141 but in this case, you then have evicted e from a binary op. So at
142 least in this situation, you can't win.)
144 Then build a binary op of d + e
145 mergetmp2 = d + e
147 and put mergetmp2 on the merge worklist.
149 so merge worklist = {mergetmp, c, mergetmp2}
151 Continue building binary ops of these operations until you have only
152 one operation left on the worklist.
154 So we have
156 build binary op
157 mergetmp3 = mergetmp + c
159 worklist = {mergetmp2, mergetmp3}
161 mergetmp4 = mergetmp2 + mergetmp3
163 worklist = {mergetmp4}
165 because we have one operation left, we can now just set the original
166 statement equal to the result of that operation.
168 This will at least expose a + b and d + e to redundancy elimination
169 as binary operations.
171 For extra points, you can reuse the old statements to build the
172 mergetmps, since you shouldn't run out.
174 So why don't we do this?
176 Because it's expensive, and rarely will help. Most trees we are
177 reassociating have 3 or less ops. If they have 2 ops, they already
178 will be written into a nice single binary op. If you have 3 ops, a
179 single simple check suffices to tell you whether the first two are of the
180 same rank. If so, you know to order it
182 mergetmp = op1 + op2
183 newstmt = mergetmp + op3
185 instead of
186 mergetmp = op2 + op3
187 newstmt = mergetmp + op1
189 If all three are of the same rank, you can't expose them all in a
190 single binary operator anyway, so the above is *still* the best you
191 can do.
193 Thus, this is what we do. When we have three ops left, we check to see
194 what order to put them in, and call it a day. As a nod to vector sum
195 reduction, we check if any of the ops are really a phi node that is a
196 destructive update for the associating op, and keep the destructive
197 update together for vector sum reduction recognition. */
200 /* Statistics */
201 static struct
203 int linearized;
204 int constants_eliminated;
205 int ops_eliminated;
206 int rewritten;
207 int pows_encountered;
208 int pows_created;
209 } reassociate_stats;
211 /* Operator, rank pair. */
212 typedef struct operand_entry
214 unsigned int rank;
215 int id;
216 tree op;
217 unsigned int count;
218 } *operand_entry_t;
220 static alloc_pool operand_entry_pool;
222 /* This is used to assign a unique ID to each struct operand_entry
223 so that qsort results are identical on different hosts. */
224 static int next_operand_entry_id;
226 /* Starting rank number for a given basic block, so that we can rank
227 operations using unmovable instructions in that BB based on the bb
228 depth. */
229 static long *bb_rank;
231 /* Operand->rank hashtable. */
232 static hash_map<tree, long> *operand_rank;
234 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
235 all basic blocks the CFG should be adjusted - basic blocks
236 split right after that SSA_NAME's definition statement and before
237 the only use, which must be a bit ior. */
238 static vec<tree> reassoc_branch_fixups;
240 /* Forward decls. */
241 static long get_rank (tree);
242 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
244 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
245 possibly added by gsi_remove. */
247 bool
248 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
250 gimple stmt = gsi_stmt (*gsi);
252 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
253 return gsi_remove (gsi, true);
255 gimple_stmt_iterator prev = *gsi;
256 gsi_prev (&prev);
257 unsigned uid = gimple_uid (stmt);
258 basic_block bb = gimple_bb (stmt);
259 bool ret = gsi_remove (gsi, true);
260 if (!gsi_end_p (prev))
261 gsi_next (&prev);
262 else
263 prev = gsi_start_bb (bb);
264 gimple end_stmt = gsi_stmt (*gsi);
265 while ((stmt = gsi_stmt (prev)) != end_stmt)
267 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
268 gimple_set_uid (stmt, uid);
269 gsi_next (&prev);
271 return ret;
274 /* Bias amount for loop-carried phis. We want this to be larger than
275 the depth of any reassociation tree we can see, but not larger than
276 the rank difference between two blocks. */
277 #define PHI_LOOP_BIAS (1 << 15)
279 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
280 an innermost loop, and the phi has only a single use which is inside
281 the loop, then the rank is the block rank of the loop latch plus an
282 extra bias for the loop-carried dependence. This causes expressions
283 calculated into an accumulator variable to be independent for each
284 iteration of the loop. If STMT is some other phi, the rank is the
285 block rank of its containing block. */
286 static long
287 phi_rank (gimple stmt)
289 basic_block bb = gimple_bb (stmt);
290 struct loop *father = bb->loop_father;
291 tree res;
292 unsigned i;
293 use_operand_p use;
294 gimple use_stmt;
296 /* We only care about real loops (those with a latch). */
297 if (!father->latch)
298 return bb_rank[bb->index];
300 /* Interesting phis must be in headers of innermost loops. */
301 if (bb != father->header
302 || father->inner)
303 return bb_rank[bb->index];
305 /* Ignore virtual SSA_NAMEs. */
306 res = gimple_phi_result (stmt);
307 if (virtual_operand_p (res))
308 return bb_rank[bb->index];
310 /* The phi definition must have a single use, and that use must be
311 within the loop. Otherwise this isn't an accumulator pattern. */
312 if (!single_imm_use (res, &use, &use_stmt)
313 || gimple_bb (use_stmt)->loop_father != father)
314 return bb_rank[bb->index];
316 /* Look for phi arguments from within the loop. If found, bias this phi. */
317 for (i = 0; i < gimple_phi_num_args (stmt); i++)
319 tree arg = gimple_phi_arg_def (stmt, i);
320 if (TREE_CODE (arg) == SSA_NAME
321 && !SSA_NAME_IS_DEFAULT_DEF (arg))
323 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
324 if (gimple_bb (def_stmt)->loop_father == father)
325 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
329 /* Must be an uninteresting phi. */
330 return bb_rank[bb->index];
333 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
334 loop-carried dependence of an innermost loop, return TRUE; else
335 return FALSE. */
336 static bool
337 loop_carried_phi (tree exp)
339 gimple phi_stmt;
340 long block_rank;
342 if (TREE_CODE (exp) != SSA_NAME
343 || SSA_NAME_IS_DEFAULT_DEF (exp))
344 return false;
346 phi_stmt = SSA_NAME_DEF_STMT (exp);
348 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
349 return false;
351 /* Non-loop-carried phis have block rank. Loop-carried phis have
352 an additional bias added in. If this phi doesn't have block rank,
353 it's biased and should not be propagated. */
354 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
356 if (phi_rank (phi_stmt) != block_rank)
357 return true;
359 return false;
362 /* Return the maximum of RANK and the rank that should be propagated
363 from expression OP. For most operands, this is just the rank of OP.
364 For loop-carried phis, the value is zero to avoid undoing the bias
365 in favor of the phi. */
366 static long
367 propagate_rank (long rank, tree op)
369 long op_rank;
371 if (loop_carried_phi (op))
372 return rank;
374 op_rank = get_rank (op);
376 return MAX (rank, op_rank);
379 /* Look up the operand rank structure for expression E. */
381 static inline long
382 find_operand_rank (tree e)
384 long *slot = operand_rank->get (e);
385 return slot ? *slot : -1;
388 /* Insert {E,RANK} into the operand rank hashtable. */
390 static inline void
391 insert_operand_rank (tree e, long rank)
393 gcc_assert (rank > 0);
394 gcc_assert (!operand_rank->put (e, rank));
397 /* Given an expression E, return the rank of the expression. */
399 static long
400 get_rank (tree e)
402 /* Constants have rank 0. */
403 if (is_gimple_min_invariant (e))
404 return 0;
406 /* SSA_NAME's have the rank of the expression they are the result
408 For globals and uninitialized values, the rank is 0.
409 For function arguments, use the pre-setup rank.
410 For PHI nodes, stores, asm statements, etc, we use the rank of
411 the BB.
412 For simple operations, the rank is the maximum rank of any of
413 its operands, or the bb_rank, whichever is less.
414 I make no claims that this is optimal, however, it gives good
415 results. */
417 /* We make an exception to the normal ranking system to break
418 dependences of accumulator variables in loops. Suppose we
419 have a simple one-block loop containing:
421 x_1 = phi(x_0, x_2)
422 b = a + x_1
423 c = b + d
424 x_2 = c + e
426 As shown, each iteration of the calculation into x is fully
427 dependent upon the iteration before it. We would prefer to
428 see this in the form:
430 x_1 = phi(x_0, x_2)
431 b = a + d
432 c = b + e
433 x_2 = c + x_1
435 If the loop is unrolled, the calculations of b and c from
436 different iterations can be interleaved.
438 To obtain this result during reassociation, we bias the rank
439 of the phi definition x_1 upward, when it is recognized as an
440 accumulator pattern. The artificial rank causes it to be
441 added last, providing the desired independence. */
443 if (TREE_CODE (e) == SSA_NAME)
445 gimple stmt;
446 long rank;
447 int i, n;
448 tree op;
450 if (SSA_NAME_IS_DEFAULT_DEF (e))
451 return find_operand_rank (e);
453 stmt = SSA_NAME_DEF_STMT (e);
454 if (gimple_code (stmt) == GIMPLE_PHI)
455 return phi_rank (stmt);
457 if (!is_gimple_assign (stmt)
458 || gimple_vdef (stmt))
459 return bb_rank[gimple_bb (stmt)->index];
461 /* If we already have a rank for this expression, use that. */
462 rank = find_operand_rank (e);
463 if (rank != -1)
464 return rank;
466 /* Otherwise, find the maximum rank for the operands. As an
467 exception, remove the bias from loop-carried phis when propagating
468 the rank so that dependent operations are not also biased. */
469 rank = 0;
470 if (gimple_assign_single_p (stmt))
472 tree rhs = gimple_assign_rhs1 (stmt);
473 n = TREE_OPERAND_LENGTH (rhs);
474 if (n == 0)
475 rank = propagate_rank (rank, rhs);
476 else
478 for (i = 0; i < n; i++)
480 op = TREE_OPERAND (rhs, i);
482 if (op != NULL_TREE)
483 rank = propagate_rank (rank, op);
487 else
489 n = gimple_num_ops (stmt);
490 for (i = 1; i < n; i++)
492 op = gimple_op (stmt, i);
493 gcc_assert (op);
494 rank = propagate_rank (rank, op);
498 if (dump_file && (dump_flags & TDF_DETAILS))
500 fprintf (dump_file, "Rank for ");
501 print_generic_expr (dump_file, e, 0);
502 fprintf (dump_file, " is %ld\n", (rank + 1));
505 /* Note the rank in the hashtable so we don't recompute it. */
506 insert_operand_rank (e, (rank + 1));
507 return (rank + 1);
510 /* Globals, etc, are rank 0 */
511 return 0;
515 /* We want integer ones to end up last no matter what, since they are
516 the ones we can do the most with. */
517 #define INTEGER_CONST_TYPE 1 << 3
518 #define FLOAT_CONST_TYPE 1 << 2
519 #define OTHER_CONST_TYPE 1 << 1
521 /* Classify an invariant tree into integer, float, or other, so that
522 we can sort them to be near other constants of the same type. */
523 static inline int
524 constant_type (tree t)
526 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
527 return INTEGER_CONST_TYPE;
528 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
529 return FLOAT_CONST_TYPE;
530 else
531 return OTHER_CONST_TYPE;
534 /* qsort comparison function to sort operand entries PA and PB by rank
535 so that the sorted array is ordered by rank in decreasing order. */
536 static int
537 sort_by_operand_rank (const void *pa, const void *pb)
539 const operand_entry_t oea = *(const operand_entry_t *)pa;
540 const operand_entry_t oeb = *(const operand_entry_t *)pb;
542 /* It's nicer for optimize_expression if constants that are likely
543 to fold when added/multiplied//whatever are put next to each
544 other. Since all constants have rank 0, order them by type. */
545 if (oeb->rank == 0 && oea->rank == 0)
547 if (constant_type (oeb->op) != constant_type (oea->op))
548 return constant_type (oeb->op) - constant_type (oea->op);
549 else
550 /* To make sorting result stable, we use unique IDs to determine
551 order. */
552 return oeb->id - oea->id;
555 /* Lastly, make sure the versions that are the same go next to each
556 other. */
557 if ((oeb->rank - oea->rank == 0)
558 && TREE_CODE (oea->op) == SSA_NAME
559 && TREE_CODE (oeb->op) == SSA_NAME)
561 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
562 versions of removed SSA_NAMEs, so if possible, prefer to sort
563 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
564 See PR60418. */
565 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
566 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
567 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
569 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
570 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
571 basic_block bba = gimple_bb (stmta);
572 basic_block bbb = gimple_bb (stmtb);
573 if (bbb != bba)
575 if (bb_rank[bbb->index] != bb_rank[bba->index])
576 return bb_rank[bbb->index] - bb_rank[bba->index];
578 else
580 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
581 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
582 if (da != db)
583 return da ? 1 : -1;
587 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
588 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
589 else
590 return oeb->id - oea->id;
593 if (oeb->rank != oea->rank)
594 return oeb->rank - oea->rank;
595 else
596 return oeb->id - oea->id;
599 /* Add an operand entry to *OPS for the tree operand OP. */
601 static void
602 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
604 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
606 oe->op = op;
607 oe->rank = get_rank (op);
608 oe->id = next_operand_entry_id++;
609 oe->count = 1;
610 ops->safe_push (oe);
613 /* Add an operand entry to *OPS for the tree operand OP with repeat
614 count REPEAT. */
616 static void
617 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
618 HOST_WIDE_INT repeat)
620 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
622 oe->op = op;
623 oe->rank = get_rank (op);
624 oe->id = next_operand_entry_id++;
625 oe->count = repeat;
626 ops->safe_push (oe);
628 reassociate_stats.pows_encountered++;
631 /* Return true if STMT is reassociable operation containing a binary
632 operation with tree code CODE, and is inside LOOP. */
634 static bool
635 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
637 basic_block bb = gimple_bb (stmt);
639 if (gimple_bb (stmt) == NULL)
640 return false;
642 if (!flow_bb_inside_loop_p (loop, bb))
643 return false;
645 if (is_gimple_assign (stmt)
646 && gimple_assign_rhs_code (stmt) == code
647 && has_single_use (gimple_assign_lhs (stmt)))
648 return true;
650 return false;
654 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
655 operand of the negate operation. Otherwise, return NULL. */
657 static tree
658 get_unary_op (tree name, enum tree_code opcode)
660 gimple stmt = SSA_NAME_DEF_STMT (name);
662 if (!is_gimple_assign (stmt))
663 return NULL_TREE;
665 if (gimple_assign_rhs_code (stmt) == opcode)
666 return gimple_assign_rhs1 (stmt);
667 return NULL_TREE;
670 /* If CURR and LAST are a pair of ops that OPCODE allows us to
671 eliminate through equivalences, do so, remove them from OPS, and
672 return true. Otherwise, return false. */
674 static bool
675 eliminate_duplicate_pair (enum tree_code opcode,
676 vec<operand_entry_t> *ops,
677 bool *all_done,
678 unsigned int i,
679 operand_entry_t curr,
680 operand_entry_t last)
683 /* If we have two of the same op, and the opcode is & |, min, or max,
684 we can eliminate one of them.
685 If we have two of the same op, and the opcode is ^, we can
686 eliminate both of them. */
688 if (last && last->op == curr->op)
690 switch (opcode)
692 case MAX_EXPR:
693 case MIN_EXPR:
694 case BIT_IOR_EXPR:
695 case BIT_AND_EXPR:
696 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "Equivalence: ");
699 print_generic_expr (dump_file, curr->op, 0);
700 fprintf (dump_file, " [&|minmax] ");
701 print_generic_expr (dump_file, last->op, 0);
702 fprintf (dump_file, " -> ");
703 print_generic_stmt (dump_file, last->op, 0);
706 ops->ordered_remove (i);
707 reassociate_stats.ops_eliminated ++;
709 return true;
711 case BIT_XOR_EXPR:
712 if (dump_file && (dump_flags & TDF_DETAILS))
714 fprintf (dump_file, "Equivalence: ");
715 print_generic_expr (dump_file, curr->op, 0);
716 fprintf (dump_file, " ^ ");
717 print_generic_expr (dump_file, last->op, 0);
718 fprintf (dump_file, " -> nothing\n");
721 reassociate_stats.ops_eliminated += 2;
723 if (ops->length () == 2)
725 ops->create (0);
726 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
727 *all_done = true;
729 else
731 ops->ordered_remove (i-1);
732 ops->ordered_remove (i-1);
735 return true;
737 default:
738 break;
741 return false;
744 static vec<tree> plus_negates;
746 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
747 expression, look in OPS for a corresponding positive operation to cancel
748 it out. If we find one, remove the other from OPS, replace
749 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
750 return false. */
752 static bool
753 eliminate_plus_minus_pair (enum tree_code opcode,
754 vec<operand_entry_t> *ops,
755 unsigned int currindex,
756 operand_entry_t curr)
758 tree negateop;
759 tree notop;
760 unsigned int i;
761 operand_entry_t oe;
763 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
764 return false;
766 negateop = get_unary_op (curr->op, NEGATE_EXPR);
767 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
768 if (negateop == NULL_TREE && notop == NULL_TREE)
769 return false;
771 /* Any non-negated version will have a rank that is one less than
772 the current rank. So once we hit those ranks, if we don't find
773 one, we can stop. */
775 for (i = currindex + 1;
776 ops->iterate (i, &oe)
777 && oe->rank >= curr->rank - 1 ;
778 i++)
780 if (oe->op == negateop)
783 if (dump_file && (dump_flags & TDF_DETAILS))
785 fprintf (dump_file, "Equivalence: ");
786 print_generic_expr (dump_file, negateop, 0);
787 fprintf (dump_file, " + -");
788 print_generic_expr (dump_file, oe->op, 0);
789 fprintf (dump_file, " -> 0\n");
792 ops->ordered_remove (i);
793 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
794 ops->ordered_remove (currindex);
795 reassociate_stats.ops_eliminated ++;
797 return true;
799 else if (oe->op == notop)
801 tree op_type = TREE_TYPE (oe->op);
803 if (dump_file && (dump_flags & TDF_DETAILS))
805 fprintf (dump_file, "Equivalence: ");
806 print_generic_expr (dump_file, notop, 0);
807 fprintf (dump_file, " + ~");
808 print_generic_expr (dump_file, oe->op, 0);
809 fprintf (dump_file, " -> -1\n");
812 ops->ordered_remove (i);
813 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
814 ops->ordered_remove (currindex);
815 reassociate_stats.ops_eliminated ++;
817 return true;
821 /* CURR->OP is a negate expr in a plus expr: save it for later
822 inspection in repropagate_negates(). */
823 if (negateop != NULL_TREE)
824 plus_negates.safe_push (curr->op);
826 return false;
829 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
830 bitwise not expression, look in OPS for a corresponding operand to
831 cancel it out. If we find one, remove the other from OPS, replace
832 OPS[CURRINDEX] with 0, and return true. Otherwise, return
833 false. */
835 static bool
836 eliminate_not_pairs (enum tree_code opcode,
837 vec<operand_entry_t> *ops,
838 unsigned int currindex,
839 operand_entry_t curr)
841 tree notop;
842 unsigned int i;
843 operand_entry_t oe;
845 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
846 || TREE_CODE (curr->op) != SSA_NAME)
847 return false;
849 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
850 if (notop == NULL_TREE)
851 return false;
853 /* Any non-not version will have a rank that is one less than
854 the current rank. So once we hit those ranks, if we don't find
855 one, we can stop. */
857 for (i = currindex + 1;
858 ops->iterate (i, &oe)
859 && oe->rank >= curr->rank - 1;
860 i++)
862 if (oe->op == notop)
864 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file, "Equivalence: ");
867 print_generic_expr (dump_file, notop, 0);
868 if (opcode == BIT_AND_EXPR)
869 fprintf (dump_file, " & ~");
870 else if (opcode == BIT_IOR_EXPR)
871 fprintf (dump_file, " | ~");
872 print_generic_expr (dump_file, oe->op, 0);
873 if (opcode == BIT_AND_EXPR)
874 fprintf (dump_file, " -> 0\n");
875 else if (opcode == BIT_IOR_EXPR)
876 fprintf (dump_file, " -> -1\n");
879 if (opcode == BIT_AND_EXPR)
880 oe->op = build_zero_cst (TREE_TYPE (oe->op));
881 else if (opcode == BIT_IOR_EXPR)
882 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
884 reassociate_stats.ops_eliminated += ops->length () - 1;
885 ops->truncate (0);
886 ops->quick_push (oe);
887 return true;
891 return false;
894 /* Use constant value that may be present in OPS to try to eliminate
895 operands. Note that this function is only really used when we've
896 eliminated ops for other reasons, or merged constants. Across
897 single statements, fold already does all of this, plus more. There
898 is little point in duplicating logic, so I've only included the
899 identities that I could ever construct testcases to trigger. */
901 static void
902 eliminate_using_constants (enum tree_code opcode,
903 vec<operand_entry_t> *ops)
905 operand_entry_t oelast = ops->last ();
906 tree type = TREE_TYPE (oelast->op);
908 if (oelast->rank == 0
909 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
911 switch (opcode)
913 case BIT_AND_EXPR:
914 if (integer_zerop (oelast->op))
916 if (ops->length () != 1)
918 if (dump_file && (dump_flags & TDF_DETAILS))
919 fprintf (dump_file, "Found & 0, removing all other ops\n");
921 reassociate_stats.ops_eliminated += ops->length () - 1;
923 ops->truncate (0);
924 ops->quick_push (oelast);
925 return;
928 else if (integer_all_onesp (oelast->op))
930 if (ops->length () != 1)
932 if (dump_file && (dump_flags & TDF_DETAILS))
933 fprintf (dump_file, "Found & -1, removing\n");
934 ops->pop ();
935 reassociate_stats.ops_eliminated++;
938 break;
939 case BIT_IOR_EXPR:
940 if (integer_all_onesp (oelast->op))
942 if (ops->length () != 1)
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found | -1, removing all other ops\n");
947 reassociate_stats.ops_eliminated += ops->length () - 1;
949 ops->truncate (0);
950 ops->quick_push (oelast);
951 return;
954 else if (integer_zerop (oelast->op))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found | 0, removing\n");
960 ops->pop ();
961 reassociate_stats.ops_eliminated++;
964 break;
965 case MULT_EXPR:
966 if (integer_zerop (oelast->op)
967 || (FLOAT_TYPE_P (type)
968 && !HONOR_NANS (TYPE_MODE (type))
969 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
970 && real_zerop (oelast->op)))
972 if (ops->length () != 1)
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 fprintf (dump_file, "Found * 0, removing all other ops\n");
977 reassociate_stats.ops_eliminated += ops->length () - 1;
978 ops->truncate (1);
979 ops->quick_push (oelast);
980 return;
983 else if (integer_onep (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_SNANS (TYPE_MODE (type))
986 && real_onep (oelast->op)))
988 if (ops->length () != 1)
990 if (dump_file && (dump_flags & TDF_DETAILS))
991 fprintf (dump_file, "Found * 1, removing\n");
992 ops->pop ();
993 reassociate_stats.ops_eliminated++;
994 return;
997 break;
998 case BIT_XOR_EXPR:
999 case PLUS_EXPR:
1000 case MINUS_EXPR:
1001 if (integer_zerop (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1004 && fold_real_zero_addition_p (type, oelast->op,
1005 opcode == MINUS_EXPR)))
1007 if (ops->length () != 1)
1009 if (dump_file && (dump_flags & TDF_DETAILS))
1010 fprintf (dump_file, "Found [|^+] 0, removing\n");
1011 ops->pop ();
1012 reassociate_stats.ops_eliminated++;
1013 return;
1016 break;
1017 default:
1018 break;
1024 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1025 bool, bool);
1027 /* Structure for tracking and counting operands. */
1028 typedef struct oecount_s {
1029 int cnt;
1030 int id;
1031 enum tree_code oecode;
1032 tree op;
1033 } oecount;
1036 /* The heap for the oecount hashtable and the sorted list of operands. */
1037 static vec<oecount> cvec;
1040 /* Oecount hashtable helpers. */
1042 struct oecount_hasher
1044 typedef int value_type;
1045 typedef int compare_type;
1046 typedef int store_values_directly;
1047 static inline hashval_t hash (const value_type &);
1048 static inline bool equal (const value_type &, const compare_type &);
1049 static bool is_deleted (int &v) { return v == 1; }
1050 static void mark_deleted (int &e) { e = 1; }
1051 static bool is_empty (int &v) { return v == 0; }
1052 static void mark_empty (int &e) { e = 0; }
1053 static void remove (int &) {}
1056 /* Hash function for oecount. */
1058 inline hashval_t
1059 oecount_hasher::hash (const value_type &p)
1061 const oecount *c = &cvec[p - 42];
1062 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1065 /* Comparison function for oecount. */
1067 inline bool
1068 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1070 const oecount *c1 = &cvec[p1 - 42];
1071 const oecount *c2 = &cvec[p2 - 42];
1072 return (c1->oecode == c2->oecode
1073 && c1->op == c2->op);
1076 /* Comparison function for qsort sorting oecount elements by count. */
1078 static int
1079 oecount_cmp (const void *p1, const void *p2)
1081 const oecount *c1 = (const oecount *)p1;
1082 const oecount *c2 = (const oecount *)p2;
1083 if (c1->cnt != c2->cnt)
1084 return c1->cnt - c2->cnt;
1085 else
1086 /* If counts are identical, use unique IDs to stabilize qsort. */
1087 return c1->id - c2->id;
1090 /* Return TRUE iff STMT represents a builtin call that raises OP
1091 to some exponent. */
1093 static bool
1094 stmt_is_power_of_op (gimple stmt, tree op)
1096 tree fndecl;
1098 if (!is_gimple_call (stmt))
1099 return false;
1101 fndecl = gimple_call_fndecl (stmt);
1103 if (!fndecl
1104 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1105 return false;
1107 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1109 CASE_FLT_FN (BUILT_IN_POW):
1110 CASE_FLT_FN (BUILT_IN_POWI):
1111 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1113 default:
1114 return false;
1118 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1119 in place and return the result. Assumes that stmt_is_power_of_op
1120 was previously called for STMT and returned TRUE. */
1122 static HOST_WIDE_INT
1123 decrement_power (gimple stmt)
1125 REAL_VALUE_TYPE c, cint;
1126 HOST_WIDE_INT power;
1127 tree arg1;
1129 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1131 CASE_FLT_FN (BUILT_IN_POW):
1132 arg1 = gimple_call_arg (stmt, 1);
1133 c = TREE_REAL_CST (arg1);
1134 power = real_to_integer (&c) - 1;
1135 real_from_integer (&cint, VOIDmode, power, SIGNED);
1136 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1137 return power;
1139 CASE_FLT_FN (BUILT_IN_POWI):
1140 arg1 = gimple_call_arg (stmt, 1);
1141 power = TREE_INT_CST_LOW (arg1) - 1;
1142 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1143 return power;
1145 default:
1146 gcc_unreachable ();
1150 /* Find the single immediate use of STMT's LHS, and replace it
1151 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1152 replace *DEF with OP as well. */
1154 static void
1155 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1157 tree lhs;
1158 gimple use_stmt;
1159 use_operand_p use;
1160 gimple_stmt_iterator gsi;
1162 if (is_gimple_call (stmt))
1163 lhs = gimple_call_lhs (stmt);
1164 else
1165 lhs = gimple_assign_lhs (stmt);
1167 gcc_assert (has_single_use (lhs));
1168 single_imm_use (lhs, &use, &use_stmt);
1169 if (lhs == *def)
1170 *def = op;
1171 SET_USE (use, op);
1172 if (TREE_CODE (op) != SSA_NAME)
1173 update_stmt (use_stmt);
1174 gsi = gsi_for_stmt (stmt);
1175 unlink_stmt_vdef (stmt);
1176 reassoc_remove_stmt (&gsi);
1177 release_defs (stmt);
1180 /* Walks the linear chain with result *DEF searching for an operation
1181 with operand OP and code OPCODE removing that from the chain. *DEF
1182 is updated if there is only one operand but no operation left. */
1184 static void
1185 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1187 gimple stmt = SSA_NAME_DEF_STMT (*def);
1191 tree name;
1193 if (opcode == MULT_EXPR
1194 && stmt_is_power_of_op (stmt, op))
1196 if (decrement_power (stmt) == 1)
1197 propagate_op_to_single_use (op, stmt, def);
1198 return;
1201 name = gimple_assign_rhs1 (stmt);
1203 /* If this is the operation we look for and one of the operands
1204 is ours simply propagate the other operand into the stmts
1205 single use. */
1206 if (gimple_assign_rhs_code (stmt) == opcode
1207 && (name == op
1208 || gimple_assign_rhs2 (stmt) == op))
1210 if (name == op)
1211 name = gimple_assign_rhs2 (stmt);
1212 propagate_op_to_single_use (name, stmt, def);
1213 return;
1216 /* We might have a multiply of two __builtin_pow* calls, and
1217 the operand might be hiding in the rightmost one. */
1218 if (opcode == MULT_EXPR
1219 && gimple_assign_rhs_code (stmt) == opcode
1220 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1221 && has_single_use (gimple_assign_rhs2 (stmt)))
1223 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1224 if (stmt_is_power_of_op (stmt2, op))
1226 if (decrement_power (stmt2) == 1)
1227 propagate_op_to_single_use (op, stmt2, def);
1228 return;
1232 /* Continue walking the chain. */
1233 gcc_assert (name != op
1234 && TREE_CODE (name) == SSA_NAME);
1235 stmt = SSA_NAME_DEF_STMT (name);
1237 while (1);
1240 /* Returns true if statement S1 dominates statement S2. Like
1241 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1243 static bool
1244 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1246 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1248 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1249 SSA_NAME. Assume it lives at the beginning of function and
1250 thus dominates everything. */
1251 if (!bb1 || s1 == s2)
1252 return true;
1254 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1255 if (!bb2)
1256 return false;
1258 if (bb1 == bb2)
1260 /* PHIs in the same basic block are assumed to be
1261 executed all in parallel, if only one stmt is a PHI,
1262 it dominates the other stmt in the same basic block. */
1263 if (gimple_code (s1) == GIMPLE_PHI)
1264 return true;
1266 if (gimple_code (s2) == GIMPLE_PHI)
1267 return false;
1269 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1271 if (gimple_uid (s1) < gimple_uid (s2))
1272 return true;
1274 if (gimple_uid (s1) > gimple_uid (s2))
1275 return false;
1277 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1278 unsigned int uid = gimple_uid (s1);
1279 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1281 gimple s = gsi_stmt (gsi);
1282 if (gimple_uid (s) != uid)
1283 break;
1284 if (s == s2)
1285 return true;
1288 return false;
1291 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1294 /* Insert STMT after INSERT_POINT. */
1296 static void
1297 insert_stmt_after (gimple stmt, gimple insert_point)
1299 gimple_stmt_iterator gsi;
1300 basic_block bb;
1302 if (gimple_code (insert_point) == GIMPLE_PHI)
1303 bb = gimple_bb (insert_point);
1304 else if (!stmt_ends_bb_p (insert_point))
1306 gsi = gsi_for_stmt (insert_point);
1307 gimple_set_uid (stmt, gimple_uid (insert_point));
1308 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1309 return;
1311 else
1312 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1313 thus if it must end a basic block, it should be a call that can
1314 throw, or some assignment that can throw. If it throws, the LHS
1315 of it will not be initialized though, so only valid places using
1316 the SSA_NAME should be dominated by the fallthru edge. */
1317 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1318 gsi = gsi_after_labels (bb);
1319 if (gsi_end_p (gsi))
1321 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1322 gimple_set_uid (stmt,
1323 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1325 else
1326 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1327 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1330 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1331 the result. Places the statement after the definition of either
1332 OP1 or OP2. Returns the new statement. */
1334 static gimple
1335 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1337 gimple op1def = NULL, op2def = NULL;
1338 gimple_stmt_iterator gsi;
1339 tree op;
1340 gimple sum;
1342 /* Create the addition statement. */
1343 op = make_ssa_name (type, NULL);
1344 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1346 /* Find an insertion place and insert. */
1347 if (TREE_CODE (op1) == SSA_NAME)
1348 op1def = SSA_NAME_DEF_STMT (op1);
1349 if (TREE_CODE (op2) == SSA_NAME)
1350 op2def = SSA_NAME_DEF_STMT (op2);
1351 if ((!op1def || gimple_nop_p (op1def))
1352 && (!op2def || gimple_nop_p (op2def)))
1354 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355 if (gsi_end_p (gsi))
1357 gimple_stmt_iterator gsi2
1358 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1359 gimple_set_uid (sum,
1360 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1362 else
1363 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1364 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1366 else
1368 gimple insert_point;
1369 if ((!op1def || gimple_nop_p (op1def))
1370 || (op2def && !gimple_nop_p (op2def)
1371 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1372 insert_point = op2def;
1373 else
1374 insert_point = op1def;
1375 insert_stmt_after (sum, insert_point);
1377 update_stmt (sum);
1379 return sum;
1382 /* Perform un-distribution of divisions and multiplications.
1383 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1384 to (A + B) / X for real X.
1386 The algorithm is organized as follows.
1388 - First we walk the addition chain *OPS looking for summands that
1389 are defined by a multiplication or a real division. This results
1390 in the candidates bitmap with relevant indices into *OPS.
1392 - Second we build the chains of multiplications or divisions for
1393 these candidates, counting the number of occurrences of (operand, code)
1394 pairs in all of the candidates chains.
1396 - Third we sort the (operand, code) pairs by number of occurrence and
1397 process them starting with the pair with the most uses.
1399 * For each such pair we walk the candidates again to build a
1400 second candidate bitmap noting all multiplication/division chains
1401 that have at least one occurrence of (operand, code).
1403 * We build an alternate addition chain only covering these
1404 candidates with one (operand, code) operation removed from their
1405 multiplication/division chain.
1407 * The first candidate gets replaced by the alternate addition chain
1408 multiplied/divided by the operand.
1410 * All candidate chains get disabled for further processing and
1411 processing of (operand, code) pairs continues.
1413 The alternate addition chains built are re-processed by the main
1414 reassociation algorithm which allows optimizing a * x * y + b * y * x
1415 to (a + b ) * x * y in one invocation of the reassociation pass. */
1417 static bool
1418 undistribute_ops_list (enum tree_code opcode,
1419 vec<operand_entry_t> *ops, struct loop *loop)
1421 unsigned int length = ops->length ();
1422 operand_entry_t oe1;
1423 unsigned i, j;
1424 sbitmap candidates, candidates2;
1425 unsigned nr_candidates, nr_candidates2;
1426 sbitmap_iterator sbi0;
1427 vec<operand_entry_t> *subops;
1428 bool changed = false;
1429 int next_oecount_id = 0;
1431 if (length <= 1
1432 || opcode != PLUS_EXPR)
1433 return false;
1435 /* Build a list of candidates to process. */
1436 candidates = sbitmap_alloc (length);
1437 bitmap_clear (candidates);
1438 nr_candidates = 0;
1439 FOR_EACH_VEC_ELT (*ops, i, oe1)
1441 enum tree_code dcode;
1442 gimple oe1def;
1444 if (TREE_CODE (oe1->op) != SSA_NAME)
1445 continue;
1446 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1447 if (!is_gimple_assign (oe1def))
1448 continue;
1449 dcode = gimple_assign_rhs_code (oe1def);
1450 if ((dcode != MULT_EXPR
1451 && dcode != RDIV_EXPR)
1452 || !is_reassociable_op (oe1def, dcode, loop))
1453 continue;
1455 bitmap_set_bit (candidates, i);
1456 nr_candidates++;
1459 if (nr_candidates < 2)
1461 sbitmap_free (candidates);
1462 return false;
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1467 fprintf (dump_file, "searching for un-distribute opportunities ");
1468 print_generic_expr (dump_file,
1469 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1470 fprintf (dump_file, " %d\n", nr_candidates);
1473 /* Build linearized sub-operand lists and the counting table. */
1474 cvec.create (0);
1476 hash_table<oecount_hasher> ctable (15);
1478 /* ??? Macro arguments cannot have multi-argument template types in
1479 them. This typedef is needed to workaround that limitation. */
1480 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1481 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1482 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1484 gimple oedef;
1485 enum tree_code oecode;
1486 unsigned j;
1488 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1489 oecode = gimple_assign_rhs_code (oedef);
1490 linearize_expr_tree (&subops[i], oedef,
1491 associative_tree_code (oecode), false);
1493 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1495 oecount c;
1496 int *slot;
1497 int idx;
1498 c.oecode = oecode;
1499 c.cnt = 1;
1500 c.id = next_oecount_id++;
1501 c.op = oe1->op;
1502 cvec.safe_push (c);
1503 idx = cvec.length () + 41;
1504 slot = ctable.find_slot (idx, INSERT);
1505 if (!*slot)
1507 *slot = idx;
1509 else
1511 cvec.pop ();
1512 cvec[*slot - 42].cnt++;
1517 /* Sort the counting table. */
1518 cvec.qsort (oecount_cmp);
1520 if (dump_file && (dump_flags & TDF_DETAILS))
1522 oecount *c;
1523 fprintf (dump_file, "Candidates:\n");
1524 FOR_EACH_VEC_ELT (cvec, j, c)
1526 fprintf (dump_file, " %u %s: ", c->cnt,
1527 c->oecode == MULT_EXPR
1528 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1529 print_generic_expr (dump_file, c->op, 0);
1530 fprintf (dump_file, "\n");
1534 /* Process the (operand, code) pairs in order of most occurrence. */
1535 candidates2 = sbitmap_alloc (length);
1536 while (!cvec.is_empty ())
1538 oecount *c = &cvec.last ();
1539 if (c->cnt < 2)
1540 break;
1542 /* Now collect the operands in the outer chain that contain
1543 the common operand in their inner chain. */
1544 bitmap_clear (candidates2);
1545 nr_candidates2 = 0;
1546 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1548 gimple oedef;
1549 enum tree_code oecode;
1550 unsigned j;
1551 tree op = (*ops)[i]->op;
1553 /* If we undistributed in this chain already this may be
1554 a constant. */
1555 if (TREE_CODE (op) != SSA_NAME)
1556 continue;
1558 oedef = SSA_NAME_DEF_STMT (op);
1559 oecode = gimple_assign_rhs_code (oedef);
1560 if (oecode != c->oecode)
1561 continue;
1563 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1565 if (oe1->op == c->op)
1567 bitmap_set_bit (candidates2, i);
1568 ++nr_candidates2;
1569 break;
1574 if (nr_candidates2 >= 2)
1576 operand_entry_t oe1, oe2;
1577 gimple prod;
1578 int first = bitmap_first_set_bit (candidates2);
1580 /* Build the new addition chain. */
1581 oe1 = (*ops)[first];
1582 if (dump_file && (dump_flags & TDF_DETAILS))
1584 fprintf (dump_file, "Building (");
1585 print_generic_expr (dump_file, oe1->op, 0);
1587 zero_one_operation (&oe1->op, c->oecode, c->op);
1588 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1590 gimple sum;
1591 oe2 = (*ops)[i];
1592 if (dump_file && (dump_flags & TDF_DETAILS))
1594 fprintf (dump_file, " + ");
1595 print_generic_expr (dump_file, oe2->op, 0);
1597 zero_one_operation (&oe2->op, c->oecode, c->op);
1598 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1599 oe1->op, oe2->op, opcode);
1600 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1601 oe2->rank = 0;
1602 oe1->op = gimple_get_lhs (sum);
1605 /* Apply the multiplication/division. */
1606 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1607 oe1->op, c->op, c->oecode);
1608 if (dump_file && (dump_flags & TDF_DETAILS))
1610 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1611 print_generic_expr (dump_file, c->op, 0);
1612 fprintf (dump_file, "\n");
1615 /* Record it in the addition chain and disable further
1616 undistribution with this op. */
1617 oe1->op = gimple_assign_lhs (prod);
1618 oe1->rank = get_rank (oe1->op);
1619 subops[first].release ();
1621 changed = true;
1624 cvec.pop ();
1627 for (i = 0; i < ops->length (); ++i)
1628 subops[i].release ();
1629 free (subops);
1630 cvec.release ();
1631 sbitmap_free (candidates);
1632 sbitmap_free (candidates2);
1634 return changed;
1637 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1638 expression, examine the other OPS to see if any of them are comparisons
1639 of the same values, which we may be able to combine or eliminate.
1640 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1642 static bool
1643 eliminate_redundant_comparison (enum tree_code opcode,
1644 vec<operand_entry_t> *ops,
1645 unsigned int currindex,
1646 operand_entry_t curr)
1648 tree op1, op2;
1649 enum tree_code lcode, rcode;
1650 gimple def1, def2;
1651 int i;
1652 operand_entry_t oe;
1654 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1655 return false;
1657 /* Check that CURR is a comparison. */
1658 if (TREE_CODE (curr->op) != SSA_NAME)
1659 return false;
1660 def1 = SSA_NAME_DEF_STMT (curr->op);
1661 if (!is_gimple_assign (def1))
1662 return false;
1663 lcode = gimple_assign_rhs_code (def1);
1664 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1665 return false;
1666 op1 = gimple_assign_rhs1 (def1);
1667 op2 = gimple_assign_rhs2 (def1);
1669 /* Now look for a similar comparison in the remaining OPS. */
1670 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1672 tree t;
1674 if (TREE_CODE (oe->op) != SSA_NAME)
1675 continue;
1676 def2 = SSA_NAME_DEF_STMT (oe->op);
1677 if (!is_gimple_assign (def2))
1678 continue;
1679 rcode = gimple_assign_rhs_code (def2);
1680 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1681 continue;
1683 /* If we got here, we have a match. See if we can combine the
1684 two comparisons. */
1685 if (opcode == BIT_IOR_EXPR)
1686 t = maybe_fold_or_comparisons (lcode, op1, op2,
1687 rcode, gimple_assign_rhs1 (def2),
1688 gimple_assign_rhs2 (def2));
1689 else
1690 t = maybe_fold_and_comparisons (lcode, op1, op2,
1691 rcode, gimple_assign_rhs1 (def2),
1692 gimple_assign_rhs2 (def2));
1693 if (!t)
1694 continue;
1696 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1697 always give us a boolean_type_node value back. If the original
1698 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1699 we need to convert. */
1700 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1701 t = fold_convert (TREE_TYPE (curr->op), t);
1703 if (TREE_CODE (t) != INTEGER_CST
1704 && !operand_equal_p (t, curr->op, 0))
1706 enum tree_code subcode;
1707 tree newop1, newop2;
1708 if (!COMPARISON_CLASS_P (t))
1709 continue;
1710 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1711 STRIP_USELESS_TYPE_CONVERSION (newop1);
1712 STRIP_USELESS_TYPE_CONVERSION (newop2);
1713 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1714 continue;
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1719 fprintf (dump_file, "Equivalence: ");
1720 print_generic_expr (dump_file, curr->op, 0);
1721 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1722 print_generic_expr (dump_file, oe->op, 0);
1723 fprintf (dump_file, " -> ");
1724 print_generic_expr (dump_file, t, 0);
1725 fprintf (dump_file, "\n");
1728 /* Now we can delete oe, as it has been subsumed by the new combined
1729 expression t. */
1730 ops->ordered_remove (i);
1731 reassociate_stats.ops_eliminated ++;
1733 /* If t is the same as curr->op, we're done. Otherwise we must
1734 replace curr->op with t. Special case is if we got a constant
1735 back, in which case we add it to the end instead of in place of
1736 the current entry. */
1737 if (TREE_CODE (t) == INTEGER_CST)
1739 ops->ordered_remove (currindex);
1740 add_to_ops_vec (ops, t);
1742 else if (!operand_equal_p (t, curr->op, 0))
1744 gimple sum;
1745 enum tree_code subcode;
1746 tree newop1;
1747 tree newop2;
1748 gcc_assert (COMPARISON_CLASS_P (t));
1749 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1750 STRIP_USELESS_TYPE_CONVERSION (newop1);
1751 STRIP_USELESS_TYPE_CONVERSION (newop2);
1752 gcc_checking_assert (is_gimple_val (newop1)
1753 && is_gimple_val (newop2));
1754 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1755 curr->op = gimple_get_lhs (sum);
1757 return true;
1760 return false;
1763 /* Perform various identities and other optimizations on the list of
1764 operand entries, stored in OPS. The tree code for the binary
1765 operation between all the operands is OPCODE. */
1767 static void
1768 optimize_ops_list (enum tree_code opcode,
1769 vec<operand_entry_t> *ops)
1771 unsigned int length = ops->length ();
1772 unsigned int i;
1773 operand_entry_t oe;
1774 operand_entry_t oelast = NULL;
1775 bool iterate = false;
1777 if (length == 1)
1778 return;
1780 oelast = ops->last ();
1782 /* If the last two are constants, pop the constants off, merge them
1783 and try the next two. */
1784 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1786 operand_entry_t oelm1 = (*ops)[length - 2];
1788 if (oelm1->rank == 0
1789 && is_gimple_min_invariant (oelm1->op)
1790 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1791 TREE_TYPE (oelast->op)))
1793 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1794 oelm1->op, oelast->op);
1796 if (folded && is_gimple_min_invariant (folded))
1798 if (dump_file && (dump_flags & TDF_DETAILS))
1799 fprintf (dump_file, "Merging constants\n");
1801 ops->pop ();
1802 ops->pop ();
1804 add_to_ops_vec (ops, folded);
1805 reassociate_stats.constants_eliminated++;
1807 optimize_ops_list (opcode, ops);
1808 return;
1813 eliminate_using_constants (opcode, ops);
1814 oelast = NULL;
1816 for (i = 0; ops->iterate (i, &oe);)
1818 bool done = false;
1820 if (eliminate_not_pairs (opcode, ops, i, oe))
1821 return;
1822 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1823 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1824 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1826 if (done)
1827 return;
1828 iterate = true;
1829 oelast = NULL;
1830 continue;
1832 oelast = oe;
1833 i++;
1836 length = ops->length ();
1837 oelast = ops->last ();
1839 if (iterate)
1840 optimize_ops_list (opcode, ops);
1843 /* The following functions are subroutines to optimize_range_tests and allow
1844 it to try to change a logical combination of comparisons into a range
1845 test.
1847 For example, both
1848 X == 2 || X == 5 || X == 3 || X == 4
1850 X >= 2 && X <= 5
1851 are converted to
1852 (unsigned) (X - 2) <= 3
1854 For more information see comments above fold_test_range in fold-const.c,
1855 this implementation is for GIMPLE. */
1857 struct range_entry
1859 tree exp;
1860 tree low;
1861 tree high;
1862 bool in_p;
1863 bool strict_overflow_p;
1864 unsigned int idx, next;
1867 /* This is similar to make_range in fold-const.c, but on top of
1868 GIMPLE instead of trees. If EXP is non-NULL, it should be
1869 an SSA_NAME and STMT argument is ignored, otherwise STMT
1870 argument should be a GIMPLE_COND. */
1872 static void
1873 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1875 int in_p;
1876 tree low, high;
1877 bool is_bool, strict_overflow_p;
1879 r->exp = NULL_TREE;
1880 r->in_p = false;
1881 r->strict_overflow_p = false;
1882 r->low = NULL_TREE;
1883 r->high = NULL_TREE;
1884 if (exp != NULL_TREE
1885 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1886 return;
1888 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1889 and see if we can refine the range. Some of the cases below may not
1890 happen, but it doesn't seem worth worrying about this. We "continue"
1891 the outer loop when we've changed something; otherwise we "break"
1892 the switch, which will "break" the while. */
1893 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1894 high = low;
1895 in_p = 0;
1896 strict_overflow_p = false;
1897 is_bool = false;
1898 if (exp == NULL_TREE)
1899 is_bool = true;
1900 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1902 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1903 is_bool = true;
1904 else
1905 return;
1907 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1908 is_bool = true;
1910 while (1)
1912 enum tree_code code;
1913 tree arg0, arg1, exp_type;
1914 tree nexp;
1915 location_t loc;
1917 if (exp != NULL_TREE)
1919 if (TREE_CODE (exp) != SSA_NAME
1920 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1921 break;
1923 stmt = SSA_NAME_DEF_STMT (exp);
1924 if (!is_gimple_assign (stmt))
1925 break;
1927 code = gimple_assign_rhs_code (stmt);
1928 arg0 = gimple_assign_rhs1 (stmt);
1929 arg1 = gimple_assign_rhs2 (stmt);
1930 exp_type = TREE_TYPE (exp);
1932 else
1934 code = gimple_cond_code (stmt);
1935 arg0 = gimple_cond_lhs (stmt);
1936 arg1 = gimple_cond_rhs (stmt);
1937 exp_type = boolean_type_node;
1940 if (TREE_CODE (arg0) != SSA_NAME)
1941 break;
1942 loc = gimple_location (stmt);
1943 switch (code)
1945 case BIT_NOT_EXPR:
1946 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1947 /* Ensure the range is either +[-,0], +[0,0],
1948 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1949 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1950 or similar expression of unconditional true or
1951 false, it should not be negated. */
1952 && ((high && integer_zerop (high))
1953 || (low && integer_onep (low))))
1955 in_p = !in_p;
1956 exp = arg0;
1957 continue;
1959 break;
1960 case SSA_NAME:
1961 exp = arg0;
1962 continue;
1963 CASE_CONVERT:
1964 if (is_bool)
1965 goto do_default;
1966 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1968 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1969 is_bool = true;
1970 else
1971 return;
1973 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1974 is_bool = true;
1975 goto do_default;
1976 case EQ_EXPR:
1977 case NE_EXPR:
1978 case LT_EXPR:
1979 case LE_EXPR:
1980 case GE_EXPR:
1981 case GT_EXPR:
1982 is_bool = true;
1983 /* FALLTHRU */
1984 default:
1985 if (!is_bool)
1986 return;
1987 do_default:
1988 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1989 &low, &high, &in_p,
1990 &strict_overflow_p);
1991 if (nexp != NULL_TREE)
1993 exp = nexp;
1994 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1995 continue;
1997 break;
1999 break;
2001 if (is_bool)
2003 r->exp = exp;
2004 r->in_p = in_p;
2005 r->low = low;
2006 r->high = high;
2007 r->strict_overflow_p = strict_overflow_p;
2011 /* Comparison function for qsort. Sort entries
2012 without SSA_NAME exp first, then with SSA_NAMEs sorted
2013 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2014 by increasing ->low and if ->low is the same, by increasing
2015 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2016 maximum. */
2018 static int
2019 range_entry_cmp (const void *a, const void *b)
2021 const struct range_entry *p = (const struct range_entry *) a;
2022 const struct range_entry *q = (const struct range_entry *) b;
2024 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2026 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2028 /* Group range_entries for the same SSA_NAME together. */
2029 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2030 return -1;
2031 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2032 return 1;
2033 /* If ->low is different, NULL low goes first, then by
2034 ascending low. */
2035 if (p->low != NULL_TREE)
2037 if (q->low != NULL_TREE)
2039 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2040 p->low, q->low);
2041 if (tem && integer_onep (tem))
2042 return -1;
2043 tem = fold_binary (GT_EXPR, boolean_type_node,
2044 p->low, q->low);
2045 if (tem && integer_onep (tem))
2046 return 1;
2048 else
2049 return 1;
2051 else if (q->low != NULL_TREE)
2052 return -1;
2053 /* If ->high is different, NULL high goes last, before that by
2054 ascending high. */
2055 if (p->high != NULL_TREE)
2057 if (q->high != NULL_TREE)
2059 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2060 p->high, q->high);
2061 if (tem && integer_onep (tem))
2062 return -1;
2063 tem = fold_binary (GT_EXPR, boolean_type_node,
2064 p->high, q->high);
2065 if (tem && integer_onep (tem))
2066 return 1;
2068 else
2069 return -1;
2071 else if (p->high != NULL_TREE)
2072 return 1;
2073 /* If both ranges are the same, sort below by ascending idx. */
2075 else
2076 return 1;
2078 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2079 return -1;
2081 if (p->idx < q->idx)
2082 return -1;
2083 else
2085 gcc_checking_assert (p->idx > q->idx);
2086 return 1;
2090 /* Helper routine of optimize_range_test.
2091 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2092 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2093 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2094 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2095 an array of COUNT pointers to other ranges. Return
2096 true if the range merge has been successful.
2097 If OPCODE is ERROR_MARK, this is called from within
2098 maybe_optimize_range_tests and is performing inter-bb range optimization.
2099 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2100 oe->rank. */
2102 static bool
2103 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2104 struct range_entry **otherrangep,
2105 unsigned int count, enum tree_code opcode,
2106 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2107 bool in_p, tree low, tree high, bool strict_overflow_p)
2109 operand_entry_t oe = (*ops)[range->idx];
2110 tree op = oe->op;
2111 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2112 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2113 location_t loc = gimple_location (stmt);
2114 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2115 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2116 in_p, low, high);
2117 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2118 gimple_stmt_iterator gsi;
2119 unsigned int i;
2121 if (tem == NULL_TREE)
2122 return false;
2124 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2125 warning_at (loc, OPT_Wstrict_overflow,
2126 "assuming signed overflow does not occur "
2127 "when simplifying range test");
2129 if (dump_file && (dump_flags & TDF_DETAILS))
2131 struct range_entry *r;
2132 fprintf (dump_file, "Optimizing range tests ");
2133 print_generic_expr (dump_file, range->exp, 0);
2134 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2135 print_generic_expr (dump_file, range->low, 0);
2136 fprintf (dump_file, ", ");
2137 print_generic_expr (dump_file, range->high, 0);
2138 fprintf (dump_file, "]");
2139 for (i = 0; i < count; i++)
2141 if (otherrange)
2142 r = otherrange + i;
2143 else
2144 r = otherrangep[i];
2145 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2146 print_generic_expr (dump_file, r->low, 0);
2147 fprintf (dump_file, ", ");
2148 print_generic_expr (dump_file, r->high, 0);
2149 fprintf (dump_file, "]");
2151 fprintf (dump_file, "\n into ");
2152 print_generic_expr (dump_file, tem, 0);
2153 fprintf (dump_file, "\n");
2156 if (opcode == BIT_IOR_EXPR
2157 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2158 tem = invert_truthvalue_loc (loc, tem);
2160 tem = fold_convert_loc (loc, optype, tem);
2161 gsi = gsi_for_stmt (stmt);
2162 /* In rare cases range->exp can be equal to lhs of stmt.
2163 In that case we have to insert after the stmt rather then before
2164 it. */
2165 if (op == range->exp)
2167 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2168 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2169 GSI_CONTINUE_LINKING);
2171 else
2173 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2174 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2175 GSI_SAME_STMT);
2176 gsi_prev (&gsi);
2178 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2179 if (gimple_uid (gsi_stmt (gsi)))
2180 break;
2181 else
2182 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2184 oe->op = tem;
2185 range->exp = exp;
2186 range->low = low;
2187 range->high = high;
2188 range->in_p = in_p;
2189 range->strict_overflow_p = false;
2191 for (i = 0; i < count; i++)
2193 if (otherrange)
2194 range = otherrange + i;
2195 else
2196 range = otherrangep[i];
2197 oe = (*ops)[range->idx];
2198 /* Now change all the other range test immediate uses, so that
2199 those tests will be optimized away. */
2200 if (opcode == ERROR_MARK)
2202 if (oe->op)
2203 oe->op = build_int_cst (TREE_TYPE (oe->op),
2204 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2205 else
2206 oe->op = (oe->rank == BIT_IOR_EXPR
2207 ? boolean_false_node : boolean_true_node);
2209 else
2210 oe->op = error_mark_node;
2211 range->exp = NULL_TREE;
2213 return true;
2216 /* Optimize X == CST1 || X == CST2
2217 if popcount (CST1 ^ CST2) == 1 into
2218 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2219 Similarly for ranges. E.g.
2220 X != 2 && X != 3 && X != 10 && X != 11
2221 will be transformed by the previous optimization into
2222 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2223 and this loop can transform that into
2224 !(((X & ~8) - 2U) <= 1U). */
2226 static bool
2227 optimize_range_tests_xor (enum tree_code opcode, tree type,
2228 tree lowi, tree lowj, tree highi, tree highj,
2229 vec<operand_entry_t> *ops,
2230 struct range_entry *rangei,
2231 struct range_entry *rangej)
2233 tree lowxor, highxor, tem, exp;
2234 /* Check lowi ^ lowj == highi ^ highj and
2235 popcount (lowi ^ lowj) == 1. */
2236 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2237 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2238 return false;
2239 if (!integer_pow2p (lowxor))
2240 return false;
2241 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2242 if (!tree_int_cst_equal (lowxor, highxor))
2243 return false;
2245 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2246 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2247 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2248 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2249 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2250 NULL, rangei->in_p, lowj, highj,
2251 rangei->strict_overflow_p
2252 || rangej->strict_overflow_p))
2253 return true;
2254 return false;
2257 /* Optimize X == CST1 || X == CST2
2258 if popcount (CST2 - CST1) == 1 into
2259 ((X - CST1) & ~(CST2 - CST1)) == 0.
2260 Similarly for ranges. E.g.
2261 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2262 || X == 75 || X == 45
2263 will be transformed by the previous optimization into
2264 (X - 43U) <= 3U || (X - 75U) <= 3U
2265 and this loop can transform that into
2266 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2267 static bool
2268 optimize_range_tests_diff (enum tree_code opcode, tree type,
2269 tree lowi, tree lowj, tree highi, tree highj,
2270 vec<operand_entry_t> *ops,
2271 struct range_entry *rangei,
2272 struct range_entry *rangej)
2274 tree tem1, tem2, mask;
2275 /* Check highi - lowi == highj - lowj. */
2276 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2277 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2278 return false;
2279 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2280 if (!tree_int_cst_equal (tem1, tem2))
2281 return false;
2282 /* Check popcount (lowj - lowi) == 1. */
2283 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2284 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2285 return false;
2286 if (!integer_pow2p (tem1))
2287 return false;
2289 type = unsigned_type_for (type);
2290 tem1 = fold_convert (type, tem1);
2291 tem2 = fold_convert (type, tem2);
2292 lowi = fold_convert (type, lowi);
2293 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2294 tem1 = fold_binary (MINUS_EXPR, type,
2295 fold_convert (type, rangei->exp), lowi);
2296 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2297 lowj = build_int_cst (type, 0);
2298 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2299 NULL, rangei->in_p, lowj, tem2,
2300 rangei->strict_overflow_p
2301 || rangej->strict_overflow_p))
2302 return true;
2303 return false;
2306 /* It does some common checks for function optimize_range_tests_xor and
2307 optimize_range_tests_diff.
2308 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2309 Else it calls optimize_range_tests_diff. */
2311 static bool
2312 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2313 bool optimize_xor, vec<operand_entry_t> *ops,
2314 struct range_entry *ranges)
2316 int i, j;
2317 bool any_changes = false;
2318 for (i = first; i < length; i++)
2320 tree lowi, highi, lowj, highj, type, tem;
2322 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2323 continue;
2324 type = TREE_TYPE (ranges[i].exp);
2325 if (!INTEGRAL_TYPE_P (type))
2326 continue;
2327 lowi = ranges[i].low;
2328 if (lowi == NULL_TREE)
2329 lowi = TYPE_MIN_VALUE (type);
2330 highi = ranges[i].high;
2331 if (highi == NULL_TREE)
2332 continue;
2333 for (j = i + 1; j < length && j < i + 64; j++)
2335 bool changes;
2336 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2337 continue;
2338 lowj = ranges[j].low;
2339 if (lowj == NULL_TREE)
2340 continue;
2341 highj = ranges[j].high;
2342 if (highj == NULL_TREE)
2343 highj = TYPE_MAX_VALUE (type);
2344 /* Check lowj > highi. */
2345 tem = fold_binary (GT_EXPR, boolean_type_node,
2346 lowj, highi);
2347 if (tem == NULL_TREE || !integer_onep (tem))
2348 continue;
2349 if (optimize_xor)
2350 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2351 highi, highj, ops,
2352 ranges + i, ranges + j);
2353 else
2354 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2355 highi, highj, ops,
2356 ranges + i, ranges + j);
2357 if (changes)
2359 any_changes = true;
2360 break;
2364 return any_changes;
2367 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2368 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2369 EXP on success, NULL otherwise. */
2371 static tree
2372 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2373 wide_int *mask, tree *totallowp)
2375 tree tem = int_const_binop (MINUS_EXPR, high, low);
2376 if (tem == NULL_TREE
2377 || TREE_CODE (tem) != INTEGER_CST
2378 || TREE_OVERFLOW (tem)
2379 || tree_int_cst_sgn (tem) == -1
2380 || compare_tree_int (tem, prec) != -1)
2381 return NULL_TREE;
2383 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2384 *mask = wi::shifted_mask (0, max, false, prec);
2385 if (TREE_CODE (exp) == BIT_AND_EXPR
2386 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2388 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2389 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2390 if (wi::popcount (msk) == 1
2391 && wi::ltu_p (msk, prec - max))
2393 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2394 max += msk.to_uhwi ();
2395 exp = TREE_OPERAND (exp, 0);
2396 if (integer_zerop (low)
2397 && TREE_CODE (exp) == PLUS_EXPR
2398 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2400 widest_int bias
2401 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2402 TYPE_PRECISION (TREE_TYPE (low))));
2403 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2404 if (totallowp)
2406 *totallowp = tbias;
2407 exp = TREE_OPERAND (exp, 0);
2408 STRIP_NOPS (exp);
2409 return exp;
2411 else if (!tree_int_cst_lt (totallow, tbias))
2412 return NULL_TREE;
2413 bias -= wi::to_widest (totallow);
2414 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2416 *mask = wi::lshift (*mask, bias);
2417 exp = TREE_OPERAND (exp, 0);
2418 STRIP_NOPS (exp);
2419 return exp;
2424 if (totallowp)
2425 return exp;
2426 if (!tree_int_cst_lt (totallow, low))
2427 return exp;
2428 tem = int_const_binop (MINUS_EXPR, low, totallow);
2429 if (tem == NULL_TREE
2430 || TREE_CODE (tem) != INTEGER_CST
2431 || TREE_OVERFLOW (tem)
2432 || compare_tree_int (tem, prec - max) == 1)
2433 return NULL_TREE;
2435 *mask = wi::lshift (*mask, wi::to_widest (tem));
2436 return exp;
2439 /* Attempt to optimize small range tests using bit test.
2440 E.g.
2441 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2442 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2443 has been by earlier optimizations optimized into:
2444 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2445 As all the 43 through 82 range is less than 64 numbers,
2446 for 64-bit word targets optimize that into:
2447 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2449 static bool
2450 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2451 vec<operand_entry_t> *ops,
2452 struct range_entry *ranges)
2454 int i, j;
2455 bool any_changes = false;
2456 int prec = GET_MODE_BITSIZE (word_mode);
2457 auto_vec<struct range_entry *, 64> candidates;
2459 for (i = first; i < length - 2; i++)
2461 tree lowi, highi, lowj, highj, type;
2463 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2464 continue;
2465 type = TREE_TYPE (ranges[i].exp);
2466 if (!INTEGRAL_TYPE_P (type))
2467 continue;
2468 lowi = ranges[i].low;
2469 if (lowi == NULL_TREE)
2470 lowi = TYPE_MIN_VALUE (type);
2471 highi = ranges[i].high;
2472 if (highi == NULL_TREE)
2473 continue;
2474 wide_int mask;
2475 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2476 highi, &mask, &lowi);
2477 if (exp == NULL_TREE)
2478 continue;
2479 bool strict_overflow_p = ranges[i].strict_overflow_p;
2480 candidates.truncate (0);
2481 int end = MIN (i + 64, length);
2482 for (j = i + 1; j < end; j++)
2484 tree exp2;
2485 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2486 continue;
2487 if (ranges[j].exp == exp)
2489 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2491 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2492 if (exp2 == exp)
2494 else if (TREE_CODE (exp2) == PLUS_EXPR)
2496 exp2 = TREE_OPERAND (exp2, 0);
2497 STRIP_NOPS (exp2);
2498 if (exp2 != exp)
2499 continue;
2501 else
2502 continue;
2504 else
2505 continue;
2506 lowj = ranges[j].low;
2507 if (lowj == NULL_TREE)
2508 continue;
2509 highj = ranges[j].high;
2510 if (highj == NULL_TREE)
2511 highj = TYPE_MAX_VALUE (type);
2512 wide_int mask2;
2513 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2514 highj, &mask2, NULL);
2515 if (exp2 != exp)
2516 continue;
2517 mask |= mask2;
2518 strict_overflow_p |= ranges[j].strict_overflow_p;
2519 candidates.safe_push (&ranges[j]);
2522 /* If we need otherwise 3 or more comparisons, use a bit test. */
2523 if (candidates.length () >= 2)
2525 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2526 wi::to_widest (lowi)
2527 + prec - 1 - wi::clz (mask));
2528 operand_entry_t oe = (*ops)[ranges[i].idx];
2529 tree op = oe->op;
2530 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2531 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2532 location_t loc = gimple_location (stmt);
2533 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2535 /* See if it isn't cheaper to pretend the minimum value of the
2536 range is 0, if maximum value is small enough.
2537 We can avoid then subtraction of the minimum value, but the
2538 mask constant could be perhaps more expensive. */
2539 if (compare_tree_int (lowi, 0) > 0
2540 && compare_tree_int (high, prec) < 0)
2542 int cost_diff;
2543 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2544 rtx reg = gen_raw_REG (word_mode, 10000);
2545 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2546 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2547 GEN_INT (-m)), speed_p);
2548 rtx r = immed_wide_int_const (mask, word_mode);
2549 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2550 speed_p);
2551 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2552 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2553 speed_p);
2554 if (cost_diff > 0)
2556 mask = wi::lshift (mask, m);
2557 lowi = build_zero_cst (TREE_TYPE (lowi));
2561 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2562 false, lowi, high);
2563 if (tem == NULL_TREE || is_gimple_val (tem))
2564 continue;
2565 tree etype = unsigned_type_for (TREE_TYPE (exp));
2566 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2567 fold_convert_loc (loc, etype, exp),
2568 fold_convert_loc (loc, etype, lowi));
2569 exp = fold_convert_loc (loc, integer_type_node, exp);
2570 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2571 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2572 build_int_cst (word_type, 1), exp);
2573 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2574 wide_int_to_tree (word_type, mask));
2575 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2576 build_zero_cst (word_type));
2577 if (is_gimple_val (exp))
2578 continue;
2580 /* The shift might have undefined behavior if TEM is true,
2581 but reassociate_bb isn't prepared to have basic blocks
2582 split when it is running. So, temporarily emit a code
2583 with BIT_IOR_EXPR instead of &&, and fix it up in
2584 branch_fixup. */
2585 gimple_seq seq;
2586 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2587 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2588 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2589 gimple_seq seq2;
2590 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2591 gimple_seq_add_seq_without_update (&seq, seq2);
2592 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2593 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2594 gimple g
2595 = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2596 make_ssa_name (optype, NULL),
2597 tem, exp);
2598 gimple_set_location (g, loc);
2599 gimple_seq_add_stmt_without_update (&seq, g);
2600 exp = gimple_assign_lhs (g);
2601 tree val = build_zero_cst (optype);
2602 if (update_range_test (&ranges[i], NULL, candidates.address (),
2603 candidates.length (), opcode, ops, exp,
2604 seq, false, val, val, strict_overflow_p))
2606 any_changes = true;
2607 reassoc_branch_fixups.safe_push (tem);
2609 else
2610 gimple_seq_discard (seq);
2613 return any_changes;
2616 /* Optimize range tests, similarly how fold_range_test optimizes
2617 it on trees. The tree code for the binary
2618 operation between all the operands is OPCODE.
2619 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2620 maybe_optimize_range_tests for inter-bb range optimization.
2621 In that case if oe->op is NULL, oe->id is bb->index whose
2622 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2623 the actual opcode. */
2625 static bool
2626 optimize_range_tests (enum tree_code opcode,
2627 vec<operand_entry_t> *ops)
2629 unsigned int length = ops->length (), i, j, first;
2630 operand_entry_t oe;
2631 struct range_entry *ranges;
2632 bool any_changes = false;
2634 if (length == 1)
2635 return false;
2637 ranges = XNEWVEC (struct range_entry, length);
2638 for (i = 0; i < length; i++)
2640 oe = (*ops)[i];
2641 ranges[i].idx = i;
2642 init_range_entry (ranges + i, oe->op,
2643 oe->op ? NULL :
2644 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2645 /* For | invert it now, we will invert it again before emitting
2646 the optimized expression. */
2647 if (opcode == BIT_IOR_EXPR
2648 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2649 ranges[i].in_p = !ranges[i].in_p;
2652 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2653 for (i = 0; i < length; i++)
2654 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2655 break;
2657 /* Try to merge ranges. */
2658 for (first = i; i < length; i++)
2660 tree low = ranges[i].low;
2661 tree high = ranges[i].high;
2662 int in_p = ranges[i].in_p;
2663 bool strict_overflow_p = ranges[i].strict_overflow_p;
2664 int update_fail_count = 0;
2666 for (j = i + 1; j < length; j++)
2668 if (ranges[i].exp != ranges[j].exp)
2669 break;
2670 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2671 ranges[j].in_p, ranges[j].low, ranges[j].high))
2672 break;
2673 strict_overflow_p |= ranges[j].strict_overflow_p;
2676 if (j == i + 1)
2677 continue;
2679 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2680 opcode, ops, ranges[i].exp, NULL, in_p,
2681 low, high, strict_overflow_p))
2683 i = j - 1;
2684 any_changes = true;
2686 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2687 while update_range_test would fail. */
2688 else if (update_fail_count == 64)
2689 i = j - 1;
2690 else
2691 ++update_fail_count;
2694 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2695 ops, ranges);
2697 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2698 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2699 ops, ranges);
2700 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2701 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2702 ops, ranges);
2704 if (any_changes && opcode != ERROR_MARK)
2706 j = 0;
2707 FOR_EACH_VEC_ELT (*ops, i, oe)
2709 if (oe->op == error_mark_node)
2710 continue;
2711 else if (i != j)
2712 (*ops)[j] = oe;
2713 j++;
2715 ops->truncate (j);
2718 XDELETEVEC (ranges);
2719 return any_changes;
2722 /* Return true if STMT is a cast like:
2723 <bb N>:
2725 _123 = (int) _234;
2727 <bb M>:
2728 # _345 = PHI <_123(N), 1(...), 1(...)>
2729 where _234 has bool type, _123 has single use and
2730 bb N has a single successor M. This is commonly used in
2731 the last block of a range test. */
2733 static bool
2734 final_range_test_p (gimple stmt)
2736 basic_block bb, rhs_bb;
2737 edge e;
2738 tree lhs, rhs;
2739 use_operand_p use_p;
2740 gimple use_stmt;
2742 if (!gimple_assign_cast_p (stmt))
2743 return false;
2744 bb = gimple_bb (stmt);
2745 if (!single_succ_p (bb))
2746 return false;
2747 e = single_succ_edge (bb);
2748 if (e->flags & EDGE_COMPLEX)
2749 return false;
2751 lhs = gimple_assign_lhs (stmt);
2752 rhs = gimple_assign_rhs1 (stmt);
2753 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2754 || TREE_CODE (rhs) != SSA_NAME
2755 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2756 return false;
2758 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2759 if (!single_imm_use (lhs, &use_p, &use_stmt))
2760 return false;
2762 if (gimple_code (use_stmt) != GIMPLE_PHI
2763 || gimple_bb (use_stmt) != e->dest)
2764 return false;
2766 /* And that the rhs is defined in the same loop. */
2767 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2768 if (rhs_bb == NULL
2769 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2770 return false;
2772 return true;
2775 /* Return true if BB is suitable basic block for inter-bb range test
2776 optimization. If BACKWARD is true, BB should be the only predecessor
2777 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2778 or compared with to find a common basic block to which all conditions
2779 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2780 be the only predecessor of BB. */
2782 static bool
2783 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2784 bool backward)
2786 edge_iterator ei, ei2;
2787 edge e, e2;
2788 gimple stmt;
2789 gimple_stmt_iterator gsi;
2790 bool other_edge_seen = false;
2791 bool is_cond;
2793 if (test_bb == bb)
2794 return false;
2795 /* Check last stmt first. */
2796 stmt = last_stmt (bb);
2797 if (stmt == NULL
2798 || (gimple_code (stmt) != GIMPLE_COND
2799 && (backward || !final_range_test_p (stmt)))
2800 || gimple_visited_p (stmt)
2801 || stmt_could_throw_p (stmt)
2802 || *other_bb == bb)
2803 return false;
2804 is_cond = gimple_code (stmt) == GIMPLE_COND;
2805 if (is_cond)
2807 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2808 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2809 to *OTHER_BB (if not set yet, try to find it out). */
2810 if (EDGE_COUNT (bb->succs) != 2)
2811 return false;
2812 FOR_EACH_EDGE (e, ei, bb->succs)
2814 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2815 return false;
2816 if (e->dest == test_bb)
2818 if (backward)
2819 continue;
2820 else
2821 return false;
2823 if (e->dest == bb)
2824 return false;
2825 if (*other_bb == NULL)
2827 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2828 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2829 return false;
2830 else if (e->dest == e2->dest)
2831 *other_bb = e->dest;
2832 if (*other_bb == NULL)
2833 return false;
2835 if (e->dest == *other_bb)
2836 other_edge_seen = true;
2837 else if (backward)
2838 return false;
2840 if (*other_bb == NULL || !other_edge_seen)
2841 return false;
2843 else if (single_succ (bb) != *other_bb)
2844 return false;
2846 /* Now check all PHIs of *OTHER_BB. */
2847 e = find_edge (bb, *other_bb);
2848 e2 = find_edge (test_bb, *other_bb);
2849 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2851 gimple phi = gsi_stmt (gsi);
2852 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2853 corresponding to BB and TEST_BB predecessor must be the same. */
2854 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2855 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2857 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2858 one of the PHIs should have the lhs of the last stmt in
2859 that block as PHI arg and that PHI should have 0 or 1
2860 corresponding to it in all other range test basic blocks
2861 considered. */
2862 if (!is_cond)
2864 if (gimple_phi_arg_def (phi, e->dest_idx)
2865 == gimple_assign_lhs (stmt)
2866 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2867 || integer_onep (gimple_phi_arg_def (phi,
2868 e2->dest_idx))))
2869 continue;
2871 else
2873 gimple test_last = last_stmt (test_bb);
2874 if (gimple_code (test_last) != GIMPLE_COND
2875 && gimple_phi_arg_def (phi, e2->dest_idx)
2876 == gimple_assign_lhs (test_last)
2877 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2878 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2879 continue;
2882 return false;
2885 return true;
2888 /* Return true if BB doesn't have side-effects that would disallow
2889 range test optimization, all SSA_NAMEs set in the bb are consumed
2890 in the bb and there are no PHIs. */
2892 static bool
2893 no_side_effect_bb (basic_block bb)
2895 gimple_stmt_iterator gsi;
2896 gimple last;
2898 if (!gimple_seq_empty_p (phi_nodes (bb)))
2899 return false;
2900 last = last_stmt (bb);
2901 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2903 gimple stmt = gsi_stmt (gsi);
2904 tree lhs;
2905 imm_use_iterator imm_iter;
2906 use_operand_p use_p;
2908 if (is_gimple_debug (stmt))
2909 continue;
2910 if (gimple_has_side_effects (stmt))
2911 return false;
2912 if (stmt == last)
2913 return true;
2914 if (!is_gimple_assign (stmt))
2915 return false;
2916 lhs = gimple_assign_lhs (stmt);
2917 if (TREE_CODE (lhs) != SSA_NAME)
2918 return false;
2919 if (gimple_assign_rhs_could_trap_p (stmt))
2920 return false;
2921 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2923 gimple use_stmt = USE_STMT (use_p);
2924 if (is_gimple_debug (use_stmt))
2925 continue;
2926 if (gimple_bb (use_stmt) != bb)
2927 return false;
2930 return false;
2933 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2934 return true and fill in *OPS recursively. */
2936 static bool
2937 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2938 struct loop *loop)
2940 gimple stmt = SSA_NAME_DEF_STMT (var);
2941 tree rhs[2];
2942 int i;
2944 if (!is_reassociable_op (stmt, code, loop))
2945 return false;
2947 rhs[0] = gimple_assign_rhs1 (stmt);
2948 rhs[1] = gimple_assign_rhs2 (stmt);
2949 gimple_set_visited (stmt, true);
2950 for (i = 0; i < 2; i++)
2951 if (TREE_CODE (rhs[i]) == SSA_NAME
2952 && !get_ops (rhs[i], code, ops, loop)
2953 && has_single_use (rhs[i]))
2955 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2957 oe->op = rhs[i];
2958 oe->rank = code;
2959 oe->id = 0;
2960 oe->count = 1;
2961 ops->safe_push (oe);
2963 return true;
2966 /* Find the ops that were added by get_ops starting from VAR, see if
2967 they were changed during update_range_test and if yes, create new
2968 stmts. */
2970 static tree
2971 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2972 unsigned int *pidx, struct loop *loop)
2974 gimple stmt = SSA_NAME_DEF_STMT (var);
2975 tree rhs[4];
2976 int i;
2978 if (!is_reassociable_op (stmt, code, loop))
2979 return NULL;
2981 rhs[0] = gimple_assign_rhs1 (stmt);
2982 rhs[1] = gimple_assign_rhs2 (stmt);
2983 rhs[2] = rhs[0];
2984 rhs[3] = rhs[1];
2985 for (i = 0; i < 2; i++)
2986 if (TREE_CODE (rhs[i]) == SSA_NAME)
2988 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2989 if (rhs[2 + i] == NULL_TREE)
2991 if (has_single_use (rhs[i]))
2992 rhs[2 + i] = ops[(*pidx)++]->op;
2993 else
2994 rhs[2 + i] = rhs[i];
2997 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2998 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3000 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3001 var = make_ssa_name (TREE_TYPE (var), NULL);
3002 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3003 var, rhs[2], rhs[3]);
3004 gimple_set_uid (g, gimple_uid (stmt));
3005 gimple_set_visited (g, true);
3006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3008 return var;
3011 /* Structure to track the initial value passed to get_ops and
3012 the range in the ops vector for each basic block. */
3014 struct inter_bb_range_test_entry
3016 tree op;
3017 unsigned int first_idx, last_idx;
3020 /* Inter-bb range test optimization. */
3022 static void
3023 maybe_optimize_range_tests (gimple stmt)
3025 basic_block first_bb = gimple_bb (stmt);
3026 basic_block last_bb = first_bb;
3027 basic_block other_bb = NULL;
3028 basic_block bb;
3029 edge_iterator ei;
3030 edge e;
3031 auto_vec<operand_entry_t> ops;
3032 auto_vec<inter_bb_range_test_entry> bbinfo;
3033 bool any_changes = false;
3035 /* Consider only basic blocks that end with GIMPLE_COND or
3036 a cast statement satisfying final_range_test_p. All
3037 but the last bb in the first_bb .. last_bb range
3038 should end with GIMPLE_COND. */
3039 if (gimple_code (stmt) == GIMPLE_COND)
3041 if (EDGE_COUNT (first_bb->succs) != 2)
3042 return;
3044 else if (final_range_test_p (stmt))
3045 other_bb = single_succ (first_bb);
3046 else
3047 return;
3049 if (stmt_could_throw_p (stmt))
3050 return;
3052 /* As relative ordering of post-dominator sons isn't fixed,
3053 maybe_optimize_range_tests can be called first on any
3054 bb in the range we want to optimize. So, start searching
3055 backwards, if first_bb can be set to a predecessor. */
3056 while (single_pred_p (first_bb))
3058 basic_block pred_bb = single_pred (first_bb);
3059 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3060 break;
3061 if (!no_side_effect_bb (first_bb))
3062 break;
3063 first_bb = pred_bb;
3065 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3066 Before starting forward search in last_bb successors, find
3067 out the other_bb. */
3068 if (first_bb == last_bb)
3070 other_bb = NULL;
3071 /* As non-GIMPLE_COND last stmt always terminates the range,
3072 if forward search didn't discover anything, just give up. */
3073 if (gimple_code (stmt) != GIMPLE_COND)
3074 return;
3075 /* Look at both successors. Either it ends with a GIMPLE_COND
3076 and satisfies suitable_cond_bb, or ends with a cast and
3077 other_bb is that cast's successor. */
3078 FOR_EACH_EDGE (e, ei, first_bb->succs)
3079 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3080 || e->dest == first_bb)
3081 return;
3082 else if (single_pred_p (e->dest))
3084 stmt = last_stmt (e->dest);
3085 if (stmt
3086 && gimple_code (stmt) == GIMPLE_COND
3087 && EDGE_COUNT (e->dest->succs) == 2)
3089 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3090 break;
3091 else
3092 other_bb = NULL;
3094 else if (stmt
3095 && final_range_test_p (stmt)
3096 && find_edge (first_bb, single_succ (e->dest)))
3098 other_bb = single_succ (e->dest);
3099 if (other_bb == first_bb)
3100 other_bb = NULL;
3103 if (other_bb == NULL)
3104 return;
3106 /* Now do the forward search, moving last_bb to successor bbs
3107 that aren't other_bb. */
3108 while (EDGE_COUNT (last_bb->succs) == 2)
3110 FOR_EACH_EDGE (e, ei, last_bb->succs)
3111 if (e->dest != other_bb)
3112 break;
3113 if (e == NULL)
3114 break;
3115 if (!single_pred_p (e->dest))
3116 break;
3117 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3118 break;
3119 if (!no_side_effect_bb (e->dest))
3120 break;
3121 last_bb = e->dest;
3123 if (first_bb == last_bb)
3124 return;
3125 /* Here basic blocks first_bb through last_bb's predecessor
3126 end with GIMPLE_COND, all of them have one of the edges to
3127 other_bb and another to another block in the range,
3128 all blocks except first_bb don't have side-effects and
3129 last_bb ends with either GIMPLE_COND, or cast satisfying
3130 final_range_test_p. */
3131 for (bb = last_bb; ; bb = single_pred (bb))
3133 enum tree_code code;
3134 tree lhs, rhs;
3135 inter_bb_range_test_entry bb_ent;
3137 bb_ent.op = NULL_TREE;
3138 bb_ent.first_idx = ops.length ();
3139 bb_ent.last_idx = bb_ent.first_idx;
3140 e = find_edge (bb, other_bb);
3141 stmt = last_stmt (bb);
3142 gimple_set_visited (stmt, true);
3143 if (gimple_code (stmt) != GIMPLE_COND)
3145 use_operand_p use_p;
3146 gimple phi;
3147 edge e2;
3148 unsigned int d;
3150 lhs = gimple_assign_lhs (stmt);
3151 rhs = gimple_assign_rhs1 (stmt);
3152 gcc_assert (bb == last_bb);
3154 /* stmt is
3155 _123 = (int) _234;
3157 followed by:
3158 <bb M>:
3159 # _345 = PHI <_123(N), 1(...), 1(...)>
3161 or 0 instead of 1. If it is 0, the _234
3162 range test is anded together with all the
3163 other range tests, if it is 1, it is ored with
3164 them. */
3165 single_imm_use (lhs, &use_p, &phi);
3166 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3167 e2 = find_edge (first_bb, other_bb);
3168 d = e2->dest_idx;
3169 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3170 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3171 code = BIT_AND_EXPR;
3172 else
3174 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3175 code = BIT_IOR_EXPR;
3178 /* If _234 SSA_NAME_DEF_STMT is
3179 _234 = _567 | _789;
3180 (or &, corresponding to 1/0 in the phi arguments,
3181 push into ops the individual range test arguments
3182 of the bitwise or resp. and, recursively. */
3183 if (!get_ops (rhs, code, &ops,
3184 loop_containing_stmt (stmt))
3185 && has_single_use (rhs))
3187 /* Otherwise, push the _234 range test itself. */
3188 operand_entry_t oe
3189 = (operand_entry_t) pool_alloc (operand_entry_pool);
3191 oe->op = rhs;
3192 oe->rank = code;
3193 oe->id = 0;
3194 oe->count = 1;
3195 ops.safe_push (oe);
3196 bb_ent.last_idx++;
3198 else
3199 bb_ent.last_idx = ops.length ();
3200 bb_ent.op = rhs;
3201 bbinfo.safe_push (bb_ent);
3202 continue;
3204 /* Otherwise stmt is GIMPLE_COND. */
3205 code = gimple_cond_code (stmt);
3206 lhs = gimple_cond_lhs (stmt);
3207 rhs = gimple_cond_rhs (stmt);
3208 if (TREE_CODE (lhs) == SSA_NAME
3209 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3210 && ((code != EQ_EXPR && code != NE_EXPR)
3211 || rhs != boolean_false_node
3212 /* Either push into ops the individual bitwise
3213 or resp. and operands, depending on which
3214 edge is other_bb. */
3215 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3216 ^ (code == EQ_EXPR))
3217 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3218 loop_containing_stmt (stmt))))
3220 /* Or push the GIMPLE_COND stmt itself. */
3221 operand_entry_t oe
3222 = (operand_entry_t) pool_alloc (operand_entry_pool);
3224 oe->op = NULL;
3225 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3226 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3227 /* oe->op = NULL signs that there is no SSA_NAME
3228 for the range test, and oe->id instead is the
3229 basic block number, at which's end the GIMPLE_COND
3230 is. */
3231 oe->id = bb->index;
3232 oe->count = 1;
3233 ops.safe_push (oe);
3234 bb_ent.op = NULL;
3235 bb_ent.last_idx++;
3237 else if (ops.length () > bb_ent.first_idx)
3239 bb_ent.op = lhs;
3240 bb_ent.last_idx = ops.length ();
3242 bbinfo.safe_push (bb_ent);
3243 if (bb == first_bb)
3244 break;
3246 if (ops.length () > 1)
3247 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3248 if (any_changes)
3250 unsigned int idx;
3251 /* update_ops relies on has_single_use predicates returning the
3252 same values as it did during get_ops earlier. Additionally it
3253 never removes statements, only adds new ones and it should walk
3254 from the single imm use and check the predicate already before
3255 making those changes.
3256 On the other side, the handling of GIMPLE_COND directly can turn
3257 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3258 it needs to be done in a separate loop afterwards. */
3259 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3261 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3262 && bbinfo[idx].op != NULL_TREE)
3264 tree new_op;
3266 stmt = last_stmt (bb);
3267 new_op = update_ops (bbinfo[idx].op,
3268 (enum tree_code)
3269 ops[bbinfo[idx].first_idx]->rank,
3270 ops, &bbinfo[idx].first_idx,
3271 loop_containing_stmt (stmt));
3272 if (new_op == NULL_TREE)
3274 gcc_assert (bb == last_bb);
3275 new_op = ops[bbinfo[idx].first_idx++]->op;
3277 if (bbinfo[idx].op != new_op)
3279 imm_use_iterator iter;
3280 use_operand_p use_p;
3281 gimple use_stmt, cast_stmt = NULL;
3283 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3284 if (is_gimple_debug (use_stmt))
3285 continue;
3286 else if (gimple_code (use_stmt) == GIMPLE_COND
3287 || gimple_code (use_stmt) == GIMPLE_PHI)
3288 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3289 SET_USE (use_p, new_op);
3290 else if (gimple_assign_cast_p (use_stmt))
3291 cast_stmt = use_stmt;
3292 else
3293 gcc_unreachable ();
3294 if (cast_stmt)
3296 gcc_assert (bb == last_bb);
3297 tree lhs = gimple_assign_lhs (cast_stmt);
3298 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3299 enum tree_code rhs_code
3300 = gimple_assign_rhs_code (cast_stmt);
3301 gimple g;
3302 if (is_gimple_min_invariant (new_op))
3304 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3305 g = gimple_build_assign (new_lhs, new_op);
3307 else
3308 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3309 new_op, NULL_TREE);
3310 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3311 gimple_set_uid (g, gimple_uid (cast_stmt));
3312 gimple_set_visited (g, true);
3313 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3314 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3315 if (is_gimple_debug (use_stmt))
3316 continue;
3317 else if (gimple_code (use_stmt) == GIMPLE_COND
3318 || gimple_code (use_stmt) == GIMPLE_PHI)
3319 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3320 SET_USE (use_p, new_lhs);
3321 else
3322 gcc_unreachable ();
3326 if (bb == first_bb)
3327 break;
3329 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3331 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3332 && bbinfo[idx].op == NULL_TREE
3333 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3335 stmt = last_stmt (bb);
3336 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3337 gimple_cond_make_false (stmt);
3338 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3339 gimple_cond_make_true (stmt);
3340 else
3342 gimple_cond_set_code (stmt, NE_EXPR);
3343 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3344 gimple_cond_set_rhs (stmt, boolean_false_node);
3346 update_stmt (stmt);
3348 if (bb == first_bb)
3349 break;
3354 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3355 of STMT in it's operands. This is also known as a "destructive
3356 update" operation. */
3358 static bool
3359 is_phi_for_stmt (gimple stmt, tree operand)
3361 gimple def_stmt;
3362 tree lhs;
3363 use_operand_p arg_p;
3364 ssa_op_iter i;
3366 if (TREE_CODE (operand) != SSA_NAME)
3367 return false;
3369 lhs = gimple_assign_lhs (stmt);
3371 def_stmt = SSA_NAME_DEF_STMT (operand);
3372 if (gimple_code (def_stmt) != GIMPLE_PHI)
3373 return false;
3375 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3376 if (lhs == USE_FROM_PTR (arg_p))
3377 return true;
3378 return false;
3381 /* Remove def stmt of VAR if VAR has zero uses and recurse
3382 on rhs1 operand if so. */
3384 static void
3385 remove_visited_stmt_chain (tree var)
3387 gimple stmt;
3388 gimple_stmt_iterator gsi;
3390 while (1)
3392 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3393 return;
3394 stmt = SSA_NAME_DEF_STMT (var);
3395 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3397 var = gimple_assign_rhs1 (stmt);
3398 gsi = gsi_for_stmt (stmt);
3399 reassoc_remove_stmt (&gsi);
3400 release_defs (stmt);
3402 else
3403 return;
3407 /* This function checks three consequtive operands in
3408 passed operands vector OPS starting from OPINDEX and
3409 swaps two operands if it is profitable for binary operation
3410 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3412 We pair ops with the same rank if possible.
3414 The alternative we try is to see if STMT is a destructive
3415 update style statement, which is like:
3416 b = phi (a, ...)
3417 a = c + b;
3418 In that case, we want to use the destructive update form to
3419 expose the possible vectorizer sum reduction opportunity.
3420 In that case, the third operand will be the phi node. This
3421 check is not performed if STMT is null.
3423 We could, of course, try to be better as noted above, and do a
3424 lot of work to try to find these opportunities in >3 operand
3425 cases, but it is unlikely to be worth it. */
3427 static void
3428 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3429 unsigned int opindex, gimple stmt)
3431 operand_entry_t oe1, oe2, oe3;
3433 oe1 = ops[opindex];
3434 oe2 = ops[opindex + 1];
3435 oe3 = ops[opindex + 2];
3437 if ((oe1->rank == oe2->rank
3438 && oe2->rank != oe3->rank)
3439 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3440 && !is_phi_for_stmt (stmt, oe1->op)
3441 && !is_phi_for_stmt (stmt, oe2->op)))
3443 struct operand_entry temp = *oe3;
3444 oe3->op = oe1->op;
3445 oe3->rank = oe1->rank;
3446 oe1->op = temp.op;
3447 oe1->rank= temp.rank;
3449 else if ((oe1->rank == oe3->rank
3450 && oe2->rank != oe3->rank)
3451 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3452 && !is_phi_for_stmt (stmt, oe1->op)
3453 && !is_phi_for_stmt (stmt, oe3->op)))
3455 struct operand_entry temp = *oe2;
3456 oe2->op = oe1->op;
3457 oe2->rank = oe1->rank;
3458 oe1->op = temp.op;
3459 oe1->rank = temp.rank;
3463 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3464 two definitions, otherwise return STMT. */
3466 static inline gimple
3467 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3469 if (TREE_CODE (rhs1) == SSA_NAME
3470 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3471 stmt = SSA_NAME_DEF_STMT (rhs1);
3472 if (TREE_CODE (rhs2) == SSA_NAME
3473 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3474 stmt = SSA_NAME_DEF_STMT (rhs2);
3475 return stmt;
3478 /* Recursively rewrite our linearized statements so that the operators
3479 match those in OPS[OPINDEX], putting the computation in rank
3480 order. Return new lhs. */
3482 static tree
3483 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3484 vec<operand_entry_t> ops, bool changed)
3486 tree rhs1 = gimple_assign_rhs1 (stmt);
3487 tree rhs2 = gimple_assign_rhs2 (stmt);
3488 tree lhs = gimple_assign_lhs (stmt);
3489 operand_entry_t oe;
3491 /* The final recursion case for this function is that you have
3492 exactly two operations left.
3493 If we had one exactly one op in the entire list to start with, we
3494 would have never called this function, and the tail recursion
3495 rewrites them one at a time. */
3496 if (opindex + 2 == ops.length ())
3498 operand_entry_t oe1, oe2;
3500 oe1 = ops[opindex];
3501 oe2 = ops[opindex + 1];
3503 if (rhs1 != oe1->op || rhs2 != oe2->op)
3505 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3506 unsigned int uid = gimple_uid (stmt);
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3510 fprintf (dump_file, "Transforming ");
3511 print_gimple_stmt (dump_file, stmt, 0, 0);
3514 if (changed)
3516 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3517 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3518 stmt
3519 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3520 lhs, oe1->op, oe2->op);
3521 gimple_set_uid (stmt, uid);
3522 gimple_set_visited (stmt, true);
3523 if (insert_point == gsi_stmt (gsi))
3524 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3525 else
3526 insert_stmt_after (stmt, insert_point);
3528 else
3530 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3531 == stmt);
3532 gimple_assign_set_rhs1 (stmt, oe1->op);
3533 gimple_assign_set_rhs2 (stmt, oe2->op);
3534 update_stmt (stmt);
3537 if (rhs1 != oe1->op && rhs1 != oe2->op)
3538 remove_visited_stmt_chain (rhs1);
3540 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, " into ");
3543 print_gimple_stmt (dump_file, stmt, 0, 0);
3546 return lhs;
3549 /* If we hit here, we should have 3 or more ops left. */
3550 gcc_assert (opindex + 2 < ops.length ());
3552 /* Rewrite the next operator. */
3553 oe = ops[opindex];
3555 /* Recurse on the LHS of the binary operator, which is guaranteed to
3556 be the non-leaf side. */
3557 tree new_rhs1
3558 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3559 changed || oe->op != rhs2);
3561 if (oe->op != rhs2 || new_rhs1 != rhs1)
3563 if (dump_file && (dump_flags & TDF_DETAILS))
3565 fprintf (dump_file, "Transforming ");
3566 print_gimple_stmt (dump_file, stmt, 0, 0);
3569 /* If changed is false, this is either opindex == 0
3570 or all outer rhs2's were equal to corresponding oe->op,
3571 and powi_result is NULL.
3572 That means lhs is equivalent before and after reassociation.
3573 Otherwise ensure the old lhs SSA_NAME is not reused and
3574 create a new stmt as well, so that any debug stmts will be
3575 properly adjusted. */
3576 if (changed)
3578 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3579 unsigned int uid = gimple_uid (stmt);
3580 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3582 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3583 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3584 lhs, new_rhs1, oe->op);
3585 gimple_set_uid (stmt, uid);
3586 gimple_set_visited (stmt, true);
3587 if (insert_point == gsi_stmt (gsi))
3588 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3589 else
3590 insert_stmt_after (stmt, insert_point);
3592 else
3594 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3595 == stmt);
3596 gimple_assign_set_rhs1 (stmt, new_rhs1);
3597 gimple_assign_set_rhs2 (stmt, oe->op);
3598 update_stmt (stmt);
3601 if (dump_file && (dump_flags & TDF_DETAILS))
3603 fprintf (dump_file, " into ");
3604 print_gimple_stmt (dump_file, stmt, 0, 0);
3607 return lhs;
3610 /* Find out how many cycles we need to compute statements chain.
3611 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3612 maximum number of independent statements we may execute per cycle. */
3614 static int
3615 get_required_cycles (int ops_num, int cpu_width)
3617 int res;
3618 int elog;
3619 unsigned int rest;
3621 /* While we have more than 2 * cpu_width operands
3622 we may reduce number of operands by cpu_width
3623 per cycle. */
3624 res = ops_num / (2 * cpu_width);
3626 /* Remained operands count may be reduced twice per cycle
3627 until we have only one operand. */
3628 rest = (unsigned)(ops_num - res * cpu_width);
3629 elog = exact_log2 (rest);
3630 if (elog >= 0)
3631 res += elog;
3632 else
3633 res += floor_log2 (rest) + 1;
3635 return res;
3638 /* Returns an optimal number of registers to use for computation of
3639 given statements. */
3641 static int
3642 get_reassociation_width (int ops_num, enum tree_code opc,
3643 machine_mode mode)
3645 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3646 int width;
3647 int width_min;
3648 int cycles_best;
3650 if (param_width > 0)
3651 width = param_width;
3652 else
3653 width = targetm.sched.reassociation_width (opc, mode);
3655 if (width == 1)
3656 return width;
3658 /* Get the minimal time required for sequence computation. */
3659 cycles_best = get_required_cycles (ops_num, width);
3661 /* Check if we may use less width and still compute sequence for
3662 the same time. It will allow us to reduce registers usage.
3663 get_required_cycles is monotonically increasing with lower width
3664 so we can perform a binary search for the minimal width that still
3665 results in the optimal cycle count. */
3666 width_min = 1;
3667 while (width > width_min)
3669 int width_mid = (width + width_min) / 2;
3671 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3672 width = width_mid;
3673 else if (width_min < width_mid)
3674 width_min = width_mid;
3675 else
3676 break;
3679 return width;
3682 /* Recursively rewrite our linearized statements so that the operators
3683 match those in OPS[OPINDEX], putting the computation in rank
3684 order and trying to allow operations to be executed in
3685 parallel. */
3687 static void
3688 rewrite_expr_tree_parallel (gimple stmt, int width,
3689 vec<operand_entry_t> ops)
3691 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3692 int op_num = ops.length ();
3693 int stmt_num = op_num - 1;
3694 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3695 int op_index = op_num - 1;
3696 int stmt_index = 0;
3697 int ready_stmts_end = 0;
3698 int i = 0;
3699 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3701 /* We start expression rewriting from the top statements.
3702 So, in this loop we create a full list of statements
3703 we will work with. */
3704 stmts[stmt_num - 1] = stmt;
3705 for (i = stmt_num - 2; i >= 0; i--)
3706 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3708 for (i = 0; i < stmt_num; i++)
3710 tree op1, op2;
3712 /* Determine whether we should use results of
3713 already handled statements or not. */
3714 if (ready_stmts_end == 0
3715 && (i - stmt_index >= width || op_index < 1))
3716 ready_stmts_end = i;
3718 /* Now we choose operands for the next statement. Non zero
3719 value in ready_stmts_end means here that we should use
3720 the result of already generated statements as new operand. */
3721 if (ready_stmts_end > 0)
3723 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3724 if (ready_stmts_end > stmt_index)
3725 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3726 else if (op_index >= 0)
3727 op2 = ops[op_index--]->op;
3728 else
3730 gcc_assert (stmt_index < i);
3731 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3734 if (stmt_index >= ready_stmts_end)
3735 ready_stmts_end = 0;
3737 else
3739 if (op_index > 1)
3740 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3741 op2 = ops[op_index--]->op;
3742 op1 = ops[op_index--]->op;
3745 /* If we emit the last statement then we should put
3746 operands into the last statement. It will also
3747 break the loop. */
3748 if (op_index < 0 && stmt_index == i)
3749 i = stmt_num - 1;
3751 if (dump_file && (dump_flags & TDF_DETAILS))
3753 fprintf (dump_file, "Transforming ");
3754 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3757 /* We keep original statement only for the last one. All
3758 others are recreated. */
3759 if (i == stmt_num - 1)
3761 gimple_assign_set_rhs1 (stmts[i], op1);
3762 gimple_assign_set_rhs2 (stmts[i], op2);
3763 update_stmt (stmts[i]);
3765 else
3766 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3768 if (dump_file && (dump_flags & TDF_DETAILS))
3770 fprintf (dump_file, " into ");
3771 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3775 remove_visited_stmt_chain (last_rhs1);
3778 /* Transform STMT, which is really (A +B) + (C + D) into the left
3779 linear form, ((A+B)+C)+D.
3780 Recurse on D if necessary. */
3782 static void
3783 linearize_expr (gimple stmt)
3785 gimple_stmt_iterator gsi;
3786 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3787 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3788 gimple oldbinrhs = binrhs;
3789 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3790 gimple newbinrhs = NULL;
3791 struct loop *loop = loop_containing_stmt (stmt);
3792 tree lhs = gimple_assign_lhs (stmt);
3794 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3795 && is_reassociable_op (binrhs, rhscode, loop));
3797 gsi = gsi_for_stmt (stmt);
3799 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3800 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3801 make_ssa_name (TREE_TYPE (lhs), NULL),
3802 gimple_assign_lhs (binlhs),
3803 gimple_assign_rhs2 (binrhs));
3804 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3805 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3806 gimple_set_uid (binrhs, gimple_uid (stmt));
3808 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3809 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3811 if (dump_file && (dump_flags & TDF_DETAILS))
3813 fprintf (dump_file, "Linearized: ");
3814 print_gimple_stmt (dump_file, stmt, 0, 0);
3817 reassociate_stats.linearized++;
3818 update_stmt (stmt);
3820 gsi = gsi_for_stmt (oldbinrhs);
3821 reassoc_remove_stmt (&gsi);
3822 release_defs (oldbinrhs);
3824 gimple_set_visited (stmt, true);
3825 gimple_set_visited (binlhs, true);
3826 gimple_set_visited (binrhs, true);
3828 /* Tail recurse on the new rhs if it still needs reassociation. */
3829 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3830 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3831 want to change the algorithm while converting to tuples. */
3832 linearize_expr (stmt);
3835 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3836 it. Otherwise, return NULL. */
3838 static gimple
3839 get_single_immediate_use (tree lhs)
3841 use_operand_p immuse;
3842 gimple immusestmt;
3844 if (TREE_CODE (lhs) == SSA_NAME
3845 && single_imm_use (lhs, &immuse, &immusestmt)
3846 && is_gimple_assign (immusestmt))
3847 return immusestmt;
3849 return NULL;
3852 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3853 representing the negated value. Insertions of any necessary
3854 instructions go before GSI.
3855 This function is recursive in that, if you hand it "a_5" as the
3856 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3857 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3859 static tree
3860 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3862 gimple negatedefstmt = NULL;
3863 tree resultofnegate;
3864 gimple_stmt_iterator gsi;
3865 unsigned int uid;
3867 /* If we are trying to negate a name, defined by an add, negate the
3868 add operands instead. */
3869 if (TREE_CODE (tonegate) == SSA_NAME)
3870 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3871 if (TREE_CODE (tonegate) == SSA_NAME
3872 && is_gimple_assign (negatedefstmt)
3873 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3874 && has_single_use (gimple_assign_lhs (negatedefstmt))
3875 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3877 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3878 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3879 tree lhs = gimple_assign_lhs (negatedefstmt);
3880 gimple g;
3882 gsi = gsi_for_stmt (negatedefstmt);
3883 rhs1 = negate_value (rhs1, &gsi);
3885 gsi = gsi_for_stmt (negatedefstmt);
3886 rhs2 = negate_value (rhs2, &gsi);
3888 gsi = gsi_for_stmt (negatedefstmt);
3889 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3890 gimple_set_visited (negatedefstmt, true);
3891 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3892 gimple_set_uid (g, gimple_uid (negatedefstmt));
3893 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3894 return lhs;
3897 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3898 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3899 NULL_TREE, true, GSI_SAME_STMT);
3900 gsi = *gsip;
3901 uid = gimple_uid (gsi_stmt (gsi));
3902 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3904 gimple stmt = gsi_stmt (gsi);
3905 if (gimple_uid (stmt) != 0)
3906 break;
3907 gimple_set_uid (stmt, uid);
3909 return resultofnegate;
3912 /* Return true if we should break up the subtract in STMT into an add
3913 with negate. This is true when we the subtract operands are really
3914 adds, or the subtract itself is used in an add expression. In
3915 either case, breaking up the subtract into an add with negate
3916 exposes the adds to reassociation. */
3918 static bool
3919 should_break_up_subtract (gimple stmt)
3921 tree lhs = gimple_assign_lhs (stmt);
3922 tree binlhs = gimple_assign_rhs1 (stmt);
3923 tree binrhs = gimple_assign_rhs2 (stmt);
3924 gimple immusestmt;
3925 struct loop *loop = loop_containing_stmt (stmt);
3927 if (TREE_CODE (binlhs) == SSA_NAME
3928 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3929 return true;
3931 if (TREE_CODE (binrhs) == SSA_NAME
3932 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3933 return true;
3935 if (TREE_CODE (lhs) == SSA_NAME
3936 && (immusestmt = get_single_immediate_use (lhs))
3937 && is_gimple_assign (immusestmt)
3938 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3939 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3940 return true;
3941 return false;
3944 /* Transform STMT from A - B into A + -B. */
3946 static void
3947 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3949 tree rhs1 = gimple_assign_rhs1 (stmt);
3950 tree rhs2 = gimple_assign_rhs2 (stmt);
3952 if (dump_file && (dump_flags & TDF_DETAILS))
3954 fprintf (dump_file, "Breaking up subtract ");
3955 print_gimple_stmt (dump_file, stmt, 0, 0);
3958 rhs2 = negate_value (rhs2, gsip);
3959 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3960 update_stmt (stmt);
3963 /* Determine whether STMT is a builtin call that raises an SSA name
3964 to an integer power and has only one use. If so, and this is early
3965 reassociation and unsafe math optimizations are permitted, place
3966 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3967 If any of these conditions does not hold, return FALSE. */
3969 static bool
3970 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3972 tree fndecl, arg1;
3973 REAL_VALUE_TYPE c, cint;
3975 if (!first_pass_instance
3976 || !flag_unsafe_math_optimizations
3977 || !is_gimple_call (stmt)
3978 || !has_single_use (gimple_call_lhs (stmt)))
3979 return false;
3981 fndecl = gimple_call_fndecl (stmt);
3983 if (!fndecl
3984 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3985 return false;
3987 switch (DECL_FUNCTION_CODE (fndecl))
3989 CASE_FLT_FN (BUILT_IN_POW):
3990 *base = gimple_call_arg (stmt, 0);
3991 arg1 = gimple_call_arg (stmt, 1);
3993 if (TREE_CODE (arg1) != REAL_CST)
3994 return false;
3996 c = TREE_REAL_CST (arg1);
3998 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3999 return false;
4001 *exponent = real_to_integer (&c);
4002 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4003 if (!real_identical (&c, &cint))
4004 return false;
4006 break;
4008 CASE_FLT_FN (BUILT_IN_POWI):
4009 *base = gimple_call_arg (stmt, 0);
4010 arg1 = gimple_call_arg (stmt, 1);
4012 if (!tree_fits_shwi_p (arg1))
4013 return false;
4015 *exponent = tree_to_shwi (arg1);
4016 break;
4018 default:
4019 return false;
4022 /* Expanding negative exponents is generally unproductive, so we don't
4023 complicate matters with those. Exponents of zero and one should
4024 have been handled by expression folding. */
4025 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4026 return false;
4028 return true;
4031 /* Recursively linearize a binary expression that is the RHS of STMT.
4032 Place the operands of the expression tree in the vector named OPS. */
4034 static void
4035 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4036 bool is_associative, bool set_visited)
4038 tree binlhs = gimple_assign_rhs1 (stmt);
4039 tree binrhs = gimple_assign_rhs2 (stmt);
4040 gimple binlhsdef = NULL, binrhsdef = NULL;
4041 bool binlhsisreassoc = false;
4042 bool binrhsisreassoc = false;
4043 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4044 struct loop *loop = loop_containing_stmt (stmt);
4045 tree base = NULL_TREE;
4046 HOST_WIDE_INT exponent = 0;
4048 if (set_visited)
4049 gimple_set_visited (stmt, true);
4051 if (TREE_CODE (binlhs) == SSA_NAME)
4053 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4054 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4055 && !stmt_could_throw_p (binlhsdef));
4058 if (TREE_CODE (binrhs) == SSA_NAME)
4060 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4061 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4062 && !stmt_could_throw_p (binrhsdef));
4065 /* If the LHS is not reassociable, but the RHS is, we need to swap
4066 them. If neither is reassociable, there is nothing we can do, so
4067 just put them in the ops vector. If the LHS is reassociable,
4068 linearize it. If both are reassociable, then linearize the RHS
4069 and the LHS. */
4071 if (!binlhsisreassoc)
4073 tree temp;
4075 /* If this is not a associative operation like division, give up. */
4076 if (!is_associative)
4078 add_to_ops_vec (ops, binrhs);
4079 return;
4082 if (!binrhsisreassoc)
4084 if (rhscode == MULT_EXPR
4085 && TREE_CODE (binrhs) == SSA_NAME
4086 && acceptable_pow_call (binrhsdef, &base, &exponent))
4088 add_repeat_to_ops_vec (ops, base, exponent);
4089 gimple_set_visited (binrhsdef, true);
4091 else
4092 add_to_ops_vec (ops, binrhs);
4094 if (rhscode == MULT_EXPR
4095 && TREE_CODE (binlhs) == SSA_NAME
4096 && acceptable_pow_call (binlhsdef, &base, &exponent))
4098 add_repeat_to_ops_vec (ops, base, exponent);
4099 gimple_set_visited (binlhsdef, true);
4101 else
4102 add_to_ops_vec (ops, binlhs);
4104 return;
4107 if (dump_file && (dump_flags & TDF_DETAILS))
4109 fprintf (dump_file, "swapping operands of ");
4110 print_gimple_stmt (dump_file, stmt, 0, 0);
4113 swap_ssa_operands (stmt,
4114 gimple_assign_rhs1_ptr (stmt),
4115 gimple_assign_rhs2_ptr (stmt));
4116 update_stmt (stmt);
4118 if (dump_file && (dump_flags & TDF_DETAILS))
4120 fprintf (dump_file, " is now ");
4121 print_gimple_stmt (dump_file, stmt, 0, 0);
4124 /* We want to make it so the lhs is always the reassociative op,
4125 so swap. */
4126 temp = binlhs;
4127 binlhs = binrhs;
4128 binrhs = temp;
4130 else if (binrhsisreassoc)
4132 linearize_expr (stmt);
4133 binlhs = gimple_assign_rhs1 (stmt);
4134 binrhs = gimple_assign_rhs2 (stmt);
4137 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4138 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4139 rhscode, loop));
4140 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4141 is_associative, set_visited);
4143 if (rhscode == MULT_EXPR
4144 && TREE_CODE (binrhs) == SSA_NAME
4145 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4147 add_repeat_to_ops_vec (ops, base, exponent);
4148 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4150 else
4151 add_to_ops_vec (ops, binrhs);
4154 /* Repropagate the negates back into subtracts, since no other pass
4155 currently does it. */
4157 static void
4158 repropagate_negates (void)
4160 unsigned int i = 0;
4161 tree negate;
4163 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4165 gimple user = get_single_immediate_use (negate);
4167 if (!user || !is_gimple_assign (user))
4168 continue;
4170 /* The negate operand can be either operand of a PLUS_EXPR
4171 (it can be the LHS if the RHS is a constant for example).
4173 Force the negate operand to the RHS of the PLUS_EXPR, then
4174 transform the PLUS_EXPR into a MINUS_EXPR. */
4175 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4177 /* If the negated operand appears on the LHS of the
4178 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4179 to force the negated operand to the RHS of the PLUS_EXPR. */
4180 if (gimple_assign_rhs1 (user) == negate)
4182 swap_ssa_operands (user,
4183 gimple_assign_rhs1_ptr (user),
4184 gimple_assign_rhs2_ptr (user));
4187 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4188 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4189 if (gimple_assign_rhs2 (user) == negate)
4191 tree rhs1 = gimple_assign_rhs1 (user);
4192 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4193 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4194 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4195 update_stmt (user);
4198 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4200 if (gimple_assign_rhs1 (user) == negate)
4202 /* We have
4203 x = -a
4204 y = x - b
4205 which we transform into
4206 x = a + b
4207 y = -x .
4208 This pushes down the negate which we possibly can merge
4209 into some other operation, hence insert it into the
4210 plus_negates vector. */
4211 gimple feed = SSA_NAME_DEF_STMT (negate);
4212 tree a = gimple_assign_rhs1 (feed);
4213 tree b = gimple_assign_rhs2 (user);
4214 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4215 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4216 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
4217 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
4218 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4219 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
4220 user = gsi_stmt (gsi2);
4221 update_stmt (user);
4222 reassoc_remove_stmt (&gsi);
4223 release_defs (feed);
4224 plus_negates.safe_push (gimple_assign_lhs (user));
4226 else
4228 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4229 rid of one operation. */
4230 gimple feed = SSA_NAME_DEF_STMT (negate);
4231 tree a = gimple_assign_rhs1 (feed);
4232 tree rhs1 = gimple_assign_rhs1 (user);
4233 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4234 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4235 update_stmt (gsi_stmt (gsi));
4241 /* Returns true if OP is of a type for which we can do reassociation.
4242 That is for integral or non-saturating fixed-point types, and for
4243 floating point type when associative-math is enabled. */
4245 static bool
4246 can_reassociate_p (tree op)
4248 tree type = TREE_TYPE (op);
4249 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4250 || NON_SAT_FIXED_POINT_TYPE_P (type)
4251 || (flag_associative_math && FLOAT_TYPE_P (type)))
4252 return true;
4253 return false;
4256 /* Break up subtract operations in block BB.
4258 We do this top down because we don't know whether the subtract is
4259 part of a possible chain of reassociation except at the top.
4261 IE given
4262 d = f + g
4263 c = a + e
4264 b = c - d
4265 q = b - r
4266 k = t - q
4268 we want to break up k = t - q, but we won't until we've transformed q
4269 = b - r, which won't be broken up until we transform b = c - d.
4271 En passant, clear the GIMPLE visited flag on every statement
4272 and set UIDs within each basic block. */
4274 static void
4275 break_up_subtract_bb (basic_block bb)
4277 gimple_stmt_iterator gsi;
4278 basic_block son;
4279 unsigned int uid = 1;
4281 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4283 gimple stmt = gsi_stmt (gsi);
4284 gimple_set_visited (stmt, false);
4285 gimple_set_uid (stmt, uid++);
4287 if (!is_gimple_assign (stmt)
4288 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4289 continue;
4291 /* Look for simple gimple subtract operations. */
4292 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4294 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4295 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4296 continue;
4298 /* Check for a subtract used only in an addition. If this
4299 is the case, transform it into add of a negate for better
4300 reassociation. IE transform C = A-B into C = A + -B if C
4301 is only used in an addition. */
4302 if (should_break_up_subtract (stmt))
4303 break_up_subtract (stmt, &gsi);
4305 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4306 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4307 plus_negates.safe_push (gimple_assign_lhs (stmt));
4309 for (son = first_dom_son (CDI_DOMINATORS, bb);
4310 son;
4311 son = next_dom_son (CDI_DOMINATORS, son))
4312 break_up_subtract_bb (son);
4315 /* Used for repeated factor analysis. */
4316 struct repeat_factor_d
4318 /* An SSA name that occurs in a multiply chain. */
4319 tree factor;
4321 /* Cached rank of the factor. */
4322 unsigned rank;
4324 /* Number of occurrences of the factor in the chain. */
4325 HOST_WIDE_INT count;
4327 /* An SSA name representing the product of this factor and
4328 all factors appearing later in the repeated factor vector. */
4329 tree repr;
4332 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4333 typedef const struct repeat_factor_d *const_repeat_factor_t;
4336 static vec<repeat_factor> repeat_factor_vec;
4338 /* Used for sorting the repeat factor vector. Sort primarily by
4339 ascending occurrence count, secondarily by descending rank. */
4341 static int
4342 compare_repeat_factors (const void *x1, const void *x2)
4344 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4345 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4347 if (rf1->count != rf2->count)
4348 return rf1->count - rf2->count;
4350 return rf2->rank - rf1->rank;
4353 /* Look for repeated operands in OPS in the multiply tree rooted at
4354 STMT. Replace them with an optimal sequence of multiplies and powi
4355 builtin calls, and remove the used operands from OPS. Return an
4356 SSA name representing the value of the replacement sequence. */
4358 static tree
4359 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4361 unsigned i, j, vec_len;
4362 int ii;
4363 operand_entry_t oe;
4364 repeat_factor_t rf1, rf2;
4365 repeat_factor rfnew;
4366 tree result = NULL_TREE;
4367 tree target_ssa, iter_result;
4368 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4369 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4370 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4371 gimple mul_stmt, pow_stmt;
4373 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4374 target. */
4375 if (!powi_fndecl)
4376 return NULL_TREE;
4378 /* Allocate the repeated factor vector. */
4379 repeat_factor_vec.create (10);
4381 /* Scan the OPS vector for all SSA names in the product and build
4382 up a vector of occurrence counts for each factor. */
4383 FOR_EACH_VEC_ELT (*ops, i, oe)
4385 if (TREE_CODE (oe->op) == SSA_NAME)
4387 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4389 if (rf1->factor == oe->op)
4391 rf1->count += oe->count;
4392 break;
4396 if (j >= repeat_factor_vec.length ())
4398 rfnew.factor = oe->op;
4399 rfnew.rank = oe->rank;
4400 rfnew.count = oe->count;
4401 rfnew.repr = NULL_TREE;
4402 repeat_factor_vec.safe_push (rfnew);
4407 /* Sort the repeated factor vector by (a) increasing occurrence count,
4408 and (b) decreasing rank. */
4409 repeat_factor_vec.qsort (compare_repeat_factors);
4411 /* It is generally best to combine as many base factors as possible
4412 into a product before applying __builtin_powi to the result.
4413 However, the sort order chosen for the repeated factor vector
4414 allows us to cache partial results for the product of the base
4415 factors for subsequent use. When we already have a cached partial
4416 result from a previous iteration, it is best to make use of it
4417 before looking for another __builtin_pow opportunity.
4419 As an example, consider x * x * y * y * y * z * z * z * z.
4420 We want to first compose the product x * y * z, raise it to the
4421 second power, then multiply this by y * z, and finally multiply
4422 by z. This can be done in 5 multiplies provided we cache y * z
4423 for use in both expressions:
4425 t1 = y * z
4426 t2 = t1 * x
4427 t3 = t2 * t2
4428 t4 = t1 * t3
4429 result = t4 * z
4431 If we instead ignored the cached y * z and first multiplied by
4432 the __builtin_pow opportunity z * z, we would get the inferior:
4434 t1 = y * z
4435 t2 = t1 * x
4436 t3 = t2 * t2
4437 t4 = z * z
4438 t5 = t3 * t4
4439 result = t5 * y */
4441 vec_len = repeat_factor_vec.length ();
4443 /* Repeatedly look for opportunities to create a builtin_powi call. */
4444 while (true)
4446 HOST_WIDE_INT power;
4448 /* First look for the largest cached product of factors from
4449 preceding iterations. If found, create a builtin_powi for
4450 it if the minimum occurrence count for its factors is at
4451 least 2, or just use this cached product as our next
4452 multiplicand if the minimum occurrence count is 1. */
4453 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4455 if (rf1->repr && rf1->count > 0)
4456 break;
4459 if (j < vec_len)
4461 power = rf1->count;
4463 if (power == 1)
4465 iter_result = rf1->repr;
4467 if (dump_file && (dump_flags & TDF_DETAILS))
4469 unsigned elt;
4470 repeat_factor_t rf;
4471 fputs ("Multiplying by cached product ", dump_file);
4472 for (elt = j; elt < vec_len; elt++)
4474 rf = &repeat_factor_vec[elt];
4475 print_generic_expr (dump_file, rf->factor, 0);
4476 if (elt < vec_len - 1)
4477 fputs (" * ", dump_file);
4479 fputs ("\n", dump_file);
4482 else
4484 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4485 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4486 build_int_cst (integer_type_node,
4487 power));
4488 gimple_call_set_lhs (pow_stmt, iter_result);
4489 gimple_set_location (pow_stmt, gimple_location (stmt));
4490 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4492 if (dump_file && (dump_flags & TDF_DETAILS))
4494 unsigned elt;
4495 repeat_factor_t rf;
4496 fputs ("Building __builtin_pow call for cached product (",
4497 dump_file);
4498 for (elt = j; elt < vec_len; elt++)
4500 rf = &repeat_factor_vec[elt];
4501 print_generic_expr (dump_file, rf->factor, 0);
4502 if (elt < vec_len - 1)
4503 fputs (" * ", dump_file);
4505 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4506 power);
4510 else
4512 /* Otherwise, find the first factor in the repeated factor
4513 vector whose occurrence count is at least 2. If no such
4514 factor exists, there are no builtin_powi opportunities
4515 remaining. */
4516 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4518 if (rf1->count >= 2)
4519 break;
4522 if (j >= vec_len)
4523 break;
4525 power = rf1->count;
4527 if (dump_file && (dump_flags & TDF_DETAILS))
4529 unsigned elt;
4530 repeat_factor_t rf;
4531 fputs ("Building __builtin_pow call for (", dump_file);
4532 for (elt = j; elt < vec_len; elt++)
4534 rf = &repeat_factor_vec[elt];
4535 print_generic_expr (dump_file, rf->factor, 0);
4536 if (elt < vec_len - 1)
4537 fputs (" * ", dump_file);
4539 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4542 reassociate_stats.pows_created++;
4544 /* Visit each element of the vector in reverse order (so that
4545 high-occurrence elements are visited first, and within the
4546 same occurrence count, lower-ranked elements are visited
4547 first). Form a linear product of all elements in this order
4548 whose occurrencce count is at least that of element J.
4549 Record the SSA name representing the product of each element
4550 with all subsequent elements in the vector. */
4551 if (j == vec_len - 1)
4552 rf1->repr = rf1->factor;
4553 else
4555 for (ii = vec_len - 2; ii >= (int)j; ii--)
4557 tree op1, op2;
4559 rf1 = &repeat_factor_vec[ii];
4560 rf2 = &repeat_factor_vec[ii + 1];
4562 /* Init the last factor's representative to be itself. */
4563 if (!rf2->repr)
4564 rf2->repr = rf2->factor;
4566 op1 = rf1->factor;
4567 op2 = rf2->repr;
4569 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4570 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4571 target_ssa,
4572 op1, op2);
4573 gimple_set_location (mul_stmt, gimple_location (stmt));
4574 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4575 rf1->repr = target_ssa;
4577 /* Don't reprocess the multiply we just introduced. */
4578 gimple_set_visited (mul_stmt, true);
4582 /* Form a call to __builtin_powi for the maximum product
4583 just formed, raised to the power obtained earlier. */
4584 rf1 = &repeat_factor_vec[j];
4585 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4586 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4587 build_int_cst (integer_type_node,
4588 power));
4589 gimple_call_set_lhs (pow_stmt, iter_result);
4590 gimple_set_location (pow_stmt, gimple_location (stmt));
4591 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4594 /* If we previously formed at least one other builtin_powi call,
4595 form the product of this one and those others. */
4596 if (result)
4598 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4599 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4600 result, iter_result);
4601 gimple_set_location (mul_stmt, gimple_location (stmt));
4602 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4603 gimple_set_visited (mul_stmt, true);
4604 result = new_result;
4606 else
4607 result = iter_result;
4609 /* Decrement the occurrence count of each element in the product
4610 by the count found above, and remove this many copies of each
4611 factor from OPS. */
4612 for (i = j; i < vec_len; i++)
4614 unsigned k = power;
4615 unsigned n;
4617 rf1 = &repeat_factor_vec[i];
4618 rf1->count -= power;
4620 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4622 if (oe->op == rf1->factor)
4624 if (oe->count <= k)
4626 ops->ordered_remove (n);
4627 k -= oe->count;
4629 if (k == 0)
4630 break;
4632 else
4634 oe->count -= k;
4635 break;
4642 /* At this point all elements in the repeated factor vector have a
4643 remaining occurrence count of 0 or 1, and those with a count of 1
4644 don't have cached representatives. Re-sort the ops vector and
4645 clean up. */
4646 ops->qsort (sort_by_operand_rank);
4647 repeat_factor_vec.release ();
4649 /* Return the final product computed herein. Note that there may
4650 still be some elements with single occurrence count left in OPS;
4651 those will be handled by the normal reassociation logic. */
4652 return result;
4655 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4657 static void
4658 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4660 tree rhs1;
4662 if (dump_file && (dump_flags & TDF_DETAILS))
4664 fprintf (dump_file, "Transforming ");
4665 print_gimple_stmt (dump_file, stmt, 0, 0);
4668 rhs1 = gimple_assign_rhs1 (stmt);
4669 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4670 update_stmt (stmt);
4671 remove_visited_stmt_chain (rhs1);
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4675 fprintf (dump_file, " into ");
4676 print_gimple_stmt (dump_file, stmt, 0, 0);
4680 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4682 static void
4683 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4684 tree rhs1, tree rhs2)
4686 if (dump_file && (dump_flags & TDF_DETAILS))
4688 fprintf (dump_file, "Transforming ");
4689 print_gimple_stmt (dump_file, stmt, 0, 0);
4692 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4693 update_stmt (gsi_stmt (*gsi));
4694 remove_visited_stmt_chain (rhs1);
4696 if (dump_file && (dump_flags & TDF_DETAILS))
4698 fprintf (dump_file, " into ");
4699 print_gimple_stmt (dump_file, stmt, 0, 0);
4703 /* Reassociate expressions in basic block BB and its post-dominator as
4704 children. */
4706 static void
4707 reassociate_bb (basic_block bb)
4709 gimple_stmt_iterator gsi;
4710 basic_block son;
4711 gimple stmt = last_stmt (bb);
4713 if (stmt && !gimple_visited_p (stmt))
4714 maybe_optimize_range_tests (stmt);
4716 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4718 stmt = gsi_stmt (gsi);
4720 if (is_gimple_assign (stmt)
4721 && !stmt_could_throw_p (stmt))
4723 tree lhs, rhs1, rhs2;
4724 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4726 /* If this is not a gimple binary expression, there is
4727 nothing for us to do with it. */
4728 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4729 continue;
4731 /* If this was part of an already processed statement,
4732 we don't need to touch it again. */
4733 if (gimple_visited_p (stmt))
4735 /* This statement might have become dead because of previous
4736 reassociations. */
4737 if (has_zero_uses (gimple_get_lhs (stmt)))
4739 reassoc_remove_stmt (&gsi);
4740 release_defs (stmt);
4741 /* We might end up removing the last stmt above which
4742 places the iterator to the end of the sequence.
4743 Reset it to the last stmt in this case which might
4744 be the end of the sequence as well if we removed
4745 the last statement of the sequence. In which case
4746 we need to bail out. */
4747 if (gsi_end_p (gsi))
4749 gsi = gsi_last_bb (bb);
4750 if (gsi_end_p (gsi))
4751 break;
4754 continue;
4757 lhs = gimple_assign_lhs (stmt);
4758 rhs1 = gimple_assign_rhs1 (stmt);
4759 rhs2 = gimple_assign_rhs2 (stmt);
4761 /* For non-bit or min/max operations we can't associate
4762 all types. Verify that here. */
4763 if (rhs_code != BIT_IOR_EXPR
4764 && rhs_code != BIT_AND_EXPR
4765 && rhs_code != BIT_XOR_EXPR
4766 && rhs_code != MIN_EXPR
4767 && rhs_code != MAX_EXPR
4768 && (!can_reassociate_p (lhs)
4769 || !can_reassociate_p (rhs1)
4770 || !can_reassociate_p (rhs2)))
4771 continue;
4773 if (associative_tree_code (rhs_code))
4775 auto_vec<operand_entry_t> ops;
4776 tree powi_result = NULL_TREE;
4778 /* There may be no immediate uses left by the time we
4779 get here because we may have eliminated them all. */
4780 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4781 continue;
4783 gimple_set_visited (stmt, true);
4784 linearize_expr_tree (&ops, stmt, true, true);
4785 ops.qsort (sort_by_operand_rank);
4786 optimize_ops_list (rhs_code, &ops);
4787 if (undistribute_ops_list (rhs_code, &ops,
4788 loop_containing_stmt (stmt)))
4790 ops.qsort (sort_by_operand_rank);
4791 optimize_ops_list (rhs_code, &ops);
4794 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4795 optimize_range_tests (rhs_code, &ops);
4797 if (first_pass_instance
4798 && rhs_code == MULT_EXPR
4799 && flag_unsafe_math_optimizations)
4800 powi_result = attempt_builtin_powi (stmt, &ops);
4802 /* If the operand vector is now empty, all operands were
4803 consumed by the __builtin_powi optimization. */
4804 if (ops.length () == 0)
4805 transform_stmt_to_copy (&gsi, stmt, powi_result);
4806 else if (ops.length () == 1)
4808 tree last_op = ops.last ()->op;
4810 if (powi_result)
4811 transform_stmt_to_multiply (&gsi, stmt, last_op,
4812 powi_result);
4813 else
4814 transform_stmt_to_copy (&gsi, stmt, last_op);
4816 else
4818 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4819 int ops_num = ops.length ();
4820 int width = get_reassociation_width (ops_num, rhs_code, mode);
4821 tree new_lhs = lhs;
4823 if (dump_file && (dump_flags & TDF_DETAILS))
4824 fprintf (dump_file,
4825 "Width = %d was chosen for reassociation\n", width);
4827 if (width > 1
4828 && ops.length () > 3)
4829 rewrite_expr_tree_parallel (stmt, width, ops);
4830 else
4832 /* When there are three operands left, we want
4833 to make sure the ones that get the double
4834 binary op are chosen wisely. */
4835 int len = ops.length ();
4836 if (len >= 3)
4837 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4839 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4840 powi_result != NULL);
4843 /* If we combined some repeated factors into a
4844 __builtin_powi call, multiply that result by the
4845 reassociated operands. */
4846 if (powi_result)
4848 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4849 tree type = TREE_TYPE (lhs);
4850 tree target_ssa = make_temp_ssa_name (type, NULL,
4851 "reassocpow");
4852 gimple_set_lhs (lhs_stmt, target_ssa);
4853 update_stmt (lhs_stmt);
4854 if (lhs != new_lhs)
4855 target_ssa = new_lhs;
4856 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4857 powi_result,
4858 target_ssa);
4859 gimple_set_location (mul_stmt, gimple_location (stmt));
4860 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4866 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4867 son;
4868 son = next_dom_son (CDI_POST_DOMINATORS, son))
4869 reassociate_bb (son);
4872 /* Add jumps around shifts for range tests turned into bit tests.
4873 For each SSA_NAME VAR we have code like:
4874 VAR = ...; // final stmt of range comparison
4875 // bit test here...;
4876 OTHERVAR = ...; // final stmt of the bit test sequence
4877 RES = VAR | OTHERVAR;
4878 Turn the above into:
4879 VAR = ...;
4880 if (VAR != 0)
4881 goto <l3>;
4882 else
4883 goto <l2>;
4884 <l2>:
4885 // bit test here...;
4886 OTHERVAR = ...;
4887 <l3>:
4888 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4890 static void
4891 branch_fixup (void)
4893 tree var;
4894 unsigned int i;
4896 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4898 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4899 gimple use_stmt;
4900 use_operand_p use;
4901 bool ok = single_imm_use (var, &use, &use_stmt);
4902 gcc_assert (ok
4903 && is_gimple_assign (use_stmt)
4904 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4905 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4907 basic_block cond_bb = gimple_bb (def_stmt);
4908 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4909 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4911 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4912 gimple g = gimple_build_cond (NE_EXPR, var,
4913 build_zero_cst (TREE_TYPE (var)),
4914 NULL_TREE, NULL_TREE);
4915 location_t loc = gimple_location (use_stmt);
4916 gimple_set_location (g, loc);
4917 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4919 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4920 etrue->probability = REG_BR_PROB_BASE / 2;
4921 etrue->count = cond_bb->count / 2;
4922 edge efalse = find_edge (cond_bb, then_bb);
4923 efalse->flags = EDGE_FALSE_VALUE;
4924 efalse->probability -= etrue->probability;
4925 efalse->count -= etrue->count;
4926 then_bb->count -= etrue->count;
4928 tree othervar = NULL_TREE;
4929 if (gimple_assign_rhs1 (use_stmt) == var)
4930 othervar = gimple_assign_rhs2 (use_stmt);
4931 else if (gimple_assign_rhs2 (use_stmt) == var)
4932 othervar = gimple_assign_rhs1 (use_stmt);
4933 else
4934 gcc_unreachable ();
4935 tree lhs = gimple_assign_lhs (use_stmt);
4936 gimple phi = create_phi_node (lhs, merge_bb);
4937 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4938 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4939 gsi = gsi_for_stmt (use_stmt);
4940 gsi_remove (&gsi, true);
4942 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4943 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4945 reassoc_branch_fixups.release ();
4948 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4949 void debug_ops_vector (vec<operand_entry_t> ops);
4951 /* Dump the operand entry vector OPS to FILE. */
4953 void
4954 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4956 operand_entry_t oe;
4957 unsigned int i;
4959 FOR_EACH_VEC_ELT (ops, i, oe)
4961 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4962 print_generic_expr (file, oe->op, 0);
4966 /* Dump the operand entry vector OPS to STDERR. */
4968 DEBUG_FUNCTION void
4969 debug_ops_vector (vec<operand_entry_t> ops)
4971 dump_ops_vector (stderr, ops);
4974 static void
4975 do_reassoc (void)
4977 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4978 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4981 /* Initialize the reassociation pass. */
4983 static void
4984 init_reassoc (void)
4986 int i;
4987 long rank = 2;
4988 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4990 /* Find the loops, so that we can prevent moving calculations in
4991 them. */
4992 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4994 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4996 operand_entry_pool = create_alloc_pool ("operand entry pool",
4997 sizeof (struct operand_entry), 30);
4998 next_operand_entry_id = 0;
5000 /* Reverse RPO (Reverse Post Order) will give us something where
5001 deeper loops come later. */
5002 pre_and_rev_post_order_compute (NULL, bbs, false);
5003 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5004 operand_rank = new hash_map<tree, long>;
5006 /* Give each default definition a distinct rank. This includes
5007 parameters and the static chain. Walk backwards over all
5008 SSA names so that we get proper rank ordering according
5009 to tree_swap_operands_p. */
5010 for (i = num_ssa_names - 1; i > 0; --i)
5012 tree name = ssa_name (i);
5013 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5014 insert_operand_rank (name, ++rank);
5017 /* Set up rank for each BB */
5018 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5019 bb_rank[bbs[i]] = ++rank << 16;
5021 free (bbs);
5022 calculate_dominance_info (CDI_POST_DOMINATORS);
5023 plus_negates = vNULL;
5026 /* Cleanup after the reassociation pass, and print stats if
5027 requested. */
5029 static void
5030 fini_reassoc (void)
5032 statistics_counter_event (cfun, "Linearized",
5033 reassociate_stats.linearized);
5034 statistics_counter_event (cfun, "Constants eliminated",
5035 reassociate_stats.constants_eliminated);
5036 statistics_counter_event (cfun, "Ops eliminated",
5037 reassociate_stats.ops_eliminated);
5038 statistics_counter_event (cfun, "Statements rewritten",
5039 reassociate_stats.rewritten);
5040 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5041 reassociate_stats.pows_encountered);
5042 statistics_counter_event (cfun, "Built-in powi calls created",
5043 reassociate_stats.pows_created);
5045 delete operand_rank;
5046 free_alloc_pool (operand_entry_pool);
5047 free (bb_rank);
5048 plus_negates.release ();
5049 free_dominance_info (CDI_POST_DOMINATORS);
5050 loop_optimizer_finalize ();
5053 /* Gate and execute functions for Reassociation. */
5055 static unsigned int
5056 execute_reassoc (void)
5058 init_reassoc ();
5060 do_reassoc ();
5061 repropagate_negates ();
5062 branch_fixup ();
5064 fini_reassoc ();
5065 return 0;
5068 namespace {
5070 const pass_data pass_data_reassoc =
5072 GIMPLE_PASS, /* type */
5073 "reassoc", /* name */
5074 OPTGROUP_NONE, /* optinfo_flags */
5075 TV_TREE_REASSOC, /* tv_id */
5076 ( PROP_cfg | PROP_ssa ), /* properties_required */
5077 0, /* properties_provided */
5078 0, /* properties_destroyed */
5079 0, /* todo_flags_start */
5080 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5083 class pass_reassoc : public gimple_opt_pass
5085 public:
5086 pass_reassoc (gcc::context *ctxt)
5087 : gimple_opt_pass (pass_data_reassoc, ctxt)
5090 /* opt_pass methods: */
5091 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5092 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5093 virtual unsigned int execute (function *) { return execute_reassoc (); }
5095 }; // class pass_reassoc
5097 } // anon namespace
5099 gimple_opt_pass *
5100 make_pass_reassoc (gcc::context *ctxt)
5102 return new pass_reassoc (ctxt);