2014-10-24 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob159f217bbe410fc281da8eb3dbdec88c9b5428c1
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "hash-map.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 #include "builtins.h"
64 #include "gimplify.h"
65 #include "optabs.h"
67 /* This is a simple global reassociation pass. It is, in part, based
68 on the LLVM pass of the same name (They do some things more/less
69 than we do, in different orders, etc).
71 It consists of five steps:
73 1. Breaking up subtract operations into addition + negate, where
74 it would promote the reassociation of adds.
76 2. Left linearization of the expression trees, so that (A+B)+(C+D)
77 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
78 During linearization, we place the operands of the binary
79 expressions into a vector of operand_entry_t
81 3. Optimization of the operand lists, eliminating things like a +
82 -a, a & a, etc.
84 3a. Combine repeated factors with the same occurrence counts
85 into a __builtin_powi call that will later be optimized into
86 an optimal number of multiplies.
88 4. Rewrite the expression trees we linearized and optimized so
89 they are in proper rank order.
91 5. Repropagate negates, as nothing else will clean it up ATM.
93 A bit of theory on #4, since nobody seems to write anything down
94 about why it makes sense to do it the way they do it:
96 We could do this much nicer theoretically, but don't (for reasons
97 explained after how to do it theoretically nice :P).
99 In order to promote the most redundancy elimination, you want
100 binary expressions whose operands are the same rank (or
101 preferably, the same value) exposed to the redundancy eliminator,
102 for possible elimination.
104 So the way to do this if we really cared, is to build the new op
105 tree from the leaves to the roots, merging as you go, and putting the
106 new op on the end of the worklist, until you are left with one
107 thing on the worklist.
109 IE if you have to rewrite the following set of operands (listed with
110 rank in parentheses), with opcode PLUS_EXPR:
112 a (1), b (1), c (1), d (2), e (2)
115 We start with our merge worklist empty, and the ops list with all of
116 those on it.
118 You want to first merge all leaves of the same rank, as much as
119 possible.
121 So first build a binary op of
123 mergetmp = a + b, and put "mergetmp" on the merge worklist.
125 Because there is no three operand form of PLUS_EXPR, c is not going to
126 be exposed to redundancy elimination as a rank 1 operand.
128 So you might as well throw it on the merge worklist (you could also
129 consider it to now be a rank two operand, and merge it with d and e,
130 but in this case, you then have evicted e from a binary op. So at
131 least in this situation, you can't win.)
133 Then build a binary op of d + e
134 mergetmp2 = d + e
136 and put mergetmp2 on the merge worklist.
138 so merge worklist = {mergetmp, c, mergetmp2}
140 Continue building binary ops of these operations until you have only
141 one operation left on the worklist.
143 So we have
145 build binary op
146 mergetmp3 = mergetmp + c
148 worklist = {mergetmp2, mergetmp3}
150 mergetmp4 = mergetmp2 + mergetmp3
152 worklist = {mergetmp4}
154 because we have one operation left, we can now just set the original
155 statement equal to the result of that operation.
157 This will at least expose a + b and d + e to redundancy elimination
158 as binary operations.
160 For extra points, you can reuse the old statements to build the
161 mergetmps, since you shouldn't run out.
163 So why don't we do this?
165 Because it's expensive, and rarely will help. Most trees we are
166 reassociating have 3 or less ops. If they have 2 ops, they already
167 will be written into a nice single binary op. If you have 3 ops, a
168 single simple check suffices to tell you whether the first two are of the
169 same rank. If so, you know to order it
171 mergetmp = op1 + op2
172 newstmt = mergetmp + op3
174 instead of
175 mergetmp = op2 + op3
176 newstmt = mergetmp + op1
178 If all three are of the same rank, you can't expose them all in a
179 single binary operator anyway, so the above is *still* the best you
180 can do.
182 Thus, this is what we do. When we have three ops left, we check to see
183 what order to put them in, and call it a day. As a nod to vector sum
184 reduction, we check if any of the ops are really a phi node that is a
185 destructive update for the associating op, and keep the destructive
186 update together for vector sum reduction recognition. */
189 /* Statistics */
190 static struct
192 int linearized;
193 int constants_eliminated;
194 int ops_eliminated;
195 int rewritten;
196 int pows_encountered;
197 int pows_created;
198 } reassociate_stats;
200 /* Operator, rank pair. */
201 typedef struct operand_entry
203 unsigned int rank;
204 int id;
205 tree op;
206 unsigned int count;
207 } *operand_entry_t;
209 static alloc_pool operand_entry_pool;
211 /* This is used to assign a unique ID to each struct operand_entry
212 so that qsort results are identical on different hosts. */
213 static int next_operand_entry_id;
215 /* Starting rank number for a given basic block, so that we can rank
216 operations using unmovable instructions in that BB based on the bb
217 depth. */
218 static long *bb_rank;
220 /* Operand->rank hashtable. */
221 static hash_map<tree, long> *operand_rank;
223 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
224 all basic blocks the CFG should be adjusted - basic blocks
225 split right after that SSA_NAME's definition statement and before
226 the only use, which must be a bit ior. */
227 static vec<tree> reassoc_branch_fixups;
229 /* Forward decls. */
230 static long get_rank (tree);
231 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
233 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
234 possibly added by gsi_remove. */
236 bool
237 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
239 gimple stmt = gsi_stmt (*gsi);
241 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
242 return gsi_remove (gsi, true);
244 gimple_stmt_iterator prev = *gsi;
245 gsi_prev (&prev);
246 unsigned uid = gimple_uid (stmt);
247 basic_block bb = gimple_bb (stmt);
248 bool ret = gsi_remove (gsi, true);
249 if (!gsi_end_p (prev))
250 gsi_next (&prev);
251 else
252 prev = gsi_start_bb (bb);
253 gimple end_stmt = gsi_stmt (*gsi);
254 while ((stmt = gsi_stmt (prev)) != end_stmt)
256 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
257 gimple_set_uid (stmt, uid);
258 gsi_next (&prev);
260 return ret;
263 /* Bias amount for loop-carried phis. We want this to be larger than
264 the depth of any reassociation tree we can see, but not larger than
265 the rank difference between two blocks. */
266 #define PHI_LOOP_BIAS (1 << 15)
268 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
269 an innermost loop, and the phi has only a single use which is inside
270 the loop, then the rank is the block rank of the loop latch plus an
271 extra bias for the loop-carried dependence. This causes expressions
272 calculated into an accumulator variable to be independent for each
273 iteration of the loop. If STMT is some other phi, the rank is the
274 block rank of its containing block. */
275 static long
276 phi_rank (gimple stmt)
278 basic_block bb = gimple_bb (stmt);
279 struct loop *father = bb->loop_father;
280 tree res;
281 unsigned i;
282 use_operand_p use;
283 gimple use_stmt;
285 /* We only care about real loops (those with a latch). */
286 if (!father->latch)
287 return bb_rank[bb->index];
289 /* Interesting phis must be in headers of innermost loops. */
290 if (bb != father->header
291 || father->inner)
292 return bb_rank[bb->index];
294 /* Ignore virtual SSA_NAMEs. */
295 res = gimple_phi_result (stmt);
296 if (virtual_operand_p (res))
297 return bb_rank[bb->index];
299 /* The phi definition must have a single use, and that use must be
300 within the loop. Otherwise this isn't an accumulator pattern. */
301 if (!single_imm_use (res, &use, &use_stmt)
302 || gimple_bb (use_stmt)->loop_father != father)
303 return bb_rank[bb->index];
305 /* Look for phi arguments from within the loop. If found, bias this phi. */
306 for (i = 0; i < gimple_phi_num_args (stmt); i++)
308 tree arg = gimple_phi_arg_def (stmt, i);
309 if (TREE_CODE (arg) == SSA_NAME
310 && !SSA_NAME_IS_DEFAULT_DEF (arg))
312 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
313 if (gimple_bb (def_stmt)->loop_father == father)
314 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
318 /* Must be an uninteresting phi. */
319 return bb_rank[bb->index];
322 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
323 loop-carried dependence of an innermost loop, return TRUE; else
324 return FALSE. */
325 static bool
326 loop_carried_phi (tree exp)
328 gimple phi_stmt;
329 long block_rank;
331 if (TREE_CODE (exp) != SSA_NAME
332 || SSA_NAME_IS_DEFAULT_DEF (exp))
333 return false;
335 phi_stmt = SSA_NAME_DEF_STMT (exp);
337 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
338 return false;
340 /* Non-loop-carried phis have block rank. Loop-carried phis have
341 an additional bias added in. If this phi doesn't have block rank,
342 it's biased and should not be propagated. */
343 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
345 if (phi_rank (phi_stmt) != block_rank)
346 return true;
348 return false;
351 /* Return the maximum of RANK and the rank that should be propagated
352 from expression OP. For most operands, this is just the rank of OP.
353 For loop-carried phis, the value is zero to avoid undoing the bias
354 in favor of the phi. */
355 static long
356 propagate_rank (long rank, tree op)
358 long op_rank;
360 if (loop_carried_phi (op))
361 return rank;
363 op_rank = get_rank (op);
365 return MAX (rank, op_rank);
368 /* Look up the operand rank structure for expression E. */
370 static inline long
371 find_operand_rank (tree e)
373 long *slot = operand_rank->get (e);
374 return slot ? *slot : -1;
377 /* Insert {E,RANK} into the operand rank hashtable. */
379 static inline void
380 insert_operand_rank (tree e, long rank)
382 gcc_assert (rank > 0);
383 gcc_assert (!operand_rank->put (e, rank));
386 /* Given an expression E, return the rank of the expression. */
388 static long
389 get_rank (tree e)
391 /* Constants have rank 0. */
392 if (is_gimple_min_invariant (e))
393 return 0;
395 /* SSA_NAME's have the rank of the expression they are the result
397 For globals and uninitialized values, the rank is 0.
398 For function arguments, use the pre-setup rank.
399 For PHI nodes, stores, asm statements, etc, we use the rank of
400 the BB.
401 For simple operations, the rank is the maximum rank of any of
402 its operands, or the bb_rank, whichever is less.
403 I make no claims that this is optimal, however, it gives good
404 results. */
406 /* We make an exception to the normal ranking system to break
407 dependences of accumulator variables in loops. Suppose we
408 have a simple one-block loop containing:
410 x_1 = phi(x_0, x_2)
411 b = a + x_1
412 c = b + d
413 x_2 = c + e
415 As shown, each iteration of the calculation into x is fully
416 dependent upon the iteration before it. We would prefer to
417 see this in the form:
419 x_1 = phi(x_0, x_2)
420 b = a + d
421 c = b + e
422 x_2 = c + x_1
424 If the loop is unrolled, the calculations of b and c from
425 different iterations can be interleaved.
427 To obtain this result during reassociation, we bias the rank
428 of the phi definition x_1 upward, when it is recognized as an
429 accumulator pattern. The artificial rank causes it to be
430 added last, providing the desired independence. */
432 if (TREE_CODE (e) == SSA_NAME)
434 gimple stmt;
435 long rank;
436 int i, n;
437 tree op;
439 if (SSA_NAME_IS_DEFAULT_DEF (e))
440 return find_operand_rank (e);
442 stmt = SSA_NAME_DEF_STMT (e);
443 if (gimple_code (stmt) == GIMPLE_PHI)
444 return phi_rank (stmt);
446 if (!is_gimple_assign (stmt)
447 || gimple_vdef (stmt))
448 return bb_rank[gimple_bb (stmt)->index];
450 /* If we already have a rank for this expression, use that. */
451 rank = find_operand_rank (e);
452 if (rank != -1)
453 return rank;
455 /* Otherwise, find the maximum rank for the operands. As an
456 exception, remove the bias from loop-carried phis when propagating
457 the rank so that dependent operations are not also biased. */
458 rank = 0;
459 if (gimple_assign_single_p (stmt))
461 tree rhs = gimple_assign_rhs1 (stmt);
462 n = TREE_OPERAND_LENGTH (rhs);
463 if (n == 0)
464 rank = propagate_rank (rank, rhs);
465 else
467 for (i = 0; i < n; i++)
469 op = TREE_OPERAND (rhs, i);
471 if (op != NULL_TREE)
472 rank = propagate_rank (rank, op);
476 else
478 n = gimple_num_ops (stmt);
479 for (i = 1; i < n; i++)
481 op = gimple_op (stmt, i);
482 gcc_assert (op);
483 rank = propagate_rank (rank, op);
487 if (dump_file && (dump_flags & TDF_DETAILS))
489 fprintf (dump_file, "Rank for ");
490 print_generic_expr (dump_file, e, 0);
491 fprintf (dump_file, " is %ld\n", (rank + 1));
494 /* Note the rank in the hashtable so we don't recompute it. */
495 insert_operand_rank (e, (rank + 1));
496 return (rank + 1);
499 /* Globals, etc, are rank 0 */
500 return 0;
504 /* We want integer ones to end up last no matter what, since they are
505 the ones we can do the most with. */
506 #define INTEGER_CONST_TYPE 1 << 3
507 #define FLOAT_CONST_TYPE 1 << 2
508 #define OTHER_CONST_TYPE 1 << 1
510 /* Classify an invariant tree into integer, float, or other, so that
511 we can sort them to be near other constants of the same type. */
512 static inline int
513 constant_type (tree t)
515 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
516 return INTEGER_CONST_TYPE;
517 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
518 return FLOAT_CONST_TYPE;
519 else
520 return OTHER_CONST_TYPE;
523 /* qsort comparison function to sort operand entries PA and PB by rank
524 so that the sorted array is ordered by rank in decreasing order. */
525 static int
526 sort_by_operand_rank (const void *pa, const void *pb)
528 const operand_entry_t oea = *(const operand_entry_t *)pa;
529 const operand_entry_t oeb = *(const operand_entry_t *)pb;
531 /* It's nicer for optimize_expression if constants that are likely
532 to fold when added/multiplied//whatever are put next to each
533 other. Since all constants have rank 0, order them by type. */
534 if (oeb->rank == 0 && oea->rank == 0)
536 if (constant_type (oeb->op) != constant_type (oea->op))
537 return constant_type (oeb->op) - constant_type (oea->op);
538 else
539 /* To make sorting result stable, we use unique IDs to determine
540 order. */
541 return oeb->id - oea->id;
544 /* Lastly, make sure the versions that are the same go next to each
545 other. */
546 if ((oeb->rank - oea->rank == 0)
547 && TREE_CODE (oea->op) == SSA_NAME
548 && TREE_CODE (oeb->op) == SSA_NAME)
550 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
551 versions of removed SSA_NAMEs, so if possible, prefer to sort
552 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
553 See PR60418. */
554 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
555 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
556 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
558 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
559 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
560 basic_block bba = gimple_bb (stmta);
561 basic_block bbb = gimple_bb (stmtb);
562 if (bbb != bba)
564 if (bb_rank[bbb->index] != bb_rank[bba->index])
565 return bb_rank[bbb->index] - bb_rank[bba->index];
567 else
569 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
570 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
571 if (da != db)
572 return da ? 1 : -1;
576 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
577 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
578 else
579 return oeb->id - oea->id;
582 if (oeb->rank != oea->rank)
583 return oeb->rank - oea->rank;
584 else
585 return oeb->id - oea->id;
588 /* Add an operand entry to *OPS for the tree operand OP. */
590 static void
591 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
593 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
595 oe->op = op;
596 oe->rank = get_rank (op);
597 oe->id = next_operand_entry_id++;
598 oe->count = 1;
599 ops->safe_push (oe);
602 /* Add an operand entry to *OPS for the tree operand OP with repeat
603 count REPEAT. */
605 static void
606 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
607 HOST_WIDE_INT repeat)
609 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
611 oe->op = op;
612 oe->rank = get_rank (op);
613 oe->id = next_operand_entry_id++;
614 oe->count = repeat;
615 ops->safe_push (oe);
617 reassociate_stats.pows_encountered++;
620 /* Return true if STMT is reassociable operation containing a binary
621 operation with tree code CODE, and is inside LOOP. */
623 static bool
624 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
626 basic_block bb = gimple_bb (stmt);
628 if (gimple_bb (stmt) == NULL)
629 return false;
631 if (!flow_bb_inside_loop_p (loop, bb))
632 return false;
634 if (is_gimple_assign (stmt)
635 && gimple_assign_rhs_code (stmt) == code
636 && has_single_use (gimple_assign_lhs (stmt)))
637 return true;
639 return false;
643 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
644 operand of the negate operation. Otherwise, return NULL. */
646 static tree
647 get_unary_op (tree name, enum tree_code opcode)
649 gimple stmt = SSA_NAME_DEF_STMT (name);
651 if (!is_gimple_assign (stmt))
652 return NULL_TREE;
654 if (gimple_assign_rhs_code (stmt) == opcode)
655 return gimple_assign_rhs1 (stmt);
656 return NULL_TREE;
659 /* If CURR and LAST are a pair of ops that OPCODE allows us to
660 eliminate through equivalences, do so, remove them from OPS, and
661 return true. Otherwise, return false. */
663 static bool
664 eliminate_duplicate_pair (enum tree_code opcode,
665 vec<operand_entry_t> *ops,
666 bool *all_done,
667 unsigned int i,
668 operand_entry_t curr,
669 operand_entry_t last)
672 /* If we have two of the same op, and the opcode is & |, min, or max,
673 we can eliminate one of them.
674 If we have two of the same op, and the opcode is ^, we can
675 eliminate both of them. */
677 if (last && last->op == curr->op)
679 switch (opcode)
681 case MAX_EXPR:
682 case MIN_EXPR:
683 case BIT_IOR_EXPR:
684 case BIT_AND_EXPR:
685 if (dump_file && (dump_flags & TDF_DETAILS))
687 fprintf (dump_file, "Equivalence: ");
688 print_generic_expr (dump_file, curr->op, 0);
689 fprintf (dump_file, " [&|minmax] ");
690 print_generic_expr (dump_file, last->op, 0);
691 fprintf (dump_file, " -> ");
692 print_generic_stmt (dump_file, last->op, 0);
695 ops->ordered_remove (i);
696 reassociate_stats.ops_eliminated ++;
698 return true;
700 case BIT_XOR_EXPR:
701 if (dump_file && (dump_flags & TDF_DETAILS))
703 fprintf (dump_file, "Equivalence: ");
704 print_generic_expr (dump_file, curr->op, 0);
705 fprintf (dump_file, " ^ ");
706 print_generic_expr (dump_file, last->op, 0);
707 fprintf (dump_file, " -> nothing\n");
710 reassociate_stats.ops_eliminated += 2;
712 if (ops->length () == 2)
714 ops->create (0);
715 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
716 *all_done = true;
718 else
720 ops->ordered_remove (i-1);
721 ops->ordered_remove (i-1);
724 return true;
726 default:
727 break;
730 return false;
733 static vec<tree> plus_negates;
735 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
736 expression, look in OPS for a corresponding positive operation to cancel
737 it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
739 return false. */
741 static bool
742 eliminate_plus_minus_pair (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
747 tree negateop;
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
752 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
753 return false;
755 negateop = get_unary_op (curr->op, NEGATE_EXPR);
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (negateop == NULL_TREE && notop == NULL_TREE)
758 return false;
760 /* Any non-negated version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1 ;
767 i++)
769 if (oe->op == negateop)
772 if (dump_file && (dump_flags & TDF_DETAILS))
774 fprintf (dump_file, "Equivalence: ");
775 print_generic_expr (dump_file, negateop, 0);
776 fprintf (dump_file, " + -");
777 print_generic_expr (dump_file, oe->op, 0);
778 fprintf (dump_file, " -> 0\n");
781 ops->ordered_remove (i);
782 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
783 ops->ordered_remove (currindex);
784 reassociate_stats.ops_eliminated ++;
786 return true;
788 else if (oe->op == notop)
790 tree op_type = TREE_TYPE (oe->op);
792 if (dump_file && (dump_flags & TDF_DETAILS))
794 fprintf (dump_file, "Equivalence: ");
795 print_generic_expr (dump_file, notop, 0);
796 fprintf (dump_file, " + ~");
797 print_generic_expr (dump_file, oe->op, 0);
798 fprintf (dump_file, " -> -1\n");
801 ops->ordered_remove (i);
802 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
803 ops->ordered_remove (currindex);
804 reassociate_stats.ops_eliminated ++;
806 return true;
810 /* CURR->OP is a negate expr in a plus expr: save it for later
811 inspection in repropagate_negates(). */
812 if (negateop != NULL_TREE)
813 plus_negates.safe_push (curr->op);
815 return false;
818 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
819 bitwise not expression, look in OPS for a corresponding operand to
820 cancel it out. If we find one, remove the other from OPS, replace
821 OPS[CURRINDEX] with 0, and return true. Otherwise, return
822 false. */
824 static bool
825 eliminate_not_pairs (enum tree_code opcode,
826 vec<operand_entry_t> *ops,
827 unsigned int currindex,
828 operand_entry_t curr)
830 tree notop;
831 unsigned int i;
832 operand_entry_t oe;
834 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
835 || TREE_CODE (curr->op) != SSA_NAME)
836 return false;
838 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
839 if (notop == NULL_TREE)
840 return false;
842 /* Any non-not version will have a rank that is one less than
843 the current rank. So once we hit those ranks, if we don't find
844 one, we can stop. */
846 for (i = currindex + 1;
847 ops->iterate (i, &oe)
848 && oe->rank >= curr->rank - 1;
849 i++)
851 if (oe->op == notop)
853 if (dump_file && (dump_flags & TDF_DETAILS))
855 fprintf (dump_file, "Equivalence: ");
856 print_generic_expr (dump_file, notop, 0);
857 if (opcode == BIT_AND_EXPR)
858 fprintf (dump_file, " & ~");
859 else if (opcode == BIT_IOR_EXPR)
860 fprintf (dump_file, " | ~");
861 print_generic_expr (dump_file, oe->op, 0);
862 if (opcode == BIT_AND_EXPR)
863 fprintf (dump_file, " -> 0\n");
864 else if (opcode == BIT_IOR_EXPR)
865 fprintf (dump_file, " -> -1\n");
868 if (opcode == BIT_AND_EXPR)
869 oe->op = build_zero_cst (TREE_TYPE (oe->op));
870 else if (opcode == BIT_IOR_EXPR)
871 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
873 reassociate_stats.ops_eliminated += ops->length () - 1;
874 ops->truncate (0);
875 ops->quick_push (oe);
876 return true;
880 return false;
883 /* Use constant value that may be present in OPS to try to eliminate
884 operands. Note that this function is only really used when we've
885 eliminated ops for other reasons, or merged constants. Across
886 single statements, fold already does all of this, plus more. There
887 is little point in duplicating logic, so I've only included the
888 identities that I could ever construct testcases to trigger. */
890 static void
891 eliminate_using_constants (enum tree_code opcode,
892 vec<operand_entry_t> *ops)
894 operand_entry_t oelast = ops->last ();
895 tree type = TREE_TYPE (oelast->op);
897 if (oelast->rank == 0
898 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
900 switch (opcode)
902 case BIT_AND_EXPR:
903 if (integer_zerop (oelast->op))
905 if (ops->length () != 1)
907 if (dump_file && (dump_flags & TDF_DETAILS))
908 fprintf (dump_file, "Found & 0, removing all other ops\n");
910 reassociate_stats.ops_eliminated += ops->length () - 1;
912 ops->truncate (0);
913 ops->quick_push (oelast);
914 return;
917 else if (integer_all_onesp (oelast->op))
919 if (ops->length () != 1)
921 if (dump_file && (dump_flags & TDF_DETAILS))
922 fprintf (dump_file, "Found & -1, removing\n");
923 ops->pop ();
924 reassociate_stats.ops_eliminated++;
927 break;
928 case BIT_IOR_EXPR:
929 if (integer_all_onesp (oelast->op))
931 if (ops->length () != 1)
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Found | -1, removing all other ops\n");
936 reassociate_stats.ops_eliminated += ops->length () - 1;
938 ops->truncate (0);
939 ops->quick_push (oelast);
940 return;
943 else if (integer_zerop (oelast->op))
945 if (ops->length () != 1)
947 if (dump_file && (dump_flags & TDF_DETAILS))
948 fprintf (dump_file, "Found | 0, removing\n");
949 ops->pop ();
950 reassociate_stats.ops_eliminated++;
953 break;
954 case MULT_EXPR:
955 if (integer_zerop (oelast->op)
956 || (FLOAT_TYPE_P (type)
957 && !HONOR_NANS (TYPE_MODE (type))
958 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
959 && real_zerop (oelast->op)))
961 if (ops->length () != 1)
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Found * 0, removing all other ops\n");
966 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (1);
968 ops->quick_push (oelast);
969 return;
972 else if (integer_onep (oelast->op)
973 || (FLOAT_TYPE_P (type)
974 && !HONOR_SNANS (TYPE_MODE (type))
975 && real_onep (oelast->op)))
977 if (ops->length () != 1)
979 if (dump_file && (dump_flags & TDF_DETAILS))
980 fprintf (dump_file, "Found * 1, removing\n");
981 ops->pop ();
982 reassociate_stats.ops_eliminated++;
983 return;
986 break;
987 case BIT_XOR_EXPR:
988 case PLUS_EXPR:
989 case MINUS_EXPR:
990 if (integer_zerop (oelast->op)
991 || (FLOAT_TYPE_P (type)
992 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
993 && fold_real_zero_addition_p (type, oelast->op,
994 opcode == MINUS_EXPR)))
996 if (ops->length () != 1)
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found [|^+] 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1002 return;
1005 break;
1006 default:
1007 break;
1013 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1014 bool, bool);
1016 /* Structure for tracking and counting operands. */
1017 typedef struct oecount_s {
1018 int cnt;
1019 int id;
1020 enum tree_code oecode;
1021 tree op;
1022 } oecount;
1025 /* The heap for the oecount hashtable and the sorted list of operands. */
1026 static vec<oecount> cvec;
1029 /* Oecount hashtable helpers. */
1031 struct oecount_hasher
1033 typedef int value_type;
1034 typedef int compare_type;
1035 typedef int store_values_directly;
1036 static inline hashval_t hash (const value_type &);
1037 static inline bool equal (const value_type &, const compare_type &);
1038 static bool is_deleted (int &v) { return v == 1; }
1039 static void mark_deleted (int &e) { e = 1; }
1040 static bool is_empty (int &v) { return v == 0; }
1041 static void mark_empty (int &e) { e = 0; }
1042 static void remove (int &) {}
1045 /* Hash function for oecount. */
1047 inline hashval_t
1048 oecount_hasher::hash (const value_type &p)
1050 const oecount *c = &cvec[p - 42];
1051 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1054 /* Comparison function for oecount. */
1056 inline bool
1057 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1059 const oecount *c1 = &cvec[p1 - 42];
1060 const oecount *c2 = &cvec[p2 - 42];
1061 return (c1->oecode == c2->oecode
1062 && c1->op == c2->op);
1065 /* Comparison function for qsort sorting oecount elements by count. */
1067 static int
1068 oecount_cmp (const void *p1, const void *p2)
1070 const oecount *c1 = (const oecount *)p1;
1071 const oecount *c2 = (const oecount *)p2;
1072 if (c1->cnt != c2->cnt)
1073 return c1->cnt - c2->cnt;
1074 else
1075 /* If counts are identical, use unique IDs to stabilize qsort. */
1076 return c1->id - c2->id;
1079 /* Return TRUE iff STMT represents a builtin call that raises OP
1080 to some exponent. */
1082 static bool
1083 stmt_is_power_of_op (gimple stmt, tree op)
1085 tree fndecl;
1087 if (!is_gimple_call (stmt))
1088 return false;
1090 fndecl = gimple_call_fndecl (stmt);
1092 if (!fndecl
1093 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1094 return false;
1096 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1098 CASE_FLT_FN (BUILT_IN_POW):
1099 CASE_FLT_FN (BUILT_IN_POWI):
1100 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1102 default:
1103 return false;
1107 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1108 in place and return the result. Assumes that stmt_is_power_of_op
1109 was previously called for STMT and returned TRUE. */
1111 static HOST_WIDE_INT
1112 decrement_power (gimple stmt)
1114 REAL_VALUE_TYPE c, cint;
1115 HOST_WIDE_INT power;
1116 tree arg1;
1118 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1120 CASE_FLT_FN (BUILT_IN_POW):
1121 arg1 = gimple_call_arg (stmt, 1);
1122 c = TREE_REAL_CST (arg1);
1123 power = real_to_integer (&c) - 1;
1124 real_from_integer (&cint, VOIDmode, power, SIGNED);
1125 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1126 return power;
1128 CASE_FLT_FN (BUILT_IN_POWI):
1129 arg1 = gimple_call_arg (stmt, 1);
1130 power = TREE_INT_CST_LOW (arg1) - 1;
1131 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1132 return power;
1134 default:
1135 gcc_unreachable ();
1139 /* Find the single immediate use of STMT's LHS, and replace it
1140 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1141 replace *DEF with OP as well. */
1143 static void
1144 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1146 tree lhs;
1147 gimple use_stmt;
1148 use_operand_p use;
1149 gimple_stmt_iterator gsi;
1151 if (is_gimple_call (stmt))
1152 lhs = gimple_call_lhs (stmt);
1153 else
1154 lhs = gimple_assign_lhs (stmt);
1156 gcc_assert (has_single_use (lhs));
1157 single_imm_use (lhs, &use, &use_stmt);
1158 if (lhs == *def)
1159 *def = op;
1160 SET_USE (use, op);
1161 if (TREE_CODE (op) != SSA_NAME)
1162 update_stmt (use_stmt);
1163 gsi = gsi_for_stmt (stmt);
1164 unlink_stmt_vdef (stmt);
1165 reassoc_remove_stmt (&gsi);
1166 release_defs (stmt);
1169 /* Walks the linear chain with result *DEF searching for an operation
1170 with operand OP and code OPCODE removing that from the chain. *DEF
1171 is updated if there is only one operand but no operation left. */
1173 static void
1174 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1176 gimple stmt = SSA_NAME_DEF_STMT (*def);
1180 tree name;
1182 if (opcode == MULT_EXPR
1183 && stmt_is_power_of_op (stmt, op))
1185 if (decrement_power (stmt) == 1)
1186 propagate_op_to_single_use (op, stmt, def);
1187 return;
1190 name = gimple_assign_rhs1 (stmt);
1192 /* If this is the operation we look for and one of the operands
1193 is ours simply propagate the other operand into the stmts
1194 single use. */
1195 if (gimple_assign_rhs_code (stmt) == opcode
1196 && (name == op
1197 || gimple_assign_rhs2 (stmt) == op))
1199 if (name == op)
1200 name = gimple_assign_rhs2 (stmt);
1201 propagate_op_to_single_use (name, stmt, def);
1202 return;
1205 /* We might have a multiply of two __builtin_pow* calls, and
1206 the operand might be hiding in the rightmost one. */
1207 if (opcode == MULT_EXPR
1208 && gimple_assign_rhs_code (stmt) == opcode
1209 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1210 && has_single_use (gimple_assign_rhs2 (stmt)))
1212 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1213 if (stmt_is_power_of_op (stmt2, op))
1215 if (decrement_power (stmt2) == 1)
1216 propagate_op_to_single_use (op, stmt2, def);
1217 return;
1221 /* Continue walking the chain. */
1222 gcc_assert (name != op
1223 && TREE_CODE (name) == SSA_NAME);
1224 stmt = SSA_NAME_DEF_STMT (name);
1226 while (1);
1229 /* Returns true if statement S1 dominates statement S2. Like
1230 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1232 static bool
1233 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1235 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1237 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1238 SSA_NAME. Assume it lives at the beginning of function and
1239 thus dominates everything. */
1240 if (!bb1 || s1 == s2)
1241 return true;
1243 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1244 if (!bb2)
1245 return false;
1247 if (bb1 == bb2)
1249 /* PHIs in the same basic block are assumed to be
1250 executed all in parallel, if only one stmt is a PHI,
1251 it dominates the other stmt in the same basic block. */
1252 if (gimple_code (s1) == GIMPLE_PHI)
1253 return true;
1255 if (gimple_code (s2) == GIMPLE_PHI)
1256 return false;
1258 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1260 if (gimple_uid (s1) < gimple_uid (s2))
1261 return true;
1263 if (gimple_uid (s1) > gimple_uid (s2))
1264 return false;
1266 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1267 unsigned int uid = gimple_uid (s1);
1268 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1270 gimple s = gsi_stmt (gsi);
1271 if (gimple_uid (s) != uid)
1272 break;
1273 if (s == s2)
1274 return true;
1277 return false;
1280 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1283 /* Insert STMT after INSERT_POINT. */
1285 static void
1286 insert_stmt_after (gimple stmt, gimple insert_point)
1288 gimple_stmt_iterator gsi;
1289 basic_block bb;
1291 if (gimple_code (insert_point) == GIMPLE_PHI)
1292 bb = gimple_bb (insert_point);
1293 else if (!stmt_ends_bb_p (insert_point))
1295 gsi = gsi_for_stmt (insert_point);
1296 gimple_set_uid (stmt, gimple_uid (insert_point));
1297 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1298 return;
1300 else
1301 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1302 thus if it must end a basic block, it should be a call that can
1303 throw, or some assignment that can throw. If it throws, the LHS
1304 of it will not be initialized though, so only valid places using
1305 the SSA_NAME should be dominated by the fallthru edge. */
1306 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1307 gsi = gsi_after_labels (bb);
1308 if (gsi_end_p (gsi))
1310 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1311 gimple_set_uid (stmt,
1312 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1314 else
1315 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1316 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1319 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1320 the result. Places the statement after the definition of either
1321 OP1 or OP2. Returns the new statement. */
1323 static gimple
1324 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1326 gimple op1def = NULL, op2def = NULL;
1327 gimple_stmt_iterator gsi;
1328 tree op;
1329 gimple sum;
1331 /* Create the addition statement. */
1332 op = make_ssa_name (type, NULL);
1333 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1335 /* Find an insertion place and insert. */
1336 if (TREE_CODE (op1) == SSA_NAME)
1337 op1def = SSA_NAME_DEF_STMT (op1);
1338 if (TREE_CODE (op2) == SSA_NAME)
1339 op2def = SSA_NAME_DEF_STMT (op2);
1340 if ((!op1def || gimple_nop_p (op1def))
1341 && (!op2def || gimple_nop_p (op2def)))
1343 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1344 if (gsi_end_p (gsi))
1346 gimple_stmt_iterator gsi2
1347 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1348 gimple_set_uid (sum,
1349 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1351 else
1352 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1353 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1355 else
1357 gimple insert_point;
1358 if ((!op1def || gimple_nop_p (op1def))
1359 || (op2def && !gimple_nop_p (op2def)
1360 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1361 insert_point = op2def;
1362 else
1363 insert_point = op1def;
1364 insert_stmt_after (sum, insert_point);
1366 update_stmt (sum);
1368 return sum;
1371 /* Perform un-distribution of divisions and multiplications.
1372 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1373 to (A + B) / X for real X.
1375 The algorithm is organized as follows.
1377 - First we walk the addition chain *OPS looking for summands that
1378 are defined by a multiplication or a real division. This results
1379 in the candidates bitmap with relevant indices into *OPS.
1381 - Second we build the chains of multiplications or divisions for
1382 these candidates, counting the number of occurrences of (operand, code)
1383 pairs in all of the candidates chains.
1385 - Third we sort the (operand, code) pairs by number of occurrence and
1386 process them starting with the pair with the most uses.
1388 * For each such pair we walk the candidates again to build a
1389 second candidate bitmap noting all multiplication/division chains
1390 that have at least one occurrence of (operand, code).
1392 * We build an alternate addition chain only covering these
1393 candidates with one (operand, code) operation removed from their
1394 multiplication/division chain.
1396 * The first candidate gets replaced by the alternate addition chain
1397 multiplied/divided by the operand.
1399 * All candidate chains get disabled for further processing and
1400 processing of (operand, code) pairs continues.
1402 The alternate addition chains built are re-processed by the main
1403 reassociation algorithm which allows optimizing a * x * y + b * y * x
1404 to (a + b ) * x * y in one invocation of the reassociation pass. */
1406 static bool
1407 undistribute_ops_list (enum tree_code opcode,
1408 vec<operand_entry_t> *ops, struct loop *loop)
1410 unsigned int length = ops->length ();
1411 operand_entry_t oe1;
1412 unsigned i, j;
1413 sbitmap candidates, candidates2;
1414 unsigned nr_candidates, nr_candidates2;
1415 sbitmap_iterator sbi0;
1416 vec<operand_entry_t> *subops;
1417 bool changed = false;
1418 int next_oecount_id = 0;
1420 if (length <= 1
1421 || opcode != PLUS_EXPR)
1422 return false;
1424 /* Build a list of candidates to process. */
1425 candidates = sbitmap_alloc (length);
1426 bitmap_clear (candidates);
1427 nr_candidates = 0;
1428 FOR_EACH_VEC_ELT (*ops, i, oe1)
1430 enum tree_code dcode;
1431 gimple oe1def;
1433 if (TREE_CODE (oe1->op) != SSA_NAME)
1434 continue;
1435 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1436 if (!is_gimple_assign (oe1def))
1437 continue;
1438 dcode = gimple_assign_rhs_code (oe1def);
1439 if ((dcode != MULT_EXPR
1440 && dcode != RDIV_EXPR)
1441 || !is_reassociable_op (oe1def, dcode, loop))
1442 continue;
1444 bitmap_set_bit (candidates, i);
1445 nr_candidates++;
1448 if (nr_candidates < 2)
1450 sbitmap_free (candidates);
1451 return false;
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1456 fprintf (dump_file, "searching for un-distribute opportunities ");
1457 print_generic_expr (dump_file,
1458 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1459 fprintf (dump_file, " %d\n", nr_candidates);
1462 /* Build linearized sub-operand lists and the counting table. */
1463 cvec.create (0);
1465 hash_table<oecount_hasher> ctable (15);
1467 /* ??? Macro arguments cannot have multi-argument template types in
1468 them. This typedef is needed to workaround that limitation. */
1469 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1470 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1471 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1473 gimple oedef;
1474 enum tree_code oecode;
1475 unsigned j;
1477 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1478 oecode = gimple_assign_rhs_code (oedef);
1479 linearize_expr_tree (&subops[i], oedef,
1480 associative_tree_code (oecode), false);
1482 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1484 oecount c;
1485 int *slot;
1486 int idx;
1487 c.oecode = oecode;
1488 c.cnt = 1;
1489 c.id = next_oecount_id++;
1490 c.op = oe1->op;
1491 cvec.safe_push (c);
1492 idx = cvec.length () + 41;
1493 slot = ctable.find_slot (idx, INSERT);
1494 if (!*slot)
1496 *slot = idx;
1498 else
1500 cvec.pop ();
1501 cvec[*slot - 42].cnt++;
1506 /* Sort the counting table. */
1507 cvec.qsort (oecount_cmp);
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1511 oecount *c;
1512 fprintf (dump_file, "Candidates:\n");
1513 FOR_EACH_VEC_ELT (cvec, j, c)
1515 fprintf (dump_file, " %u %s: ", c->cnt,
1516 c->oecode == MULT_EXPR
1517 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1518 print_generic_expr (dump_file, c->op, 0);
1519 fprintf (dump_file, "\n");
1523 /* Process the (operand, code) pairs in order of most occurrence. */
1524 candidates2 = sbitmap_alloc (length);
1525 while (!cvec.is_empty ())
1527 oecount *c = &cvec.last ();
1528 if (c->cnt < 2)
1529 break;
1531 /* Now collect the operands in the outer chain that contain
1532 the common operand in their inner chain. */
1533 bitmap_clear (candidates2);
1534 nr_candidates2 = 0;
1535 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1537 gimple oedef;
1538 enum tree_code oecode;
1539 unsigned j;
1540 tree op = (*ops)[i]->op;
1542 /* If we undistributed in this chain already this may be
1543 a constant. */
1544 if (TREE_CODE (op) != SSA_NAME)
1545 continue;
1547 oedef = SSA_NAME_DEF_STMT (op);
1548 oecode = gimple_assign_rhs_code (oedef);
1549 if (oecode != c->oecode)
1550 continue;
1552 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1554 if (oe1->op == c->op)
1556 bitmap_set_bit (candidates2, i);
1557 ++nr_candidates2;
1558 break;
1563 if (nr_candidates2 >= 2)
1565 operand_entry_t oe1, oe2;
1566 gimple prod;
1567 int first = bitmap_first_set_bit (candidates2);
1569 /* Build the new addition chain. */
1570 oe1 = (*ops)[first];
1571 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, "Building (");
1574 print_generic_expr (dump_file, oe1->op, 0);
1576 zero_one_operation (&oe1->op, c->oecode, c->op);
1577 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1579 gimple sum;
1580 oe2 = (*ops)[i];
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1583 fprintf (dump_file, " + ");
1584 print_generic_expr (dump_file, oe2->op, 0);
1586 zero_one_operation (&oe2->op, c->oecode, c->op);
1587 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1588 oe1->op, oe2->op, opcode);
1589 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1590 oe2->rank = 0;
1591 oe1->op = gimple_get_lhs (sum);
1594 /* Apply the multiplication/division. */
1595 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1596 oe1->op, c->op, c->oecode);
1597 if (dump_file && (dump_flags & TDF_DETAILS))
1599 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1600 print_generic_expr (dump_file, c->op, 0);
1601 fprintf (dump_file, "\n");
1604 /* Record it in the addition chain and disable further
1605 undistribution with this op. */
1606 oe1->op = gimple_assign_lhs (prod);
1607 oe1->rank = get_rank (oe1->op);
1608 subops[first].release ();
1610 changed = true;
1613 cvec.pop ();
1616 for (i = 0; i < ops->length (); ++i)
1617 subops[i].release ();
1618 free (subops);
1619 cvec.release ();
1620 sbitmap_free (candidates);
1621 sbitmap_free (candidates2);
1623 return changed;
1626 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1627 expression, examine the other OPS to see if any of them are comparisons
1628 of the same values, which we may be able to combine or eliminate.
1629 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1631 static bool
1632 eliminate_redundant_comparison (enum tree_code opcode,
1633 vec<operand_entry_t> *ops,
1634 unsigned int currindex,
1635 operand_entry_t curr)
1637 tree op1, op2;
1638 enum tree_code lcode, rcode;
1639 gimple def1, def2;
1640 int i;
1641 operand_entry_t oe;
1643 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1644 return false;
1646 /* Check that CURR is a comparison. */
1647 if (TREE_CODE (curr->op) != SSA_NAME)
1648 return false;
1649 def1 = SSA_NAME_DEF_STMT (curr->op);
1650 if (!is_gimple_assign (def1))
1651 return false;
1652 lcode = gimple_assign_rhs_code (def1);
1653 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1654 return false;
1655 op1 = gimple_assign_rhs1 (def1);
1656 op2 = gimple_assign_rhs2 (def1);
1658 /* Now look for a similar comparison in the remaining OPS. */
1659 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1661 tree t;
1663 if (TREE_CODE (oe->op) != SSA_NAME)
1664 continue;
1665 def2 = SSA_NAME_DEF_STMT (oe->op);
1666 if (!is_gimple_assign (def2))
1667 continue;
1668 rcode = gimple_assign_rhs_code (def2);
1669 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1670 continue;
1672 /* If we got here, we have a match. See if we can combine the
1673 two comparisons. */
1674 if (opcode == BIT_IOR_EXPR)
1675 t = maybe_fold_or_comparisons (lcode, op1, op2,
1676 rcode, gimple_assign_rhs1 (def2),
1677 gimple_assign_rhs2 (def2));
1678 else
1679 t = maybe_fold_and_comparisons (lcode, op1, op2,
1680 rcode, gimple_assign_rhs1 (def2),
1681 gimple_assign_rhs2 (def2));
1682 if (!t)
1683 continue;
1685 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1686 always give us a boolean_type_node value back. If the original
1687 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1688 we need to convert. */
1689 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1690 t = fold_convert (TREE_TYPE (curr->op), t);
1692 if (TREE_CODE (t) != INTEGER_CST
1693 && !operand_equal_p (t, curr->op, 0))
1695 enum tree_code subcode;
1696 tree newop1, newop2;
1697 if (!COMPARISON_CLASS_P (t))
1698 continue;
1699 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1700 STRIP_USELESS_TYPE_CONVERSION (newop1);
1701 STRIP_USELESS_TYPE_CONVERSION (newop2);
1702 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1703 continue;
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1708 fprintf (dump_file, "Equivalence: ");
1709 print_generic_expr (dump_file, curr->op, 0);
1710 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1711 print_generic_expr (dump_file, oe->op, 0);
1712 fprintf (dump_file, " -> ");
1713 print_generic_expr (dump_file, t, 0);
1714 fprintf (dump_file, "\n");
1717 /* Now we can delete oe, as it has been subsumed by the new combined
1718 expression t. */
1719 ops->ordered_remove (i);
1720 reassociate_stats.ops_eliminated ++;
1722 /* If t is the same as curr->op, we're done. Otherwise we must
1723 replace curr->op with t. Special case is if we got a constant
1724 back, in which case we add it to the end instead of in place of
1725 the current entry. */
1726 if (TREE_CODE (t) == INTEGER_CST)
1728 ops->ordered_remove (currindex);
1729 add_to_ops_vec (ops, t);
1731 else if (!operand_equal_p (t, curr->op, 0))
1733 gimple sum;
1734 enum tree_code subcode;
1735 tree newop1;
1736 tree newop2;
1737 gcc_assert (COMPARISON_CLASS_P (t));
1738 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1739 STRIP_USELESS_TYPE_CONVERSION (newop1);
1740 STRIP_USELESS_TYPE_CONVERSION (newop2);
1741 gcc_checking_assert (is_gimple_val (newop1)
1742 && is_gimple_val (newop2));
1743 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1744 curr->op = gimple_get_lhs (sum);
1746 return true;
1749 return false;
1752 /* Perform various identities and other optimizations on the list of
1753 operand entries, stored in OPS. The tree code for the binary
1754 operation between all the operands is OPCODE. */
1756 static void
1757 optimize_ops_list (enum tree_code opcode,
1758 vec<operand_entry_t> *ops)
1760 unsigned int length = ops->length ();
1761 unsigned int i;
1762 operand_entry_t oe;
1763 operand_entry_t oelast = NULL;
1764 bool iterate = false;
1766 if (length == 1)
1767 return;
1769 oelast = ops->last ();
1771 /* If the last two are constants, pop the constants off, merge them
1772 and try the next two. */
1773 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1775 operand_entry_t oelm1 = (*ops)[length - 2];
1777 if (oelm1->rank == 0
1778 && is_gimple_min_invariant (oelm1->op)
1779 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1780 TREE_TYPE (oelast->op)))
1782 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1783 oelm1->op, oelast->op);
1785 if (folded && is_gimple_min_invariant (folded))
1787 if (dump_file && (dump_flags & TDF_DETAILS))
1788 fprintf (dump_file, "Merging constants\n");
1790 ops->pop ();
1791 ops->pop ();
1793 add_to_ops_vec (ops, folded);
1794 reassociate_stats.constants_eliminated++;
1796 optimize_ops_list (opcode, ops);
1797 return;
1802 eliminate_using_constants (opcode, ops);
1803 oelast = NULL;
1805 for (i = 0; ops->iterate (i, &oe);)
1807 bool done = false;
1809 if (eliminate_not_pairs (opcode, ops, i, oe))
1810 return;
1811 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1812 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1813 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1815 if (done)
1816 return;
1817 iterate = true;
1818 oelast = NULL;
1819 continue;
1821 oelast = oe;
1822 i++;
1825 length = ops->length ();
1826 oelast = ops->last ();
1828 if (iterate)
1829 optimize_ops_list (opcode, ops);
1832 /* The following functions are subroutines to optimize_range_tests and allow
1833 it to try to change a logical combination of comparisons into a range
1834 test.
1836 For example, both
1837 X == 2 || X == 5 || X == 3 || X == 4
1839 X >= 2 && X <= 5
1840 are converted to
1841 (unsigned) (X - 2) <= 3
1843 For more information see comments above fold_test_range in fold-const.c,
1844 this implementation is for GIMPLE. */
1846 struct range_entry
1848 tree exp;
1849 tree low;
1850 tree high;
1851 bool in_p;
1852 bool strict_overflow_p;
1853 unsigned int idx, next;
1856 /* This is similar to make_range in fold-const.c, but on top of
1857 GIMPLE instead of trees. If EXP is non-NULL, it should be
1858 an SSA_NAME and STMT argument is ignored, otherwise STMT
1859 argument should be a GIMPLE_COND. */
1861 static void
1862 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1864 int in_p;
1865 tree low, high;
1866 bool is_bool, strict_overflow_p;
1868 r->exp = NULL_TREE;
1869 r->in_p = false;
1870 r->strict_overflow_p = false;
1871 r->low = NULL_TREE;
1872 r->high = NULL_TREE;
1873 if (exp != NULL_TREE
1874 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1875 return;
1877 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1878 and see if we can refine the range. Some of the cases below may not
1879 happen, but it doesn't seem worth worrying about this. We "continue"
1880 the outer loop when we've changed something; otherwise we "break"
1881 the switch, which will "break" the while. */
1882 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1883 high = low;
1884 in_p = 0;
1885 strict_overflow_p = false;
1886 is_bool = false;
1887 if (exp == NULL_TREE)
1888 is_bool = true;
1889 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1891 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1892 is_bool = true;
1893 else
1894 return;
1896 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1897 is_bool = true;
1899 while (1)
1901 enum tree_code code;
1902 tree arg0, arg1, exp_type;
1903 tree nexp;
1904 location_t loc;
1906 if (exp != NULL_TREE)
1908 if (TREE_CODE (exp) != SSA_NAME
1909 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1910 break;
1912 stmt = SSA_NAME_DEF_STMT (exp);
1913 if (!is_gimple_assign (stmt))
1914 break;
1916 code = gimple_assign_rhs_code (stmt);
1917 arg0 = gimple_assign_rhs1 (stmt);
1918 arg1 = gimple_assign_rhs2 (stmt);
1919 exp_type = TREE_TYPE (exp);
1921 else
1923 code = gimple_cond_code (stmt);
1924 arg0 = gimple_cond_lhs (stmt);
1925 arg1 = gimple_cond_rhs (stmt);
1926 exp_type = boolean_type_node;
1929 if (TREE_CODE (arg0) != SSA_NAME)
1930 break;
1931 loc = gimple_location (stmt);
1932 switch (code)
1934 case BIT_NOT_EXPR:
1935 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1936 /* Ensure the range is either +[-,0], +[0,0],
1937 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1938 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1939 or similar expression of unconditional true or
1940 false, it should not be negated. */
1941 && ((high && integer_zerop (high))
1942 || (low && integer_onep (low))))
1944 in_p = !in_p;
1945 exp = arg0;
1946 continue;
1948 break;
1949 case SSA_NAME:
1950 exp = arg0;
1951 continue;
1952 CASE_CONVERT:
1953 if (is_bool)
1954 goto do_default;
1955 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1957 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1958 is_bool = true;
1959 else
1960 return;
1962 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1963 is_bool = true;
1964 goto do_default;
1965 case EQ_EXPR:
1966 case NE_EXPR:
1967 case LT_EXPR:
1968 case LE_EXPR:
1969 case GE_EXPR:
1970 case GT_EXPR:
1971 is_bool = true;
1972 /* FALLTHRU */
1973 default:
1974 if (!is_bool)
1975 return;
1976 do_default:
1977 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1978 &low, &high, &in_p,
1979 &strict_overflow_p);
1980 if (nexp != NULL_TREE)
1982 exp = nexp;
1983 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1984 continue;
1986 break;
1988 break;
1990 if (is_bool)
1992 r->exp = exp;
1993 r->in_p = in_p;
1994 r->low = low;
1995 r->high = high;
1996 r->strict_overflow_p = strict_overflow_p;
2000 /* Comparison function for qsort. Sort entries
2001 without SSA_NAME exp first, then with SSA_NAMEs sorted
2002 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2003 by increasing ->low and if ->low is the same, by increasing
2004 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2005 maximum. */
2007 static int
2008 range_entry_cmp (const void *a, const void *b)
2010 const struct range_entry *p = (const struct range_entry *) a;
2011 const struct range_entry *q = (const struct range_entry *) b;
2013 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2015 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2017 /* Group range_entries for the same SSA_NAME together. */
2018 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2019 return -1;
2020 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2021 return 1;
2022 /* If ->low is different, NULL low goes first, then by
2023 ascending low. */
2024 if (p->low != NULL_TREE)
2026 if (q->low != NULL_TREE)
2028 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2029 p->low, q->low);
2030 if (tem && integer_onep (tem))
2031 return -1;
2032 tem = fold_binary (GT_EXPR, boolean_type_node,
2033 p->low, q->low);
2034 if (tem && integer_onep (tem))
2035 return 1;
2037 else
2038 return 1;
2040 else if (q->low != NULL_TREE)
2041 return -1;
2042 /* If ->high is different, NULL high goes last, before that by
2043 ascending high. */
2044 if (p->high != NULL_TREE)
2046 if (q->high != NULL_TREE)
2048 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2049 p->high, q->high);
2050 if (tem && integer_onep (tem))
2051 return -1;
2052 tem = fold_binary (GT_EXPR, boolean_type_node,
2053 p->high, q->high);
2054 if (tem && integer_onep (tem))
2055 return 1;
2057 else
2058 return -1;
2060 else if (p->high != NULL_TREE)
2061 return 1;
2062 /* If both ranges are the same, sort below by ascending idx. */
2064 else
2065 return 1;
2067 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2068 return -1;
2070 if (p->idx < q->idx)
2071 return -1;
2072 else
2074 gcc_checking_assert (p->idx > q->idx);
2075 return 1;
2079 /* Helper routine of optimize_range_test.
2080 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2081 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2082 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2083 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2084 an array of COUNT pointers to other ranges. Return
2085 true if the range merge has been successful.
2086 If OPCODE is ERROR_MARK, this is called from within
2087 maybe_optimize_range_tests and is performing inter-bb range optimization.
2088 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2089 oe->rank. */
2091 static bool
2092 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2093 struct range_entry **otherrangep,
2094 unsigned int count, enum tree_code opcode,
2095 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2096 bool in_p, tree low, tree high, bool strict_overflow_p)
2098 operand_entry_t oe = (*ops)[range->idx];
2099 tree op = oe->op;
2100 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2101 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2102 location_t loc = gimple_location (stmt);
2103 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2104 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2105 in_p, low, high);
2106 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2107 gimple_stmt_iterator gsi;
2108 unsigned int i;
2110 if (tem == NULL_TREE)
2111 return false;
2113 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2114 warning_at (loc, OPT_Wstrict_overflow,
2115 "assuming signed overflow does not occur "
2116 "when simplifying range test");
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2120 struct range_entry *r;
2121 fprintf (dump_file, "Optimizing range tests ");
2122 print_generic_expr (dump_file, range->exp, 0);
2123 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2124 print_generic_expr (dump_file, range->low, 0);
2125 fprintf (dump_file, ", ");
2126 print_generic_expr (dump_file, range->high, 0);
2127 fprintf (dump_file, "]");
2128 for (i = 0; i < count; i++)
2130 if (otherrange)
2131 r = otherrange + i;
2132 else
2133 r = otherrangep[i];
2134 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2135 print_generic_expr (dump_file, r->low, 0);
2136 fprintf (dump_file, ", ");
2137 print_generic_expr (dump_file, r->high, 0);
2138 fprintf (dump_file, "]");
2140 fprintf (dump_file, "\n into ");
2141 print_generic_expr (dump_file, tem, 0);
2142 fprintf (dump_file, "\n");
2145 if (opcode == BIT_IOR_EXPR
2146 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2147 tem = invert_truthvalue_loc (loc, tem);
2149 tem = fold_convert_loc (loc, optype, tem);
2150 gsi = gsi_for_stmt (stmt);
2151 /* In rare cases range->exp can be equal to lhs of stmt.
2152 In that case we have to insert after the stmt rather then before
2153 it. */
2154 if (op == range->exp)
2156 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2157 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2158 GSI_CONTINUE_LINKING);
2160 else
2162 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2163 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2164 GSI_SAME_STMT);
2165 gsi_prev (&gsi);
2167 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2168 if (gimple_uid (gsi_stmt (gsi)))
2169 break;
2170 else
2171 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2173 oe->op = tem;
2174 range->exp = exp;
2175 range->low = low;
2176 range->high = high;
2177 range->in_p = in_p;
2178 range->strict_overflow_p = false;
2180 for (i = 0; i < count; i++)
2182 if (otherrange)
2183 range = otherrange + i;
2184 else
2185 range = otherrangep[i];
2186 oe = (*ops)[range->idx];
2187 /* Now change all the other range test immediate uses, so that
2188 those tests will be optimized away. */
2189 if (opcode == ERROR_MARK)
2191 if (oe->op)
2192 oe->op = build_int_cst (TREE_TYPE (oe->op),
2193 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2194 else
2195 oe->op = (oe->rank == BIT_IOR_EXPR
2196 ? boolean_false_node : boolean_true_node);
2198 else
2199 oe->op = error_mark_node;
2200 range->exp = NULL_TREE;
2202 return true;
2205 /* Optimize X == CST1 || X == CST2
2206 if popcount (CST1 ^ CST2) == 1 into
2207 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2208 Similarly for ranges. E.g.
2209 X != 2 && X != 3 && X != 10 && X != 11
2210 will be transformed by the previous optimization into
2211 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2212 and this loop can transform that into
2213 !(((X & ~8) - 2U) <= 1U). */
2215 static bool
2216 optimize_range_tests_xor (enum tree_code opcode, tree type,
2217 tree lowi, tree lowj, tree highi, tree highj,
2218 vec<operand_entry_t> *ops,
2219 struct range_entry *rangei,
2220 struct range_entry *rangej)
2222 tree lowxor, highxor, tem, exp;
2223 /* Check lowi ^ lowj == highi ^ highj and
2224 popcount (lowi ^ lowj) == 1. */
2225 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2226 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2227 return false;
2228 if (!integer_pow2p (lowxor))
2229 return false;
2230 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2231 if (!tree_int_cst_equal (lowxor, highxor))
2232 return false;
2234 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2235 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2236 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2237 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2238 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2239 NULL, rangei->in_p, lowj, highj,
2240 rangei->strict_overflow_p
2241 || rangej->strict_overflow_p))
2242 return true;
2243 return false;
2246 /* Optimize X == CST1 || X == CST2
2247 if popcount (CST2 - CST1) == 1 into
2248 ((X - CST1) & ~(CST2 - CST1)) == 0.
2249 Similarly for ranges. E.g.
2250 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2251 || X == 75 || X == 45
2252 will be transformed by the previous optimization into
2253 (X - 43U) <= 3U || (X - 75U) <= 3U
2254 and this loop can transform that into
2255 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2256 static bool
2257 optimize_range_tests_diff (enum tree_code opcode, tree type,
2258 tree lowi, tree lowj, tree highi, tree highj,
2259 vec<operand_entry_t> *ops,
2260 struct range_entry *rangei,
2261 struct range_entry *rangej)
2263 tree tem1, tem2, mask;
2264 /* Check highi - lowi == highj - lowj. */
2265 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2266 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2267 return false;
2268 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2269 if (!tree_int_cst_equal (tem1, tem2))
2270 return false;
2271 /* Check popcount (lowj - lowi) == 1. */
2272 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2273 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2274 return false;
2275 if (!integer_pow2p (tem1))
2276 return false;
2278 type = unsigned_type_for (type);
2279 tem1 = fold_convert (type, tem1);
2280 tem2 = fold_convert (type, tem2);
2281 lowi = fold_convert (type, lowi);
2282 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2283 tem1 = fold_binary (MINUS_EXPR, type,
2284 fold_convert (type, rangei->exp), lowi);
2285 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2286 lowj = build_int_cst (type, 0);
2287 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2288 NULL, rangei->in_p, lowj, tem2,
2289 rangei->strict_overflow_p
2290 || rangej->strict_overflow_p))
2291 return true;
2292 return false;
2295 /* It does some common checks for function optimize_range_tests_xor and
2296 optimize_range_tests_diff.
2297 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2298 Else it calls optimize_range_tests_diff. */
2300 static bool
2301 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2302 bool optimize_xor, vec<operand_entry_t> *ops,
2303 struct range_entry *ranges)
2305 int i, j;
2306 bool any_changes = false;
2307 for (i = first; i < length; i++)
2309 tree lowi, highi, lowj, highj, type, tem;
2311 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2312 continue;
2313 type = TREE_TYPE (ranges[i].exp);
2314 if (!INTEGRAL_TYPE_P (type))
2315 continue;
2316 lowi = ranges[i].low;
2317 if (lowi == NULL_TREE)
2318 lowi = TYPE_MIN_VALUE (type);
2319 highi = ranges[i].high;
2320 if (highi == NULL_TREE)
2321 continue;
2322 for (j = i + 1; j < length && j < i + 64; j++)
2324 bool changes;
2325 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2326 continue;
2327 lowj = ranges[j].low;
2328 if (lowj == NULL_TREE)
2329 continue;
2330 highj = ranges[j].high;
2331 if (highj == NULL_TREE)
2332 highj = TYPE_MAX_VALUE (type);
2333 /* Check lowj > highi. */
2334 tem = fold_binary (GT_EXPR, boolean_type_node,
2335 lowj, highi);
2336 if (tem == NULL_TREE || !integer_onep (tem))
2337 continue;
2338 if (optimize_xor)
2339 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2340 highi, highj, ops,
2341 ranges + i, ranges + j);
2342 else
2343 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2344 highi, highj, ops,
2345 ranges + i, ranges + j);
2346 if (changes)
2348 any_changes = true;
2349 break;
2353 return any_changes;
2356 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2357 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2358 EXP on success, NULL otherwise. */
2360 static tree
2361 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2362 wide_int *mask, tree *totallowp)
2364 tree tem = int_const_binop (MINUS_EXPR, high, low);
2365 if (tem == NULL_TREE
2366 || TREE_CODE (tem) != INTEGER_CST
2367 || TREE_OVERFLOW (tem)
2368 || tree_int_cst_sgn (tem) == -1
2369 || compare_tree_int (tem, prec) != -1)
2370 return NULL_TREE;
2372 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2373 *mask = wi::shifted_mask (0, max, false, prec);
2374 if (TREE_CODE (exp) == BIT_AND_EXPR
2375 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2377 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2378 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2379 if (wi::popcount (msk) == 1
2380 && wi::ltu_p (msk, prec - max))
2382 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2383 max += msk.to_uhwi ();
2384 exp = TREE_OPERAND (exp, 0);
2385 if (integer_zerop (low)
2386 && TREE_CODE (exp) == PLUS_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2389 widest_int bias
2390 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2391 TYPE_PRECISION (TREE_TYPE (low))));
2392 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2393 if (totallowp)
2395 *totallowp = tbias;
2396 exp = TREE_OPERAND (exp, 0);
2397 STRIP_NOPS (exp);
2398 return exp;
2400 else if (!tree_int_cst_lt (totallow, tbias))
2401 return NULL_TREE;
2402 bias -= wi::to_widest (totallow);
2403 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2405 *mask = wi::lshift (*mask, bias);
2406 exp = TREE_OPERAND (exp, 0);
2407 STRIP_NOPS (exp);
2408 return exp;
2413 if (totallowp)
2414 return exp;
2415 if (!tree_int_cst_lt (totallow, low))
2416 return exp;
2417 tem = int_const_binop (MINUS_EXPR, low, totallow);
2418 if (tem == NULL_TREE
2419 || TREE_CODE (tem) != INTEGER_CST
2420 || TREE_OVERFLOW (tem)
2421 || compare_tree_int (tem, prec - max) == 1)
2422 return NULL_TREE;
2424 *mask = wi::lshift (*mask, wi::to_widest (tem));
2425 return exp;
2428 /* Attempt to optimize small range tests using bit test.
2429 E.g.
2430 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2431 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2432 has been by earlier optimizations optimized into:
2433 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2434 As all the 43 through 82 range is less than 64 numbers,
2435 for 64-bit word targets optimize that into:
2436 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2438 static bool
2439 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2440 vec<operand_entry_t> *ops,
2441 struct range_entry *ranges)
2443 int i, j;
2444 bool any_changes = false;
2445 int prec = GET_MODE_BITSIZE (word_mode);
2446 auto_vec<struct range_entry *, 64> candidates;
2448 for (i = first; i < length - 2; i++)
2450 tree lowi, highi, lowj, highj, type;
2452 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2453 continue;
2454 type = TREE_TYPE (ranges[i].exp);
2455 if (!INTEGRAL_TYPE_P (type))
2456 continue;
2457 lowi = ranges[i].low;
2458 if (lowi == NULL_TREE)
2459 lowi = TYPE_MIN_VALUE (type);
2460 highi = ranges[i].high;
2461 if (highi == NULL_TREE)
2462 continue;
2463 wide_int mask;
2464 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2465 highi, &mask, &lowi);
2466 if (exp == NULL_TREE)
2467 continue;
2468 bool strict_overflow_p = ranges[i].strict_overflow_p;
2469 candidates.truncate (0);
2470 int end = MIN (i + 64, length);
2471 for (j = i + 1; j < end; j++)
2473 tree exp2;
2474 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2475 continue;
2476 if (ranges[j].exp == exp)
2478 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2480 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2481 if (exp2 == exp)
2483 else if (TREE_CODE (exp2) == PLUS_EXPR)
2485 exp2 = TREE_OPERAND (exp2, 0);
2486 STRIP_NOPS (exp2);
2487 if (exp2 != exp)
2488 continue;
2490 else
2491 continue;
2493 else
2494 continue;
2495 lowj = ranges[j].low;
2496 if (lowj == NULL_TREE)
2497 continue;
2498 highj = ranges[j].high;
2499 if (highj == NULL_TREE)
2500 highj = TYPE_MAX_VALUE (type);
2501 wide_int mask2;
2502 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2503 highj, &mask2, NULL);
2504 if (exp2 != exp)
2505 continue;
2506 mask |= mask2;
2507 strict_overflow_p |= ranges[j].strict_overflow_p;
2508 candidates.safe_push (&ranges[j]);
2511 /* If we need otherwise 3 or more comparisons, use a bit test. */
2512 if (candidates.length () >= 2)
2514 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2515 wi::to_widest (lowi)
2516 + prec - wi::clz (mask));
2517 operand_entry_t oe = (*ops)[ranges[i].idx];
2518 tree op = oe->op;
2519 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2520 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2521 location_t loc = gimple_location (stmt);
2522 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2524 /* See if it isn't cheaper to pretend the minimum value of the
2525 range is 0, if maximum value is small enough.
2526 We can avoid then subtraction of the minimum value, but the
2527 mask constant could be perhaps more expensive. */
2528 if (compare_tree_int (lowi, 0) > 0
2529 && compare_tree_int (high, prec) < 0)
2531 int cost_diff;
2532 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2533 rtx reg = gen_raw_REG (word_mode, 10000);
2534 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2535 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2536 GEN_INT (-m)), speed_p);
2537 rtx r = immed_wide_int_const (mask, word_mode);
2538 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2539 speed_p);
2540 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2541 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2542 speed_p);
2543 if (cost_diff > 0)
2545 mask = wi::lshift (mask, m);
2546 lowi = build_zero_cst (TREE_TYPE (lowi));
2550 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2551 false, lowi, high);
2552 if (tem == NULL_TREE || is_gimple_val (tem))
2553 continue;
2554 tree etype = unsigned_type_for (TREE_TYPE (exp));
2555 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2556 fold_convert_loc (loc, etype, exp),
2557 fold_convert_loc (loc, etype, lowi));
2558 exp = fold_convert_loc (loc, integer_type_node, exp);
2559 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2560 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2561 build_int_cst (word_type, 1), exp);
2562 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2563 wide_int_to_tree (word_type, mask));
2564 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2565 build_zero_cst (word_type));
2566 if (is_gimple_val (exp))
2567 continue;
2569 /* The shift might have undefined behavior if TEM is true,
2570 but reassociate_bb isn't prepared to have basic blocks
2571 split when it is running. So, temporarily emit a code
2572 with BIT_IOR_EXPR instead of &&, and fix it up in
2573 branch_fixup. */
2574 gimple_seq seq;
2575 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2576 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2577 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2578 gimple_seq seq2;
2579 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2580 gimple_seq_add_seq_without_update (&seq, seq2);
2581 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2582 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2583 gimple g
2584 = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2585 make_ssa_name (optype, NULL),
2586 tem, exp);
2587 gimple_set_location (g, loc);
2588 gimple_seq_add_stmt_without_update (&seq, g);
2589 exp = gimple_assign_lhs (g);
2590 tree val = build_zero_cst (optype);
2591 if (update_range_test (&ranges[i], NULL, candidates.address (),
2592 candidates.length (), opcode, ops, exp,
2593 seq, false, val, val, strict_overflow_p))
2595 any_changes = true;
2596 reassoc_branch_fixups.safe_push (tem);
2598 else
2599 gimple_seq_discard (seq);
2602 return any_changes;
2605 /* Optimize range tests, similarly how fold_range_test optimizes
2606 it on trees. The tree code for the binary
2607 operation between all the operands is OPCODE.
2608 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2609 maybe_optimize_range_tests for inter-bb range optimization.
2610 In that case if oe->op is NULL, oe->id is bb->index whose
2611 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2612 the actual opcode. */
2614 static bool
2615 optimize_range_tests (enum tree_code opcode,
2616 vec<operand_entry_t> *ops)
2618 unsigned int length = ops->length (), i, j, first;
2619 operand_entry_t oe;
2620 struct range_entry *ranges;
2621 bool any_changes = false;
2623 if (length == 1)
2624 return false;
2626 ranges = XNEWVEC (struct range_entry, length);
2627 for (i = 0; i < length; i++)
2629 oe = (*ops)[i];
2630 ranges[i].idx = i;
2631 init_range_entry (ranges + i, oe->op,
2632 oe->op ? NULL :
2633 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2634 /* For | invert it now, we will invert it again before emitting
2635 the optimized expression. */
2636 if (opcode == BIT_IOR_EXPR
2637 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2638 ranges[i].in_p = !ranges[i].in_p;
2641 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2642 for (i = 0; i < length; i++)
2643 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2644 break;
2646 /* Try to merge ranges. */
2647 for (first = i; i < length; i++)
2649 tree low = ranges[i].low;
2650 tree high = ranges[i].high;
2651 int in_p = ranges[i].in_p;
2652 bool strict_overflow_p = ranges[i].strict_overflow_p;
2653 int update_fail_count = 0;
2655 for (j = i + 1; j < length; j++)
2657 if (ranges[i].exp != ranges[j].exp)
2658 break;
2659 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2660 ranges[j].in_p, ranges[j].low, ranges[j].high))
2661 break;
2662 strict_overflow_p |= ranges[j].strict_overflow_p;
2665 if (j == i + 1)
2666 continue;
2668 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2669 opcode, ops, ranges[i].exp, NULL, in_p,
2670 low, high, strict_overflow_p))
2672 i = j - 1;
2673 any_changes = true;
2675 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2676 while update_range_test would fail. */
2677 else if (update_fail_count == 64)
2678 i = j - 1;
2679 else
2680 ++update_fail_count;
2683 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2684 ops, ranges);
2686 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2687 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2688 ops, ranges);
2689 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2690 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2691 ops, ranges);
2693 if (any_changes && opcode != ERROR_MARK)
2695 j = 0;
2696 FOR_EACH_VEC_ELT (*ops, i, oe)
2698 if (oe->op == error_mark_node)
2699 continue;
2700 else if (i != j)
2701 (*ops)[j] = oe;
2702 j++;
2704 ops->truncate (j);
2707 XDELETEVEC (ranges);
2708 return any_changes;
2711 /* Return true if STMT is a cast like:
2712 <bb N>:
2714 _123 = (int) _234;
2716 <bb M>:
2717 # _345 = PHI <_123(N), 1(...), 1(...)>
2718 where _234 has bool type, _123 has single use and
2719 bb N has a single successor M. This is commonly used in
2720 the last block of a range test. */
2722 static bool
2723 final_range_test_p (gimple stmt)
2725 basic_block bb, rhs_bb;
2726 edge e;
2727 tree lhs, rhs;
2728 use_operand_p use_p;
2729 gimple use_stmt;
2731 if (!gimple_assign_cast_p (stmt))
2732 return false;
2733 bb = gimple_bb (stmt);
2734 if (!single_succ_p (bb))
2735 return false;
2736 e = single_succ_edge (bb);
2737 if (e->flags & EDGE_COMPLEX)
2738 return false;
2740 lhs = gimple_assign_lhs (stmt);
2741 rhs = gimple_assign_rhs1 (stmt);
2742 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2743 || TREE_CODE (rhs) != SSA_NAME
2744 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2745 return false;
2747 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2748 if (!single_imm_use (lhs, &use_p, &use_stmt))
2749 return false;
2751 if (gimple_code (use_stmt) != GIMPLE_PHI
2752 || gimple_bb (use_stmt) != e->dest)
2753 return false;
2755 /* And that the rhs is defined in the same loop. */
2756 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2757 if (rhs_bb == NULL
2758 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2759 return false;
2761 return true;
2764 /* Return true if BB is suitable basic block for inter-bb range test
2765 optimization. If BACKWARD is true, BB should be the only predecessor
2766 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2767 or compared with to find a common basic block to which all conditions
2768 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2769 be the only predecessor of BB. */
2771 static bool
2772 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2773 bool backward)
2775 edge_iterator ei, ei2;
2776 edge e, e2;
2777 gimple stmt;
2778 gimple_stmt_iterator gsi;
2779 bool other_edge_seen = false;
2780 bool is_cond;
2782 if (test_bb == bb)
2783 return false;
2784 /* Check last stmt first. */
2785 stmt = last_stmt (bb);
2786 if (stmt == NULL
2787 || (gimple_code (stmt) != GIMPLE_COND
2788 && (backward || !final_range_test_p (stmt)))
2789 || gimple_visited_p (stmt)
2790 || stmt_could_throw_p (stmt)
2791 || *other_bb == bb)
2792 return false;
2793 is_cond = gimple_code (stmt) == GIMPLE_COND;
2794 if (is_cond)
2796 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2797 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2798 to *OTHER_BB (if not set yet, try to find it out). */
2799 if (EDGE_COUNT (bb->succs) != 2)
2800 return false;
2801 FOR_EACH_EDGE (e, ei, bb->succs)
2803 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2804 return false;
2805 if (e->dest == test_bb)
2807 if (backward)
2808 continue;
2809 else
2810 return false;
2812 if (e->dest == bb)
2813 return false;
2814 if (*other_bb == NULL)
2816 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2817 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2818 return false;
2819 else if (e->dest == e2->dest)
2820 *other_bb = e->dest;
2821 if (*other_bb == NULL)
2822 return false;
2824 if (e->dest == *other_bb)
2825 other_edge_seen = true;
2826 else if (backward)
2827 return false;
2829 if (*other_bb == NULL || !other_edge_seen)
2830 return false;
2832 else if (single_succ (bb) != *other_bb)
2833 return false;
2835 /* Now check all PHIs of *OTHER_BB. */
2836 e = find_edge (bb, *other_bb);
2837 e2 = find_edge (test_bb, *other_bb);
2838 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2840 gimple phi = gsi_stmt (gsi);
2841 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2842 corresponding to BB and TEST_BB predecessor must be the same. */
2843 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2844 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2846 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2847 one of the PHIs should have the lhs of the last stmt in
2848 that block as PHI arg and that PHI should have 0 or 1
2849 corresponding to it in all other range test basic blocks
2850 considered. */
2851 if (!is_cond)
2853 if (gimple_phi_arg_def (phi, e->dest_idx)
2854 == gimple_assign_lhs (stmt)
2855 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2856 || integer_onep (gimple_phi_arg_def (phi,
2857 e2->dest_idx))))
2858 continue;
2860 else
2862 gimple test_last = last_stmt (test_bb);
2863 if (gimple_code (test_last) != GIMPLE_COND
2864 && gimple_phi_arg_def (phi, e2->dest_idx)
2865 == gimple_assign_lhs (test_last)
2866 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2867 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2868 continue;
2871 return false;
2874 return true;
2877 /* Return true if BB doesn't have side-effects that would disallow
2878 range test optimization, all SSA_NAMEs set in the bb are consumed
2879 in the bb and there are no PHIs. */
2881 static bool
2882 no_side_effect_bb (basic_block bb)
2884 gimple_stmt_iterator gsi;
2885 gimple last;
2887 if (!gimple_seq_empty_p (phi_nodes (bb)))
2888 return false;
2889 last = last_stmt (bb);
2890 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2892 gimple stmt = gsi_stmt (gsi);
2893 tree lhs;
2894 imm_use_iterator imm_iter;
2895 use_operand_p use_p;
2897 if (is_gimple_debug (stmt))
2898 continue;
2899 if (gimple_has_side_effects (stmt))
2900 return false;
2901 if (stmt == last)
2902 return true;
2903 if (!is_gimple_assign (stmt))
2904 return false;
2905 lhs = gimple_assign_lhs (stmt);
2906 if (TREE_CODE (lhs) != SSA_NAME)
2907 return false;
2908 if (gimple_assign_rhs_could_trap_p (stmt))
2909 return false;
2910 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2912 gimple use_stmt = USE_STMT (use_p);
2913 if (is_gimple_debug (use_stmt))
2914 continue;
2915 if (gimple_bb (use_stmt) != bb)
2916 return false;
2919 return false;
2922 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2923 return true and fill in *OPS recursively. */
2925 static bool
2926 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2927 struct loop *loop)
2929 gimple stmt = SSA_NAME_DEF_STMT (var);
2930 tree rhs[2];
2931 int i;
2933 if (!is_reassociable_op (stmt, code, loop))
2934 return false;
2936 rhs[0] = gimple_assign_rhs1 (stmt);
2937 rhs[1] = gimple_assign_rhs2 (stmt);
2938 gimple_set_visited (stmt, true);
2939 for (i = 0; i < 2; i++)
2940 if (TREE_CODE (rhs[i]) == SSA_NAME
2941 && !get_ops (rhs[i], code, ops, loop)
2942 && has_single_use (rhs[i]))
2944 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2946 oe->op = rhs[i];
2947 oe->rank = code;
2948 oe->id = 0;
2949 oe->count = 1;
2950 ops->safe_push (oe);
2952 return true;
2955 /* Find the ops that were added by get_ops starting from VAR, see if
2956 they were changed during update_range_test and if yes, create new
2957 stmts. */
2959 static tree
2960 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2961 unsigned int *pidx, struct loop *loop)
2963 gimple stmt = SSA_NAME_DEF_STMT (var);
2964 tree rhs[4];
2965 int i;
2967 if (!is_reassociable_op (stmt, code, loop))
2968 return NULL;
2970 rhs[0] = gimple_assign_rhs1 (stmt);
2971 rhs[1] = gimple_assign_rhs2 (stmt);
2972 rhs[2] = rhs[0];
2973 rhs[3] = rhs[1];
2974 for (i = 0; i < 2; i++)
2975 if (TREE_CODE (rhs[i]) == SSA_NAME)
2977 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2978 if (rhs[2 + i] == NULL_TREE)
2980 if (has_single_use (rhs[i]))
2981 rhs[2 + i] = ops[(*pidx)++]->op;
2982 else
2983 rhs[2 + i] = rhs[i];
2986 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2987 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2989 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2990 var = make_ssa_name (TREE_TYPE (var), NULL);
2991 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2992 var, rhs[2], rhs[3]);
2993 gimple_set_uid (g, gimple_uid (stmt));
2994 gimple_set_visited (g, true);
2995 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2997 return var;
3000 /* Structure to track the initial value passed to get_ops and
3001 the range in the ops vector for each basic block. */
3003 struct inter_bb_range_test_entry
3005 tree op;
3006 unsigned int first_idx, last_idx;
3009 /* Inter-bb range test optimization. */
3011 static void
3012 maybe_optimize_range_tests (gimple stmt)
3014 basic_block first_bb = gimple_bb (stmt);
3015 basic_block last_bb = first_bb;
3016 basic_block other_bb = NULL;
3017 basic_block bb;
3018 edge_iterator ei;
3019 edge e;
3020 auto_vec<operand_entry_t> ops;
3021 auto_vec<inter_bb_range_test_entry> bbinfo;
3022 bool any_changes = false;
3024 /* Consider only basic blocks that end with GIMPLE_COND or
3025 a cast statement satisfying final_range_test_p. All
3026 but the last bb in the first_bb .. last_bb range
3027 should end with GIMPLE_COND. */
3028 if (gimple_code (stmt) == GIMPLE_COND)
3030 if (EDGE_COUNT (first_bb->succs) != 2)
3031 return;
3033 else if (final_range_test_p (stmt))
3034 other_bb = single_succ (first_bb);
3035 else
3036 return;
3038 if (stmt_could_throw_p (stmt))
3039 return;
3041 /* As relative ordering of post-dominator sons isn't fixed,
3042 maybe_optimize_range_tests can be called first on any
3043 bb in the range we want to optimize. So, start searching
3044 backwards, if first_bb can be set to a predecessor. */
3045 while (single_pred_p (first_bb))
3047 basic_block pred_bb = single_pred (first_bb);
3048 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3049 break;
3050 if (!no_side_effect_bb (first_bb))
3051 break;
3052 first_bb = pred_bb;
3054 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3055 Before starting forward search in last_bb successors, find
3056 out the other_bb. */
3057 if (first_bb == last_bb)
3059 other_bb = NULL;
3060 /* As non-GIMPLE_COND last stmt always terminates the range,
3061 if forward search didn't discover anything, just give up. */
3062 if (gimple_code (stmt) != GIMPLE_COND)
3063 return;
3064 /* Look at both successors. Either it ends with a GIMPLE_COND
3065 and satisfies suitable_cond_bb, or ends with a cast and
3066 other_bb is that cast's successor. */
3067 FOR_EACH_EDGE (e, ei, first_bb->succs)
3068 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3069 || e->dest == first_bb)
3070 return;
3071 else if (single_pred_p (e->dest))
3073 stmt = last_stmt (e->dest);
3074 if (stmt
3075 && gimple_code (stmt) == GIMPLE_COND
3076 && EDGE_COUNT (e->dest->succs) == 2)
3078 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3079 break;
3080 else
3081 other_bb = NULL;
3083 else if (stmt
3084 && final_range_test_p (stmt)
3085 && find_edge (first_bb, single_succ (e->dest)))
3087 other_bb = single_succ (e->dest);
3088 if (other_bb == first_bb)
3089 other_bb = NULL;
3092 if (other_bb == NULL)
3093 return;
3095 /* Now do the forward search, moving last_bb to successor bbs
3096 that aren't other_bb. */
3097 while (EDGE_COUNT (last_bb->succs) == 2)
3099 FOR_EACH_EDGE (e, ei, last_bb->succs)
3100 if (e->dest != other_bb)
3101 break;
3102 if (e == NULL)
3103 break;
3104 if (!single_pred_p (e->dest))
3105 break;
3106 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3107 break;
3108 if (!no_side_effect_bb (e->dest))
3109 break;
3110 last_bb = e->dest;
3112 if (first_bb == last_bb)
3113 return;
3114 /* Here basic blocks first_bb through last_bb's predecessor
3115 end with GIMPLE_COND, all of them have one of the edges to
3116 other_bb and another to another block in the range,
3117 all blocks except first_bb don't have side-effects and
3118 last_bb ends with either GIMPLE_COND, or cast satisfying
3119 final_range_test_p. */
3120 for (bb = last_bb; ; bb = single_pred (bb))
3122 enum tree_code code;
3123 tree lhs, rhs;
3124 inter_bb_range_test_entry bb_ent;
3126 bb_ent.op = NULL_TREE;
3127 bb_ent.first_idx = ops.length ();
3128 bb_ent.last_idx = bb_ent.first_idx;
3129 e = find_edge (bb, other_bb);
3130 stmt = last_stmt (bb);
3131 gimple_set_visited (stmt, true);
3132 if (gimple_code (stmt) != GIMPLE_COND)
3134 use_operand_p use_p;
3135 gimple phi;
3136 edge e2;
3137 unsigned int d;
3139 lhs = gimple_assign_lhs (stmt);
3140 rhs = gimple_assign_rhs1 (stmt);
3141 gcc_assert (bb == last_bb);
3143 /* stmt is
3144 _123 = (int) _234;
3146 followed by:
3147 <bb M>:
3148 # _345 = PHI <_123(N), 1(...), 1(...)>
3150 or 0 instead of 1. If it is 0, the _234
3151 range test is anded together with all the
3152 other range tests, if it is 1, it is ored with
3153 them. */
3154 single_imm_use (lhs, &use_p, &phi);
3155 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3156 e2 = find_edge (first_bb, other_bb);
3157 d = e2->dest_idx;
3158 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3159 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3160 code = BIT_AND_EXPR;
3161 else
3163 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3164 code = BIT_IOR_EXPR;
3167 /* If _234 SSA_NAME_DEF_STMT is
3168 _234 = _567 | _789;
3169 (or &, corresponding to 1/0 in the phi arguments,
3170 push into ops the individual range test arguments
3171 of the bitwise or resp. and, recursively. */
3172 if (!get_ops (rhs, code, &ops,
3173 loop_containing_stmt (stmt))
3174 && has_single_use (rhs))
3176 /* Otherwise, push the _234 range test itself. */
3177 operand_entry_t oe
3178 = (operand_entry_t) pool_alloc (operand_entry_pool);
3180 oe->op = rhs;
3181 oe->rank = code;
3182 oe->id = 0;
3183 oe->count = 1;
3184 ops.safe_push (oe);
3185 bb_ent.last_idx++;
3187 else
3188 bb_ent.last_idx = ops.length ();
3189 bb_ent.op = rhs;
3190 bbinfo.safe_push (bb_ent);
3191 continue;
3193 /* Otherwise stmt is GIMPLE_COND. */
3194 code = gimple_cond_code (stmt);
3195 lhs = gimple_cond_lhs (stmt);
3196 rhs = gimple_cond_rhs (stmt);
3197 if (TREE_CODE (lhs) == SSA_NAME
3198 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3199 && ((code != EQ_EXPR && code != NE_EXPR)
3200 || rhs != boolean_false_node
3201 /* Either push into ops the individual bitwise
3202 or resp. and operands, depending on which
3203 edge is other_bb. */
3204 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3205 ^ (code == EQ_EXPR))
3206 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3207 loop_containing_stmt (stmt))))
3209 /* Or push the GIMPLE_COND stmt itself. */
3210 operand_entry_t oe
3211 = (operand_entry_t) pool_alloc (operand_entry_pool);
3213 oe->op = NULL;
3214 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3215 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3216 /* oe->op = NULL signs that there is no SSA_NAME
3217 for the range test, and oe->id instead is the
3218 basic block number, at which's end the GIMPLE_COND
3219 is. */
3220 oe->id = bb->index;
3221 oe->count = 1;
3222 ops.safe_push (oe);
3223 bb_ent.op = NULL;
3224 bb_ent.last_idx++;
3226 else if (ops.length () > bb_ent.first_idx)
3228 bb_ent.op = lhs;
3229 bb_ent.last_idx = ops.length ();
3231 bbinfo.safe_push (bb_ent);
3232 if (bb == first_bb)
3233 break;
3235 if (ops.length () > 1)
3236 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3237 if (any_changes)
3239 unsigned int idx;
3240 /* update_ops relies on has_single_use predicates returning the
3241 same values as it did during get_ops earlier. Additionally it
3242 never removes statements, only adds new ones and it should walk
3243 from the single imm use and check the predicate already before
3244 making those changes.
3245 On the other side, the handling of GIMPLE_COND directly can turn
3246 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3247 it needs to be done in a separate loop afterwards. */
3248 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3250 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3251 && bbinfo[idx].op != NULL_TREE)
3253 tree new_op;
3255 stmt = last_stmt (bb);
3256 new_op = update_ops (bbinfo[idx].op,
3257 (enum tree_code)
3258 ops[bbinfo[idx].first_idx]->rank,
3259 ops, &bbinfo[idx].first_idx,
3260 loop_containing_stmt (stmt));
3261 if (new_op == NULL_TREE)
3263 gcc_assert (bb == last_bb);
3264 new_op = ops[bbinfo[idx].first_idx++]->op;
3266 if (bbinfo[idx].op != new_op)
3268 imm_use_iterator iter;
3269 use_operand_p use_p;
3270 gimple use_stmt, cast_stmt = NULL;
3272 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3273 if (is_gimple_debug (use_stmt))
3274 continue;
3275 else if (gimple_code (use_stmt) == GIMPLE_COND
3276 || gimple_code (use_stmt) == GIMPLE_PHI)
3277 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3278 SET_USE (use_p, new_op);
3279 else if (gimple_assign_cast_p (use_stmt))
3280 cast_stmt = use_stmt;
3281 else
3282 gcc_unreachable ();
3283 if (cast_stmt)
3285 gcc_assert (bb == last_bb);
3286 tree lhs = gimple_assign_lhs (cast_stmt);
3287 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3288 enum tree_code rhs_code
3289 = gimple_assign_rhs_code (cast_stmt);
3290 gimple g;
3291 if (is_gimple_min_invariant (new_op))
3293 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3294 g = gimple_build_assign (new_lhs, new_op);
3296 else
3297 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3298 new_op, NULL_TREE);
3299 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3300 gimple_set_uid (g, gimple_uid (cast_stmt));
3301 gimple_set_visited (g, true);
3302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3303 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3304 if (is_gimple_debug (use_stmt))
3305 continue;
3306 else if (gimple_code (use_stmt) == GIMPLE_COND
3307 || gimple_code (use_stmt) == GIMPLE_PHI)
3308 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3309 SET_USE (use_p, new_lhs);
3310 else
3311 gcc_unreachable ();
3315 if (bb == first_bb)
3316 break;
3318 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3320 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3321 && bbinfo[idx].op == NULL_TREE
3322 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3324 stmt = last_stmt (bb);
3325 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3326 gimple_cond_make_false (stmt);
3327 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3328 gimple_cond_make_true (stmt);
3329 else
3331 gimple_cond_set_code (stmt, NE_EXPR);
3332 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3333 gimple_cond_set_rhs (stmt, boolean_false_node);
3335 update_stmt (stmt);
3337 if (bb == first_bb)
3338 break;
3343 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3344 of STMT in it's operands. This is also known as a "destructive
3345 update" operation. */
3347 static bool
3348 is_phi_for_stmt (gimple stmt, tree operand)
3350 gimple def_stmt;
3351 tree lhs;
3352 use_operand_p arg_p;
3353 ssa_op_iter i;
3355 if (TREE_CODE (operand) != SSA_NAME)
3356 return false;
3358 lhs = gimple_assign_lhs (stmt);
3360 def_stmt = SSA_NAME_DEF_STMT (operand);
3361 if (gimple_code (def_stmt) != GIMPLE_PHI)
3362 return false;
3364 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3365 if (lhs == USE_FROM_PTR (arg_p))
3366 return true;
3367 return false;
3370 /* Remove def stmt of VAR if VAR has zero uses and recurse
3371 on rhs1 operand if so. */
3373 static void
3374 remove_visited_stmt_chain (tree var)
3376 gimple stmt;
3377 gimple_stmt_iterator gsi;
3379 while (1)
3381 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3382 return;
3383 stmt = SSA_NAME_DEF_STMT (var);
3384 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3386 var = gimple_assign_rhs1 (stmt);
3387 gsi = gsi_for_stmt (stmt);
3388 reassoc_remove_stmt (&gsi);
3389 release_defs (stmt);
3391 else
3392 return;
3396 /* This function checks three consequtive operands in
3397 passed operands vector OPS starting from OPINDEX and
3398 swaps two operands if it is profitable for binary operation
3399 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3401 We pair ops with the same rank if possible.
3403 The alternative we try is to see if STMT is a destructive
3404 update style statement, which is like:
3405 b = phi (a, ...)
3406 a = c + b;
3407 In that case, we want to use the destructive update form to
3408 expose the possible vectorizer sum reduction opportunity.
3409 In that case, the third operand will be the phi node. This
3410 check is not performed if STMT is null.
3412 We could, of course, try to be better as noted above, and do a
3413 lot of work to try to find these opportunities in >3 operand
3414 cases, but it is unlikely to be worth it. */
3416 static void
3417 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3418 unsigned int opindex, gimple stmt)
3420 operand_entry_t oe1, oe2, oe3;
3422 oe1 = ops[opindex];
3423 oe2 = ops[opindex + 1];
3424 oe3 = ops[opindex + 2];
3426 if ((oe1->rank == oe2->rank
3427 && oe2->rank != oe3->rank)
3428 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3429 && !is_phi_for_stmt (stmt, oe1->op)
3430 && !is_phi_for_stmt (stmt, oe2->op)))
3432 struct operand_entry temp = *oe3;
3433 oe3->op = oe1->op;
3434 oe3->rank = oe1->rank;
3435 oe1->op = temp.op;
3436 oe1->rank= temp.rank;
3438 else if ((oe1->rank == oe3->rank
3439 && oe2->rank != oe3->rank)
3440 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3441 && !is_phi_for_stmt (stmt, oe1->op)
3442 && !is_phi_for_stmt (stmt, oe3->op)))
3444 struct operand_entry temp = *oe2;
3445 oe2->op = oe1->op;
3446 oe2->rank = oe1->rank;
3447 oe1->op = temp.op;
3448 oe1->rank = temp.rank;
3452 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3453 two definitions, otherwise return STMT. */
3455 static inline gimple
3456 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3458 if (TREE_CODE (rhs1) == SSA_NAME
3459 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3460 stmt = SSA_NAME_DEF_STMT (rhs1);
3461 if (TREE_CODE (rhs2) == SSA_NAME
3462 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3463 stmt = SSA_NAME_DEF_STMT (rhs2);
3464 return stmt;
3467 /* Recursively rewrite our linearized statements so that the operators
3468 match those in OPS[OPINDEX], putting the computation in rank
3469 order. Return new lhs. */
3471 static tree
3472 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3473 vec<operand_entry_t> ops, bool changed)
3475 tree rhs1 = gimple_assign_rhs1 (stmt);
3476 tree rhs2 = gimple_assign_rhs2 (stmt);
3477 tree lhs = gimple_assign_lhs (stmt);
3478 operand_entry_t oe;
3480 /* The final recursion case for this function is that you have
3481 exactly two operations left.
3482 If we had one exactly one op in the entire list to start with, we
3483 would have never called this function, and the tail recursion
3484 rewrites them one at a time. */
3485 if (opindex + 2 == ops.length ())
3487 operand_entry_t oe1, oe2;
3489 oe1 = ops[opindex];
3490 oe2 = ops[opindex + 1];
3492 if (rhs1 != oe1->op || rhs2 != oe2->op)
3494 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3495 unsigned int uid = gimple_uid (stmt);
3497 if (dump_file && (dump_flags & TDF_DETAILS))
3499 fprintf (dump_file, "Transforming ");
3500 print_gimple_stmt (dump_file, stmt, 0, 0);
3503 if (changed)
3505 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3506 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3507 stmt
3508 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3509 lhs, oe1->op, oe2->op);
3510 gimple_set_uid (stmt, uid);
3511 gimple_set_visited (stmt, true);
3512 if (insert_point == gsi_stmt (gsi))
3513 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3514 else
3515 insert_stmt_after (stmt, insert_point);
3517 else
3519 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3520 == stmt);
3521 gimple_assign_set_rhs1 (stmt, oe1->op);
3522 gimple_assign_set_rhs2 (stmt, oe2->op);
3523 update_stmt (stmt);
3526 if (rhs1 != oe1->op && rhs1 != oe2->op)
3527 remove_visited_stmt_chain (rhs1);
3529 if (dump_file && (dump_flags & TDF_DETAILS))
3531 fprintf (dump_file, " into ");
3532 print_gimple_stmt (dump_file, stmt, 0, 0);
3535 return lhs;
3538 /* If we hit here, we should have 3 or more ops left. */
3539 gcc_assert (opindex + 2 < ops.length ());
3541 /* Rewrite the next operator. */
3542 oe = ops[opindex];
3544 /* Recurse on the LHS of the binary operator, which is guaranteed to
3545 be the non-leaf side. */
3546 tree new_rhs1
3547 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3548 changed || oe->op != rhs2);
3550 if (oe->op != rhs2 || new_rhs1 != rhs1)
3552 if (dump_file && (dump_flags & TDF_DETAILS))
3554 fprintf (dump_file, "Transforming ");
3555 print_gimple_stmt (dump_file, stmt, 0, 0);
3558 /* If changed is false, this is either opindex == 0
3559 or all outer rhs2's were equal to corresponding oe->op,
3560 and powi_result is NULL.
3561 That means lhs is equivalent before and after reassociation.
3562 Otherwise ensure the old lhs SSA_NAME is not reused and
3563 create a new stmt as well, so that any debug stmts will be
3564 properly adjusted. */
3565 if (changed)
3567 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3568 unsigned int uid = gimple_uid (stmt);
3569 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3571 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3572 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3573 lhs, new_rhs1, oe->op);
3574 gimple_set_uid (stmt, uid);
3575 gimple_set_visited (stmt, true);
3576 if (insert_point == gsi_stmt (gsi))
3577 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3578 else
3579 insert_stmt_after (stmt, insert_point);
3581 else
3583 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3584 == stmt);
3585 gimple_assign_set_rhs1 (stmt, new_rhs1);
3586 gimple_assign_set_rhs2 (stmt, oe->op);
3587 update_stmt (stmt);
3590 if (dump_file && (dump_flags & TDF_DETAILS))
3592 fprintf (dump_file, " into ");
3593 print_gimple_stmt (dump_file, stmt, 0, 0);
3596 return lhs;
3599 /* Find out how many cycles we need to compute statements chain.
3600 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3601 maximum number of independent statements we may execute per cycle. */
3603 static int
3604 get_required_cycles (int ops_num, int cpu_width)
3606 int res;
3607 int elog;
3608 unsigned int rest;
3610 /* While we have more than 2 * cpu_width operands
3611 we may reduce number of operands by cpu_width
3612 per cycle. */
3613 res = ops_num / (2 * cpu_width);
3615 /* Remained operands count may be reduced twice per cycle
3616 until we have only one operand. */
3617 rest = (unsigned)(ops_num - res * cpu_width);
3618 elog = exact_log2 (rest);
3619 if (elog >= 0)
3620 res += elog;
3621 else
3622 res += floor_log2 (rest) + 1;
3624 return res;
3627 /* Returns an optimal number of registers to use for computation of
3628 given statements. */
3630 static int
3631 get_reassociation_width (int ops_num, enum tree_code opc,
3632 enum machine_mode mode)
3634 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3635 int width;
3636 int width_min;
3637 int cycles_best;
3639 if (param_width > 0)
3640 width = param_width;
3641 else
3642 width = targetm.sched.reassociation_width (opc, mode);
3644 if (width == 1)
3645 return width;
3647 /* Get the minimal time required for sequence computation. */
3648 cycles_best = get_required_cycles (ops_num, width);
3650 /* Check if we may use less width and still compute sequence for
3651 the same time. It will allow us to reduce registers usage.
3652 get_required_cycles is monotonically increasing with lower width
3653 so we can perform a binary search for the minimal width that still
3654 results in the optimal cycle count. */
3655 width_min = 1;
3656 while (width > width_min)
3658 int width_mid = (width + width_min) / 2;
3660 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3661 width = width_mid;
3662 else if (width_min < width_mid)
3663 width_min = width_mid;
3664 else
3665 break;
3668 return width;
3671 /* Recursively rewrite our linearized statements so that the operators
3672 match those in OPS[OPINDEX], putting the computation in rank
3673 order and trying to allow operations to be executed in
3674 parallel. */
3676 static void
3677 rewrite_expr_tree_parallel (gimple stmt, int width,
3678 vec<operand_entry_t> ops)
3680 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3681 int op_num = ops.length ();
3682 int stmt_num = op_num - 1;
3683 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3684 int op_index = op_num - 1;
3685 int stmt_index = 0;
3686 int ready_stmts_end = 0;
3687 int i = 0;
3688 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3690 /* We start expression rewriting from the top statements.
3691 So, in this loop we create a full list of statements
3692 we will work with. */
3693 stmts[stmt_num - 1] = stmt;
3694 for (i = stmt_num - 2; i >= 0; i--)
3695 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3697 for (i = 0; i < stmt_num; i++)
3699 tree op1, op2;
3701 /* Determine whether we should use results of
3702 already handled statements or not. */
3703 if (ready_stmts_end == 0
3704 && (i - stmt_index >= width || op_index < 1))
3705 ready_stmts_end = i;
3707 /* Now we choose operands for the next statement. Non zero
3708 value in ready_stmts_end means here that we should use
3709 the result of already generated statements as new operand. */
3710 if (ready_stmts_end > 0)
3712 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3713 if (ready_stmts_end > stmt_index)
3714 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3715 else if (op_index >= 0)
3716 op2 = ops[op_index--]->op;
3717 else
3719 gcc_assert (stmt_index < i);
3720 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3723 if (stmt_index >= ready_stmts_end)
3724 ready_stmts_end = 0;
3726 else
3728 if (op_index > 1)
3729 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3730 op2 = ops[op_index--]->op;
3731 op1 = ops[op_index--]->op;
3734 /* If we emit the last statement then we should put
3735 operands into the last statement. It will also
3736 break the loop. */
3737 if (op_index < 0 && stmt_index == i)
3738 i = stmt_num - 1;
3740 if (dump_file && (dump_flags & TDF_DETAILS))
3742 fprintf (dump_file, "Transforming ");
3743 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3746 /* We keep original statement only for the last one. All
3747 others are recreated. */
3748 if (i == stmt_num - 1)
3750 gimple_assign_set_rhs1 (stmts[i], op1);
3751 gimple_assign_set_rhs2 (stmts[i], op2);
3752 update_stmt (stmts[i]);
3754 else
3755 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3759 fprintf (dump_file, " into ");
3760 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3764 remove_visited_stmt_chain (last_rhs1);
3767 /* Transform STMT, which is really (A +B) + (C + D) into the left
3768 linear form, ((A+B)+C)+D.
3769 Recurse on D if necessary. */
3771 static void
3772 linearize_expr (gimple stmt)
3774 gimple_stmt_iterator gsi;
3775 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3776 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3777 gimple oldbinrhs = binrhs;
3778 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3779 gimple newbinrhs = NULL;
3780 struct loop *loop = loop_containing_stmt (stmt);
3781 tree lhs = gimple_assign_lhs (stmt);
3783 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3784 && is_reassociable_op (binrhs, rhscode, loop));
3786 gsi = gsi_for_stmt (stmt);
3788 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3789 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3790 make_ssa_name (TREE_TYPE (lhs), NULL),
3791 gimple_assign_lhs (binlhs),
3792 gimple_assign_rhs2 (binrhs));
3793 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3794 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3795 gimple_set_uid (binrhs, gimple_uid (stmt));
3797 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3798 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3800 if (dump_file && (dump_flags & TDF_DETAILS))
3802 fprintf (dump_file, "Linearized: ");
3803 print_gimple_stmt (dump_file, stmt, 0, 0);
3806 reassociate_stats.linearized++;
3807 update_stmt (stmt);
3809 gsi = gsi_for_stmt (oldbinrhs);
3810 reassoc_remove_stmt (&gsi);
3811 release_defs (oldbinrhs);
3813 gimple_set_visited (stmt, true);
3814 gimple_set_visited (binlhs, true);
3815 gimple_set_visited (binrhs, true);
3817 /* Tail recurse on the new rhs if it still needs reassociation. */
3818 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3819 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3820 want to change the algorithm while converting to tuples. */
3821 linearize_expr (stmt);
3824 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3825 it. Otherwise, return NULL. */
3827 static gimple
3828 get_single_immediate_use (tree lhs)
3830 use_operand_p immuse;
3831 gimple immusestmt;
3833 if (TREE_CODE (lhs) == SSA_NAME
3834 && single_imm_use (lhs, &immuse, &immusestmt)
3835 && is_gimple_assign (immusestmt))
3836 return immusestmt;
3838 return NULL;
3841 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3842 representing the negated value. Insertions of any necessary
3843 instructions go before GSI.
3844 This function is recursive in that, if you hand it "a_5" as the
3845 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3846 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3848 static tree
3849 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3851 gimple negatedefstmt = NULL;
3852 tree resultofnegate;
3853 gimple_stmt_iterator gsi;
3854 unsigned int uid;
3856 /* If we are trying to negate a name, defined by an add, negate the
3857 add operands instead. */
3858 if (TREE_CODE (tonegate) == SSA_NAME)
3859 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3860 if (TREE_CODE (tonegate) == SSA_NAME
3861 && is_gimple_assign (negatedefstmt)
3862 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3863 && has_single_use (gimple_assign_lhs (negatedefstmt))
3864 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3866 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3867 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3868 tree lhs = gimple_assign_lhs (negatedefstmt);
3869 gimple g;
3871 gsi = gsi_for_stmt (negatedefstmt);
3872 rhs1 = negate_value (rhs1, &gsi);
3874 gsi = gsi_for_stmt (negatedefstmt);
3875 rhs2 = negate_value (rhs2, &gsi);
3877 gsi = gsi_for_stmt (negatedefstmt);
3878 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3879 gimple_set_visited (negatedefstmt, true);
3880 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3881 gimple_set_uid (g, gimple_uid (negatedefstmt));
3882 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3883 return lhs;
3886 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3887 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3888 NULL_TREE, true, GSI_SAME_STMT);
3889 gsi = *gsip;
3890 uid = gimple_uid (gsi_stmt (gsi));
3891 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3893 gimple stmt = gsi_stmt (gsi);
3894 if (gimple_uid (stmt) != 0)
3895 break;
3896 gimple_set_uid (stmt, uid);
3898 return resultofnegate;
3901 /* Return true if we should break up the subtract in STMT into an add
3902 with negate. This is true when we the subtract operands are really
3903 adds, or the subtract itself is used in an add expression. In
3904 either case, breaking up the subtract into an add with negate
3905 exposes the adds to reassociation. */
3907 static bool
3908 should_break_up_subtract (gimple stmt)
3910 tree lhs = gimple_assign_lhs (stmt);
3911 tree binlhs = gimple_assign_rhs1 (stmt);
3912 tree binrhs = gimple_assign_rhs2 (stmt);
3913 gimple immusestmt;
3914 struct loop *loop = loop_containing_stmt (stmt);
3916 if (TREE_CODE (binlhs) == SSA_NAME
3917 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3918 return true;
3920 if (TREE_CODE (binrhs) == SSA_NAME
3921 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3922 return true;
3924 if (TREE_CODE (lhs) == SSA_NAME
3925 && (immusestmt = get_single_immediate_use (lhs))
3926 && is_gimple_assign (immusestmt)
3927 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3928 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3929 return true;
3930 return false;
3933 /* Transform STMT from A - B into A + -B. */
3935 static void
3936 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3938 tree rhs1 = gimple_assign_rhs1 (stmt);
3939 tree rhs2 = gimple_assign_rhs2 (stmt);
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3943 fprintf (dump_file, "Breaking up subtract ");
3944 print_gimple_stmt (dump_file, stmt, 0, 0);
3947 rhs2 = negate_value (rhs2, gsip);
3948 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3949 update_stmt (stmt);
3952 /* Determine whether STMT is a builtin call that raises an SSA name
3953 to an integer power and has only one use. If so, and this is early
3954 reassociation and unsafe math optimizations are permitted, place
3955 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3956 If any of these conditions does not hold, return FALSE. */
3958 static bool
3959 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3961 tree fndecl, arg1;
3962 REAL_VALUE_TYPE c, cint;
3964 if (!first_pass_instance
3965 || !flag_unsafe_math_optimizations
3966 || !is_gimple_call (stmt)
3967 || !has_single_use (gimple_call_lhs (stmt)))
3968 return false;
3970 fndecl = gimple_call_fndecl (stmt);
3972 if (!fndecl
3973 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3974 return false;
3976 switch (DECL_FUNCTION_CODE (fndecl))
3978 CASE_FLT_FN (BUILT_IN_POW):
3979 *base = gimple_call_arg (stmt, 0);
3980 arg1 = gimple_call_arg (stmt, 1);
3982 if (TREE_CODE (arg1) != REAL_CST)
3983 return false;
3985 c = TREE_REAL_CST (arg1);
3987 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3988 return false;
3990 *exponent = real_to_integer (&c);
3991 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3992 if (!real_identical (&c, &cint))
3993 return false;
3995 break;
3997 CASE_FLT_FN (BUILT_IN_POWI):
3998 *base = gimple_call_arg (stmt, 0);
3999 arg1 = gimple_call_arg (stmt, 1);
4001 if (!tree_fits_shwi_p (arg1))
4002 return false;
4004 *exponent = tree_to_shwi (arg1);
4005 break;
4007 default:
4008 return false;
4011 /* Expanding negative exponents is generally unproductive, so we don't
4012 complicate matters with those. Exponents of zero and one should
4013 have been handled by expression folding. */
4014 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4015 return false;
4017 return true;
4020 /* Recursively linearize a binary expression that is the RHS of STMT.
4021 Place the operands of the expression tree in the vector named OPS. */
4023 static void
4024 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4025 bool is_associative, bool set_visited)
4027 tree binlhs = gimple_assign_rhs1 (stmt);
4028 tree binrhs = gimple_assign_rhs2 (stmt);
4029 gimple binlhsdef = NULL, binrhsdef = NULL;
4030 bool binlhsisreassoc = false;
4031 bool binrhsisreassoc = false;
4032 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4033 struct loop *loop = loop_containing_stmt (stmt);
4034 tree base = NULL_TREE;
4035 HOST_WIDE_INT exponent = 0;
4037 if (set_visited)
4038 gimple_set_visited (stmt, true);
4040 if (TREE_CODE (binlhs) == SSA_NAME)
4042 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4043 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4044 && !stmt_could_throw_p (binlhsdef));
4047 if (TREE_CODE (binrhs) == SSA_NAME)
4049 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4050 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4051 && !stmt_could_throw_p (binrhsdef));
4054 /* If the LHS is not reassociable, but the RHS is, we need to swap
4055 them. If neither is reassociable, there is nothing we can do, so
4056 just put them in the ops vector. If the LHS is reassociable,
4057 linearize it. If both are reassociable, then linearize the RHS
4058 and the LHS. */
4060 if (!binlhsisreassoc)
4062 tree temp;
4064 /* If this is not a associative operation like division, give up. */
4065 if (!is_associative)
4067 add_to_ops_vec (ops, binrhs);
4068 return;
4071 if (!binrhsisreassoc)
4073 if (rhscode == MULT_EXPR
4074 && TREE_CODE (binrhs) == SSA_NAME
4075 && acceptable_pow_call (binrhsdef, &base, &exponent))
4077 add_repeat_to_ops_vec (ops, base, exponent);
4078 gimple_set_visited (binrhsdef, true);
4080 else
4081 add_to_ops_vec (ops, binrhs);
4083 if (rhscode == MULT_EXPR
4084 && TREE_CODE (binlhs) == SSA_NAME
4085 && acceptable_pow_call (binlhsdef, &base, &exponent))
4087 add_repeat_to_ops_vec (ops, base, exponent);
4088 gimple_set_visited (binlhsdef, true);
4090 else
4091 add_to_ops_vec (ops, binlhs);
4093 return;
4096 if (dump_file && (dump_flags & TDF_DETAILS))
4098 fprintf (dump_file, "swapping operands of ");
4099 print_gimple_stmt (dump_file, stmt, 0, 0);
4102 swap_ssa_operands (stmt,
4103 gimple_assign_rhs1_ptr (stmt),
4104 gimple_assign_rhs2_ptr (stmt));
4105 update_stmt (stmt);
4107 if (dump_file && (dump_flags & TDF_DETAILS))
4109 fprintf (dump_file, " is now ");
4110 print_gimple_stmt (dump_file, stmt, 0, 0);
4113 /* We want to make it so the lhs is always the reassociative op,
4114 so swap. */
4115 temp = binlhs;
4116 binlhs = binrhs;
4117 binrhs = temp;
4119 else if (binrhsisreassoc)
4121 linearize_expr (stmt);
4122 binlhs = gimple_assign_rhs1 (stmt);
4123 binrhs = gimple_assign_rhs2 (stmt);
4126 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4127 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4128 rhscode, loop));
4129 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4130 is_associative, set_visited);
4132 if (rhscode == MULT_EXPR
4133 && TREE_CODE (binrhs) == SSA_NAME
4134 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4136 add_repeat_to_ops_vec (ops, base, exponent);
4137 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4139 else
4140 add_to_ops_vec (ops, binrhs);
4143 /* Repropagate the negates back into subtracts, since no other pass
4144 currently does it. */
4146 static void
4147 repropagate_negates (void)
4149 unsigned int i = 0;
4150 tree negate;
4152 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4154 gimple user = get_single_immediate_use (negate);
4156 if (!user || !is_gimple_assign (user))
4157 continue;
4159 /* The negate operand can be either operand of a PLUS_EXPR
4160 (it can be the LHS if the RHS is a constant for example).
4162 Force the negate operand to the RHS of the PLUS_EXPR, then
4163 transform the PLUS_EXPR into a MINUS_EXPR. */
4164 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4166 /* If the negated operand appears on the LHS of the
4167 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4168 to force the negated operand to the RHS of the PLUS_EXPR. */
4169 if (gimple_assign_rhs1 (user) == negate)
4171 swap_ssa_operands (user,
4172 gimple_assign_rhs1_ptr (user),
4173 gimple_assign_rhs2_ptr (user));
4176 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4177 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4178 if (gimple_assign_rhs2 (user) == negate)
4180 tree rhs1 = gimple_assign_rhs1 (user);
4181 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4182 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4183 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4184 update_stmt (user);
4187 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4189 if (gimple_assign_rhs1 (user) == negate)
4191 /* We have
4192 x = -a
4193 y = x - b
4194 which we transform into
4195 x = a + b
4196 y = -x .
4197 This pushes down the negate which we possibly can merge
4198 into some other operation, hence insert it into the
4199 plus_negates vector. */
4200 gimple feed = SSA_NAME_DEF_STMT (negate);
4201 tree a = gimple_assign_rhs1 (feed);
4202 tree b = gimple_assign_rhs2 (user);
4203 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4204 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4205 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
4206 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
4207 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4208 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
4209 user = gsi_stmt (gsi2);
4210 update_stmt (user);
4211 reassoc_remove_stmt (&gsi);
4212 release_defs (feed);
4213 plus_negates.safe_push (gimple_assign_lhs (user));
4215 else
4217 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4218 rid of one operation. */
4219 gimple feed = SSA_NAME_DEF_STMT (negate);
4220 tree a = gimple_assign_rhs1 (feed);
4221 tree rhs1 = gimple_assign_rhs1 (user);
4222 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4223 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4224 update_stmt (gsi_stmt (gsi));
4230 /* Returns true if OP is of a type for which we can do reassociation.
4231 That is for integral or non-saturating fixed-point types, and for
4232 floating point type when associative-math is enabled. */
4234 static bool
4235 can_reassociate_p (tree op)
4237 tree type = TREE_TYPE (op);
4238 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4239 || NON_SAT_FIXED_POINT_TYPE_P (type)
4240 || (flag_associative_math && FLOAT_TYPE_P (type)))
4241 return true;
4242 return false;
4245 /* Break up subtract operations in block BB.
4247 We do this top down because we don't know whether the subtract is
4248 part of a possible chain of reassociation except at the top.
4250 IE given
4251 d = f + g
4252 c = a + e
4253 b = c - d
4254 q = b - r
4255 k = t - q
4257 we want to break up k = t - q, but we won't until we've transformed q
4258 = b - r, which won't be broken up until we transform b = c - d.
4260 En passant, clear the GIMPLE visited flag on every statement
4261 and set UIDs within each basic block. */
4263 static void
4264 break_up_subtract_bb (basic_block bb)
4266 gimple_stmt_iterator gsi;
4267 basic_block son;
4268 unsigned int uid = 1;
4270 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4272 gimple stmt = gsi_stmt (gsi);
4273 gimple_set_visited (stmt, false);
4274 gimple_set_uid (stmt, uid++);
4276 if (!is_gimple_assign (stmt)
4277 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4278 continue;
4280 /* Look for simple gimple subtract operations. */
4281 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4283 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4284 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4285 continue;
4287 /* Check for a subtract used only in an addition. If this
4288 is the case, transform it into add of a negate for better
4289 reassociation. IE transform C = A-B into C = A + -B if C
4290 is only used in an addition. */
4291 if (should_break_up_subtract (stmt))
4292 break_up_subtract (stmt, &gsi);
4294 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4295 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4296 plus_negates.safe_push (gimple_assign_lhs (stmt));
4298 for (son = first_dom_son (CDI_DOMINATORS, bb);
4299 son;
4300 son = next_dom_son (CDI_DOMINATORS, son))
4301 break_up_subtract_bb (son);
4304 /* Used for repeated factor analysis. */
4305 struct repeat_factor_d
4307 /* An SSA name that occurs in a multiply chain. */
4308 tree factor;
4310 /* Cached rank of the factor. */
4311 unsigned rank;
4313 /* Number of occurrences of the factor in the chain. */
4314 HOST_WIDE_INT count;
4316 /* An SSA name representing the product of this factor and
4317 all factors appearing later in the repeated factor vector. */
4318 tree repr;
4321 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4322 typedef const struct repeat_factor_d *const_repeat_factor_t;
4325 static vec<repeat_factor> repeat_factor_vec;
4327 /* Used for sorting the repeat factor vector. Sort primarily by
4328 ascending occurrence count, secondarily by descending rank. */
4330 static int
4331 compare_repeat_factors (const void *x1, const void *x2)
4333 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4334 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4336 if (rf1->count != rf2->count)
4337 return rf1->count - rf2->count;
4339 return rf2->rank - rf1->rank;
4342 /* Look for repeated operands in OPS in the multiply tree rooted at
4343 STMT. Replace them with an optimal sequence of multiplies and powi
4344 builtin calls, and remove the used operands from OPS. Return an
4345 SSA name representing the value of the replacement sequence. */
4347 static tree
4348 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4350 unsigned i, j, vec_len;
4351 int ii;
4352 operand_entry_t oe;
4353 repeat_factor_t rf1, rf2;
4354 repeat_factor rfnew;
4355 tree result = NULL_TREE;
4356 tree target_ssa, iter_result;
4357 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4358 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4359 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4360 gimple mul_stmt, pow_stmt;
4362 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4363 target. */
4364 if (!powi_fndecl)
4365 return NULL_TREE;
4367 /* Allocate the repeated factor vector. */
4368 repeat_factor_vec.create (10);
4370 /* Scan the OPS vector for all SSA names in the product and build
4371 up a vector of occurrence counts for each factor. */
4372 FOR_EACH_VEC_ELT (*ops, i, oe)
4374 if (TREE_CODE (oe->op) == SSA_NAME)
4376 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4378 if (rf1->factor == oe->op)
4380 rf1->count += oe->count;
4381 break;
4385 if (j >= repeat_factor_vec.length ())
4387 rfnew.factor = oe->op;
4388 rfnew.rank = oe->rank;
4389 rfnew.count = oe->count;
4390 rfnew.repr = NULL_TREE;
4391 repeat_factor_vec.safe_push (rfnew);
4396 /* Sort the repeated factor vector by (a) increasing occurrence count,
4397 and (b) decreasing rank. */
4398 repeat_factor_vec.qsort (compare_repeat_factors);
4400 /* It is generally best to combine as many base factors as possible
4401 into a product before applying __builtin_powi to the result.
4402 However, the sort order chosen for the repeated factor vector
4403 allows us to cache partial results for the product of the base
4404 factors for subsequent use. When we already have a cached partial
4405 result from a previous iteration, it is best to make use of it
4406 before looking for another __builtin_pow opportunity.
4408 As an example, consider x * x * y * y * y * z * z * z * z.
4409 We want to first compose the product x * y * z, raise it to the
4410 second power, then multiply this by y * z, and finally multiply
4411 by z. This can be done in 5 multiplies provided we cache y * z
4412 for use in both expressions:
4414 t1 = y * z
4415 t2 = t1 * x
4416 t3 = t2 * t2
4417 t4 = t1 * t3
4418 result = t4 * z
4420 If we instead ignored the cached y * z and first multiplied by
4421 the __builtin_pow opportunity z * z, we would get the inferior:
4423 t1 = y * z
4424 t2 = t1 * x
4425 t3 = t2 * t2
4426 t4 = z * z
4427 t5 = t3 * t4
4428 result = t5 * y */
4430 vec_len = repeat_factor_vec.length ();
4432 /* Repeatedly look for opportunities to create a builtin_powi call. */
4433 while (true)
4435 HOST_WIDE_INT power;
4437 /* First look for the largest cached product of factors from
4438 preceding iterations. If found, create a builtin_powi for
4439 it if the minimum occurrence count for its factors is at
4440 least 2, or just use this cached product as our next
4441 multiplicand if the minimum occurrence count is 1. */
4442 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4444 if (rf1->repr && rf1->count > 0)
4445 break;
4448 if (j < vec_len)
4450 power = rf1->count;
4452 if (power == 1)
4454 iter_result = rf1->repr;
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4458 unsigned elt;
4459 repeat_factor_t rf;
4460 fputs ("Multiplying by cached product ", dump_file);
4461 for (elt = j; elt < vec_len; elt++)
4463 rf = &repeat_factor_vec[elt];
4464 print_generic_expr (dump_file, rf->factor, 0);
4465 if (elt < vec_len - 1)
4466 fputs (" * ", dump_file);
4468 fputs ("\n", dump_file);
4471 else
4473 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4474 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4475 build_int_cst (integer_type_node,
4476 power));
4477 gimple_call_set_lhs (pow_stmt, iter_result);
4478 gimple_set_location (pow_stmt, gimple_location (stmt));
4479 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4481 if (dump_file && (dump_flags & TDF_DETAILS))
4483 unsigned elt;
4484 repeat_factor_t rf;
4485 fputs ("Building __builtin_pow call for cached product (",
4486 dump_file);
4487 for (elt = j; elt < vec_len; elt++)
4489 rf = &repeat_factor_vec[elt];
4490 print_generic_expr (dump_file, rf->factor, 0);
4491 if (elt < vec_len - 1)
4492 fputs (" * ", dump_file);
4494 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4495 power);
4499 else
4501 /* Otherwise, find the first factor in the repeated factor
4502 vector whose occurrence count is at least 2. If no such
4503 factor exists, there are no builtin_powi opportunities
4504 remaining. */
4505 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4507 if (rf1->count >= 2)
4508 break;
4511 if (j >= vec_len)
4512 break;
4514 power = rf1->count;
4516 if (dump_file && (dump_flags & TDF_DETAILS))
4518 unsigned elt;
4519 repeat_factor_t rf;
4520 fputs ("Building __builtin_pow call for (", dump_file);
4521 for (elt = j; elt < vec_len; elt++)
4523 rf = &repeat_factor_vec[elt];
4524 print_generic_expr (dump_file, rf->factor, 0);
4525 if (elt < vec_len - 1)
4526 fputs (" * ", dump_file);
4528 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4531 reassociate_stats.pows_created++;
4533 /* Visit each element of the vector in reverse order (so that
4534 high-occurrence elements are visited first, and within the
4535 same occurrence count, lower-ranked elements are visited
4536 first). Form a linear product of all elements in this order
4537 whose occurrencce count is at least that of element J.
4538 Record the SSA name representing the product of each element
4539 with all subsequent elements in the vector. */
4540 if (j == vec_len - 1)
4541 rf1->repr = rf1->factor;
4542 else
4544 for (ii = vec_len - 2; ii >= (int)j; ii--)
4546 tree op1, op2;
4548 rf1 = &repeat_factor_vec[ii];
4549 rf2 = &repeat_factor_vec[ii + 1];
4551 /* Init the last factor's representative to be itself. */
4552 if (!rf2->repr)
4553 rf2->repr = rf2->factor;
4555 op1 = rf1->factor;
4556 op2 = rf2->repr;
4558 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4559 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4560 target_ssa,
4561 op1, op2);
4562 gimple_set_location (mul_stmt, gimple_location (stmt));
4563 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4564 rf1->repr = target_ssa;
4566 /* Don't reprocess the multiply we just introduced. */
4567 gimple_set_visited (mul_stmt, true);
4571 /* Form a call to __builtin_powi for the maximum product
4572 just formed, raised to the power obtained earlier. */
4573 rf1 = &repeat_factor_vec[j];
4574 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4575 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4576 build_int_cst (integer_type_node,
4577 power));
4578 gimple_call_set_lhs (pow_stmt, iter_result);
4579 gimple_set_location (pow_stmt, gimple_location (stmt));
4580 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4583 /* If we previously formed at least one other builtin_powi call,
4584 form the product of this one and those others. */
4585 if (result)
4587 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4588 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4589 result, iter_result);
4590 gimple_set_location (mul_stmt, gimple_location (stmt));
4591 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4592 gimple_set_visited (mul_stmt, true);
4593 result = new_result;
4595 else
4596 result = iter_result;
4598 /* Decrement the occurrence count of each element in the product
4599 by the count found above, and remove this many copies of each
4600 factor from OPS. */
4601 for (i = j; i < vec_len; i++)
4603 unsigned k = power;
4604 unsigned n;
4606 rf1 = &repeat_factor_vec[i];
4607 rf1->count -= power;
4609 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4611 if (oe->op == rf1->factor)
4613 if (oe->count <= k)
4615 ops->ordered_remove (n);
4616 k -= oe->count;
4618 if (k == 0)
4619 break;
4621 else
4623 oe->count -= k;
4624 break;
4631 /* At this point all elements in the repeated factor vector have a
4632 remaining occurrence count of 0 or 1, and those with a count of 1
4633 don't have cached representatives. Re-sort the ops vector and
4634 clean up. */
4635 ops->qsort (sort_by_operand_rank);
4636 repeat_factor_vec.release ();
4638 /* Return the final product computed herein. Note that there may
4639 still be some elements with single occurrence count left in OPS;
4640 those will be handled by the normal reassociation logic. */
4641 return result;
4644 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4646 static void
4647 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4649 tree rhs1;
4651 if (dump_file && (dump_flags & TDF_DETAILS))
4653 fprintf (dump_file, "Transforming ");
4654 print_gimple_stmt (dump_file, stmt, 0, 0);
4657 rhs1 = gimple_assign_rhs1 (stmt);
4658 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4659 update_stmt (stmt);
4660 remove_visited_stmt_chain (rhs1);
4662 if (dump_file && (dump_flags & TDF_DETAILS))
4664 fprintf (dump_file, " into ");
4665 print_gimple_stmt (dump_file, stmt, 0, 0);
4669 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4671 static void
4672 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4673 tree rhs1, tree rhs2)
4675 if (dump_file && (dump_flags & TDF_DETAILS))
4677 fprintf (dump_file, "Transforming ");
4678 print_gimple_stmt (dump_file, stmt, 0, 0);
4681 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4682 update_stmt (gsi_stmt (*gsi));
4683 remove_visited_stmt_chain (rhs1);
4685 if (dump_file && (dump_flags & TDF_DETAILS))
4687 fprintf (dump_file, " into ");
4688 print_gimple_stmt (dump_file, stmt, 0, 0);
4692 /* Reassociate expressions in basic block BB and its post-dominator as
4693 children. */
4695 static void
4696 reassociate_bb (basic_block bb)
4698 gimple_stmt_iterator gsi;
4699 basic_block son;
4700 gimple stmt = last_stmt (bb);
4702 if (stmt && !gimple_visited_p (stmt))
4703 maybe_optimize_range_tests (stmt);
4705 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4707 stmt = gsi_stmt (gsi);
4709 if (is_gimple_assign (stmt)
4710 && !stmt_could_throw_p (stmt))
4712 tree lhs, rhs1, rhs2;
4713 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4715 /* If this is not a gimple binary expression, there is
4716 nothing for us to do with it. */
4717 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4718 continue;
4720 /* If this was part of an already processed statement,
4721 we don't need to touch it again. */
4722 if (gimple_visited_p (stmt))
4724 /* This statement might have become dead because of previous
4725 reassociations. */
4726 if (has_zero_uses (gimple_get_lhs (stmt)))
4728 reassoc_remove_stmt (&gsi);
4729 release_defs (stmt);
4730 /* We might end up removing the last stmt above which
4731 places the iterator to the end of the sequence.
4732 Reset it to the last stmt in this case which might
4733 be the end of the sequence as well if we removed
4734 the last statement of the sequence. In which case
4735 we need to bail out. */
4736 if (gsi_end_p (gsi))
4738 gsi = gsi_last_bb (bb);
4739 if (gsi_end_p (gsi))
4740 break;
4743 continue;
4746 lhs = gimple_assign_lhs (stmt);
4747 rhs1 = gimple_assign_rhs1 (stmt);
4748 rhs2 = gimple_assign_rhs2 (stmt);
4750 /* For non-bit or min/max operations we can't associate
4751 all types. Verify that here. */
4752 if (rhs_code != BIT_IOR_EXPR
4753 && rhs_code != BIT_AND_EXPR
4754 && rhs_code != BIT_XOR_EXPR
4755 && rhs_code != MIN_EXPR
4756 && rhs_code != MAX_EXPR
4757 && (!can_reassociate_p (lhs)
4758 || !can_reassociate_p (rhs1)
4759 || !can_reassociate_p (rhs2)))
4760 continue;
4762 if (associative_tree_code (rhs_code))
4764 auto_vec<operand_entry_t> ops;
4765 tree powi_result = NULL_TREE;
4767 /* There may be no immediate uses left by the time we
4768 get here because we may have eliminated them all. */
4769 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4770 continue;
4772 gimple_set_visited (stmt, true);
4773 linearize_expr_tree (&ops, stmt, true, true);
4774 ops.qsort (sort_by_operand_rank);
4775 optimize_ops_list (rhs_code, &ops);
4776 if (undistribute_ops_list (rhs_code, &ops,
4777 loop_containing_stmt (stmt)))
4779 ops.qsort (sort_by_operand_rank);
4780 optimize_ops_list (rhs_code, &ops);
4783 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4784 optimize_range_tests (rhs_code, &ops);
4786 if (first_pass_instance
4787 && rhs_code == MULT_EXPR
4788 && flag_unsafe_math_optimizations)
4789 powi_result = attempt_builtin_powi (stmt, &ops);
4791 /* If the operand vector is now empty, all operands were
4792 consumed by the __builtin_powi optimization. */
4793 if (ops.length () == 0)
4794 transform_stmt_to_copy (&gsi, stmt, powi_result);
4795 else if (ops.length () == 1)
4797 tree last_op = ops.last ()->op;
4799 if (powi_result)
4800 transform_stmt_to_multiply (&gsi, stmt, last_op,
4801 powi_result);
4802 else
4803 transform_stmt_to_copy (&gsi, stmt, last_op);
4805 else
4807 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4808 int ops_num = ops.length ();
4809 int width = get_reassociation_width (ops_num, rhs_code, mode);
4810 tree new_lhs = lhs;
4812 if (dump_file && (dump_flags & TDF_DETAILS))
4813 fprintf (dump_file,
4814 "Width = %d was chosen for reassociation\n", width);
4816 if (width > 1
4817 && ops.length () > 3)
4818 rewrite_expr_tree_parallel (stmt, width, ops);
4819 else
4821 /* When there are three operands left, we want
4822 to make sure the ones that get the double
4823 binary op are chosen wisely. */
4824 int len = ops.length ();
4825 if (len >= 3)
4826 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4828 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4829 powi_result != NULL);
4832 /* If we combined some repeated factors into a
4833 __builtin_powi call, multiply that result by the
4834 reassociated operands. */
4835 if (powi_result)
4837 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4838 tree type = TREE_TYPE (lhs);
4839 tree target_ssa = make_temp_ssa_name (type, NULL,
4840 "reassocpow");
4841 gimple_set_lhs (lhs_stmt, target_ssa);
4842 update_stmt (lhs_stmt);
4843 if (lhs != new_lhs)
4844 target_ssa = new_lhs;
4845 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4846 powi_result,
4847 target_ssa);
4848 gimple_set_location (mul_stmt, gimple_location (stmt));
4849 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4855 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4856 son;
4857 son = next_dom_son (CDI_POST_DOMINATORS, son))
4858 reassociate_bb (son);
4861 /* Add jumps around shifts for range tests turned into bit tests.
4862 For each SSA_NAME VAR we have code like:
4863 VAR = ...; // final stmt of range comparison
4864 // bit test here...;
4865 OTHERVAR = ...; // final stmt of the bit test sequence
4866 RES = VAR | OTHERVAR;
4867 Turn the above into:
4868 VAR = ...;
4869 if (VAR != 0)
4870 goto <l3>;
4871 else
4872 goto <l2>;
4873 <l2>:
4874 // bit test here...;
4875 OTHERVAR = ...;
4876 <l3>:
4877 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4879 static void
4880 branch_fixup (void)
4882 tree var;
4883 unsigned int i;
4885 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4887 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4888 gimple use_stmt;
4889 use_operand_p use;
4890 bool ok = single_imm_use (var, &use, &use_stmt);
4891 gcc_assert (ok
4892 && is_gimple_assign (use_stmt)
4893 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4894 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4896 basic_block cond_bb = gimple_bb (def_stmt);
4897 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4898 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4900 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4901 gimple g = gimple_build_cond (NE_EXPR, var,
4902 build_zero_cst (TREE_TYPE (var)),
4903 NULL_TREE, NULL_TREE);
4904 location_t loc = gimple_location (use_stmt);
4905 gimple_set_location (g, loc);
4906 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4908 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4909 etrue->probability = REG_BR_PROB_BASE / 2;
4910 etrue->count = cond_bb->count / 2;
4911 edge efalse = find_edge (cond_bb, then_bb);
4912 efalse->flags = EDGE_FALSE_VALUE;
4913 efalse->probability -= etrue->probability;
4914 efalse->count -= etrue->count;
4915 then_bb->count -= etrue->count;
4917 tree othervar = NULL_TREE;
4918 if (gimple_assign_rhs1 (use_stmt) == var)
4919 othervar = gimple_assign_rhs2 (use_stmt);
4920 else if (gimple_assign_rhs2 (use_stmt) == var)
4921 othervar = gimple_assign_rhs1 (use_stmt);
4922 else
4923 gcc_unreachable ();
4924 tree lhs = gimple_assign_lhs (use_stmt);
4925 gimple phi = create_phi_node (lhs, merge_bb);
4926 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4927 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4928 gsi = gsi_for_stmt (use_stmt);
4929 gsi_remove (&gsi, true);
4931 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4932 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4934 reassoc_branch_fixups.release ();
4937 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4938 void debug_ops_vector (vec<operand_entry_t> ops);
4940 /* Dump the operand entry vector OPS to FILE. */
4942 void
4943 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4945 operand_entry_t oe;
4946 unsigned int i;
4948 FOR_EACH_VEC_ELT (ops, i, oe)
4950 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4951 print_generic_expr (file, oe->op, 0);
4955 /* Dump the operand entry vector OPS to STDERR. */
4957 DEBUG_FUNCTION void
4958 debug_ops_vector (vec<operand_entry_t> ops)
4960 dump_ops_vector (stderr, ops);
4963 static void
4964 do_reassoc (void)
4966 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4967 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4970 /* Initialize the reassociation pass. */
4972 static void
4973 init_reassoc (void)
4975 int i;
4976 long rank = 2;
4977 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4979 /* Find the loops, so that we can prevent moving calculations in
4980 them. */
4981 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4983 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4985 operand_entry_pool = create_alloc_pool ("operand entry pool",
4986 sizeof (struct operand_entry), 30);
4987 next_operand_entry_id = 0;
4989 /* Reverse RPO (Reverse Post Order) will give us something where
4990 deeper loops come later. */
4991 pre_and_rev_post_order_compute (NULL, bbs, false);
4992 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4993 operand_rank = new hash_map<tree, long>;
4995 /* Give each default definition a distinct rank. This includes
4996 parameters and the static chain. Walk backwards over all
4997 SSA names so that we get proper rank ordering according
4998 to tree_swap_operands_p. */
4999 for (i = num_ssa_names - 1; i > 0; --i)
5001 tree name = ssa_name (i);
5002 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5003 insert_operand_rank (name, ++rank);
5006 /* Set up rank for each BB */
5007 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5008 bb_rank[bbs[i]] = ++rank << 16;
5010 free (bbs);
5011 calculate_dominance_info (CDI_POST_DOMINATORS);
5012 plus_negates = vNULL;
5015 /* Cleanup after the reassociation pass, and print stats if
5016 requested. */
5018 static void
5019 fini_reassoc (void)
5021 statistics_counter_event (cfun, "Linearized",
5022 reassociate_stats.linearized);
5023 statistics_counter_event (cfun, "Constants eliminated",
5024 reassociate_stats.constants_eliminated);
5025 statistics_counter_event (cfun, "Ops eliminated",
5026 reassociate_stats.ops_eliminated);
5027 statistics_counter_event (cfun, "Statements rewritten",
5028 reassociate_stats.rewritten);
5029 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5030 reassociate_stats.pows_encountered);
5031 statistics_counter_event (cfun, "Built-in powi calls created",
5032 reassociate_stats.pows_created);
5034 delete operand_rank;
5035 free_alloc_pool (operand_entry_pool);
5036 free (bb_rank);
5037 plus_negates.release ();
5038 free_dominance_info (CDI_POST_DOMINATORS);
5039 loop_optimizer_finalize ();
5042 /* Gate and execute functions for Reassociation. */
5044 static unsigned int
5045 execute_reassoc (void)
5047 init_reassoc ();
5049 do_reassoc ();
5050 repropagate_negates ();
5051 branch_fixup ();
5053 fini_reassoc ();
5054 return 0;
5057 namespace {
5059 const pass_data pass_data_reassoc =
5061 GIMPLE_PASS, /* type */
5062 "reassoc", /* name */
5063 OPTGROUP_NONE, /* optinfo_flags */
5064 TV_TREE_REASSOC, /* tv_id */
5065 ( PROP_cfg | PROP_ssa ), /* properties_required */
5066 0, /* properties_provided */
5067 0, /* properties_destroyed */
5068 0, /* todo_flags_start */
5069 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5072 class pass_reassoc : public gimple_opt_pass
5074 public:
5075 pass_reassoc (gcc::context *ctxt)
5076 : gimple_opt_pass (pass_data_reassoc, ctxt)
5079 /* opt_pass methods: */
5080 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5081 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5082 virtual unsigned int execute (function *) { return execute_reassoc (); }
5084 }; // class pass_reassoc
5086 } // anon namespace
5088 gimple_opt_pass *
5089 make_pass_reassoc (gcc::context *ctxt)
5091 return new pass_reassoc (ctxt);