* doc/extend.texi (Size of an asm): Really move node to its position.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob357ac08381cccaa6c4d09c25c4d687b75b9b7e64
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
79 -a, a & a, etc.
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
113 those on it.
115 You want to first merge all leaves of the same rank, as much as
116 possible.
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
131 mergetmp2 = d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
140 So we have
142 build binary op
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
168 mergetmp = op1 + op2
169 newstmt = mergetmp + op3
171 instead of
172 mergetmp = op2 + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
177 can do.
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
186 /* Statistics */
187 static struct
189 int linearized;
190 int constants_eliminated;
191 int ops_eliminated;
192 int rewritten;
193 int pows_encountered;
194 int pows_created;
195 } reassociate_stats;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
200 unsigned int rank;
201 int id;
202 tree op;
203 unsigned int count;
204 } *operand_entry_t;
206 static alloc_pool operand_entry_pool;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
214 depth. */
215 static long *bb_rank;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t *operand_rank;
220 /* Forward decls. */
221 static long get_rank (tree);
222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
224 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
225 possibly added by gsi_remove. */
227 bool
228 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
230 gimple stmt = gsi_stmt (*gsi);
232 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
233 return gsi_remove (gsi, true);
235 gimple_stmt_iterator prev = *gsi;
236 gsi_prev (&prev);
237 unsigned uid = gimple_uid (stmt);
238 basic_block bb = gimple_bb (stmt);
239 bool ret = gsi_remove (gsi, true);
240 if (!gsi_end_p (prev))
241 gsi_next (&prev);
242 else
243 prev = gsi_start_bb (bb);
244 gimple end_stmt = gsi_stmt (*gsi);
245 while ((stmt = gsi_stmt (prev)) != end_stmt)
247 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
248 gimple_set_uid (stmt, uid);
249 gsi_next (&prev);
251 return ret;
254 /* Bias amount for loop-carried phis. We want this to be larger than
255 the depth of any reassociation tree we can see, but not larger than
256 the rank difference between two blocks. */
257 #define PHI_LOOP_BIAS (1 << 15)
259 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
260 an innermost loop, and the phi has only a single use which is inside
261 the loop, then the rank is the block rank of the loop latch plus an
262 extra bias for the loop-carried dependence. This causes expressions
263 calculated into an accumulator variable to be independent for each
264 iteration of the loop. If STMT is some other phi, the rank is the
265 block rank of its containing block. */
266 static long
267 phi_rank (gimple stmt)
269 basic_block bb = gimple_bb (stmt);
270 struct loop *father = bb->loop_father;
271 tree res;
272 unsigned i;
273 use_operand_p use;
274 gimple use_stmt;
276 /* We only care about real loops (those with a latch). */
277 if (!father->latch)
278 return bb_rank[bb->index];
280 /* Interesting phis must be in headers of innermost loops. */
281 if (bb != father->header
282 || father->inner)
283 return bb_rank[bb->index];
285 /* Ignore virtual SSA_NAMEs. */
286 res = gimple_phi_result (stmt);
287 if (virtual_operand_p (res))
288 return bb_rank[bb->index];
290 /* The phi definition must have a single use, and that use must be
291 within the loop. Otherwise this isn't an accumulator pattern. */
292 if (!single_imm_use (res, &use, &use_stmt)
293 || gimple_bb (use_stmt)->loop_father != father)
294 return bb_rank[bb->index];
296 /* Look for phi arguments from within the loop. If found, bias this phi. */
297 for (i = 0; i < gimple_phi_num_args (stmt); i++)
299 tree arg = gimple_phi_arg_def (stmt, i);
300 if (TREE_CODE (arg) == SSA_NAME
301 && !SSA_NAME_IS_DEFAULT_DEF (arg))
303 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
304 if (gimple_bb (def_stmt)->loop_father == father)
305 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 /* Must be an uninteresting phi. */
310 return bb_rank[bb->index];
313 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
314 loop-carried dependence of an innermost loop, return TRUE; else
315 return FALSE. */
316 static bool
317 loop_carried_phi (tree exp)
319 gimple phi_stmt;
320 long block_rank;
322 if (TREE_CODE (exp) != SSA_NAME
323 || SSA_NAME_IS_DEFAULT_DEF (exp))
324 return false;
326 phi_stmt = SSA_NAME_DEF_STMT (exp);
328 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
329 return false;
331 /* Non-loop-carried phis have block rank. Loop-carried phis have
332 an additional bias added in. If this phi doesn't have block rank,
333 it's biased and should not be propagated. */
334 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
336 if (phi_rank (phi_stmt) != block_rank)
337 return true;
339 return false;
342 /* Return the maximum of RANK and the rank that should be propagated
343 from expression OP. For most operands, this is just the rank of OP.
344 For loop-carried phis, the value is zero to avoid undoing the bias
345 in favor of the phi. */
346 static long
347 propagate_rank (long rank, tree op)
349 long op_rank;
351 if (loop_carried_phi (op))
352 return rank;
354 op_rank = get_rank (op);
356 return MAX (rank, op_rank);
359 /* Look up the operand rank structure for expression E. */
361 static inline long
362 find_operand_rank (tree e)
364 void **slot = pointer_map_contains (operand_rank, e);
365 return slot ? (long) (intptr_t) *slot : -1;
368 /* Insert {E,RANK} into the operand rank hashtable. */
370 static inline void
371 insert_operand_rank (tree e, long rank)
373 void **slot;
374 gcc_assert (rank > 0);
375 slot = pointer_map_insert (operand_rank, e);
376 gcc_assert (!*slot);
377 *slot = (void *) (intptr_t) rank;
380 /* Given an expression E, return the rank of the expression. */
382 static long
383 get_rank (tree e)
385 /* Constants have rank 0. */
386 if (is_gimple_min_invariant (e))
387 return 0;
389 /* SSA_NAME's have the rank of the expression they are the result
391 For globals and uninitialized values, the rank is 0.
392 For function arguments, use the pre-setup rank.
393 For PHI nodes, stores, asm statements, etc, we use the rank of
394 the BB.
395 For simple operations, the rank is the maximum rank of any of
396 its operands, or the bb_rank, whichever is less.
397 I make no claims that this is optimal, however, it gives good
398 results. */
400 /* We make an exception to the normal ranking system to break
401 dependences of accumulator variables in loops. Suppose we
402 have a simple one-block loop containing:
404 x_1 = phi(x_0, x_2)
405 b = a + x_1
406 c = b + d
407 x_2 = c + e
409 As shown, each iteration of the calculation into x is fully
410 dependent upon the iteration before it. We would prefer to
411 see this in the form:
413 x_1 = phi(x_0, x_2)
414 b = a + d
415 c = b + e
416 x_2 = c + x_1
418 If the loop is unrolled, the calculations of b and c from
419 different iterations can be interleaved.
421 To obtain this result during reassociation, we bias the rank
422 of the phi definition x_1 upward, when it is recognized as an
423 accumulator pattern. The artificial rank causes it to be
424 added last, providing the desired independence. */
426 if (TREE_CODE (e) == SSA_NAME)
428 gimple stmt;
429 long rank;
430 int i, n;
431 tree op;
433 if (SSA_NAME_IS_DEFAULT_DEF (e))
434 return find_operand_rank (e);
436 stmt = SSA_NAME_DEF_STMT (e);
437 if (gimple_code (stmt) == GIMPLE_PHI)
438 return phi_rank (stmt);
440 if (!is_gimple_assign (stmt)
441 || gimple_vdef (stmt))
442 return bb_rank[gimple_bb (stmt)->index];
444 /* If we already have a rank for this expression, use that. */
445 rank = find_operand_rank (e);
446 if (rank != -1)
447 return rank;
449 /* Otherwise, find the maximum rank for the operands. As an
450 exception, remove the bias from loop-carried phis when propagating
451 the rank so that dependent operations are not also biased. */
452 rank = 0;
453 if (gimple_assign_single_p (stmt))
455 tree rhs = gimple_assign_rhs1 (stmt);
456 n = TREE_OPERAND_LENGTH (rhs);
457 if (n == 0)
458 rank = propagate_rank (rank, rhs);
459 else
461 for (i = 0; i < n; i++)
463 op = TREE_OPERAND (rhs, i);
465 if (op != NULL_TREE)
466 rank = propagate_rank (rank, op);
470 else
472 n = gimple_num_ops (stmt);
473 for (i = 1; i < n; i++)
475 op = gimple_op (stmt, i);
476 gcc_assert (op);
477 rank = propagate_rank (rank, op);
481 if (dump_file && (dump_flags & TDF_DETAILS))
483 fprintf (dump_file, "Rank for ");
484 print_generic_expr (dump_file, e, 0);
485 fprintf (dump_file, " is %ld\n", (rank + 1));
488 /* Note the rank in the hashtable so we don't recompute it. */
489 insert_operand_rank (e, (rank + 1));
490 return (rank + 1);
493 /* Globals, etc, are rank 0 */
494 return 0;
498 /* We want integer ones to end up last no matter what, since they are
499 the ones we can do the most with. */
500 #define INTEGER_CONST_TYPE 1 << 3
501 #define FLOAT_CONST_TYPE 1 << 2
502 #define OTHER_CONST_TYPE 1 << 1
504 /* Classify an invariant tree into integer, float, or other, so that
505 we can sort them to be near other constants of the same type. */
506 static inline int
507 constant_type (tree t)
509 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
510 return INTEGER_CONST_TYPE;
511 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
512 return FLOAT_CONST_TYPE;
513 else
514 return OTHER_CONST_TYPE;
517 /* qsort comparison function to sort operand entries PA and PB by rank
518 so that the sorted array is ordered by rank in decreasing order. */
519 static int
520 sort_by_operand_rank (const void *pa, const void *pb)
522 const operand_entry_t oea = *(const operand_entry_t *)pa;
523 const operand_entry_t oeb = *(const operand_entry_t *)pb;
525 /* It's nicer for optimize_expression if constants that are likely
526 to fold when added/multiplied//whatever are put next to each
527 other. Since all constants have rank 0, order them by type. */
528 if (oeb->rank == 0 && oea->rank == 0)
530 if (constant_type (oeb->op) != constant_type (oea->op))
531 return constant_type (oeb->op) - constant_type (oea->op);
532 else
533 /* To make sorting result stable, we use unique IDs to determine
534 order. */
535 return oeb->id - oea->id;
538 /* Lastly, make sure the versions that are the same go next to each
539 other. */
540 if ((oeb->rank - oea->rank == 0)
541 && TREE_CODE (oea->op) == SSA_NAME
542 && TREE_CODE (oeb->op) == SSA_NAME)
544 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
545 versions of removed SSA_NAMEs, so if possible, prefer to sort
546 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
547 See PR60418. */
548 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
549 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
550 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
552 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
553 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
554 basic_block bba = gimple_bb (stmta);
555 basic_block bbb = gimple_bb (stmtb);
556 if (bbb != bba)
558 if (bb_rank[bbb->index] != bb_rank[bba->index])
559 return bb_rank[bbb->index] - bb_rank[bba->index];
561 else
563 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
564 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
565 if (da != db)
566 return da ? 1 : -1;
570 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
571 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
572 else
573 return oeb->id - oea->id;
576 if (oeb->rank != oea->rank)
577 return oeb->rank - oea->rank;
578 else
579 return oeb->id - oea->id;
582 /* Add an operand entry to *OPS for the tree operand OP. */
584 static void
585 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
587 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
589 oe->op = op;
590 oe->rank = get_rank (op);
591 oe->id = next_operand_entry_id++;
592 oe->count = 1;
593 ops->safe_push (oe);
596 /* Add an operand entry to *OPS for the tree operand OP with repeat
597 count REPEAT. */
599 static void
600 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
601 HOST_WIDE_INT repeat)
603 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
605 oe->op = op;
606 oe->rank = get_rank (op);
607 oe->id = next_operand_entry_id++;
608 oe->count = repeat;
609 ops->safe_push (oe);
611 reassociate_stats.pows_encountered++;
614 /* Return true if STMT is reassociable operation containing a binary
615 operation with tree code CODE, and is inside LOOP. */
617 static bool
618 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
620 basic_block bb = gimple_bb (stmt);
622 if (gimple_bb (stmt) == NULL)
623 return false;
625 if (!flow_bb_inside_loop_p (loop, bb))
626 return false;
628 if (is_gimple_assign (stmt)
629 && gimple_assign_rhs_code (stmt) == code
630 && has_single_use (gimple_assign_lhs (stmt)))
631 return true;
633 return false;
637 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
638 operand of the negate operation. Otherwise, return NULL. */
640 static tree
641 get_unary_op (tree name, enum tree_code opcode)
643 gimple stmt = SSA_NAME_DEF_STMT (name);
645 if (!is_gimple_assign (stmt))
646 return NULL_TREE;
648 if (gimple_assign_rhs_code (stmt) == opcode)
649 return gimple_assign_rhs1 (stmt);
650 return NULL_TREE;
653 /* If CURR and LAST are a pair of ops that OPCODE allows us to
654 eliminate through equivalences, do so, remove them from OPS, and
655 return true. Otherwise, return false. */
657 static bool
658 eliminate_duplicate_pair (enum tree_code opcode,
659 vec<operand_entry_t> *ops,
660 bool *all_done,
661 unsigned int i,
662 operand_entry_t curr,
663 operand_entry_t last)
666 /* If we have two of the same op, and the opcode is & |, min, or max,
667 we can eliminate one of them.
668 If we have two of the same op, and the opcode is ^, we can
669 eliminate both of them. */
671 if (last && last->op == curr->op)
673 switch (opcode)
675 case MAX_EXPR:
676 case MIN_EXPR:
677 case BIT_IOR_EXPR:
678 case BIT_AND_EXPR:
679 if (dump_file && (dump_flags & TDF_DETAILS))
681 fprintf (dump_file, "Equivalence: ");
682 print_generic_expr (dump_file, curr->op, 0);
683 fprintf (dump_file, " [&|minmax] ");
684 print_generic_expr (dump_file, last->op, 0);
685 fprintf (dump_file, " -> ");
686 print_generic_stmt (dump_file, last->op, 0);
689 ops->ordered_remove (i);
690 reassociate_stats.ops_eliminated ++;
692 return true;
694 case BIT_XOR_EXPR:
695 if (dump_file && (dump_flags & TDF_DETAILS))
697 fprintf (dump_file, "Equivalence: ");
698 print_generic_expr (dump_file, curr->op, 0);
699 fprintf (dump_file, " ^ ");
700 print_generic_expr (dump_file, last->op, 0);
701 fprintf (dump_file, " -> nothing\n");
704 reassociate_stats.ops_eliminated += 2;
706 if (ops->length () == 2)
708 ops->create (0);
709 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
710 *all_done = true;
712 else
714 ops->ordered_remove (i-1);
715 ops->ordered_remove (i-1);
718 return true;
720 default:
721 break;
724 return false;
727 static vec<tree> plus_negates;
729 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
730 expression, look in OPS for a corresponding positive operation to cancel
731 it out. If we find one, remove the other from OPS, replace
732 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
733 return false. */
735 static bool
736 eliminate_plus_minus_pair (enum tree_code opcode,
737 vec<operand_entry_t> *ops,
738 unsigned int currindex,
739 operand_entry_t curr)
741 tree negateop;
742 tree notop;
743 unsigned int i;
744 operand_entry_t oe;
746 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
747 return false;
749 negateop = get_unary_op (curr->op, NEGATE_EXPR);
750 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
751 if (negateop == NULL_TREE && notop == NULL_TREE)
752 return false;
754 /* Any non-negated version will have a rank that is one less than
755 the current rank. So once we hit those ranks, if we don't find
756 one, we can stop. */
758 for (i = currindex + 1;
759 ops->iterate (i, &oe)
760 && oe->rank >= curr->rank - 1 ;
761 i++)
763 if (oe->op == negateop)
766 if (dump_file && (dump_flags & TDF_DETAILS))
768 fprintf (dump_file, "Equivalence: ");
769 print_generic_expr (dump_file, negateop, 0);
770 fprintf (dump_file, " + -");
771 print_generic_expr (dump_file, oe->op, 0);
772 fprintf (dump_file, " -> 0\n");
775 ops->ordered_remove (i);
776 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
777 ops->ordered_remove (currindex);
778 reassociate_stats.ops_eliminated ++;
780 return true;
782 else if (oe->op == notop)
784 tree op_type = TREE_TYPE (oe->op);
786 if (dump_file && (dump_flags & TDF_DETAILS))
788 fprintf (dump_file, "Equivalence: ");
789 print_generic_expr (dump_file, notop, 0);
790 fprintf (dump_file, " + ~");
791 print_generic_expr (dump_file, oe->op, 0);
792 fprintf (dump_file, " -> -1\n");
795 ops->ordered_remove (i);
796 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
797 ops->ordered_remove (currindex);
798 reassociate_stats.ops_eliminated ++;
800 return true;
804 /* CURR->OP is a negate expr in a plus expr: save it for later
805 inspection in repropagate_negates(). */
806 if (negateop != NULL_TREE)
807 plus_negates.safe_push (curr->op);
809 return false;
812 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
813 bitwise not expression, look in OPS for a corresponding operand to
814 cancel it out. If we find one, remove the other from OPS, replace
815 OPS[CURRINDEX] with 0, and return true. Otherwise, return
816 false. */
818 static bool
819 eliminate_not_pairs (enum tree_code opcode,
820 vec<operand_entry_t> *ops,
821 unsigned int currindex,
822 operand_entry_t curr)
824 tree notop;
825 unsigned int i;
826 operand_entry_t oe;
828 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
829 || TREE_CODE (curr->op) != SSA_NAME)
830 return false;
832 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
833 if (notop == NULL_TREE)
834 return false;
836 /* Any non-not version will have a rank that is one less than
837 the current rank. So once we hit those ranks, if we don't find
838 one, we can stop. */
840 for (i = currindex + 1;
841 ops->iterate (i, &oe)
842 && oe->rank >= curr->rank - 1;
843 i++)
845 if (oe->op == notop)
847 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file, "Equivalence: ");
850 print_generic_expr (dump_file, notop, 0);
851 if (opcode == BIT_AND_EXPR)
852 fprintf (dump_file, " & ~");
853 else if (opcode == BIT_IOR_EXPR)
854 fprintf (dump_file, " | ~");
855 print_generic_expr (dump_file, oe->op, 0);
856 if (opcode == BIT_AND_EXPR)
857 fprintf (dump_file, " -> 0\n");
858 else if (opcode == BIT_IOR_EXPR)
859 fprintf (dump_file, " -> -1\n");
862 if (opcode == BIT_AND_EXPR)
863 oe->op = build_zero_cst (TREE_TYPE (oe->op));
864 else if (opcode == BIT_IOR_EXPR)
865 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
867 reassociate_stats.ops_eliminated += ops->length () - 1;
868 ops->truncate (0);
869 ops->quick_push (oe);
870 return true;
874 return false;
877 /* Use constant value that may be present in OPS to try to eliminate
878 operands. Note that this function is only really used when we've
879 eliminated ops for other reasons, or merged constants. Across
880 single statements, fold already does all of this, plus more. There
881 is little point in duplicating logic, so I've only included the
882 identities that I could ever construct testcases to trigger. */
884 static void
885 eliminate_using_constants (enum tree_code opcode,
886 vec<operand_entry_t> *ops)
888 operand_entry_t oelast = ops->last ();
889 tree type = TREE_TYPE (oelast->op);
891 if (oelast->rank == 0
892 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
894 switch (opcode)
896 case BIT_AND_EXPR:
897 if (integer_zerop (oelast->op))
899 if (ops->length () != 1)
901 if (dump_file && (dump_flags & TDF_DETAILS))
902 fprintf (dump_file, "Found & 0, removing all other ops\n");
904 reassociate_stats.ops_eliminated += ops->length () - 1;
906 ops->truncate (0);
907 ops->quick_push (oelast);
908 return;
911 else if (integer_all_onesp (oelast->op))
913 if (ops->length () != 1)
915 if (dump_file && (dump_flags & TDF_DETAILS))
916 fprintf (dump_file, "Found & -1, removing\n");
917 ops->pop ();
918 reassociate_stats.ops_eliminated++;
921 break;
922 case BIT_IOR_EXPR:
923 if (integer_all_onesp (oelast->op))
925 if (ops->length () != 1)
927 if (dump_file && (dump_flags & TDF_DETAILS))
928 fprintf (dump_file, "Found | -1, removing all other ops\n");
930 reassociate_stats.ops_eliminated += ops->length () - 1;
932 ops->truncate (0);
933 ops->quick_push (oelast);
934 return;
937 else if (integer_zerop (oelast->op))
939 if (ops->length () != 1)
941 if (dump_file && (dump_flags & TDF_DETAILS))
942 fprintf (dump_file, "Found | 0, removing\n");
943 ops->pop ();
944 reassociate_stats.ops_eliminated++;
947 break;
948 case MULT_EXPR:
949 if (integer_zerop (oelast->op)
950 || (FLOAT_TYPE_P (type)
951 && !HONOR_NANS (TYPE_MODE (type))
952 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
953 && real_zerop (oelast->op)))
955 if (ops->length () != 1)
957 if (dump_file && (dump_flags & TDF_DETAILS))
958 fprintf (dump_file, "Found * 0, removing all other ops\n");
960 reassociate_stats.ops_eliminated += ops->length () - 1;
961 ops->truncate (1);
962 ops->quick_push (oelast);
963 return;
966 else if (integer_onep (oelast->op)
967 || (FLOAT_TYPE_P (type)
968 && !HONOR_SNANS (TYPE_MODE (type))
969 && real_onep (oelast->op)))
971 if (ops->length () != 1)
973 if (dump_file && (dump_flags & TDF_DETAILS))
974 fprintf (dump_file, "Found * 1, removing\n");
975 ops->pop ();
976 reassociate_stats.ops_eliminated++;
977 return;
980 break;
981 case BIT_XOR_EXPR:
982 case PLUS_EXPR:
983 case MINUS_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
987 && fold_real_zero_addition_p (type, oelast->op,
988 opcode == MINUS_EXPR)))
990 if (ops->length () != 1)
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found [|^+] 0, removing\n");
994 ops->pop ();
995 reassociate_stats.ops_eliminated++;
996 return;
999 break;
1000 default:
1001 break;
1007 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1008 bool, bool);
1010 /* Structure for tracking and counting operands. */
1011 typedef struct oecount_s {
1012 int cnt;
1013 int id;
1014 enum tree_code oecode;
1015 tree op;
1016 } oecount;
1019 /* The heap for the oecount hashtable and the sorted list of operands. */
1020 static vec<oecount> cvec;
1023 /* Oecount hashtable helpers. */
1025 struct oecount_hasher : typed_noop_remove <void>
1027 /* Note that this hash table stores integers, not pointers.
1028 So, observe the casting in the member functions. */
1029 typedef void value_type;
1030 typedef void compare_type;
1031 static inline hashval_t hash (const value_type *);
1032 static inline bool equal (const value_type *, const compare_type *);
1035 /* Hash function for oecount. */
1037 inline hashval_t
1038 oecount_hasher::hash (const value_type *p)
1040 const oecount *c = &cvec[(size_t)p - 42];
1041 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1044 /* Comparison function for oecount. */
1046 inline bool
1047 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1049 const oecount *c1 = &cvec[(size_t)p1 - 42];
1050 const oecount *c2 = &cvec[(size_t)p2 - 42];
1051 return (c1->oecode == c2->oecode
1052 && c1->op == c2->op);
1055 /* Comparison function for qsort sorting oecount elements by count. */
1057 static int
1058 oecount_cmp (const void *p1, const void *p2)
1060 const oecount *c1 = (const oecount *)p1;
1061 const oecount *c2 = (const oecount *)p2;
1062 if (c1->cnt != c2->cnt)
1063 return c1->cnt - c2->cnt;
1064 else
1065 /* If counts are identical, use unique IDs to stabilize qsort. */
1066 return c1->id - c2->id;
1069 /* Return TRUE iff STMT represents a builtin call that raises OP
1070 to some exponent. */
1072 static bool
1073 stmt_is_power_of_op (gimple stmt, tree op)
1075 tree fndecl;
1077 if (!is_gimple_call (stmt))
1078 return false;
1080 fndecl = gimple_call_fndecl (stmt);
1082 if (!fndecl
1083 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1084 return false;
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1088 CASE_FLT_FN (BUILT_IN_POW):
1089 CASE_FLT_FN (BUILT_IN_POWI):
1090 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1092 default:
1093 return false;
1097 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1098 in place and return the result. Assumes that stmt_is_power_of_op
1099 was previously called for STMT and returned TRUE. */
1101 static HOST_WIDE_INT
1102 decrement_power (gimple stmt)
1104 REAL_VALUE_TYPE c, cint;
1105 HOST_WIDE_INT power;
1106 tree arg1;
1108 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1110 CASE_FLT_FN (BUILT_IN_POW):
1111 arg1 = gimple_call_arg (stmt, 1);
1112 c = TREE_REAL_CST (arg1);
1113 power = real_to_integer (&c) - 1;
1114 real_from_integer (&cint, VOIDmode, power, SIGNED);
1115 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1116 return power;
1118 CASE_FLT_FN (BUILT_IN_POWI):
1119 arg1 = gimple_call_arg (stmt, 1);
1120 power = TREE_INT_CST_LOW (arg1) - 1;
1121 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1122 return power;
1124 default:
1125 gcc_unreachable ();
1129 /* Find the single immediate use of STMT's LHS, and replace it
1130 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1131 replace *DEF with OP as well. */
1133 static void
1134 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1136 tree lhs;
1137 gimple use_stmt;
1138 use_operand_p use;
1139 gimple_stmt_iterator gsi;
1141 if (is_gimple_call (stmt))
1142 lhs = gimple_call_lhs (stmt);
1143 else
1144 lhs = gimple_assign_lhs (stmt);
1146 gcc_assert (has_single_use (lhs));
1147 single_imm_use (lhs, &use, &use_stmt);
1148 if (lhs == *def)
1149 *def = op;
1150 SET_USE (use, op);
1151 if (TREE_CODE (op) != SSA_NAME)
1152 update_stmt (use_stmt);
1153 gsi = gsi_for_stmt (stmt);
1154 unlink_stmt_vdef (stmt);
1155 reassoc_remove_stmt (&gsi);
1156 release_defs (stmt);
1159 /* Walks the linear chain with result *DEF searching for an operation
1160 with operand OP and code OPCODE removing that from the chain. *DEF
1161 is updated if there is only one operand but no operation left. */
1163 static void
1164 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1166 gimple stmt = SSA_NAME_DEF_STMT (*def);
1170 tree name;
1172 if (opcode == MULT_EXPR
1173 && stmt_is_power_of_op (stmt, op))
1175 if (decrement_power (stmt) == 1)
1176 propagate_op_to_single_use (op, stmt, def);
1177 return;
1180 name = gimple_assign_rhs1 (stmt);
1182 /* If this is the operation we look for and one of the operands
1183 is ours simply propagate the other operand into the stmts
1184 single use. */
1185 if (gimple_assign_rhs_code (stmt) == opcode
1186 && (name == op
1187 || gimple_assign_rhs2 (stmt) == op))
1189 if (name == op)
1190 name = gimple_assign_rhs2 (stmt);
1191 propagate_op_to_single_use (name, stmt, def);
1192 return;
1195 /* We might have a multiply of two __builtin_pow* calls, and
1196 the operand might be hiding in the rightmost one. */
1197 if (opcode == MULT_EXPR
1198 && gimple_assign_rhs_code (stmt) == opcode
1199 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1200 && has_single_use (gimple_assign_rhs2 (stmt)))
1202 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1203 if (stmt_is_power_of_op (stmt2, op))
1205 if (decrement_power (stmt2) == 1)
1206 propagate_op_to_single_use (op, stmt2, def);
1207 return;
1211 /* Continue walking the chain. */
1212 gcc_assert (name != op
1213 && TREE_CODE (name) == SSA_NAME);
1214 stmt = SSA_NAME_DEF_STMT (name);
1216 while (1);
1219 /* Returns true if statement S1 dominates statement S2. Like
1220 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1222 static bool
1223 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1225 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1227 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1228 SSA_NAME. Assume it lives at the beginning of function and
1229 thus dominates everything. */
1230 if (!bb1 || s1 == s2)
1231 return true;
1233 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1234 if (!bb2)
1235 return false;
1237 if (bb1 == bb2)
1239 /* PHIs in the same basic block are assumed to be
1240 executed all in parallel, if only one stmt is a PHI,
1241 it dominates the other stmt in the same basic block. */
1242 if (gimple_code (s1) == GIMPLE_PHI)
1243 return true;
1245 if (gimple_code (s2) == GIMPLE_PHI)
1246 return false;
1248 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1250 if (gimple_uid (s1) < gimple_uid (s2))
1251 return true;
1253 if (gimple_uid (s1) > gimple_uid (s2))
1254 return false;
1256 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1257 unsigned int uid = gimple_uid (s1);
1258 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1260 gimple s = gsi_stmt (gsi);
1261 if (gimple_uid (s) != uid)
1262 break;
1263 if (s == s2)
1264 return true;
1267 return false;
1270 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1273 /* Insert STMT after INSERT_POINT. */
1275 static void
1276 insert_stmt_after (gimple stmt, gimple insert_point)
1278 gimple_stmt_iterator gsi;
1279 basic_block bb;
1281 if (gimple_code (insert_point) == GIMPLE_PHI)
1282 bb = gimple_bb (insert_point);
1283 else if (!stmt_ends_bb_p (insert_point))
1285 gsi = gsi_for_stmt (insert_point);
1286 gimple_set_uid (stmt, gimple_uid (insert_point));
1287 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1288 return;
1290 else
1291 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1292 thus if it must end a basic block, it should be a call that can
1293 throw, or some assignment that can throw. If it throws, the LHS
1294 of it will not be initialized though, so only valid places using
1295 the SSA_NAME should be dominated by the fallthru edge. */
1296 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1297 gsi = gsi_after_labels (bb);
1298 if (gsi_end_p (gsi))
1300 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1301 gimple_set_uid (stmt,
1302 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1304 else
1305 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1306 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1309 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1310 the result. Places the statement after the definition of either
1311 OP1 or OP2. Returns the new statement. */
1313 static gimple
1314 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1316 gimple op1def = NULL, op2def = NULL;
1317 gimple_stmt_iterator gsi;
1318 tree op;
1319 gimple sum;
1321 /* Create the addition statement. */
1322 op = make_ssa_name (type, NULL);
1323 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1325 /* Find an insertion place and insert. */
1326 if (TREE_CODE (op1) == SSA_NAME)
1327 op1def = SSA_NAME_DEF_STMT (op1);
1328 if (TREE_CODE (op2) == SSA_NAME)
1329 op2def = SSA_NAME_DEF_STMT (op2);
1330 if ((!op1def || gimple_nop_p (op1def))
1331 && (!op2def || gimple_nop_p (op2def)))
1333 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1334 if (gsi_end_p (gsi))
1336 gimple_stmt_iterator gsi2
1337 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1338 gimple_set_uid (sum,
1339 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1341 else
1342 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1343 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1345 else
1347 gimple insert_point;
1348 if ((!op1def || gimple_nop_p (op1def))
1349 || (op2def && !gimple_nop_p (op2def)
1350 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1351 insert_point = op2def;
1352 else
1353 insert_point = op1def;
1354 insert_stmt_after (sum, insert_point);
1356 update_stmt (sum);
1358 return sum;
1361 /* Perform un-distribution of divisions and multiplications.
1362 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1363 to (A + B) / X for real X.
1365 The algorithm is organized as follows.
1367 - First we walk the addition chain *OPS looking for summands that
1368 are defined by a multiplication or a real division. This results
1369 in the candidates bitmap with relevant indices into *OPS.
1371 - Second we build the chains of multiplications or divisions for
1372 these candidates, counting the number of occurrences of (operand, code)
1373 pairs in all of the candidates chains.
1375 - Third we sort the (operand, code) pairs by number of occurrence and
1376 process them starting with the pair with the most uses.
1378 * For each such pair we walk the candidates again to build a
1379 second candidate bitmap noting all multiplication/division chains
1380 that have at least one occurrence of (operand, code).
1382 * We build an alternate addition chain only covering these
1383 candidates with one (operand, code) operation removed from their
1384 multiplication/division chain.
1386 * The first candidate gets replaced by the alternate addition chain
1387 multiplied/divided by the operand.
1389 * All candidate chains get disabled for further processing and
1390 processing of (operand, code) pairs continues.
1392 The alternate addition chains built are re-processed by the main
1393 reassociation algorithm which allows optimizing a * x * y + b * y * x
1394 to (a + b ) * x * y in one invocation of the reassociation pass. */
1396 static bool
1397 undistribute_ops_list (enum tree_code opcode,
1398 vec<operand_entry_t> *ops, struct loop *loop)
1400 unsigned int length = ops->length ();
1401 operand_entry_t oe1;
1402 unsigned i, j;
1403 sbitmap candidates, candidates2;
1404 unsigned nr_candidates, nr_candidates2;
1405 sbitmap_iterator sbi0;
1406 vec<operand_entry_t> *subops;
1407 hash_table <oecount_hasher> ctable;
1408 bool changed = false;
1409 int next_oecount_id = 0;
1411 if (length <= 1
1412 || opcode != PLUS_EXPR)
1413 return false;
1415 /* Build a list of candidates to process. */
1416 candidates = sbitmap_alloc (length);
1417 bitmap_clear (candidates);
1418 nr_candidates = 0;
1419 FOR_EACH_VEC_ELT (*ops, i, oe1)
1421 enum tree_code dcode;
1422 gimple oe1def;
1424 if (TREE_CODE (oe1->op) != SSA_NAME)
1425 continue;
1426 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1427 if (!is_gimple_assign (oe1def))
1428 continue;
1429 dcode = gimple_assign_rhs_code (oe1def);
1430 if ((dcode != MULT_EXPR
1431 && dcode != RDIV_EXPR)
1432 || !is_reassociable_op (oe1def, dcode, loop))
1433 continue;
1435 bitmap_set_bit (candidates, i);
1436 nr_candidates++;
1439 if (nr_candidates < 2)
1441 sbitmap_free (candidates);
1442 return false;
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1447 fprintf (dump_file, "searching for un-distribute opportunities ");
1448 print_generic_expr (dump_file,
1449 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1450 fprintf (dump_file, " %d\n", nr_candidates);
1453 /* Build linearized sub-operand lists and the counting table. */
1454 cvec.create (0);
1455 ctable.create (15);
1456 /* ??? Macro arguments cannot have multi-argument template types in
1457 them. This typedef is needed to workaround that limitation. */
1458 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1459 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1460 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1462 gimple oedef;
1463 enum tree_code oecode;
1464 unsigned j;
1466 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1467 oecode = gimple_assign_rhs_code (oedef);
1468 linearize_expr_tree (&subops[i], oedef,
1469 associative_tree_code (oecode), false);
1471 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1473 oecount c;
1474 void **slot;
1475 size_t idx;
1476 c.oecode = oecode;
1477 c.cnt = 1;
1478 c.id = next_oecount_id++;
1479 c.op = oe1->op;
1480 cvec.safe_push (c);
1481 idx = cvec.length () + 41;
1482 slot = ctable.find_slot ((void *)idx, INSERT);
1483 if (!*slot)
1485 *slot = (void *)idx;
1487 else
1489 cvec.pop ();
1490 cvec[(size_t)*slot - 42].cnt++;
1494 ctable.dispose ();
1496 /* Sort the counting table. */
1497 cvec.qsort (oecount_cmp);
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1501 oecount *c;
1502 fprintf (dump_file, "Candidates:\n");
1503 FOR_EACH_VEC_ELT (cvec, j, c)
1505 fprintf (dump_file, " %u %s: ", c->cnt,
1506 c->oecode == MULT_EXPR
1507 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1508 print_generic_expr (dump_file, c->op, 0);
1509 fprintf (dump_file, "\n");
1513 /* Process the (operand, code) pairs in order of most occurrence. */
1514 candidates2 = sbitmap_alloc (length);
1515 while (!cvec.is_empty ())
1517 oecount *c = &cvec.last ();
1518 if (c->cnt < 2)
1519 break;
1521 /* Now collect the operands in the outer chain that contain
1522 the common operand in their inner chain. */
1523 bitmap_clear (candidates2);
1524 nr_candidates2 = 0;
1525 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1527 gimple oedef;
1528 enum tree_code oecode;
1529 unsigned j;
1530 tree op = (*ops)[i]->op;
1532 /* If we undistributed in this chain already this may be
1533 a constant. */
1534 if (TREE_CODE (op) != SSA_NAME)
1535 continue;
1537 oedef = SSA_NAME_DEF_STMT (op);
1538 oecode = gimple_assign_rhs_code (oedef);
1539 if (oecode != c->oecode)
1540 continue;
1542 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1544 if (oe1->op == c->op)
1546 bitmap_set_bit (candidates2, i);
1547 ++nr_candidates2;
1548 break;
1553 if (nr_candidates2 >= 2)
1555 operand_entry_t oe1, oe2;
1556 gimple prod;
1557 int first = bitmap_first_set_bit (candidates2);
1559 /* Build the new addition chain. */
1560 oe1 = (*ops)[first];
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1563 fprintf (dump_file, "Building (");
1564 print_generic_expr (dump_file, oe1->op, 0);
1566 zero_one_operation (&oe1->op, c->oecode, c->op);
1567 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1569 gimple sum;
1570 oe2 = (*ops)[i];
1571 if (dump_file && (dump_flags & TDF_DETAILS))
1573 fprintf (dump_file, " + ");
1574 print_generic_expr (dump_file, oe2->op, 0);
1576 zero_one_operation (&oe2->op, c->oecode, c->op);
1577 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1578 oe1->op, oe2->op, opcode);
1579 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1580 oe2->rank = 0;
1581 oe1->op = gimple_get_lhs (sum);
1584 /* Apply the multiplication/division. */
1585 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1586 oe1->op, c->op, c->oecode);
1587 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1590 print_generic_expr (dump_file, c->op, 0);
1591 fprintf (dump_file, "\n");
1594 /* Record it in the addition chain and disable further
1595 undistribution with this op. */
1596 oe1->op = gimple_assign_lhs (prod);
1597 oe1->rank = get_rank (oe1->op);
1598 subops[first].release ();
1600 changed = true;
1603 cvec.pop ();
1606 for (i = 0; i < ops->length (); ++i)
1607 subops[i].release ();
1608 free (subops);
1609 cvec.release ();
1610 sbitmap_free (candidates);
1611 sbitmap_free (candidates2);
1613 return changed;
1616 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1617 expression, examine the other OPS to see if any of them are comparisons
1618 of the same values, which we may be able to combine or eliminate.
1619 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1621 static bool
1622 eliminate_redundant_comparison (enum tree_code opcode,
1623 vec<operand_entry_t> *ops,
1624 unsigned int currindex,
1625 operand_entry_t curr)
1627 tree op1, op2;
1628 enum tree_code lcode, rcode;
1629 gimple def1, def2;
1630 int i;
1631 operand_entry_t oe;
1633 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1634 return false;
1636 /* Check that CURR is a comparison. */
1637 if (TREE_CODE (curr->op) != SSA_NAME)
1638 return false;
1639 def1 = SSA_NAME_DEF_STMT (curr->op);
1640 if (!is_gimple_assign (def1))
1641 return false;
1642 lcode = gimple_assign_rhs_code (def1);
1643 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1644 return false;
1645 op1 = gimple_assign_rhs1 (def1);
1646 op2 = gimple_assign_rhs2 (def1);
1648 /* Now look for a similar comparison in the remaining OPS. */
1649 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1651 tree t;
1653 if (TREE_CODE (oe->op) != SSA_NAME)
1654 continue;
1655 def2 = SSA_NAME_DEF_STMT (oe->op);
1656 if (!is_gimple_assign (def2))
1657 continue;
1658 rcode = gimple_assign_rhs_code (def2);
1659 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1660 continue;
1662 /* If we got here, we have a match. See if we can combine the
1663 two comparisons. */
1664 if (opcode == BIT_IOR_EXPR)
1665 t = maybe_fold_or_comparisons (lcode, op1, op2,
1666 rcode, gimple_assign_rhs1 (def2),
1667 gimple_assign_rhs2 (def2));
1668 else
1669 t = maybe_fold_and_comparisons (lcode, op1, op2,
1670 rcode, gimple_assign_rhs1 (def2),
1671 gimple_assign_rhs2 (def2));
1672 if (!t)
1673 continue;
1675 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1676 always give us a boolean_type_node value back. If the original
1677 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1678 we need to convert. */
1679 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1680 t = fold_convert (TREE_TYPE (curr->op), t);
1682 if (TREE_CODE (t) != INTEGER_CST
1683 && !operand_equal_p (t, curr->op, 0))
1685 enum tree_code subcode;
1686 tree newop1, newop2;
1687 if (!COMPARISON_CLASS_P (t))
1688 continue;
1689 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1690 STRIP_USELESS_TYPE_CONVERSION (newop1);
1691 STRIP_USELESS_TYPE_CONVERSION (newop2);
1692 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1693 continue;
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1698 fprintf (dump_file, "Equivalence: ");
1699 print_generic_expr (dump_file, curr->op, 0);
1700 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1701 print_generic_expr (dump_file, oe->op, 0);
1702 fprintf (dump_file, " -> ");
1703 print_generic_expr (dump_file, t, 0);
1704 fprintf (dump_file, "\n");
1707 /* Now we can delete oe, as it has been subsumed by the new combined
1708 expression t. */
1709 ops->ordered_remove (i);
1710 reassociate_stats.ops_eliminated ++;
1712 /* If t is the same as curr->op, we're done. Otherwise we must
1713 replace curr->op with t. Special case is if we got a constant
1714 back, in which case we add it to the end instead of in place of
1715 the current entry. */
1716 if (TREE_CODE (t) == INTEGER_CST)
1718 ops->ordered_remove (currindex);
1719 add_to_ops_vec (ops, t);
1721 else if (!operand_equal_p (t, curr->op, 0))
1723 gimple sum;
1724 enum tree_code subcode;
1725 tree newop1;
1726 tree newop2;
1727 gcc_assert (COMPARISON_CLASS_P (t));
1728 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2);
1731 gcc_checking_assert (is_gimple_val (newop1)
1732 && is_gimple_val (newop2));
1733 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1734 curr->op = gimple_get_lhs (sum);
1736 return true;
1739 return false;
1742 /* Perform various identities and other optimizations on the list of
1743 operand entries, stored in OPS. The tree code for the binary
1744 operation between all the operands is OPCODE. */
1746 static void
1747 optimize_ops_list (enum tree_code opcode,
1748 vec<operand_entry_t> *ops)
1750 unsigned int length = ops->length ();
1751 unsigned int i;
1752 operand_entry_t oe;
1753 operand_entry_t oelast = NULL;
1754 bool iterate = false;
1756 if (length == 1)
1757 return;
1759 oelast = ops->last ();
1761 /* If the last two are constants, pop the constants off, merge them
1762 and try the next two. */
1763 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1765 operand_entry_t oelm1 = (*ops)[length - 2];
1767 if (oelm1->rank == 0
1768 && is_gimple_min_invariant (oelm1->op)
1769 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1770 TREE_TYPE (oelast->op)))
1772 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1773 oelm1->op, oelast->op);
1775 if (folded && is_gimple_min_invariant (folded))
1777 if (dump_file && (dump_flags & TDF_DETAILS))
1778 fprintf (dump_file, "Merging constants\n");
1780 ops->pop ();
1781 ops->pop ();
1783 add_to_ops_vec (ops, folded);
1784 reassociate_stats.constants_eliminated++;
1786 optimize_ops_list (opcode, ops);
1787 return;
1792 eliminate_using_constants (opcode, ops);
1793 oelast = NULL;
1795 for (i = 0; ops->iterate (i, &oe);)
1797 bool done = false;
1799 if (eliminate_not_pairs (opcode, ops, i, oe))
1800 return;
1801 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1802 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1803 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1805 if (done)
1806 return;
1807 iterate = true;
1808 oelast = NULL;
1809 continue;
1811 oelast = oe;
1812 i++;
1815 length = ops->length ();
1816 oelast = ops->last ();
1818 if (iterate)
1819 optimize_ops_list (opcode, ops);
1822 /* The following functions are subroutines to optimize_range_tests and allow
1823 it to try to change a logical combination of comparisons into a range
1824 test.
1826 For example, both
1827 X == 2 || X == 5 || X == 3 || X == 4
1829 X >= 2 && X <= 5
1830 are converted to
1831 (unsigned) (X - 2) <= 3
1833 For more information see comments above fold_test_range in fold-const.c,
1834 this implementation is for GIMPLE. */
1836 struct range_entry
1838 tree exp;
1839 tree low;
1840 tree high;
1841 bool in_p;
1842 bool strict_overflow_p;
1843 unsigned int idx, next;
1846 /* This is similar to make_range in fold-const.c, but on top of
1847 GIMPLE instead of trees. If EXP is non-NULL, it should be
1848 an SSA_NAME and STMT argument is ignored, otherwise STMT
1849 argument should be a GIMPLE_COND. */
1851 static void
1852 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1854 int in_p;
1855 tree low, high;
1856 bool is_bool, strict_overflow_p;
1858 r->exp = NULL_TREE;
1859 r->in_p = false;
1860 r->strict_overflow_p = false;
1861 r->low = NULL_TREE;
1862 r->high = NULL_TREE;
1863 if (exp != NULL_TREE
1864 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1865 return;
1867 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1868 and see if we can refine the range. Some of the cases below may not
1869 happen, but it doesn't seem worth worrying about this. We "continue"
1870 the outer loop when we've changed something; otherwise we "break"
1871 the switch, which will "break" the while. */
1872 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1873 high = low;
1874 in_p = 0;
1875 strict_overflow_p = false;
1876 is_bool = false;
1877 if (exp == NULL_TREE)
1878 is_bool = true;
1879 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1881 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1882 is_bool = true;
1883 else
1884 return;
1886 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1887 is_bool = true;
1889 while (1)
1891 enum tree_code code;
1892 tree arg0, arg1, exp_type;
1893 tree nexp;
1894 location_t loc;
1896 if (exp != NULL_TREE)
1898 if (TREE_CODE (exp) != SSA_NAME
1899 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1900 break;
1902 stmt = SSA_NAME_DEF_STMT (exp);
1903 if (!is_gimple_assign (stmt))
1904 break;
1906 code = gimple_assign_rhs_code (stmt);
1907 arg0 = gimple_assign_rhs1 (stmt);
1908 arg1 = gimple_assign_rhs2 (stmt);
1909 exp_type = TREE_TYPE (exp);
1911 else
1913 code = gimple_cond_code (stmt);
1914 arg0 = gimple_cond_lhs (stmt);
1915 arg1 = gimple_cond_rhs (stmt);
1916 exp_type = boolean_type_node;
1919 if (TREE_CODE (arg0) != SSA_NAME)
1920 break;
1921 loc = gimple_location (stmt);
1922 switch (code)
1924 case BIT_NOT_EXPR:
1925 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1926 /* Ensure the range is either +[-,0], +[0,0],
1927 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1928 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1929 or similar expression of unconditional true or
1930 false, it should not be negated. */
1931 && ((high && integer_zerop (high))
1932 || (low && integer_onep (low))))
1934 in_p = !in_p;
1935 exp = arg0;
1936 continue;
1938 break;
1939 case SSA_NAME:
1940 exp = arg0;
1941 continue;
1942 CASE_CONVERT:
1943 if (is_bool)
1944 goto do_default;
1945 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1947 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1948 is_bool = true;
1949 else
1950 return;
1952 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1953 is_bool = true;
1954 goto do_default;
1955 case EQ_EXPR:
1956 case NE_EXPR:
1957 case LT_EXPR:
1958 case LE_EXPR:
1959 case GE_EXPR:
1960 case GT_EXPR:
1961 is_bool = true;
1962 /* FALLTHRU */
1963 default:
1964 if (!is_bool)
1965 return;
1966 do_default:
1967 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1968 &low, &high, &in_p,
1969 &strict_overflow_p);
1970 if (nexp != NULL_TREE)
1972 exp = nexp;
1973 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1974 continue;
1976 break;
1978 break;
1980 if (is_bool)
1982 r->exp = exp;
1983 r->in_p = in_p;
1984 r->low = low;
1985 r->high = high;
1986 r->strict_overflow_p = strict_overflow_p;
1990 /* Comparison function for qsort. Sort entries
1991 without SSA_NAME exp first, then with SSA_NAMEs sorted
1992 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1993 by increasing ->low and if ->low is the same, by increasing
1994 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1995 maximum. */
1997 static int
1998 range_entry_cmp (const void *a, const void *b)
2000 const struct range_entry *p = (const struct range_entry *) a;
2001 const struct range_entry *q = (const struct range_entry *) b;
2003 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2005 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2007 /* Group range_entries for the same SSA_NAME together. */
2008 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2009 return -1;
2010 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2011 return 1;
2012 /* If ->low is different, NULL low goes first, then by
2013 ascending low. */
2014 if (p->low != NULL_TREE)
2016 if (q->low != NULL_TREE)
2018 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2019 p->low, q->low);
2020 if (tem && integer_onep (tem))
2021 return -1;
2022 tem = fold_binary (GT_EXPR, boolean_type_node,
2023 p->low, q->low);
2024 if (tem && integer_onep (tem))
2025 return 1;
2027 else
2028 return 1;
2030 else if (q->low != NULL_TREE)
2031 return -1;
2032 /* If ->high is different, NULL high goes last, before that by
2033 ascending high. */
2034 if (p->high != NULL_TREE)
2036 if (q->high != NULL_TREE)
2038 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2039 p->high, q->high);
2040 if (tem && integer_onep (tem))
2041 return -1;
2042 tem = fold_binary (GT_EXPR, boolean_type_node,
2043 p->high, q->high);
2044 if (tem && integer_onep (tem))
2045 return 1;
2047 else
2048 return -1;
2050 else if (p->high != NULL_TREE)
2051 return 1;
2052 /* If both ranges are the same, sort below by ascending idx. */
2054 else
2055 return 1;
2057 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2058 return -1;
2060 if (p->idx < q->idx)
2061 return -1;
2062 else
2064 gcc_checking_assert (p->idx > q->idx);
2065 return 1;
2069 /* Helper routine of optimize_range_test.
2070 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2071 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2072 OPCODE and OPS are arguments of optimize_range_tests. Return
2073 true if the range merge has been successful.
2074 If OPCODE is ERROR_MARK, this is called from within
2075 maybe_optimize_range_tests and is performing inter-bb range optimization.
2076 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2077 oe->rank. */
2079 static bool
2080 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2081 unsigned int count, enum tree_code opcode,
2082 vec<operand_entry_t> *ops, tree exp, bool in_p,
2083 tree low, tree high, bool strict_overflow_p)
2085 operand_entry_t oe = (*ops)[range->idx];
2086 tree op = oe->op;
2087 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2088 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2089 location_t loc = gimple_location (stmt);
2090 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2091 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2092 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2093 gimple_stmt_iterator gsi;
2095 if (tem == NULL_TREE)
2096 return false;
2098 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2099 warning_at (loc, OPT_Wstrict_overflow,
2100 "assuming signed overflow does not occur "
2101 "when simplifying range test");
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2105 struct range_entry *r;
2106 fprintf (dump_file, "Optimizing range tests ");
2107 print_generic_expr (dump_file, range->exp, 0);
2108 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2109 print_generic_expr (dump_file, range->low, 0);
2110 fprintf (dump_file, ", ");
2111 print_generic_expr (dump_file, range->high, 0);
2112 fprintf (dump_file, "]");
2113 for (r = otherrange; r < otherrange + count; r++)
2115 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2116 print_generic_expr (dump_file, r->low, 0);
2117 fprintf (dump_file, ", ");
2118 print_generic_expr (dump_file, r->high, 0);
2119 fprintf (dump_file, "]");
2121 fprintf (dump_file, "\n into ");
2122 print_generic_expr (dump_file, tem, 0);
2123 fprintf (dump_file, "\n");
2126 if (opcode == BIT_IOR_EXPR
2127 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2128 tem = invert_truthvalue_loc (loc, tem);
2130 tem = fold_convert_loc (loc, optype, tem);
2131 gsi = gsi_for_stmt (stmt);
2132 /* In rare cases range->exp can be equal to lhs of stmt.
2133 In that case we have to insert after the stmt rather then before
2134 it. */
2135 if (op == range->exp)
2136 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2137 GSI_CONTINUE_LINKING);
2138 else
2140 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2141 GSI_SAME_STMT);
2142 gsi_prev (&gsi);
2144 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2145 if (gimple_uid (gsi_stmt (gsi)))
2146 break;
2147 else
2148 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2150 oe->op = tem;
2151 range->exp = exp;
2152 range->low = low;
2153 range->high = high;
2154 range->in_p = in_p;
2155 range->strict_overflow_p = false;
2157 for (range = otherrange; range < otherrange + count; range++)
2159 oe = (*ops)[range->idx];
2160 /* Now change all the other range test immediate uses, so that
2161 those tests will be optimized away. */
2162 if (opcode == ERROR_MARK)
2164 if (oe->op)
2165 oe->op = build_int_cst (TREE_TYPE (oe->op),
2166 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2167 else
2168 oe->op = (oe->rank == BIT_IOR_EXPR
2169 ? boolean_false_node : boolean_true_node);
2171 else
2172 oe->op = error_mark_node;
2173 range->exp = NULL_TREE;
2175 return true;
2178 /* Optimize X == CST1 || X == CST2
2179 if popcount (CST1 ^ CST2) == 1 into
2180 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181 Similarly for ranges. E.g.
2182 X != 2 && X != 3 && X != 10 && X != 11
2183 will be transformed by the previous optimization into
2184 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2185 and this loop can transform that into
2186 !(((X & ~8) - 2U) <= 1U). */
2188 static bool
2189 optimize_range_tests_xor (enum tree_code opcode, tree type,
2190 tree lowi, tree lowj, tree highi, tree highj,
2191 vec<operand_entry_t> *ops,
2192 struct range_entry *rangei,
2193 struct range_entry *rangej)
2195 tree lowxor, highxor, tem, exp;
2196 /* Check highi ^ lowi == highj ^ lowj and
2197 popcount (highi ^ lowi) == 1. */
2198 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2199 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2200 return false;
2201 if (tree_log2 (lowxor) < 0)
2202 return false;
2203 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2204 if (!tree_int_cst_equal (lowxor, highxor))
2205 return false;
2207 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2208 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2209 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2210 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2211 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2212 rangei->in_p, lowj, highj,
2213 rangei->strict_overflow_p
2214 || rangej->strict_overflow_p))
2215 return true;
2216 return false;
2219 /* Optimize X == CST1 || X == CST2
2220 if popcount (CST2 - CST1) == 1 into
2221 ((X - CST1) & ~(CST2 - CST1)) == 0.
2222 Similarly for ranges. E.g.
2223 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2224 || X == 75 || X == 45
2225 will be transformed by the previous optimization into
2226 (X - 43U) <= 3U || (X - 75U) <= 3U
2227 and this loop can transform that into
2228 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2229 static bool
2230 optimize_range_tests_diff (enum tree_code opcode, tree type,
2231 tree lowi, tree lowj, tree highi, tree highj,
2232 vec<operand_entry_t> *ops,
2233 struct range_entry *rangei,
2234 struct range_entry *rangej)
2236 tree tem1, tem2, mask;
2237 /* Check highi - lowi == highj - lowj. */
2238 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2239 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2240 return false;
2241 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2242 if (!tree_int_cst_equal (tem1, tem2))
2243 return false;
2244 /* Check popcount (lowj - lowi) == 1. */
2245 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2246 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2247 return false;
2248 if (tree_log2 (tem1) < 0)
2249 return false;
2251 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2252 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2253 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2254 lowj = build_int_cst (type, 0);
2255 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2256 rangei->in_p, lowj, tem2,
2257 rangei->strict_overflow_p
2258 || rangej->strict_overflow_p))
2259 return true;
2260 return false;
2263 /* It does some common checks for function optimize_range_tests_xor and
2264 optimize_range_tests_diff.
2265 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2266 Else it calls optimize_range_tests_diff. */
2268 static bool
2269 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2270 bool optimize_xor, vec<operand_entry_t> *ops,
2271 struct range_entry *ranges)
2273 int i, j;
2274 bool any_changes = false;
2275 for (i = first; i < length; i++)
2277 tree lowi, highi, lowj, highj, type, tem;
2279 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2280 continue;
2281 type = TREE_TYPE (ranges[i].exp);
2282 if (!INTEGRAL_TYPE_P (type))
2283 continue;
2284 lowi = ranges[i].low;
2285 if (lowi == NULL_TREE)
2286 lowi = TYPE_MIN_VALUE (type);
2287 highi = ranges[i].high;
2288 if (highi == NULL_TREE)
2289 continue;
2290 for (j = i + 1; j < length && j < i + 64; j++)
2292 bool changes;
2293 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2294 continue;
2295 lowj = ranges[j].low;
2296 if (lowj == NULL_TREE)
2297 continue;
2298 highj = ranges[j].high;
2299 if (highj == NULL_TREE)
2300 highj = TYPE_MAX_VALUE (type);
2301 /* Check lowj > highi. */
2302 tem = fold_binary (GT_EXPR, boolean_type_node,
2303 lowj, highi);
2304 if (tem == NULL_TREE || !integer_onep (tem))
2305 continue;
2306 if (optimize_xor)
2307 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2308 highi, highj, ops,
2309 ranges + i, ranges + j);
2310 else
2311 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2312 highi, highj, ops,
2313 ranges + i, ranges + j);
2314 if (changes)
2316 any_changes = true;
2317 break;
2321 return any_changes;
2324 /* Optimize range tests, similarly how fold_range_test optimizes
2325 it on trees. The tree code for the binary
2326 operation between all the operands is OPCODE.
2327 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2328 maybe_optimize_range_tests for inter-bb range optimization.
2329 In that case if oe->op is NULL, oe->id is bb->index whose
2330 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2331 the actual opcode. */
2333 static bool
2334 optimize_range_tests (enum tree_code opcode,
2335 vec<operand_entry_t> *ops)
2337 unsigned int length = ops->length (), i, j, first;
2338 operand_entry_t oe;
2339 struct range_entry *ranges;
2340 bool any_changes = false;
2342 if (length == 1)
2343 return false;
2345 ranges = XNEWVEC (struct range_entry, length);
2346 for (i = 0; i < length; i++)
2348 oe = (*ops)[i];
2349 ranges[i].idx = i;
2350 init_range_entry (ranges + i, oe->op,
2351 oe->op ? NULL :
2352 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2353 /* For | invert it now, we will invert it again before emitting
2354 the optimized expression. */
2355 if (opcode == BIT_IOR_EXPR
2356 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2357 ranges[i].in_p = !ranges[i].in_p;
2360 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2361 for (i = 0; i < length; i++)
2362 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2363 break;
2365 /* Try to merge ranges. */
2366 for (first = i; i < length; i++)
2368 tree low = ranges[i].low;
2369 tree high = ranges[i].high;
2370 int in_p = ranges[i].in_p;
2371 bool strict_overflow_p = ranges[i].strict_overflow_p;
2372 int update_fail_count = 0;
2374 for (j = i + 1; j < length; j++)
2376 if (ranges[i].exp != ranges[j].exp)
2377 break;
2378 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2379 ranges[j].in_p, ranges[j].low, ranges[j].high))
2380 break;
2381 strict_overflow_p |= ranges[j].strict_overflow_p;
2384 if (j == i + 1)
2385 continue;
2387 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2388 ops, ranges[i].exp, in_p, low, high,
2389 strict_overflow_p))
2391 i = j - 1;
2392 any_changes = true;
2394 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2395 while update_range_test would fail. */
2396 else if (update_fail_count == 64)
2397 i = j - 1;
2398 else
2399 ++update_fail_count;
2402 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2403 ops, ranges);
2405 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2406 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2407 ops, ranges);
2409 if (any_changes && opcode != ERROR_MARK)
2411 j = 0;
2412 FOR_EACH_VEC_ELT (*ops, i, oe)
2414 if (oe->op == error_mark_node)
2415 continue;
2416 else if (i != j)
2417 (*ops)[j] = oe;
2418 j++;
2420 ops->truncate (j);
2423 XDELETEVEC (ranges);
2424 return any_changes;
2427 /* Return true if STMT is a cast like:
2428 <bb N>:
2430 _123 = (int) _234;
2432 <bb M>:
2433 # _345 = PHI <_123(N), 1(...), 1(...)>
2434 where _234 has bool type, _123 has single use and
2435 bb N has a single successor M. This is commonly used in
2436 the last block of a range test. */
2438 static bool
2439 final_range_test_p (gimple stmt)
2441 basic_block bb, rhs_bb;
2442 edge e;
2443 tree lhs, rhs;
2444 use_operand_p use_p;
2445 gimple use_stmt;
2447 if (!gimple_assign_cast_p (stmt))
2448 return false;
2449 bb = gimple_bb (stmt);
2450 if (!single_succ_p (bb))
2451 return false;
2452 e = single_succ_edge (bb);
2453 if (e->flags & EDGE_COMPLEX)
2454 return false;
2456 lhs = gimple_assign_lhs (stmt);
2457 rhs = gimple_assign_rhs1 (stmt);
2458 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2459 || TREE_CODE (rhs) != SSA_NAME
2460 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2461 return false;
2463 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2464 if (!single_imm_use (lhs, &use_p, &use_stmt))
2465 return false;
2467 if (gimple_code (use_stmt) != GIMPLE_PHI
2468 || gimple_bb (use_stmt) != e->dest)
2469 return false;
2471 /* And that the rhs is defined in the same loop. */
2472 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2473 if (rhs_bb == NULL
2474 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2475 return false;
2477 return true;
2480 /* Return true if BB is suitable basic block for inter-bb range test
2481 optimization. If BACKWARD is true, BB should be the only predecessor
2482 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2483 or compared with to find a common basic block to which all conditions
2484 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2485 be the only predecessor of BB. */
2487 static bool
2488 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2489 bool backward)
2491 edge_iterator ei, ei2;
2492 edge e, e2;
2493 gimple stmt;
2494 gimple_stmt_iterator gsi;
2495 bool other_edge_seen = false;
2496 bool is_cond;
2498 if (test_bb == bb)
2499 return false;
2500 /* Check last stmt first. */
2501 stmt = last_stmt (bb);
2502 if (stmt == NULL
2503 || (gimple_code (stmt) != GIMPLE_COND
2504 && (backward || !final_range_test_p (stmt)))
2505 || gimple_visited_p (stmt)
2506 || stmt_could_throw_p (stmt)
2507 || *other_bb == bb)
2508 return false;
2509 is_cond = gimple_code (stmt) == GIMPLE_COND;
2510 if (is_cond)
2512 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2513 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2514 to *OTHER_BB (if not set yet, try to find it out). */
2515 if (EDGE_COUNT (bb->succs) != 2)
2516 return false;
2517 FOR_EACH_EDGE (e, ei, bb->succs)
2519 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2520 return false;
2521 if (e->dest == test_bb)
2523 if (backward)
2524 continue;
2525 else
2526 return false;
2528 if (e->dest == bb)
2529 return false;
2530 if (*other_bb == NULL)
2532 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2533 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2534 return false;
2535 else if (e->dest == e2->dest)
2536 *other_bb = e->dest;
2537 if (*other_bb == NULL)
2538 return false;
2540 if (e->dest == *other_bb)
2541 other_edge_seen = true;
2542 else if (backward)
2543 return false;
2545 if (*other_bb == NULL || !other_edge_seen)
2546 return false;
2548 else if (single_succ (bb) != *other_bb)
2549 return false;
2551 /* Now check all PHIs of *OTHER_BB. */
2552 e = find_edge (bb, *other_bb);
2553 e2 = find_edge (test_bb, *other_bb);
2554 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2556 gimple phi = gsi_stmt (gsi);
2557 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2558 corresponding to BB and TEST_BB predecessor must be the same. */
2559 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2560 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2562 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2563 one of the PHIs should have the lhs of the last stmt in
2564 that block as PHI arg and that PHI should have 0 or 1
2565 corresponding to it in all other range test basic blocks
2566 considered. */
2567 if (!is_cond)
2569 if (gimple_phi_arg_def (phi, e->dest_idx)
2570 == gimple_assign_lhs (stmt)
2571 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2572 || integer_onep (gimple_phi_arg_def (phi,
2573 e2->dest_idx))))
2574 continue;
2576 else
2578 gimple test_last = last_stmt (test_bb);
2579 if (gimple_code (test_last) != GIMPLE_COND
2580 && gimple_phi_arg_def (phi, e2->dest_idx)
2581 == gimple_assign_lhs (test_last)
2582 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2583 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2584 continue;
2587 return false;
2590 return true;
2593 /* Return true if BB doesn't have side-effects that would disallow
2594 range test optimization, all SSA_NAMEs set in the bb are consumed
2595 in the bb and there are no PHIs. */
2597 static bool
2598 no_side_effect_bb (basic_block bb)
2600 gimple_stmt_iterator gsi;
2601 gimple last;
2603 if (!gimple_seq_empty_p (phi_nodes (bb)))
2604 return false;
2605 last = last_stmt (bb);
2606 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2608 gimple stmt = gsi_stmt (gsi);
2609 tree lhs;
2610 imm_use_iterator imm_iter;
2611 use_operand_p use_p;
2613 if (is_gimple_debug (stmt))
2614 continue;
2615 if (gimple_has_side_effects (stmt))
2616 return false;
2617 if (stmt == last)
2618 return true;
2619 if (!is_gimple_assign (stmt))
2620 return false;
2621 lhs = gimple_assign_lhs (stmt);
2622 if (TREE_CODE (lhs) != SSA_NAME)
2623 return false;
2624 if (gimple_assign_rhs_could_trap_p (stmt))
2625 return false;
2626 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2628 gimple use_stmt = USE_STMT (use_p);
2629 if (is_gimple_debug (use_stmt))
2630 continue;
2631 if (gimple_bb (use_stmt) != bb)
2632 return false;
2635 return false;
2638 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2639 return true and fill in *OPS recursively. */
2641 static bool
2642 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2643 struct loop *loop)
2645 gimple stmt = SSA_NAME_DEF_STMT (var);
2646 tree rhs[2];
2647 int i;
2649 if (!is_reassociable_op (stmt, code, loop))
2650 return false;
2652 rhs[0] = gimple_assign_rhs1 (stmt);
2653 rhs[1] = gimple_assign_rhs2 (stmt);
2654 gimple_set_visited (stmt, true);
2655 for (i = 0; i < 2; i++)
2656 if (TREE_CODE (rhs[i]) == SSA_NAME
2657 && !get_ops (rhs[i], code, ops, loop)
2658 && has_single_use (rhs[i]))
2660 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2662 oe->op = rhs[i];
2663 oe->rank = code;
2664 oe->id = 0;
2665 oe->count = 1;
2666 ops->safe_push (oe);
2668 return true;
2671 /* Find the ops that were added by get_ops starting from VAR, see if
2672 they were changed during update_range_test and if yes, create new
2673 stmts. */
2675 static tree
2676 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2677 unsigned int *pidx, struct loop *loop)
2679 gimple stmt = SSA_NAME_DEF_STMT (var);
2680 tree rhs[4];
2681 int i;
2683 if (!is_reassociable_op (stmt, code, loop))
2684 return NULL;
2686 rhs[0] = gimple_assign_rhs1 (stmt);
2687 rhs[1] = gimple_assign_rhs2 (stmt);
2688 rhs[2] = rhs[0];
2689 rhs[3] = rhs[1];
2690 for (i = 0; i < 2; i++)
2691 if (TREE_CODE (rhs[i]) == SSA_NAME)
2693 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2694 if (rhs[2 + i] == NULL_TREE)
2696 if (has_single_use (rhs[i]))
2697 rhs[2 + i] = ops[(*pidx)++]->op;
2698 else
2699 rhs[2 + i] = rhs[i];
2702 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2703 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2705 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2706 var = make_ssa_name (TREE_TYPE (var), NULL);
2707 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2708 var, rhs[2], rhs[3]);
2709 gimple_set_uid (g, gimple_uid (stmt));
2710 gimple_set_visited (g, true);
2711 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2713 return var;
2716 /* Structure to track the initial value passed to get_ops and
2717 the range in the ops vector for each basic block. */
2719 struct inter_bb_range_test_entry
2721 tree op;
2722 unsigned int first_idx, last_idx;
2725 /* Inter-bb range test optimization. */
2727 static void
2728 maybe_optimize_range_tests (gimple stmt)
2730 basic_block first_bb = gimple_bb (stmt);
2731 basic_block last_bb = first_bb;
2732 basic_block other_bb = NULL;
2733 basic_block bb;
2734 edge_iterator ei;
2735 edge e;
2736 auto_vec<operand_entry_t> ops;
2737 auto_vec<inter_bb_range_test_entry> bbinfo;
2738 bool any_changes = false;
2740 /* Consider only basic blocks that end with GIMPLE_COND or
2741 a cast statement satisfying final_range_test_p. All
2742 but the last bb in the first_bb .. last_bb range
2743 should end with GIMPLE_COND. */
2744 if (gimple_code (stmt) == GIMPLE_COND)
2746 if (EDGE_COUNT (first_bb->succs) != 2)
2747 return;
2749 else if (final_range_test_p (stmt))
2750 other_bb = single_succ (first_bb);
2751 else
2752 return;
2754 if (stmt_could_throw_p (stmt))
2755 return;
2757 /* As relative ordering of post-dominator sons isn't fixed,
2758 maybe_optimize_range_tests can be called first on any
2759 bb in the range we want to optimize. So, start searching
2760 backwards, if first_bb can be set to a predecessor. */
2761 while (single_pred_p (first_bb))
2763 basic_block pred_bb = single_pred (first_bb);
2764 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2765 break;
2766 if (!no_side_effect_bb (first_bb))
2767 break;
2768 first_bb = pred_bb;
2770 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2771 Before starting forward search in last_bb successors, find
2772 out the other_bb. */
2773 if (first_bb == last_bb)
2775 other_bb = NULL;
2776 /* As non-GIMPLE_COND last stmt always terminates the range,
2777 if forward search didn't discover anything, just give up. */
2778 if (gimple_code (stmt) != GIMPLE_COND)
2779 return;
2780 /* Look at both successors. Either it ends with a GIMPLE_COND
2781 and satisfies suitable_cond_bb, or ends with a cast and
2782 other_bb is that cast's successor. */
2783 FOR_EACH_EDGE (e, ei, first_bb->succs)
2784 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2785 || e->dest == first_bb)
2786 return;
2787 else if (single_pred_p (e->dest))
2789 stmt = last_stmt (e->dest);
2790 if (stmt
2791 && gimple_code (stmt) == GIMPLE_COND
2792 && EDGE_COUNT (e->dest->succs) == 2)
2794 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2795 break;
2796 else
2797 other_bb = NULL;
2799 else if (stmt
2800 && final_range_test_p (stmt)
2801 && find_edge (first_bb, single_succ (e->dest)))
2803 other_bb = single_succ (e->dest);
2804 if (other_bb == first_bb)
2805 other_bb = NULL;
2808 if (other_bb == NULL)
2809 return;
2811 /* Now do the forward search, moving last_bb to successor bbs
2812 that aren't other_bb. */
2813 while (EDGE_COUNT (last_bb->succs) == 2)
2815 FOR_EACH_EDGE (e, ei, last_bb->succs)
2816 if (e->dest != other_bb)
2817 break;
2818 if (e == NULL)
2819 break;
2820 if (!single_pred_p (e->dest))
2821 break;
2822 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2823 break;
2824 if (!no_side_effect_bb (e->dest))
2825 break;
2826 last_bb = e->dest;
2828 if (first_bb == last_bb)
2829 return;
2830 /* Here basic blocks first_bb through last_bb's predecessor
2831 end with GIMPLE_COND, all of them have one of the edges to
2832 other_bb and another to another block in the range,
2833 all blocks except first_bb don't have side-effects and
2834 last_bb ends with either GIMPLE_COND, or cast satisfying
2835 final_range_test_p. */
2836 for (bb = last_bb; ; bb = single_pred (bb))
2838 enum tree_code code;
2839 tree lhs, rhs;
2840 inter_bb_range_test_entry bb_ent;
2842 bb_ent.op = NULL_TREE;
2843 bb_ent.first_idx = ops.length ();
2844 bb_ent.last_idx = bb_ent.first_idx;
2845 e = find_edge (bb, other_bb);
2846 stmt = last_stmt (bb);
2847 gimple_set_visited (stmt, true);
2848 if (gimple_code (stmt) != GIMPLE_COND)
2850 use_operand_p use_p;
2851 gimple phi;
2852 edge e2;
2853 unsigned int d;
2855 lhs = gimple_assign_lhs (stmt);
2856 rhs = gimple_assign_rhs1 (stmt);
2857 gcc_assert (bb == last_bb);
2859 /* stmt is
2860 _123 = (int) _234;
2862 followed by:
2863 <bb M>:
2864 # _345 = PHI <_123(N), 1(...), 1(...)>
2866 or 0 instead of 1. If it is 0, the _234
2867 range test is anded together with all the
2868 other range tests, if it is 1, it is ored with
2869 them. */
2870 single_imm_use (lhs, &use_p, &phi);
2871 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2872 e2 = find_edge (first_bb, other_bb);
2873 d = e2->dest_idx;
2874 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2875 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2876 code = BIT_AND_EXPR;
2877 else
2879 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2880 code = BIT_IOR_EXPR;
2883 /* If _234 SSA_NAME_DEF_STMT is
2884 _234 = _567 | _789;
2885 (or &, corresponding to 1/0 in the phi arguments,
2886 push into ops the individual range test arguments
2887 of the bitwise or resp. and, recursively. */
2888 if (!get_ops (rhs, code, &ops,
2889 loop_containing_stmt (stmt))
2890 && has_single_use (rhs))
2892 /* Otherwise, push the _234 range test itself. */
2893 operand_entry_t oe
2894 = (operand_entry_t) pool_alloc (operand_entry_pool);
2896 oe->op = rhs;
2897 oe->rank = code;
2898 oe->id = 0;
2899 oe->count = 1;
2900 ops.safe_push (oe);
2901 bb_ent.last_idx++;
2903 else
2904 bb_ent.last_idx = ops.length ();
2905 bb_ent.op = rhs;
2906 bbinfo.safe_push (bb_ent);
2907 continue;
2909 /* Otherwise stmt is GIMPLE_COND. */
2910 code = gimple_cond_code (stmt);
2911 lhs = gimple_cond_lhs (stmt);
2912 rhs = gimple_cond_rhs (stmt);
2913 if (TREE_CODE (lhs) == SSA_NAME
2914 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2915 && ((code != EQ_EXPR && code != NE_EXPR)
2916 || rhs != boolean_false_node
2917 /* Either push into ops the individual bitwise
2918 or resp. and operands, depending on which
2919 edge is other_bb. */
2920 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2921 ^ (code == EQ_EXPR))
2922 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2923 loop_containing_stmt (stmt))))
2925 /* Or push the GIMPLE_COND stmt itself. */
2926 operand_entry_t oe
2927 = (operand_entry_t) pool_alloc (operand_entry_pool);
2929 oe->op = NULL;
2930 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2931 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2932 /* oe->op = NULL signs that there is no SSA_NAME
2933 for the range test, and oe->id instead is the
2934 basic block number, at which's end the GIMPLE_COND
2935 is. */
2936 oe->id = bb->index;
2937 oe->count = 1;
2938 ops.safe_push (oe);
2939 bb_ent.op = NULL;
2940 bb_ent.last_idx++;
2942 else if (ops.length () > bb_ent.first_idx)
2944 bb_ent.op = lhs;
2945 bb_ent.last_idx = ops.length ();
2947 bbinfo.safe_push (bb_ent);
2948 if (bb == first_bb)
2949 break;
2951 if (ops.length () > 1)
2952 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2953 if (any_changes)
2955 unsigned int idx;
2956 /* update_ops relies on has_single_use predicates returning the
2957 same values as it did during get_ops earlier. Additionally it
2958 never removes statements, only adds new ones and it should walk
2959 from the single imm use and check the predicate already before
2960 making those changes.
2961 On the other side, the handling of GIMPLE_COND directly can turn
2962 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2963 it needs to be done in a separate loop afterwards. */
2964 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2966 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2967 && bbinfo[idx].op != NULL_TREE)
2969 tree new_op;
2971 stmt = last_stmt (bb);
2972 new_op = update_ops (bbinfo[idx].op,
2973 (enum tree_code)
2974 ops[bbinfo[idx].first_idx]->rank,
2975 ops, &bbinfo[idx].first_idx,
2976 loop_containing_stmt (stmt));
2977 if (new_op == NULL_TREE)
2979 gcc_assert (bb == last_bb);
2980 new_op = ops[bbinfo[idx].first_idx++]->op;
2982 if (bbinfo[idx].op != new_op)
2984 imm_use_iterator iter;
2985 use_operand_p use_p;
2986 gimple use_stmt, cast_stmt = NULL;
2988 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2989 if (is_gimple_debug (use_stmt))
2990 continue;
2991 else if (gimple_code (use_stmt) == GIMPLE_COND
2992 || gimple_code (use_stmt) == GIMPLE_PHI)
2993 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2994 SET_USE (use_p, new_op);
2995 else if (gimple_assign_cast_p (use_stmt))
2996 cast_stmt = use_stmt;
2997 else
2998 gcc_unreachable ();
2999 if (cast_stmt)
3001 gcc_assert (bb == last_bb);
3002 tree lhs = gimple_assign_lhs (cast_stmt);
3003 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3004 enum tree_code rhs_code
3005 = gimple_assign_rhs_code (cast_stmt);
3006 gimple g;
3007 if (is_gimple_min_invariant (new_op))
3009 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3010 g = gimple_build_assign (new_lhs, new_op);
3012 else
3013 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3014 new_op, NULL_TREE);
3015 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3016 gimple_set_uid (g, gimple_uid (cast_stmt));
3017 gimple_set_visited (g, true);
3018 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3019 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3020 if (is_gimple_debug (use_stmt))
3021 continue;
3022 else if (gimple_code (use_stmt) == GIMPLE_COND
3023 || gimple_code (use_stmt) == GIMPLE_PHI)
3024 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3025 SET_USE (use_p, new_lhs);
3026 else
3027 gcc_unreachable ();
3031 if (bb == first_bb)
3032 break;
3034 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3036 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3037 && bbinfo[idx].op == NULL_TREE
3038 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3040 stmt = last_stmt (bb);
3041 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3042 gimple_cond_make_false (stmt);
3043 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3044 gimple_cond_make_true (stmt);
3045 else
3047 gimple_cond_set_code (stmt, NE_EXPR);
3048 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3049 gimple_cond_set_rhs (stmt, boolean_false_node);
3051 update_stmt (stmt);
3053 if (bb == first_bb)
3054 break;
3059 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3060 of STMT in it's operands. This is also known as a "destructive
3061 update" operation. */
3063 static bool
3064 is_phi_for_stmt (gimple stmt, tree operand)
3066 gimple def_stmt;
3067 tree lhs;
3068 use_operand_p arg_p;
3069 ssa_op_iter i;
3071 if (TREE_CODE (operand) != SSA_NAME)
3072 return false;
3074 lhs = gimple_assign_lhs (stmt);
3076 def_stmt = SSA_NAME_DEF_STMT (operand);
3077 if (gimple_code (def_stmt) != GIMPLE_PHI)
3078 return false;
3080 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3081 if (lhs == USE_FROM_PTR (arg_p))
3082 return true;
3083 return false;
3086 /* Remove def stmt of VAR if VAR has zero uses and recurse
3087 on rhs1 operand if so. */
3089 static void
3090 remove_visited_stmt_chain (tree var)
3092 gimple stmt;
3093 gimple_stmt_iterator gsi;
3095 while (1)
3097 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3098 return;
3099 stmt = SSA_NAME_DEF_STMT (var);
3100 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3102 var = gimple_assign_rhs1 (stmt);
3103 gsi = gsi_for_stmt (stmt);
3104 reassoc_remove_stmt (&gsi);
3105 release_defs (stmt);
3107 else
3108 return;
3112 /* This function checks three consequtive operands in
3113 passed operands vector OPS starting from OPINDEX and
3114 swaps two operands if it is profitable for binary operation
3115 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3117 We pair ops with the same rank if possible.
3119 The alternative we try is to see if STMT is a destructive
3120 update style statement, which is like:
3121 b = phi (a, ...)
3122 a = c + b;
3123 In that case, we want to use the destructive update form to
3124 expose the possible vectorizer sum reduction opportunity.
3125 In that case, the third operand will be the phi node. This
3126 check is not performed if STMT is null.
3128 We could, of course, try to be better as noted above, and do a
3129 lot of work to try to find these opportunities in >3 operand
3130 cases, but it is unlikely to be worth it. */
3132 static void
3133 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3134 unsigned int opindex, gimple stmt)
3136 operand_entry_t oe1, oe2, oe3;
3138 oe1 = ops[opindex];
3139 oe2 = ops[opindex + 1];
3140 oe3 = ops[opindex + 2];
3142 if ((oe1->rank == oe2->rank
3143 && oe2->rank != oe3->rank)
3144 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3145 && !is_phi_for_stmt (stmt, oe1->op)
3146 && !is_phi_for_stmt (stmt, oe2->op)))
3148 struct operand_entry temp = *oe3;
3149 oe3->op = oe1->op;
3150 oe3->rank = oe1->rank;
3151 oe1->op = temp.op;
3152 oe1->rank= temp.rank;
3154 else if ((oe1->rank == oe3->rank
3155 && oe2->rank != oe3->rank)
3156 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3157 && !is_phi_for_stmt (stmt, oe1->op)
3158 && !is_phi_for_stmt (stmt, oe3->op)))
3160 struct operand_entry temp = *oe2;
3161 oe2->op = oe1->op;
3162 oe2->rank = oe1->rank;
3163 oe1->op = temp.op;
3164 oe1->rank = temp.rank;
3168 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3169 two definitions, otherwise return STMT. */
3171 static inline gimple
3172 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3174 if (TREE_CODE (rhs1) == SSA_NAME
3175 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3176 stmt = SSA_NAME_DEF_STMT (rhs1);
3177 if (TREE_CODE (rhs2) == SSA_NAME
3178 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3179 stmt = SSA_NAME_DEF_STMT (rhs2);
3180 return stmt;
3183 /* Recursively rewrite our linearized statements so that the operators
3184 match those in OPS[OPINDEX], putting the computation in rank
3185 order. Return new lhs. */
3187 static tree
3188 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3189 vec<operand_entry_t> ops, bool changed)
3191 tree rhs1 = gimple_assign_rhs1 (stmt);
3192 tree rhs2 = gimple_assign_rhs2 (stmt);
3193 tree lhs = gimple_assign_lhs (stmt);
3194 operand_entry_t oe;
3196 /* The final recursion case for this function is that you have
3197 exactly two operations left.
3198 If we had one exactly one op in the entire list to start with, we
3199 would have never called this function, and the tail recursion
3200 rewrites them one at a time. */
3201 if (opindex + 2 == ops.length ())
3203 operand_entry_t oe1, oe2;
3205 oe1 = ops[opindex];
3206 oe2 = ops[opindex + 1];
3208 if (rhs1 != oe1->op || rhs2 != oe2->op)
3210 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3211 unsigned int uid = gimple_uid (stmt);
3213 if (dump_file && (dump_flags & TDF_DETAILS))
3215 fprintf (dump_file, "Transforming ");
3216 print_gimple_stmt (dump_file, stmt, 0, 0);
3219 if (changed)
3221 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3222 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3223 stmt
3224 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3225 lhs, oe1->op, oe2->op);
3226 gimple_set_uid (stmt, uid);
3227 gimple_set_visited (stmt, true);
3228 if (insert_point == gsi_stmt (gsi))
3229 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3230 else
3231 insert_stmt_after (stmt, insert_point);
3233 else
3235 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3236 == stmt);
3237 gimple_assign_set_rhs1 (stmt, oe1->op);
3238 gimple_assign_set_rhs2 (stmt, oe2->op);
3239 update_stmt (stmt);
3242 if (rhs1 != oe1->op && rhs1 != oe2->op)
3243 remove_visited_stmt_chain (rhs1);
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, " into ");
3248 print_gimple_stmt (dump_file, stmt, 0, 0);
3251 return lhs;
3254 /* If we hit here, we should have 3 or more ops left. */
3255 gcc_assert (opindex + 2 < ops.length ());
3257 /* Rewrite the next operator. */
3258 oe = ops[opindex];
3260 /* Recurse on the LHS of the binary operator, which is guaranteed to
3261 be the non-leaf side. */
3262 tree new_rhs1
3263 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3264 changed || oe->op != rhs2);
3266 if (oe->op != rhs2 || new_rhs1 != rhs1)
3268 if (dump_file && (dump_flags & TDF_DETAILS))
3270 fprintf (dump_file, "Transforming ");
3271 print_gimple_stmt (dump_file, stmt, 0, 0);
3274 /* If changed is false, this is either opindex == 0
3275 or all outer rhs2's were equal to corresponding oe->op,
3276 and powi_result is NULL.
3277 That means lhs is equivalent before and after reassociation.
3278 Otherwise ensure the old lhs SSA_NAME is not reused and
3279 create a new stmt as well, so that any debug stmts will be
3280 properly adjusted. */
3281 if (changed)
3283 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3284 unsigned int uid = gimple_uid (stmt);
3285 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3287 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3288 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3289 lhs, new_rhs1, oe->op);
3290 gimple_set_uid (stmt, uid);
3291 gimple_set_visited (stmt, true);
3292 if (insert_point == gsi_stmt (gsi))
3293 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3294 else
3295 insert_stmt_after (stmt, insert_point);
3297 else
3299 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3300 == stmt);
3301 gimple_assign_set_rhs1 (stmt, new_rhs1);
3302 gimple_assign_set_rhs2 (stmt, oe->op);
3303 update_stmt (stmt);
3306 if (dump_file && (dump_flags & TDF_DETAILS))
3308 fprintf (dump_file, " into ");
3309 print_gimple_stmt (dump_file, stmt, 0, 0);
3312 return lhs;
3315 /* Find out how many cycles we need to compute statements chain.
3316 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3317 maximum number of independent statements we may execute per cycle. */
3319 static int
3320 get_required_cycles (int ops_num, int cpu_width)
3322 int res;
3323 int elog;
3324 unsigned int rest;
3326 /* While we have more than 2 * cpu_width operands
3327 we may reduce number of operands by cpu_width
3328 per cycle. */
3329 res = ops_num / (2 * cpu_width);
3331 /* Remained operands count may be reduced twice per cycle
3332 until we have only one operand. */
3333 rest = (unsigned)(ops_num - res * cpu_width);
3334 elog = exact_log2 (rest);
3335 if (elog >= 0)
3336 res += elog;
3337 else
3338 res += floor_log2 (rest) + 1;
3340 return res;
3343 /* Returns an optimal number of registers to use for computation of
3344 given statements. */
3346 static int
3347 get_reassociation_width (int ops_num, enum tree_code opc,
3348 enum machine_mode mode)
3350 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3351 int width;
3352 int width_min;
3353 int cycles_best;
3355 if (param_width > 0)
3356 width = param_width;
3357 else
3358 width = targetm.sched.reassociation_width (opc, mode);
3360 if (width == 1)
3361 return width;
3363 /* Get the minimal time required for sequence computation. */
3364 cycles_best = get_required_cycles (ops_num, width);
3366 /* Check if we may use less width and still compute sequence for
3367 the same time. It will allow us to reduce registers usage.
3368 get_required_cycles is monotonically increasing with lower width
3369 so we can perform a binary search for the minimal width that still
3370 results in the optimal cycle count. */
3371 width_min = 1;
3372 while (width > width_min)
3374 int width_mid = (width + width_min) / 2;
3376 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3377 width = width_mid;
3378 else if (width_min < width_mid)
3379 width_min = width_mid;
3380 else
3381 break;
3384 return width;
3387 /* Recursively rewrite our linearized statements so that the operators
3388 match those in OPS[OPINDEX], putting the computation in rank
3389 order and trying to allow operations to be executed in
3390 parallel. */
3392 static void
3393 rewrite_expr_tree_parallel (gimple stmt, int width,
3394 vec<operand_entry_t> ops)
3396 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3397 int op_num = ops.length ();
3398 int stmt_num = op_num - 1;
3399 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3400 int op_index = op_num - 1;
3401 int stmt_index = 0;
3402 int ready_stmts_end = 0;
3403 int i = 0;
3404 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3406 /* We start expression rewriting from the top statements.
3407 So, in this loop we create a full list of statements
3408 we will work with. */
3409 stmts[stmt_num - 1] = stmt;
3410 for (i = stmt_num - 2; i >= 0; i--)
3411 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3413 for (i = 0; i < stmt_num; i++)
3415 tree op1, op2;
3417 /* Determine whether we should use results of
3418 already handled statements or not. */
3419 if (ready_stmts_end == 0
3420 && (i - stmt_index >= width || op_index < 1))
3421 ready_stmts_end = i;
3423 /* Now we choose operands for the next statement. Non zero
3424 value in ready_stmts_end means here that we should use
3425 the result of already generated statements as new operand. */
3426 if (ready_stmts_end > 0)
3428 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3429 if (ready_stmts_end > stmt_index)
3430 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3431 else if (op_index >= 0)
3432 op2 = ops[op_index--]->op;
3433 else
3435 gcc_assert (stmt_index < i);
3436 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3439 if (stmt_index >= ready_stmts_end)
3440 ready_stmts_end = 0;
3442 else
3444 if (op_index > 1)
3445 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3446 op2 = ops[op_index--]->op;
3447 op1 = ops[op_index--]->op;
3450 /* If we emit the last statement then we should put
3451 operands into the last statement. It will also
3452 break the loop. */
3453 if (op_index < 0 && stmt_index == i)
3454 i = stmt_num - 1;
3456 if (dump_file && (dump_flags & TDF_DETAILS))
3458 fprintf (dump_file, "Transforming ");
3459 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3462 /* We keep original statement only for the last one. All
3463 others are recreated. */
3464 if (i == stmt_num - 1)
3466 gimple_assign_set_rhs1 (stmts[i], op1);
3467 gimple_assign_set_rhs2 (stmts[i], op2);
3468 update_stmt (stmts[i]);
3470 else
3471 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3473 if (dump_file && (dump_flags & TDF_DETAILS))
3475 fprintf (dump_file, " into ");
3476 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3480 remove_visited_stmt_chain (last_rhs1);
3483 /* Transform STMT, which is really (A +B) + (C + D) into the left
3484 linear form, ((A+B)+C)+D.
3485 Recurse on D if necessary. */
3487 static void
3488 linearize_expr (gimple stmt)
3490 gimple_stmt_iterator gsi;
3491 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3492 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3493 gimple oldbinrhs = binrhs;
3494 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3495 gimple newbinrhs = NULL;
3496 struct loop *loop = loop_containing_stmt (stmt);
3497 tree lhs = gimple_assign_lhs (stmt);
3499 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3500 && is_reassociable_op (binrhs, rhscode, loop));
3502 gsi = gsi_for_stmt (stmt);
3504 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3505 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3506 make_ssa_name (TREE_TYPE (lhs), NULL),
3507 gimple_assign_lhs (binlhs),
3508 gimple_assign_rhs2 (binrhs));
3509 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3510 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3511 gimple_set_uid (binrhs, gimple_uid (stmt));
3513 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3514 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3516 if (dump_file && (dump_flags & TDF_DETAILS))
3518 fprintf (dump_file, "Linearized: ");
3519 print_gimple_stmt (dump_file, stmt, 0, 0);
3522 reassociate_stats.linearized++;
3523 update_stmt (stmt);
3525 gsi = gsi_for_stmt (oldbinrhs);
3526 reassoc_remove_stmt (&gsi);
3527 release_defs (oldbinrhs);
3529 gimple_set_visited (stmt, true);
3530 gimple_set_visited (binlhs, true);
3531 gimple_set_visited (binrhs, true);
3533 /* Tail recurse on the new rhs if it still needs reassociation. */
3534 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3535 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3536 want to change the algorithm while converting to tuples. */
3537 linearize_expr (stmt);
3540 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3541 it. Otherwise, return NULL. */
3543 static gimple
3544 get_single_immediate_use (tree lhs)
3546 use_operand_p immuse;
3547 gimple immusestmt;
3549 if (TREE_CODE (lhs) == SSA_NAME
3550 && single_imm_use (lhs, &immuse, &immusestmt)
3551 && is_gimple_assign (immusestmt))
3552 return immusestmt;
3554 return NULL;
3557 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3558 representing the negated value. Insertions of any necessary
3559 instructions go before GSI.
3560 This function is recursive in that, if you hand it "a_5" as the
3561 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3562 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3564 static tree
3565 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3567 gimple negatedefstmt = NULL;
3568 tree resultofnegate;
3569 gimple_stmt_iterator gsi;
3570 unsigned int uid;
3572 /* If we are trying to negate a name, defined by an add, negate the
3573 add operands instead. */
3574 if (TREE_CODE (tonegate) == SSA_NAME)
3575 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3576 if (TREE_CODE (tonegate) == SSA_NAME
3577 && is_gimple_assign (negatedefstmt)
3578 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3579 && has_single_use (gimple_assign_lhs (negatedefstmt))
3580 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3582 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3583 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3584 tree lhs = gimple_assign_lhs (negatedefstmt);
3585 gimple g;
3587 gsi = gsi_for_stmt (negatedefstmt);
3588 rhs1 = negate_value (rhs1, &gsi);
3590 gsi = gsi_for_stmt (negatedefstmt);
3591 rhs2 = negate_value (rhs2, &gsi);
3593 gsi = gsi_for_stmt (negatedefstmt);
3594 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3595 gimple_set_visited (negatedefstmt, true);
3596 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3597 gimple_set_uid (g, gimple_uid (negatedefstmt));
3598 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3599 return lhs;
3602 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3603 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3604 NULL_TREE, true, GSI_SAME_STMT);
3605 gsi = *gsip;
3606 uid = gimple_uid (gsi_stmt (gsi));
3607 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3609 gimple stmt = gsi_stmt (gsi);
3610 if (gimple_uid (stmt) != 0)
3611 break;
3612 gimple_set_uid (stmt, uid);
3614 return resultofnegate;
3617 /* Return true if we should break up the subtract in STMT into an add
3618 with negate. This is true when we the subtract operands are really
3619 adds, or the subtract itself is used in an add expression. In
3620 either case, breaking up the subtract into an add with negate
3621 exposes the adds to reassociation. */
3623 static bool
3624 should_break_up_subtract (gimple stmt)
3626 tree lhs = gimple_assign_lhs (stmt);
3627 tree binlhs = gimple_assign_rhs1 (stmt);
3628 tree binrhs = gimple_assign_rhs2 (stmt);
3629 gimple immusestmt;
3630 struct loop *loop = loop_containing_stmt (stmt);
3632 if (TREE_CODE (binlhs) == SSA_NAME
3633 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3634 return true;
3636 if (TREE_CODE (binrhs) == SSA_NAME
3637 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3638 return true;
3640 if (TREE_CODE (lhs) == SSA_NAME
3641 && (immusestmt = get_single_immediate_use (lhs))
3642 && is_gimple_assign (immusestmt)
3643 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3644 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3645 return true;
3646 return false;
3649 /* Transform STMT from A - B into A + -B. */
3651 static void
3652 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3654 tree rhs1 = gimple_assign_rhs1 (stmt);
3655 tree rhs2 = gimple_assign_rhs2 (stmt);
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3659 fprintf (dump_file, "Breaking up subtract ");
3660 print_gimple_stmt (dump_file, stmt, 0, 0);
3663 rhs2 = negate_value (rhs2, gsip);
3664 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3665 update_stmt (stmt);
3668 /* Determine whether STMT is a builtin call that raises an SSA name
3669 to an integer power and has only one use. If so, and this is early
3670 reassociation and unsafe math optimizations are permitted, place
3671 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3672 If any of these conditions does not hold, return FALSE. */
3674 static bool
3675 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3677 tree fndecl, arg1;
3678 REAL_VALUE_TYPE c, cint;
3680 if (!first_pass_instance
3681 || !flag_unsafe_math_optimizations
3682 || !is_gimple_call (stmt)
3683 || !has_single_use (gimple_call_lhs (stmt)))
3684 return false;
3686 fndecl = gimple_call_fndecl (stmt);
3688 if (!fndecl
3689 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3690 return false;
3692 switch (DECL_FUNCTION_CODE (fndecl))
3694 CASE_FLT_FN (BUILT_IN_POW):
3695 *base = gimple_call_arg (stmt, 0);
3696 arg1 = gimple_call_arg (stmt, 1);
3698 if (TREE_CODE (arg1) != REAL_CST)
3699 return false;
3701 c = TREE_REAL_CST (arg1);
3703 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3704 return false;
3706 *exponent = real_to_integer (&c);
3707 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3708 if (!real_identical (&c, &cint))
3709 return false;
3711 break;
3713 CASE_FLT_FN (BUILT_IN_POWI):
3714 *base = gimple_call_arg (stmt, 0);
3715 arg1 = gimple_call_arg (stmt, 1);
3717 if (!tree_fits_shwi_p (arg1))
3718 return false;
3720 *exponent = tree_to_shwi (arg1);
3721 break;
3723 default:
3724 return false;
3727 /* Expanding negative exponents is generally unproductive, so we don't
3728 complicate matters with those. Exponents of zero and one should
3729 have been handled by expression folding. */
3730 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3731 return false;
3733 return true;
3736 /* Recursively linearize a binary expression that is the RHS of STMT.
3737 Place the operands of the expression tree in the vector named OPS. */
3739 static void
3740 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3741 bool is_associative, bool set_visited)
3743 tree binlhs = gimple_assign_rhs1 (stmt);
3744 tree binrhs = gimple_assign_rhs2 (stmt);
3745 gimple binlhsdef = NULL, binrhsdef = NULL;
3746 bool binlhsisreassoc = false;
3747 bool binrhsisreassoc = false;
3748 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3749 struct loop *loop = loop_containing_stmt (stmt);
3750 tree base = NULL_TREE;
3751 HOST_WIDE_INT exponent = 0;
3753 if (set_visited)
3754 gimple_set_visited (stmt, true);
3756 if (TREE_CODE (binlhs) == SSA_NAME)
3758 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3759 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3760 && !stmt_could_throw_p (binlhsdef));
3763 if (TREE_CODE (binrhs) == SSA_NAME)
3765 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3766 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3767 && !stmt_could_throw_p (binrhsdef));
3770 /* If the LHS is not reassociable, but the RHS is, we need to swap
3771 them. If neither is reassociable, there is nothing we can do, so
3772 just put them in the ops vector. If the LHS is reassociable,
3773 linearize it. If both are reassociable, then linearize the RHS
3774 and the LHS. */
3776 if (!binlhsisreassoc)
3778 tree temp;
3780 /* If this is not a associative operation like division, give up. */
3781 if (!is_associative)
3783 add_to_ops_vec (ops, binrhs);
3784 return;
3787 if (!binrhsisreassoc)
3789 if (rhscode == MULT_EXPR
3790 && TREE_CODE (binrhs) == SSA_NAME
3791 && acceptable_pow_call (binrhsdef, &base, &exponent))
3793 add_repeat_to_ops_vec (ops, base, exponent);
3794 gimple_set_visited (binrhsdef, true);
3796 else
3797 add_to_ops_vec (ops, binrhs);
3799 if (rhscode == MULT_EXPR
3800 && TREE_CODE (binlhs) == SSA_NAME
3801 && acceptable_pow_call (binlhsdef, &base, &exponent))
3803 add_repeat_to_ops_vec (ops, base, exponent);
3804 gimple_set_visited (binlhsdef, true);
3806 else
3807 add_to_ops_vec (ops, binlhs);
3809 return;
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3814 fprintf (dump_file, "swapping operands of ");
3815 print_gimple_stmt (dump_file, stmt, 0, 0);
3818 swap_ssa_operands (stmt,
3819 gimple_assign_rhs1_ptr (stmt),
3820 gimple_assign_rhs2_ptr (stmt));
3821 update_stmt (stmt);
3823 if (dump_file && (dump_flags & TDF_DETAILS))
3825 fprintf (dump_file, " is now ");
3826 print_gimple_stmt (dump_file, stmt, 0, 0);
3829 /* We want to make it so the lhs is always the reassociative op,
3830 so swap. */
3831 temp = binlhs;
3832 binlhs = binrhs;
3833 binrhs = temp;
3835 else if (binrhsisreassoc)
3837 linearize_expr (stmt);
3838 binlhs = gimple_assign_rhs1 (stmt);
3839 binrhs = gimple_assign_rhs2 (stmt);
3842 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3843 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3844 rhscode, loop));
3845 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3846 is_associative, set_visited);
3848 if (rhscode == MULT_EXPR
3849 && TREE_CODE (binrhs) == SSA_NAME
3850 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3852 add_repeat_to_ops_vec (ops, base, exponent);
3853 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3855 else
3856 add_to_ops_vec (ops, binrhs);
3859 /* Repropagate the negates back into subtracts, since no other pass
3860 currently does it. */
3862 static void
3863 repropagate_negates (void)
3865 unsigned int i = 0;
3866 tree negate;
3868 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3870 gimple user = get_single_immediate_use (negate);
3872 if (!user || !is_gimple_assign (user))
3873 continue;
3875 /* The negate operand can be either operand of a PLUS_EXPR
3876 (it can be the LHS if the RHS is a constant for example).
3878 Force the negate operand to the RHS of the PLUS_EXPR, then
3879 transform the PLUS_EXPR into a MINUS_EXPR. */
3880 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3882 /* If the negated operand appears on the LHS of the
3883 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3884 to force the negated operand to the RHS of the PLUS_EXPR. */
3885 if (gimple_assign_rhs1 (user) == negate)
3887 swap_ssa_operands (user,
3888 gimple_assign_rhs1_ptr (user),
3889 gimple_assign_rhs2_ptr (user));
3892 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3893 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3894 if (gimple_assign_rhs2 (user) == negate)
3896 tree rhs1 = gimple_assign_rhs1 (user);
3897 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3898 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3899 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3900 update_stmt (user);
3903 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3905 if (gimple_assign_rhs1 (user) == negate)
3907 /* We have
3908 x = -a
3909 y = x - b
3910 which we transform into
3911 x = a + b
3912 y = -x .
3913 This pushes down the negate which we possibly can merge
3914 into some other operation, hence insert it into the
3915 plus_negates vector. */
3916 gimple feed = SSA_NAME_DEF_STMT (negate);
3917 tree a = gimple_assign_rhs1 (feed);
3918 tree b = gimple_assign_rhs2 (user);
3919 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3920 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3921 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3922 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3923 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3924 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3925 user = gsi_stmt (gsi2);
3926 update_stmt (user);
3927 reassoc_remove_stmt (&gsi);
3928 release_defs (feed);
3929 plus_negates.safe_push (gimple_assign_lhs (user));
3931 else
3933 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3934 rid of one operation. */
3935 gimple feed = SSA_NAME_DEF_STMT (negate);
3936 tree a = gimple_assign_rhs1 (feed);
3937 tree rhs1 = gimple_assign_rhs1 (user);
3938 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3939 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3940 update_stmt (gsi_stmt (gsi));
3946 /* Returns true if OP is of a type for which we can do reassociation.
3947 That is for integral or non-saturating fixed-point types, and for
3948 floating point type when associative-math is enabled. */
3950 static bool
3951 can_reassociate_p (tree op)
3953 tree type = TREE_TYPE (op);
3954 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3955 || NON_SAT_FIXED_POINT_TYPE_P (type)
3956 || (flag_associative_math && FLOAT_TYPE_P (type)))
3957 return true;
3958 return false;
3961 /* Break up subtract operations in block BB.
3963 We do this top down because we don't know whether the subtract is
3964 part of a possible chain of reassociation except at the top.
3966 IE given
3967 d = f + g
3968 c = a + e
3969 b = c - d
3970 q = b - r
3971 k = t - q
3973 we want to break up k = t - q, but we won't until we've transformed q
3974 = b - r, which won't be broken up until we transform b = c - d.
3976 En passant, clear the GIMPLE visited flag on every statement
3977 and set UIDs within each basic block. */
3979 static void
3980 break_up_subtract_bb (basic_block bb)
3982 gimple_stmt_iterator gsi;
3983 basic_block son;
3984 unsigned int uid = 1;
3986 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3988 gimple stmt = gsi_stmt (gsi);
3989 gimple_set_visited (stmt, false);
3990 gimple_set_uid (stmt, uid++);
3992 if (!is_gimple_assign (stmt)
3993 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3994 continue;
3996 /* Look for simple gimple subtract operations. */
3997 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3999 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4000 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4001 continue;
4003 /* Check for a subtract used only in an addition. If this
4004 is the case, transform it into add of a negate for better
4005 reassociation. IE transform C = A-B into C = A + -B if C
4006 is only used in an addition. */
4007 if (should_break_up_subtract (stmt))
4008 break_up_subtract (stmt, &gsi);
4010 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4011 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4012 plus_negates.safe_push (gimple_assign_lhs (stmt));
4014 for (son = first_dom_son (CDI_DOMINATORS, bb);
4015 son;
4016 son = next_dom_son (CDI_DOMINATORS, son))
4017 break_up_subtract_bb (son);
4020 /* Used for repeated factor analysis. */
4021 struct repeat_factor_d
4023 /* An SSA name that occurs in a multiply chain. */
4024 tree factor;
4026 /* Cached rank of the factor. */
4027 unsigned rank;
4029 /* Number of occurrences of the factor in the chain. */
4030 HOST_WIDE_INT count;
4032 /* An SSA name representing the product of this factor and
4033 all factors appearing later in the repeated factor vector. */
4034 tree repr;
4037 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4038 typedef const struct repeat_factor_d *const_repeat_factor_t;
4041 static vec<repeat_factor> repeat_factor_vec;
4043 /* Used for sorting the repeat factor vector. Sort primarily by
4044 ascending occurrence count, secondarily by descending rank. */
4046 static int
4047 compare_repeat_factors (const void *x1, const void *x2)
4049 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4050 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4052 if (rf1->count != rf2->count)
4053 return rf1->count - rf2->count;
4055 return rf2->rank - rf1->rank;
4058 /* Look for repeated operands in OPS in the multiply tree rooted at
4059 STMT. Replace them with an optimal sequence of multiplies and powi
4060 builtin calls, and remove the used operands from OPS. Return an
4061 SSA name representing the value of the replacement sequence. */
4063 static tree
4064 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4066 unsigned i, j, vec_len;
4067 int ii;
4068 operand_entry_t oe;
4069 repeat_factor_t rf1, rf2;
4070 repeat_factor rfnew;
4071 tree result = NULL_TREE;
4072 tree target_ssa, iter_result;
4073 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4074 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4075 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4076 gimple mul_stmt, pow_stmt;
4078 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4079 target. */
4080 if (!powi_fndecl)
4081 return NULL_TREE;
4083 /* Allocate the repeated factor vector. */
4084 repeat_factor_vec.create (10);
4086 /* Scan the OPS vector for all SSA names in the product and build
4087 up a vector of occurrence counts for each factor. */
4088 FOR_EACH_VEC_ELT (*ops, i, oe)
4090 if (TREE_CODE (oe->op) == SSA_NAME)
4092 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4094 if (rf1->factor == oe->op)
4096 rf1->count += oe->count;
4097 break;
4101 if (j >= repeat_factor_vec.length ())
4103 rfnew.factor = oe->op;
4104 rfnew.rank = oe->rank;
4105 rfnew.count = oe->count;
4106 rfnew.repr = NULL_TREE;
4107 repeat_factor_vec.safe_push (rfnew);
4112 /* Sort the repeated factor vector by (a) increasing occurrence count,
4113 and (b) decreasing rank. */
4114 repeat_factor_vec.qsort (compare_repeat_factors);
4116 /* It is generally best to combine as many base factors as possible
4117 into a product before applying __builtin_powi to the result.
4118 However, the sort order chosen for the repeated factor vector
4119 allows us to cache partial results for the product of the base
4120 factors for subsequent use. When we already have a cached partial
4121 result from a previous iteration, it is best to make use of it
4122 before looking for another __builtin_pow opportunity.
4124 As an example, consider x * x * y * y * y * z * z * z * z.
4125 We want to first compose the product x * y * z, raise it to the
4126 second power, then multiply this by y * z, and finally multiply
4127 by z. This can be done in 5 multiplies provided we cache y * z
4128 for use in both expressions:
4130 t1 = y * z
4131 t2 = t1 * x
4132 t3 = t2 * t2
4133 t4 = t1 * t3
4134 result = t4 * z
4136 If we instead ignored the cached y * z and first multiplied by
4137 the __builtin_pow opportunity z * z, we would get the inferior:
4139 t1 = y * z
4140 t2 = t1 * x
4141 t3 = t2 * t2
4142 t4 = z * z
4143 t5 = t3 * t4
4144 result = t5 * y */
4146 vec_len = repeat_factor_vec.length ();
4148 /* Repeatedly look for opportunities to create a builtin_powi call. */
4149 while (true)
4151 HOST_WIDE_INT power;
4153 /* First look for the largest cached product of factors from
4154 preceding iterations. If found, create a builtin_powi for
4155 it if the minimum occurrence count for its factors is at
4156 least 2, or just use this cached product as our next
4157 multiplicand if the minimum occurrence count is 1. */
4158 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4160 if (rf1->repr && rf1->count > 0)
4161 break;
4164 if (j < vec_len)
4166 power = rf1->count;
4168 if (power == 1)
4170 iter_result = rf1->repr;
4172 if (dump_file && (dump_flags & TDF_DETAILS))
4174 unsigned elt;
4175 repeat_factor_t rf;
4176 fputs ("Multiplying by cached product ", dump_file);
4177 for (elt = j; elt < vec_len; elt++)
4179 rf = &repeat_factor_vec[elt];
4180 print_generic_expr (dump_file, rf->factor, 0);
4181 if (elt < vec_len - 1)
4182 fputs (" * ", dump_file);
4184 fputs ("\n", dump_file);
4187 else
4189 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4190 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4191 build_int_cst (integer_type_node,
4192 power));
4193 gimple_call_set_lhs (pow_stmt, iter_result);
4194 gimple_set_location (pow_stmt, gimple_location (stmt));
4195 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4197 if (dump_file && (dump_flags & TDF_DETAILS))
4199 unsigned elt;
4200 repeat_factor_t rf;
4201 fputs ("Building __builtin_pow call for cached product (",
4202 dump_file);
4203 for (elt = j; elt < vec_len; elt++)
4205 rf = &repeat_factor_vec[elt];
4206 print_generic_expr (dump_file, rf->factor, 0);
4207 if (elt < vec_len - 1)
4208 fputs (" * ", dump_file);
4210 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4211 power);
4215 else
4217 /* Otherwise, find the first factor in the repeated factor
4218 vector whose occurrence count is at least 2. If no such
4219 factor exists, there are no builtin_powi opportunities
4220 remaining. */
4221 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4223 if (rf1->count >= 2)
4224 break;
4227 if (j >= vec_len)
4228 break;
4230 power = rf1->count;
4232 if (dump_file && (dump_flags & TDF_DETAILS))
4234 unsigned elt;
4235 repeat_factor_t rf;
4236 fputs ("Building __builtin_pow call for (", dump_file);
4237 for (elt = j; elt < vec_len; elt++)
4239 rf = &repeat_factor_vec[elt];
4240 print_generic_expr (dump_file, rf->factor, 0);
4241 if (elt < vec_len - 1)
4242 fputs (" * ", dump_file);
4244 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4247 reassociate_stats.pows_created++;
4249 /* Visit each element of the vector in reverse order (so that
4250 high-occurrence elements are visited first, and within the
4251 same occurrence count, lower-ranked elements are visited
4252 first). Form a linear product of all elements in this order
4253 whose occurrencce count is at least that of element J.
4254 Record the SSA name representing the product of each element
4255 with all subsequent elements in the vector. */
4256 if (j == vec_len - 1)
4257 rf1->repr = rf1->factor;
4258 else
4260 for (ii = vec_len - 2; ii >= (int)j; ii--)
4262 tree op1, op2;
4264 rf1 = &repeat_factor_vec[ii];
4265 rf2 = &repeat_factor_vec[ii + 1];
4267 /* Init the last factor's representative to be itself. */
4268 if (!rf2->repr)
4269 rf2->repr = rf2->factor;
4271 op1 = rf1->factor;
4272 op2 = rf2->repr;
4274 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4275 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4276 target_ssa,
4277 op1, op2);
4278 gimple_set_location (mul_stmt, gimple_location (stmt));
4279 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4280 rf1->repr = target_ssa;
4282 /* Don't reprocess the multiply we just introduced. */
4283 gimple_set_visited (mul_stmt, true);
4287 /* Form a call to __builtin_powi for the maximum product
4288 just formed, raised to the power obtained earlier. */
4289 rf1 = &repeat_factor_vec[j];
4290 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4291 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4292 build_int_cst (integer_type_node,
4293 power));
4294 gimple_call_set_lhs (pow_stmt, iter_result);
4295 gimple_set_location (pow_stmt, gimple_location (stmt));
4296 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4299 /* If we previously formed at least one other builtin_powi call,
4300 form the product of this one and those others. */
4301 if (result)
4303 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4304 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4305 result, iter_result);
4306 gimple_set_location (mul_stmt, gimple_location (stmt));
4307 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4308 gimple_set_visited (mul_stmt, true);
4309 result = new_result;
4311 else
4312 result = iter_result;
4314 /* Decrement the occurrence count of each element in the product
4315 by the count found above, and remove this many copies of each
4316 factor from OPS. */
4317 for (i = j; i < vec_len; i++)
4319 unsigned k = power;
4320 unsigned n;
4322 rf1 = &repeat_factor_vec[i];
4323 rf1->count -= power;
4325 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4327 if (oe->op == rf1->factor)
4329 if (oe->count <= k)
4331 ops->ordered_remove (n);
4332 k -= oe->count;
4334 if (k == 0)
4335 break;
4337 else
4339 oe->count -= k;
4340 break;
4347 /* At this point all elements in the repeated factor vector have a
4348 remaining occurrence count of 0 or 1, and those with a count of 1
4349 don't have cached representatives. Re-sort the ops vector and
4350 clean up. */
4351 ops->qsort (sort_by_operand_rank);
4352 repeat_factor_vec.release ();
4354 /* Return the final product computed herein. Note that there may
4355 still be some elements with single occurrence count left in OPS;
4356 those will be handled by the normal reassociation logic. */
4357 return result;
4360 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4362 static void
4363 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4365 tree rhs1;
4367 if (dump_file && (dump_flags & TDF_DETAILS))
4369 fprintf (dump_file, "Transforming ");
4370 print_gimple_stmt (dump_file, stmt, 0, 0);
4373 rhs1 = gimple_assign_rhs1 (stmt);
4374 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4375 update_stmt (stmt);
4376 remove_visited_stmt_chain (rhs1);
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4380 fprintf (dump_file, " into ");
4381 print_gimple_stmt (dump_file, stmt, 0, 0);
4385 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4387 static void
4388 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4389 tree rhs1, tree rhs2)
4391 if (dump_file && (dump_flags & TDF_DETAILS))
4393 fprintf (dump_file, "Transforming ");
4394 print_gimple_stmt (dump_file, stmt, 0, 0);
4397 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4398 update_stmt (gsi_stmt (*gsi));
4399 remove_visited_stmt_chain (rhs1);
4401 if (dump_file && (dump_flags & TDF_DETAILS))
4403 fprintf (dump_file, " into ");
4404 print_gimple_stmt (dump_file, stmt, 0, 0);
4408 /* Reassociate expressions in basic block BB and its post-dominator as
4409 children. */
4411 static void
4412 reassociate_bb (basic_block bb)
4414 gimple_stmt_iterator gsi;
4415 basic_block son;
4416 gimple stmt = last_stmt (bb);
4418 if (stmt && !gimple_visited_p (stmt))
4419 maybe_optimize_range_tests (stmt);
4421 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4423 stmt = gsi_stmt (gsi);
4425 if (is_gimple_assign (stmt)
4426 && !stmt_could_throw_p (stmt))
4428 tree lhs, rhs1, rhs2;
4429 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4431 /* If this is not a gimple binary expression, there is
4432 nothing for us to do with it. */
4433 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4434 continue;
4436 /* If this was part of an already processed statement,
4437 we don't need to touch it again. */
4438 if (gimple_visited_p (stmt))
4440 /* This statement might have become dead because of previous
4441 reassociations. */
4442 if (has_zero_uses (gimple_get_lhs (stmt)))
4444 reassoc_remove_stmt (&gsi);
4445 release_defs (stmt);
4446 /* We might end up removing the last stmt above which
4447 places the iterator to the end of the sequence.
4448 Reset it to the last stmt in this case which might
4449 be the end of the sequence as well if we removed
4450 the last statement of the sequence. In which case
4451 we need to bail out. */
4452 if (gsi_end_p (gsi))
4454 gsi = gsi_last_bb (bb);
4455 if (gsi_end_p (gsi))
4456 break;
4459 continue;
4462 lhs = gimple_assign_lhs (stmt);
4463 rhs1 = gimple_assign_rhs1 (stmt);
4464 rhs2 = gimple_assign_rhs2 (stmt);
4466 /* For non-bit or min/max operations we can't associate
4467 all types. Verify that here. */
4468 if (rhs_code != BIT_IOR_EXPR
4469 && rhs_code != BIT_AND_EXPR
4470 && rhs_code != BIT_XOR_EXPR
4471 && rhs_code != MIN_EXPR
4472 && rhs_code != MAX_EXPR
4473 && (!can_reassociate_p (lhs)
4474 || !can_reassociate_p (rhs1)
4475 || !can_reassociate_p (rhs2)))
4476 continue;
4478 if (associative_tree_code (rhs_code))
4480 auto_vec<operand_entry_t> ops;
4481 tree powi_result = NULL_TREE;
4483 /* There may be no immediate uses left by the time we
4484 get here because we may have eliminated them all. */
4485 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4486 continue;
4488 gimple_set_visited (stmt, true);
4489 linearize_expr_tree (&ops, stmt, true, true);
4490 ops.qsort (sort_by_operand_rank);
4491 optimize_ops_list (rhs_code, &ops);
4492 if (undistribute_ops_list (rhs_code, &ops,
4493 loop_containing_stmt (stmt)))
4495 ops.qsort (sort_by_operand_rank);
4496 optimize_ops_list (rhs_code, &ops);
4499 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4500 optimize_range_tests (rhs_code, &ops);
4502 if (first_pass_instance
4503 && rhs_code == MULT_EXPR
4504 && flag_unsafe_math_optimizations)
4505 powi_result = attempt_builtin_powi (stmt, &ops);
4507 /* If the operand vector is now empty, all operands were
4508 consumed by the __builtin_powi optimization. */
4509 if (ops.length () == 0)
4510 transform_stmt_to_copy (&gsi, stmt, powi_result);
4511 else if (ops.length () == 1)
4513 tree last_op = ops.last ()->op;
4515 if (powi_result)
4516 transform_stmt_to_multiply (&gsi, stmt, last_op,
4517 powi_result);
4518 else
4519 transform_stmt_to_copy (&gsi, stmt, last_op);
4521 else
4523 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4524 int ops_num = ops.length ();
4525 int width = get_reassociation_width (ops_num, rhs_code, mode);
4526 tree new_lhs = lhs;
4528 if (dump_file && (dump_flags & TDF_DETAILS))
4529 fprintf (dump_file,
4530 "Width = %d was chosen for reassociation\n", width);
4532 if (width > 1
4533 && ops.length () > 3)
4534 rewrite_expr_tree_parallel (stmt, width, ops);
4535 else
4537 /* When there are three operands left, we want
4538 to make sure the ones that get the double
4539 binary op are chosen wisely. */
4540 int len = ops.length ();
4541 if (len >= 3)
4542 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4544 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4545 powi_result != NULL);
4548 /* If we combined some repeated factors into a
4549 __builtin_powi call, multiply that result by the
4550 reassociated operands. */
4551 if (powi_result)
4553 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4554 tree type = TREE_TYPE (lhs);
4555 tree target_ssa = make_temp_ssa_name (type, NULL,
4556 "reassocpow");
4557 gimple_set_lhs (lhs_stmt, target_ssa);
4558 update_stmt (lhs_stmt);
4559 if (lhs != new_lhs)
4560 target_ssa = new_lhs;
4561 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4562 powi_result,
4563 target_ssa);
4564 gimple_set_location (mul_stmt, gimple_location (stmt));
4565 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4571 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4572 son;
4573 son = next_dom_son (CDI_POST_DOMINATORS, son))
4574 reassociate_bb (son);
4577 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4578 void debug_ops_vector (vec<operand_entry_t> ops);
4580 /* Dump the operand entry vector OPS to FILE. */
4582 void
4583 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4585 operand_entry_t oe;
4586 unsigned int i;
4588 FOR_EACH_VEC_ELT (ops, i, oe)
4590 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4591 print_generic_expr (file, oe->op, 0);
4595 /* Dump the operand entry vector OPS to STDERR. */
4597 DEBUG_FUNCTION void
4598 debug_ops_vector (vec<operand_entry_t> ops)
4600 dump_ops_vector (stderr, ops);
4603 static void
4604 do_reassoc (void)
4606 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4607 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4610 /* Initialize the reassociation pass. */
4612 static void
4613 init_reassoc (void)
4615 int i;
4616 long rank = 2;
4617 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4619 /* Find the loops, so that we can prevent moving calculations in
4620 them. */
4621 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4623 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4625 operand_entry_pool = create_alloc_pool ("operand entry pool",
4626 sizeof (struct operand_entry), 30);
4627 next_operand_entry_id = 0;
4629 /* Reverse RPO (Reverse Post Order) will give us something where
4630 deeper loops come later. */
4631 pre_and_rev_post_order_compute (NULL, bbs, false);
4632 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4633 operand_rank = pointer_map_create ();
4635 /* Give each default definition a distinct rank. This includes
4636 parameters and the static chain. Walk backwards over all
4637 SSA names so that we get proper rank ordering according
4638 to tree_swap_operands_p. */
4639 for (i = num_ssa_names - 1; i > 0; --i)
4641 tree name = ssa_name (i);
4642 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4643 insert_operand_rank (name, ++rank);
4646 /* Set up rank for each BB */
4647 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4648 bb_rank[bbs[i]] = ++rank << 16;
4650 free (bbs);
4651 calculate_dominance_info (CDI_POST_DOMINATORS);
4652 plus_negates = vNULL;
4655 /* Cleanup after the reassociation pass, and print stats if
4656 requested. */
4658 static void
4659 fini_reassoc (void)
4661 statistics_counter_event (cfun, "Linearized",
4662 reassociate_stats.linearized);
4663 statistics_counter_event (cfun, "Constants eliminated",
4664 reassociate_stats.constants_eliminated);
4665 statistics_counter_event (cfun, "Ops eliminated",
4666 reassociate_stats.ops_eliminated);
4667 statistics_counter_event (cfun, "Statements rewritten",
4668 reassociate_stats.rewritten);
4669 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4670 reassociate_stats.pows_encountered);
4671 statistics_counter_event (cfun, "Built-in powi calls created",
4672 reassociate_stats.pows_created);
4674 pointer_map_destroy (operand_rank);
4675 free_alloc_pool (operand_entry_pool);
4676 free (bb_rank);
4677 plus_negates.release ();
4678 free_dominance_info (CDI_POST_DOMINATORS);
4679 loop_optimizer_finalize ();
4682 /* Gate and execute functions for Reassociation. */
4684 static unsigned int
4685 execute_reassoc (void)
4687 init_reassoc ();
4689 do_reassoc ();
4690 repropagate_negates ();
4692 fini_reassoc ();
4693 return 0;
4696 namespace {
4698 const pass_data pass_data_reassoc =
4700 GIMPLE_PASS, /* type */
4701 "reassoc", /* name */
4702 OPTGROUP_NONE, /* optinfo_flags */
4703 true, /* has_execute */
4704 TV_TREE_REASSOC, /* tv_id */
4705 ( PROP_cfg | PROP_ssa ), /* properties_required */
4706 0, /* properties_provided */
4707 0, /* properties_destroyed */
4708 0, /* todo_flags_start */
4709 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4712 class pass_reassoc : public gimple_opt_pass
4714 public:
4715 pass_reassoc (gcc::context *ctxt)
4716 : gimple_opt_pass (pass_data_reassoc, ctxt)
4719 /* opt_pass methods: */
4720 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4721 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4722 virtual unsigned int execute (function *) { return execute_reassoc (); }
4724 }; // class pass_reassoc
4726 } // anon namespace
4728 gimple_opt_pass *
4729 make_pass_reassoc (gcc::context *ctxt)
4731 return new pass_reassoc (ctxt);