missing Changelog
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob7318ecff187fc3b44298432adcb25c2c05f66481
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
35 #include "vec.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
38 #include "cfgloop.h"
39 #include "flags.h"
40 #include "target.h"
41 #include "params.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
93 those on it.
95 You want to first merge all leaves of the same rank, as much as
96 possible.
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
111 mergetmp2 = d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
120 So we have
122 build binary op
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
148 mergetmp = op1 + op2
149 newstmt = mergetmp + op3
151 instead of
152 mergetmp = op2 + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
157 can do.
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
166 /* Statistics */
167 static struct
169 int linearized;
170 int constants_eliminated;
171 int ops_eliminated;
172 int rewritten;
173 int pows_encountered;
174 int pows_created;
175 } reassociate_stats;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
180 unsigned int rank;
181 int id;
182 tree op;
183 unsigned int count;
184 } *operand_entry_t;
186 static alloc_pool operand_entry_pool;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
194 depth. */
195 static long *bb_rank;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t *operand_rank;
200 /* Forward decls. */
201 static long get_rank (tree);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
216 static long
217 phi_rank (gimple stmt)
219 basic_block bb = gimple_bb (stmt);
220 struct loop *father = bb->loop_father;
221 tree res;
222 unsigned i;
223 use_operand_p use;
224 gimple use_stmt;
226 /* We only care about real loops (those with a latch). */
227 if (!father->latch)
228 return bb_rank[bb->index];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb != father->header
232 || father->inner)
233 return bb_rank[bb->index];
235 /* Ignore virtual SSA_NAMEs. */
236 res = gimple_phi_result (stmt);
237 if (virtual_operand_p (res))
238 return bb_rank[bb->index];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res, &use, &use_stmt)
243 || gimple_bb (use_stmt)->loop_father != father)
244 return bb_rank[bb->index];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i = 0; i < gimple_phi_num_args (stmt); i++)
249 tree arg = gimple_phi_arg_def (stmt, i);
250 if (TREE_CODE (arg) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg))
253 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
254 if (gimple_bb (def_stmt)->loop_father == father)
255 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
259 /* Must be an uninteresting phi. */
260 return bb_rank[bb->index];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
265 return FALSE. */
266 static bool
267 loop_carried_phi (tree exp)
269 gimple phi_stmt;
270 long block_rank;
272 if (TREE_CODE (exp) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp))
274 return false;
276 phi_stmt = SSA_NAME_DEF_STMT (exp);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
279 return false;
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
286 if (phi_rank (phi_stmt) != block_rank)
287 return true;
289 return false;
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
296 static long
297 propagate_rank (long rank, tree op)
299 long op_rank;
301 if (loop_carried_phi (op))
302 return rank;
304 op_rank = get_rank (op);
306 return MAX (rank, op_rank);
309 /* Look up the operand rank structure for expression E. */
311 static inline long
312 find_operand_rank (tree e)
314 void **slot = pointer_map_contains (operand_rank, e);
315 return slot ? (long) (intptr_t) *slot : -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
320 static inline void
321 insert_operand_rank (tree e, long rank)
323 void **slot;
324 gcc_assert (rank > 0);
325 slot = pointer_map_insert (operand_rank, e);
326 gcc_assert (!*slot);
327 *slot = (void *) (intptr_t) rank;
330 /* Given an expression E, return the rank of the expression. */
332 static long
333 get_rank (tree e)
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e))
337 return 0;
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
344 the BB.
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
348 results. */
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
354 x_1 = phi(x_0, x_2)
355 b = a + x_1
356 c = b + d
357 x_2 = c + e
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
363 x_1 = phi(x_0, x_2)
364 b = a + d
365 c = b + e
366 x_2 = c + x_1
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e) == SSA_NAME)
378 gimple stmt;
379 long rank;
380 int i, n;
381 tree op;
383 if (SSA_NAME_IS_DEFAULT_DEF (e))
384 return find_operand_rank (e);
386 stmt = SSA_NAME_DEF_STMT (e);
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
396 if (rank != -1)
397 return rank;
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
402 rank = 0;
403 if (gimple_assign_single_p (stmt))
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
407 if (n == 0)
408 rank = propagate_rank (rank, rhs);
409 else
411 for (i = 0; i < n; i++)
413 op = TREE_OPERAND (rhs, i);
415 if (op != NULL_TREE)
416 rank = propagate_rank (rank, op);
420 else
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
425 op = gimple_op (stmt, i);
426 gcc_assert (op);
427 rank = propagate_rank (rank, op);
431 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
440 return (rank + 1);
443 /* Globals, etc, are rank 0 */
444 return 0;
448 /* We want integer ones to end up last no matter what, since they are
449 the ones we can do the most with. */
450 #define INTEGER_CONST_TYPE 1 << 3
451 #define FLOAT_CONST_TYPE 1 << 2
452 #define OTHER_CONST_TYPE 1 << 1
454 /* Classify an invariant tree into integer, float, or other, so that
455 we can sort them to be near other constants of the same type. */
456 static inline int
457 constant_type (tree t)
459 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
460 return INTEGER_CONST_TYPE;
461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
462 return FLOAT_CONST_TYPE;
463 else
464 return OTHER_CONST_TYPE;
467 /* qsort comparison function to sort operand entries PA and PB by rank
468 so that the sorted array is ordered by rank in decreasing order. */
469 static int
470 sort_by_operand_rank (const void *pa, const void *pb)
472 const operand_entry_t oea = *(const operand_entry_t *)pa;
473 const operand_entry_t oeb = *(const operand_entry_t *)pb;
475 /* It's nicer for optimize_expression if constants that are likely
476 to fold when added/multiplied//whatever are put next to each
477 other. Since all constants have rank 0, order them by type. */
478 if (oeb->rank == 0 && oea->rank == 0)
480 if (constant_type (oeb->op) != constant_type (oea->op))
481 return constant_type (oeb->op) - constant_type (oea->op);
482 else
483 /* To make sorting result stable, we use unique IDs to determine
484 order. */
485 return oeb->id - oea->id;
488 /* Lastly, make sure the versions that are the same go next to each
489 other. We use SSA_NAME_VERSION because it's stable. */
490 if ((oeb->rank - oea->rank == 0)
491 && TREE_CODE (oea->op) == SSA_NAME
492 && TREE_CODE (oeb->op) == SSA_NAME)
494 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
495 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
496 else
497 return oeb->id - oea->id;
500 if (oeb->rank != oea->rank)
501 return oeb->rank - oea->rank;
502 else
503 return oeb->id - oea->id;
506 /* Add an operand entry to *OPS for the tree operand OP. */
508 static void
509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
511 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
513 oe->op = op;
514 oe->rank = get_rank (op);
515 oe->id = next_operand_entry_id++;
516 oe->count = 1;
517 ops->safe_push (oe);
520 /* Add an operand entry to *OPS for the tree operand OP with repeat
521 count REPEAT. */
523 static void
524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
525 HOST_WIDE_INT repeat)
527 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
529 oe->op = op;
530 oe->rank = get_rank (op);
531 oe->id = next_operand_entry_id++;
532 oe->count = repeat;
533 ops->safe_push (oe);
535 reassociate_stats.pows_encountered++;
538 /* Return true if STMT is reassociable operation containing a binary
539 operation with tree code CODE, and is inside LOOP. */
541 static bool
542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
544 basic_block bb = gimple_bb (stmt);
546 if (gimple_bb (stmt) == NULL)
547 return false;
549 if (!flow_bb_inside_loop_p (loop, bb))
550 return false;
552 if (is_gimple_assign (stmt)
553 && gimple_assign_rhs_code (stmt) == code
554 && has_single_use (gimple_assign_lhs (stmt)))
555 return true;
557 return false;
561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
562 operand of the negate operation. Otherwise, return NULL. */
564 static tree
565 get_unary_op (tree name, enum tree_code opcode)
567 gimple stmt = SSA_NAME_DEF_STMT (name);
569 if (!is_gimple_assign (stmt))
570 return NULL_TREE;
572 if (gimple_assign_rhs_code (stmt) == opcode)
573 return gimple_assign_rhs1 (stmt);
574 return NULL_TREE;
577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
578 eliminate through equivalences, do so, remove them from OPS, and
579 return true. Otherwise, return false. */
581 static bool
582 eliminate_duplicate_pair (enum tree_code opcode,
583 vec<operand_entry_t> *ops,
584 bool *all_done,
585 unsigned int i,
586 operand_entry_t curr,
587 operand_entry_t last)
590 /* If we have two of the same op, and the opcode is & |, min, or max,
591 we can eliminate one of them.
592 If we have two of the same op, and the opcode is ^, we can
593 eliminate both of them. */
595 if (last && last->op == curr->op)
597 switch (opcode)
599 case MAX_EXPR:
600 case MIN_EXPR:
601 case BIT_IOR_EXPR:
602 case BIT_AND_EXPR:
603 if (dump_file && (dump_flags & TDF_DETAILS))
605 fprintf (dump_file, "Equivalence: ");
606 print_generic_expr (dump_file, curr->op, 0);
607 fprintf (dump_file, " [&|minmax] ");
608 print_generic_expr (dump_file, last->op, 0);
609 fprintf (dump_file, " -> ");
610 print_generic_stmt (dump_file, last->op, 0);
613 ops->ordered_remove (i);
614 reassociate_stats.ops_eliminated ++;
616 return true;
618 case BIT_XOR_EXPR:
619 if (dump_file && (dump_flags & TDF_DETAILS))
621 fprintf (dump_file, "Equivalence: ");
622 print_generic_expr (dump_file, curr->op, 0);
623 fprintf (dump_file, " ^ ");
624 print_generic_expr (dump_file, last->op, 0);
625 fprintf (dump_file, " -> nothing\n");
628 reassociate_stats.ops_eliminated += 2;
630 if (ops->length () == 2)
632 ops->create (0);
633 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
634 *all_done = true;
636 else
638 ops->ordered_remove (i-1);
639 ops->ordered_remove (i-1);
642 return true;
644 default:
645 break;
648 return false;
651 static vec<tree> plus_negates;
653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
654 expression, look in OPS for a corresponding positive operation to cancel
655 it out. If we find one, remove the other from OPS, replace
656 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
657 return false. */
659 static bool
660 eliminate_plus_minus_pair (enum tree_code opcode,
661 vec<operand_entry_t> *ops,
662 unsigned int currindex,
663 operand_entry_t curr)
665 tree negateop;
666 tree notop;
667 unsigned int i;
668 operand_entry_t oe;
670 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
671 return false;
673 negateop = get_unary_op (curr->op, NEGATE_EXPR);
674 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
675 if (negateop == NULL_TREE && notop == NULL_TREE)
676 return false;
678 /* Any non-negated version will have a rank that is one less than
679 the current rank. So once we hit those ranks, if we don't find
680 one, we can stop. */
682 for (i = currindex + 1;
683 ops->iterate (i, &oe)
684 && oe->rank >= curr->rank - 1 ;
685 i++)
687 if (oe->op == negateop)
690 if (dump_file && (dump_flags & TDF_DETAILS))
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, negateop, 0);
694 fprintf (dump_file, " + -");
695 print_generic_expr (dump_file, oe->op, 0);
696 fprintf (dump_file, " -> 0\n");
699 ops->ordered_remove (i);
700 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
701 ops->ordered_remove (currindex);
702 reassociate_stats.ops_eliminated ++;
704 return true;
706 else if (oe->op == notop)
708 tree op_type = TREE_TYPE (oe->op);
710 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, notop, 0);
714 fprintf (dump_file, " + ~");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> -1\n");
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
724 return true;
728 /* CURR->OP is a negate expr in a plus expr: save it for later
729 inspection in repropagate_negates(). */
730 if (negateop != NULL_TREE)
731 plus_negates.safe_push (curr->op);
733 return false;
736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
737 bitwise not expression, look in OPS for a corresponding operand to
738 cancel it out. If we find one, remove the other from OPS, replace
739 OPS[CURRINDEX] with 0, and return true. Otherwise, return
740 false. */
742 static bool
743 eliminate_not_pairs (enum tree_code opcode,
744 vec<operand_entry_t> *ops,
745 unsigned int currindex,
746 operand_entry_t curr)
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
752 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
753 || TREE_CODE (curr->op) != SSA_NAME)
754 return false;
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (notop == NULL_TREE)
758 return false;
760 /* Any non-not version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1;
767 i++)
769 if (oe->op == notop)
771 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "Equivalence: ");
774 print_generic_expr (dump_file, notop, 0);
775 if (opcode == BIT_AND_EXPR)
776 fprintf (dump_file, " & ~");
777 else if (opcode == BIT_IOR_EXPR)
778 fprintf (dump_file, " | ~");
779 print_generic_expr (dump_file, oe->op, 0);
780 if (opcode == BIT_AND_EXPR)
781 fprintf (dump_file, " -> 0\n");
782 else if (opcode == BIT_IOR_EXPR)
783 fprintf (dump_file, " -> -1\n");
786 if (opcode == BIT_AND_EXPR)
787 oe->op = build_zero_cst (TREE_TYPE (oe->op));
788 else if (opcode == BIT_IOR_EXPR)
789 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
790 TYPE_PRECISION (TREE_TYPE (oe->op)));
792 reassociate_stats.ops_eliminated += ops->length () - 1;
793 ops->truncate (0);
794 ops->quick_push (oe);
795 return true;
799 return false;
802 /* Use constant value that may be present in OPS to try to eliminate
803 operands. Note that this function is only really used when we've
804 eliminated ops for other reasons, or merged constants. Across
805 single statements, fold already does all of this, plus more. There
806 is little point in duplicating logic, so I've only included the
807 identities that I could ever construct testcases to trigger. */
809 static void
810 eliminate_using_constants (enum tree_code opcode,
811 vec<operand_entry_t> *ops)
813 operand_entry_t oelast = ops->last ();
814 tree type = TREE_TYPE (oelast->op);
816 if (oelast->rank == 0
817 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
819 switch (opcode)
821 case BIT_AND_EXPR:
822 if (integer_zerop (oelast->op))
824 if (ops->length () != 1)
826 if (dump_file && (dump_flags & TDF_DETAILS))
827 fprintf (dump_file, "Found & 0, removing all other ops\n");
829 reassociate_stats.ops_eliminated += ops->length () - 1;
831 ops->truncate (0);
832 ops->quick_push (oelast);
833 return;
836 else if (integer_all_onesp (oelast->op))
838 if (ops->length () != 1)
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, "Found & -1, removing\n");
842 ops->pop ();
843 reassociate_stats.ops_eliminated++;
846 break;
847 case BIT_IOR_EXPR:
848 if (integer_all_onesp (oelast->op))
850 if (ops->length () != 1)
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "Found | -1, removing all other ops\n");
855 reassociate_stats.ops_eliminated += ops->length () - 1;
857 ops->truncate (0);
858 ops->quick_push (oelast);
859 return;
862 else if (integer_zerop (oelast->op))
864 if (ops->length () != 1)
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "Found | 0, removing\n");
868 ops->pop ();
869 reassociate_stats.ops_eliminated++;
872 break;
873 case MULT_EXPR:
874 if (integer_zerop (oelast->op)
875 || (FLOAT_TYPE_P (type)
876 && !HONOR_NANS (TYPE_MODE (type))
877 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
878 && real_zerop (oelast->op)))
880 if (ops->length () != 1)
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found * 0, removing all other ops\n");
885 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->truncate (1);
887 ops->quick_push (oelast);
888 return;
891 else if (integer_onep (oelast->op)
892 || (FLOAT_TYPE_P (type)
893 && !HONOR_SNANS (TYPE_MODE (type))
894 && real_onep (oelast->op)))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found * 1, removing\n");
900 ops->pop ();
901 reassociate_stats.ops_eliminated++;
902 return;
905 break;
906 case BIT_XOR_EXPR:
907 case PLUS_EXPR:
908 case MINUS_EXPR:
909 if (integer_zerop (oelast->op)
910 || (FLOAT_TYPE_P (type)
911 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
912 && fold_real_zero_addition_p (type, oelast->op,
913 opcode == MINUS_EXPR)))
915 if (ops->length () != 1)
917 if (dump_file && (dump_flags & TDF_DETAILS))
918 fprintf (dump_file, "Found [|^+] 0, removing\n");
919 ops->pop ();
920 reassociate_stats.ops_eliminated++;
921 return;
924 break;
925 default:
926 break;
932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
933 bool, bool);
935 /* Structure for tracking and counting operands. */
936 typedef struct oecount_s {
937 int cnt;
938 int id;
939 enum tree_code oecode;
940 tree op;
941 } oecount;
944 /* The heap for the oecount hashtable and the sorted list of operands. */
945 static vec<oecount> cvec;
947 /* Hash function for oecount. */
949 static hashval_t
950 oecount_hash (const void *p)
952 const oecount *c = &cvec[(size_t)p - 42];
953 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
956 /* Comparison function for oecount. */
958 static int
959 oecount_eq (const void *p1, const void *p2)
961 const oecount *c1 = &cvec[(size_t)p1 - 42];
962 const oecount *c2 = &cvec[(size_t)p2 - 42];
963 return (c1->oecode == c2->oecode
964 && c1->op == c2->op);
967 /* Comparison function for qsort sorting oecount elements by count. */
969 static int
970 oecount_cmp (const void *p1, const void *p2)
972 const oecount *c1 = (const oecount *)p1;
973 const oecount *c2 = (const oecount *)p2;
974 if (c1->cnt != c2->cnt)
975 return c1->cnt - c2->cnt;
976 else
977 /* If counts are identical, use unique IDs to stabilize qsort. */
978 return c1->id - c2->id;
981 /* Return TRUE iff STMT represents a builtin call that raises OP
982 to some exponent. */
984 static bool
985 stmt_is_power_of_op (gimple stmt, tree op)
987 tree fndecl;
989 if (!is_gimple_call (stmt))
990 return false;
992 fndecl = gimple_call_fndecl (stmt);
994 if (!fndecl
995 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
996 return false;
998 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1000 CASE_FLT_FN (BUILT_IN_POW):
1001 CASE_FLT_FN (BUILT_IN_POWI):
1002 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1004 default:
1005 return false;
1009 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1010 in place and return the result. Assumes that stmt_is_power_of_op
1011 was previously called for STMT and returned TRUE. */
1013 static HOST_WIDE_INT
1014 decrement_power (gimple stmt)
1016 REAL_VALUE_TYPE c, cint;
1017 HOST_WIDE_INT power;
1018 tree arg1;
1020 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1022 CASE_FLT_FN (BUILT_IN_POW):
1023 arg1 = gimple_call_arg (stmt, 1);
1024 c = TREE_REAL_CST (arg1);
1025 power = real_to_integer (&c) - 1;
1026 real_from_integer (&cint, VOIDmode, power, 0, 0);
1027 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1028 return power;
1030 CASE_FLT_FN (BUILT_IN_POWI):
1031 arg1 = gimple_call_arg (stmt, 1);
1032 power = TREE_INT_CST_LOW (arg1) - 1;
1033 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1034 return power;
1036 default:
1037 gcc_unreachable ();
1041 /* Find the single immediate use of STMT's LHS, and replace it
1042 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1043 replace *DEF with OP as well. */
1045 static void
1046 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1048 tree lhs;
1049 gimple use_stmt;
1050 use_operand_p use;
1051 gimple_stmt_iterator gsi;
1053 if (is_gimple_call (stmt))
1054 lhs = gimple_call_lhs (stmt);
1055 else
1056 lhs = gimple_assign_lhs (stmt);
1058 gcc_assert (has_single_use (lhs));
1059 single_imm_use (lhs, &use, &use_stmt);
1060 if (lhs == *def)
1061 *def = op;
1062 SET_USE (use, op);
1063 if (TREE_CODE (op) != SSA_NAME)
1064 update_stmt (use_stmt);
1065 gsi = gsi_for_stmt (stmt);
1066 gsi_remove (&gsi, true);
1067 release_defs (stmt);
1069 if (is_gimple_call (stmt))
1070 unlink_stmt_vdef (stmt);
1073 /* Walks the linear chain with result *DEF searching for an operation
1074 with operand OP and code OPCODE removing that from the chain. *DEF
1075 is updated if there is only one operand but no operation left. */
1077 static void
1078 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1080 gimple stmt = SSA_NAME_DEF_STMT (*def);
1084 tree name;
1086 if (opcode == MULT_EXPR
1087 && stmt_is_power_of_op (stmt, op))
1089 if (decrement_power (stmt) == 1)
1090 propagate_op_to_single_use (op, stmt, def);
1091 return;
1094 name = gimple_assign_rhs1 (stmt);
1096 /* If this is the operation we look for and one of the operands
1097 is ours simply propagate the other operand into the stmts
1098 single use. */
1099 if (gimple_assign_rhs_code (stmt) == opcode
1100 && (name == op
1101 || gimple_assign_rhs2 (stmt) == op))
1103 if (name == op)
1104 name = gimple_assign_rhs2 (stmt);
1105 propagate_op_to_single_use (name, stmt, def);
1106 return;
1109 /* We might have a multiply of two __builtin_pow* calls, and
1110 the operand might be hiding in the rightmost one. */
1111 if (opcode == MULT_EXPR
1112 && gimple_assign_rhs_code (stmt) == opcode
1113 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1115 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1116 if (stmt_is_power_of_op (stmt2, op))
1118 if (decrement_power (stmt2) == 1)
1119 propagate_op_to_single_use (op, stmt2, def);
1120 return;
1124 /* Continue walking the chain. */
1125 gcc_assert (name != op
1126 && TREE_CODE (name) == SSA_NAME);
1127 stmt = SSA_NAME_DEF_STMT (name);
1129 while (1);
1132 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1133 the result. Places the statement after the definition of either
1134 OP1 or OP2. Returns the new statement. */
1136 static gimple
1137 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1139 gimple op1def = NULL, op2def = NULL;
1140 gimple_stmt_iterator gsi;
1141 tree op;
1142 gimple sum;
1144 /* Create the addition statement. */
1145 op = make_ssa_name (type, NULL);
1146 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1148 /* Find an insertion place and insert. */
1149 if (TREE_CODE (op1) == SSA_NAME)
1150 op1def = SSA_NAME_DEF_STMT (op1);
1151 if (TREE_CODE (op2) == SSA_NAME)
1152 op2def = SSA_NAME_DEF_STMT (op2);
1153 if ((!op1def || gimple_nop_p (op1def))
1154 && (!op2def || gimple_nop_p (op2def)))
1156 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1157 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1159 else if ((!op1def || gimple_nop_p (op1def))
1160 || (op2def && !gimple_nop_p (op2def)
1161 && stmt_dominates_stmt_p (op1def, op2def)))
1163 if (gimple_code (op2def) == GIMPLE_PHI)
1165 gsi = gsi_after_labels (gimple_bb (op2def));
1166 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1168 else
1170 if (!stmt_ends_bb_p (op2def))
1172 gsi = gsi_for_stmt (op2def);
1173 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1175 else
1177 edge e;
1178 edge_iterator ei;
1180 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1181 if (e->flags & EDGE_FALLTHRU)
1182 gsi_insert_on_edge_immediate (e, sum);
1186 else
1188 if (gimple_code (op1def) == GIMPLE_PHI)
1190 gsi = gsi_after_labels (gimple_bb (op1def));
1191 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1193 else
1195 if (!stmt_ends_bb_p (op1def))
1197 gsi = gsi_for_stmt (op1def);
1198 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1200 else
1202 edge e;
1203 edge_iterator ei;
1205 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1206 if (e->flags & EDGE_FALLTHRU)
1207 gsi_insert_on_edge_immediate (e, sum);
1211 update_stmt (sum);
1213 return sum;
1216 /* Perform un-distribution of divisions and multiplications.
1217 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1218 to (A + B) / X for real X.
1220 The algorithm is organized as follows.
1222 - First we walk the addition chain *OPS looking for summands that
1223 are defined by a multiplication or a real division. This results
1224 in the candidates bitmap with relevant indices into *OPS.
1226 - Second we build the chains of multiplications or divisions for
1227 these candidates, counting the number of occurrences of (operand, code)
1228 pairs in all of the candidates chains.
1230 - Third we sort the (operand, code) pairs by number of occurrence and
1231 process them starting with the pair with the most uses.
1233 * For each such pair we walk the candidates again to build a
1234 second candidate bitmap noting all multiplication/division chains
1235 that have at least one occurrence of (operand, code).
1237 * We build an alternate addition chain only covering these
1238 candidates with one (operand, code) operation removed from their
1239 multiplication/division chain.
1241 * The first candidate gets replaced by the alternate addition chain
1242 multiplied/divided by the operand.
1244 * All candidate chains get disabled for further processing and
1245 processing of (operand, code) pairs continues.
1247 The alternate addition chains built are re-processed by the main
1248 reassociation algorithm which allows optimizing a * x * y + b * y * x
1249 to (a + b ) * x * y in one invocation of the reassociation pass. */
1251 static bool
1252 undistribute_ops_list (enum tree_code opcode,
1253 vec<operand_entry_t> *ops, struct loop *loop)
1255 unsigned int length = ops->length ();
1256 operand_entry_t oe1;
1257 unsigned i, j;
1258 sbitmap candidates, candidates2;
1259 unsigned nr_candidates, nr_candidates2;
1260 sbitmap_iterator sbi0;
1261 vec<operand_entry_t> *subops;
1262 htab_t ctable;
1263 bool changed = false;
1264 int next_oecount_id = 0;
1266 if (length <= 1
1267 || opcode != PLUS_EXPR)
1268 return false;
1270 /* Build a list of candidates to process. */
1271 candidates = sbitmap_alloc (length);
1272 bitmap_clear (candidates);
1273 nr_candidates = 0;
1274 FOR_EACH_VEC_ELT (*ops, i, oe1)
1276 enum tree_code dcode;
1277 gimple oe1def;
1279 if (TREE_CODE (oe1->op) != SSA_NAME)
1280 continue;
1281 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1282 if (!is_gimple_assign (oe1def))
1283 continue;
1284 dcode = gimple_assign_rhs_code (oe1def);
1285 if ((dcode != MULT_EXPR
1286 && dcode != RDIV_EXPR)
1287 || !is_reassociable_op (oe1def, dcode, loop))
1288 continue;
1290 bitmap_set_bit (candidates, i);
1291 nr_candidates++;
1294 if (nr_candidates < 2)
1296 sbitmap_free (candidates);
1297 return false;
1300 if (dump_file && (dump_flags & TDF_DETAILS))
1302 fprintf (dump_file, "searching for un-distribute opportunities ");
1303 print_generic_expr (dump_file,
1304 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1305 fprintf (dump_file, " %d\n", nr_candidates);
1308 /* Build linearized sub-operand lists and the counting table. */
1309 cvec.create (0);
1310 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1311 /* ??? Macro arguments cannot have multi-argument template types in
1312 them. This typedef is needed to workaround that limitation. */
1313 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1314 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1315 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1317 gimple oedef;
1318 enum tree_code oecode;
1319 unsigned j;
1321 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1322 oecode = gimple_assign_rhs_code (oedef);
1323 linearize_expr_tree (&subops[i], oedef,
1324 associative_tree_code (oecode), false);
1326 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1328 oecount c;
1329 void **slot;
1330 size_t idx;
1331 c.oecode = oecode;
1332 c.cnt = 1;
1333 c.id = next_oecount_id++;
1334 c.op = oe1->op;
1335 cvec.safe_push (c);
1336 idx = cvec.length () + 41;
1337 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1338 if (!*slot)
1340 *slot = (void *)idx;
1342 else
1344 cvec.pop ();
1345 cvec[(size_t)*slot - 42].cnt++;
1349 htab_delete (ctable);
1351 /* Sort the counting table. */
1352 cvec.qsort (oecount_cmp);
1354 if (dump_file && (dump_flags & TDF_DETAILS))
1356 oecount *c;
1357 fprintf (dump_file, "Candidates:\n");
1358 FOR_EACH_VEC_ELT (cvec, j, c)
1360 fprintf (dump_file, " %u %s: ", c->cnt,
1361 c->oecode == MULT_EXPR
1362 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1363 print_generic_expr (dump_file, c->op, 0);
1364 fprintf (dump_file, "\n");
1368 /* Process the (operand, code) pairs in order of most occurence. */
1369 candidates2 = sbitmap_alloc (length);
1370 while (!cvec.is_empty ())
1372 oecount *c = &cvec.last ();
1373 if (c->cnt < 2)
1374 break;
1376 /* Now collect the operands in the outer chain that contain
1377 the common operand in their inner chain. */
1378 bitmap_clear (candidates2);
1379 nr_candidates2 = 0;
1380 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1382 gimple oedef;
1383 enum tree_code oecode;
1384 unsigned j;
1385 tree op = (*ops)[i]->op;
1387 /* If we undistributed in this chain already this may be
1388 a constant. */
1389 if (TREE_CODE (op) != SSA_NAME)
1390 continue;
1392 oedef = SSA_NAME_DEF_STMT (op);
1393 oecode = gimple_assign_rhs_code (oedef);
1394 if (oecode != c->oecode)
1395 continue;
1397 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1399 if (oe1->op == c->op)
1401 bitmap_set_bit (candidates2, i);
1402 ++nr_candidates2;
1403 break;
1408 if (nr_candidates2 >= 2)
1410 operand_entry_t oe1, oe2;
1411 gimple prod;
1412 int first = bitmap_first_set_bit (candidates2);
1414 /* Build the new addition chain. */
1415 oe1 = (*ops)[first];
1416 if (dump_file && (dump_flags & TDF_DETAILS))
1418 fprintf (dump_file, "Building (");
1419 print_generic_expr (dump_file, oe1->op, 0);
1421 zero_one_operation (&oe1->op, c->oecode, c->op);
1422 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1424 gimple sum;
1425 oe2 = (*ops)[i];
1426 if (dump_file && (dump_flags & TDF_DETAILS))
1428 fprintf (dump_file, " + ");
1429 print_generic_expr (dump_file, oe2->op, 0);
1431 zero_one_operation (&oe2->op, c->oecode, c->op);
1432 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1433 oe1->op, oe2->op, opcode);
1434 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1435 oe2->rank = 0;
1436 oe1->op = gimple_get_lhs (sum);
1439 /* Apply the multiplication/division. */
1440 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1441 oe1->op, c->op, c->oecode);
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1444 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1445 print_generic_expr (dump_file, c->op, 0);
1446 fprintf (dump_file, "\n");
1449 /* Record it in the addition chain and disable further
1450 undistribution with this op. */
1451 oe1->op = gimple_assign_lhs (prod);
1452 oe1->rank = get_rank (oe1->op);
1453 subops[first].release ();
1455 changed = true;
1458 cvec.pop ();
1461 for (i = 0; i < ops->length (); ++i)
1462 subops[i].release ();
1463 free (subops);
1464 cvec.release ();
1465 sbitmap_free (candidates);
1466 sbitmap_free (candidates2);
1468 return changed;
1471 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1472 expression, examine the other OPS to see if any of them are comparisons
1473 of the same values, which we may be able to combine or eliminate.
1474 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1476 static bool
1477 eliminate_redundant_comparison (enum tree_code opcode,
1478 vec<operand_entry_t> *ops,
1479 unsigned int currindex,
1480 operand_entry_t curr)
1482 tree op1, op2;
1483 enum tree_code lcode, rcode;
1484 gimple def1, def2;
1485 int i;
1486 operand_entry_t oe;
1488 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1489 return false;
1491 /* Check that CURR is a comparison. */
1492 if (TREE_CODE (curr->op) != SSA_NAME)
1493 return false;
1494 def1 = SSA_NAME_DEF_STMT (curr->op);
1495 if (!is_gimple_assign (def1))
1496 return false;
1497 lcode = gimple_assign_rhs_code (def1);
1498 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1499 return false;
1500 op1 = gimple_assign_rhs1 (def1);
1501 op2 = gimple_assign_rhs2 (def1);
1503 /* Now look for a similar comparison in the remaining OPS. */
1504 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1506 tree t;
1508 if (TREE_CODE (oe->op) != SSA_NAME)
1509 continue;
1510 def2 = SSA_NAME_DEF_STMT (oe->op);
1511 if (!is_gimple_assign (def2))
1512 continue;
1513 rcode = gimple_assign_rhs_code (def2);
1514 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1515 continue;
1517 /* If we got here, we have a match. See if we can combine the
1518 two comparisons. */
1519 if (opcode == BIT_IOR_EXPR)
1520 t = maybe_fold_or_comparisons (lcode, op1, op2,
1521 rcode, gimple_assign_rhs1 (def2),
1522 gimple_assign_rhs2 (def2));
1523 else
1524 t = maybe_fold_and_comparisons (lcode, op1, op2,
1525 rcode, gimple_assign_rhs1 (def2),
1526 gimple_assign_rhs2 (def2));
1527 if (!t)
1528 continue;
1530 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1531 always give us a boolean_type_node value back. If the original
1532 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1533 we need to convert. */
1534 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1535 t = fold_convert (TREE_TYPE (curr->op), t);
1537 if (TREE_CODE (t) != INTEGER_CST
1538 && !operand_equal_p (t, curr->op, 0))
1540 enum tree_code subcode;
1541 tree newop1, newop2;
1542 if (!COMPARISON_CLASS_P (t))
1543 continue;
1544 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1545 STRIP_USELESS_TYPE_CONVERSION (newop1);
1546 STRIP_USELESS_TYPE_CONVERSION (newop2);
1547 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1548 continue;
1551 if (dump_file && (dump_flags & TDF_DETAILS))
1553 fprintf (dump_file, "Equivalence: ");
1554 print_generic_expr (dump_file, curr->op, 0);
1555 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1556 print_generic_expr (dump_file, oe->op, 0);
1557 fprintf (dump_file, " -> ");
1558 print_generic_expr (dump_file, t, 0);
1559 fprintf (dump_file, "\n");
1562 /* Now we can delete oe, as it has been subsumed by the new combined
1563 expression t. */
1564 ops->ordered_remove (i);
1565 reassociate_stats.ops_eliminated ++;
1567 /* If t is the same as curr->op, we're done. Otherwise we must
1568 replace curr->op with t. Special case is if we got a constant
1569 back, in which case we add it to the end instead of in place of
1570 the current entry. */
1571 if (TREE_CODE (t) == INTEGER_CST)
1573 ops->ordered_remove (currindex);
1574 add_to_ops_vec (ops, t);
1576 else if (!operand_equal_p (t, curr->op, 0))
1578 gimple sum;
1579 enum tree_code subcode;
1580 tree newop1;
1581 tree newop2;
1582 gcc_assert (COMPARISON_CLASS_P (t));
1583 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1584 STRIP_USELESS_TYPE_CONVERSION (newop1);
1585 STRIP_USELESS_TYPE_CONVERSION (newop2);
1586 gcc_checking_assert (is_gimple_val (newop1)
1587 && is_gimple_val (newop2));
1588 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1589 curr->op = gimple_get_lhs (sum);
1591 return true;
1594 return false;
1597 /* Perform various identities and other optimizations on the list of
1598 operand entries, stored in OPS. The tree code for the binary
1599 operation between all the operands is OPCODE. */
1601 static void
1602 optimize_ops_list (enum tree_code opcode,
1603 vec<operand_entry_t> *ops)
1605 unsigned int length = ops->length ();
1606 unsigned int i;
1607 operand_entry_t oe;
1608 operand_entry_t oelast = NULL;
1609 bool iterate = false;
1611 if (length == 1)
1612 return;
1614 oelast = ops->last ();
1616 /* If the last two are constants, pop the constants off, merge them
1617 and try the next two. */
1618 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1620 operand_entry_t oelm1 = (*ops)[length - 2];
1622 if (oelm1->rank == 0
1623 && is_gimple_min_invariant (oelm1->op)
1624 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1625 TREE_TYPE (oelast->op)))
1627 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1628 oelm1->op, oelast->op);
1630 if (folded && is_gimple_min_invariant (folded))
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1633 fprintf (dump_file, "Merging constants\n");
1635 ops->pop ();
1636 ops->pop ();
1638 add_to_ops_vec (ops, folded);
1639 reassociate_stats.constants_eliminated++;
1641 optimize_ops_list (opcode, ops);
1642 return;
1647 eliminate_using_constants (opcode, ops);
1648 oelast = NULL;
1650 for (i = 0; ops->iterate (i, &oe);)
1652 bool done = false;
1654 if (eliminate_not_pairs (opcode, ops, i, oe))
1655 return;
1656 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1657 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1658 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1660 if (done)
1661 return;
1662 iterate = true;
1663 oelast = NULL;
1664 continue;
1666 oelast = oe;
1667 i++;
1670 length = ops->length ();
1671 oelast = ops->last ();
1673 if (iterate)
1674 optimize_ops_list (opcode, ops);
1677 /* The following functions are subroutines to optimize_range_tests and allow
1678 it to try to change a logical combination of comparisons into a range
1679 test.
1681 For example, both
1682 X == 2 || X == 5 || X == 3 || X == 4
1684 X >= 2 && X <= 5
1685 are converted to
1686 (unsigned) (X - 2) <= 3
1688 For more information see comments above fold_test_range in fold-const.c,
1689 this implementation is for GIMPLE. */
1691 struct range_entry
1693 tree exp;
1694 tree low;
1695 tree high;
1696 bool in_p;
1697 bool strict_overflow_p;
1698 unsigned int idx, next;
1701 /* This is similar to make_range in fold-const.c, but on top of
1702 GIMPLE instead of trees. If EXP is non-NULL, it should be
1703 an SSA_NAME and STMT argument is ignored, otherwise STMT
1704 argument should be a GIMPLE_COND. */
1706 static void
1707 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1709 int in_p;
1710 tree low, high;
1711 bool is_bool, strict_overflow_p;
1713 r->exp = NULL_TREE;
1714 r->in_p = false;
1715 r->strict_overflow_p = false;
1716 r->low = NULL_TREE;
1717 r->high = NULL_TREE;
1718 if (exp != NULL_TREE
1719 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1720 return;
1722 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1723 and see if we can refine the range. Some of the cases below may not
1724 happen, but it doesn't seem worth worrying about this. We "continue"
1725 the outer loop when we've changed something; otherwise we "break"
1726 the switch, which will "break" the while. */
1727 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1728 high = low;
1729 in_p = 0;
1730 strict_overflow_p = false;
1731 is_bool = false;
1732 if (exp == NULL_TREE)
1733 is_bool = true;
1734 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1736 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1737 is_bool = true;
1738 else
1739 return;
1741 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1742 is_bool = true;
1744 while (1)
1746 enum tree_code code;
1747 tree arg0, arg1, exp_type;
1748 tree nexp;
1749 location_t loc;
1751 if (exp != NULL_TREE)
1753 if (TREE_CODE (exp) != SSA_NAME)
1754 break;
1756 stmt = SSA_NAME_DEF_STMT (exp);
1757 if (!is_gimple_assign (stmt))
1758 break;
1760 code = gimple_assign_rhs_code (stmt);
1761 arg0 = gimple_assign_rhs1 (stmt);
1762 arg1 = gimple_assign_rhs2 (stmt);
1763 exp_type = TREE_TYPE (exp);
1765 else
1767 code = gimple_cond_code (stmt);
1768 arg0 = gimple_cond_lhs (stmt);
1769 arg1 = gimple_cond_rhs (stmt);
1770 exp_type = boolean_type_node;
1773 if (TREE_CODE (arg0) != SSA_NAME)
1774 break;
1775 loc = gimple_location (stmt);
1776 switch (code)
1778 case BIT_NOT_EXPR:
1779 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1781 in_p = !in_p;
1782 exp = arg0;
1783 continue;
1785 break;
1786 case SSA_NAME:
1787 exp = arg0;
1788 continue;
1789 CASE_CONVERT:
1790 if (is_bool)
1791 goto do_default;
1792 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1794 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1795 is_bool = true;
1796 else
1797 return;
1799 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1800 is_bool = true;
1801 goto do_default;
1802 case EQ_EXPR:
1803 case NE_EXPR:
1804 case LT_EXPR:
1805 case LE_EXPR:
1806 case GE_EXPR:
1807 case GT_EXPR:
1808 is_bool = true;
1809 /* FALLTHRU */
1810 default:
1811 if (!is_bool)
1812 return;
1813 do_default:
1814 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1815 &low, &high, &in_p,
1816 &strict_overflow_p);
1817 if (nexp != NULL_TREE)
1819 exp = nexp;
1820 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1821 continue;
1823 break;
1825 break;
1827 if (is_bool)
1829 r->exp = exp;
1830 r->in_p = in_p;
1831 r->low = low;
1832 r->high = high;
1833 r->strict_overflow_p = strict_overflow_p;
1837 /* Comparison function for qsort. Sort entries
1838 without SSA_NAME exp first, then with SSA_NAMEs sorted
1839 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1840 by increasing ->low and if ->low is the same, by increasing
1841 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1842 maximum. */
1844 static int
1845 range_entry_cmp (const void *a, const void *b)
1847 const struct range_entry *p = (const struct range_entry *) a;
1848 const struct range_entry *q = (const struct range_entry *) b;
1850 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1852 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1854 /* Group range_entries for the same SSA_NAME together. */
1855 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1856 return -1;
1857 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1858 return 1;
1859 /* If ->low is different, NULL low goes first, then by
1860 ascending low. */
1861 if (p->low != NULL_TREE)
1863 if (q->low != NULL_TREE)
1865 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1866 p->low, q->low);
1867 if (tem && integer_onep (tem))
1868 return -1;
1869 tem = fold_binary (GT_EXPR, boolean_type_node,
1870 p->low, q->low);
1871 if (tem && integer_onep (tem))
1872 return 1;
1874 else
1875 return 1;
1877 else if (q->low != NULL_TREE)
1878 return -1;
1879 /* If ->high is different, NULL high goes last, before that by
1880 ascending high. */
1881 if (p->high != NULL_TREE)
1883 if (q->high != NULL_TREE)
1885 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1886 p->high, q->high);
1887 if (tem && integer_onep (tem))
1888 return -1;
1889 tem = fold_binary (GT_EXPR, boolean_type_node,
1890 p->high, q->high);
1891 if (tem && integer_onep (tem))
1892 return 1;
1894 else
1895 return -1;
1897 else if (p->high != NULL_TREE)
1898 return 1;
1899 /* If both ranges are the same, sort below by ascending idx. */
1901 else
1902 return 1;
1904 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1905 return -1;
1907 if (p->idx < q->idx)
1908 return -1;
1909 else
1911 gcc_checking_assert (p->idx > q->idx);
1912 return 1;
1916 /* Helper routine of optimize_range_test.
1917 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1918 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1919 OPCODE and OPS are arguments of optimize_range_tests. Return
1920 true if the range merge has been successful.
1921 If OPCODE is ERROR_MARK, this is called from within
1922 maybe_optimize_range_tests and is performing inter-bb range optimization.
1923 Changes should be then performed right away, and whether an op is
1924 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1926 static bool
1927 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1928 unsigned int count, enum tree_code opcode,
1929 vec<operand_entry_t> *ops, tree exp, bool in_p,
1930 tree low, tree high, bool strict_overflow_p)
1932 operand_entry_t oe = (*ops)[range->idx];
1933 tree op = oe->op;
1934 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1935 location_t loc = gimple_location (stmt);
1936 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1937 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1938 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1939 gimple_stmt_iterator gsi;
1941 if (tem == NULL_TREE)
1942 return false;
1944 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1945 warning_at (loc, OPT_Wstrict_overflow,
1946 "assuming signed overflow does not occur "
1947 "when simplifying range test");
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1951 struct range_entry *r;
1952 fprintf (dump_file, "Optimizing range tests ");
1953 print_generic_expr (dump_file, range->exp, 0);
1954 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1955 print_generic_expr (dump_file, range->low, 0);
1956 fprintf (dump_file, ", ");
1957 print_generic_expr (dump_file, range->high, 0);
1958 fprintf (dump_file, "]");
1959 for (r = otherrange; r < otherrange + count; r++)
1961 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1962 print_generic_expr (dump_file, r->low, 0);
1963 fprintf (dump_file, ", ");
1964 print_generic_expr (dump_file, r->high, 0);
1965 fprintf (dump_file, "]");
1967 fprintf (dump_file, "\n into ");
1968 print_generic_expr (dump_file, tem, 0);
1969 fprintf (dump_file, "\n");
1972 if (opcode == BIT_IOR_EXPR
1973 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1974 tem = invert_truthvalue_loc (loc, tem);
1976 tem = fold_convert_loc (loc, optype, tem);
1977 gsi = gsi_for_stmt (stmt);
1978 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1979 GSI_SAME_STMT);
1981 /* If doing inter-bb range test optimization, update the
1982 stmts immediately. Start with changing the first range test
1983 immediate use to the new value (TEM), or, if the first range
1984 test is a GIMPLE_COND stmt, change that condition. */
1985 if (opcode == ERROR_MARK)
1987 if (op)
1989 imm_use_iterator iter;
1990 use_operand_p use_p;
1991 gimple use_stmt;
1993 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1995 if (is_gimple_debug (use_stmt))
1996 continue;
1997 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1998 SET_USE (use_p, tem);
1999 update_stmt (use_stmt);
2002 else
2004 gimple_cond_set_code (stmt, NE_EXPR);
2005 gimple_cond_set_lhs (stmt, tem);
2006 gimple_cond_set_rhs (stmt, boolean_false_node);
2007 update_stmt (stmt);
2010 oe->op = tem;
2011 range->exp = exp;
2012 range->low = low;
2013 range->high = high;
2014 range->in_p = in_p;
2015 range->strict_overflow_p = false;
2017 for (range = otherrange; range < otherrange + count; range++)
2019 oe = (*ops)[range->idx];
2020 /* Now change all the other range test immediate uses, so that
2021 those tests will be optimized away. */
2022 if (opcode == ERROR_MARK)
2024 if (oe->op)
2026 imm_use_iterator iter;
2027 use_operand_p use_p;
2028 gimple use_stmt;
2030 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2032 if (is_gimple_debug (use_stmt))
2033 continue;
2034 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2035 adjust it into _7 = _9;. */
2036 if (is_gimple_assign (use_stmt)
2037 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2039 tree expr = NULL_TREE;
2040 if (oe->op == gimple_assign_rhs1 (use_stmt))
2041 expr = gimple_assign_rhs2 (use_stmt);
2042 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2043 expr = gimple_assign_rhs1 (use_stmt);
2044 if (expr
2045 && expr != oe->op
2046 && TREE_CODE (expr) == SSA_NAME)
2048 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2049 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2050 expr, NULL_TREE);
2051 update_stmt (use_stmt);
2052 continue;
2055 /* If imm use of _8 is a statement like _7 = (int) _8;,
2056 adjust it into _7 = 0; or _7 = 1;. */
2057 if (gimple_assign_cast_p (use_stmt)
2058 && oe->op == gimple_assign_rhs1 (use_stmt))
2060 tree lhs = gimple_assign_lhs (use_stmt);
2061 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2063 gimple_stmt_iterator gsi2
2064 = gsi_for_stmt (use_stmt);
2065 tree expr = build_int_cst (TREE_TYPE (lhs),
2066 oe->rank == BIT_IOR_EXPR
2067 ? 0 : 1);
2068 gimple_assign_set_rhs_with_ops (&gsi2,
2069 INTEGER_CST,
2070 expr, NULL_TREE);
2071 update_stmt (use_stmt);
2072 continue;
2075 /* Otherwise replace the use with 0 or 1. */
2076 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2077 SET_USE (use_p,
2078 build_int_cst (TREE_TYPE (oe->op),
2079 oe->rank == BIT_IOR_EXPR
2080 ? 0 : 1));
2081 update_stmt (use_stmt);
2084 else
2086 /* If range test was a GIMPLE_COND, simply change it
2087 into an always false or always true condition. */
2088 stmt = last_stmt (BASIC_BLOCK (oe->id));
2089 if (oe->rank == BIT_IOR_EXPR)
2090 gimple_cond_make_false (stmt);
2091 else
2092 gimple_cond_make_true (stmt);
2093 update_stmt (stmt);
2096 oe->op = error_mark_node;
2097 range->exp = NULL_TREE;
2099 return true;
2102 /* Optimize range tests, similarly how fold_range_test optimizes
2103 it on trees. The tree code for the binary
2104 operation between all the operands is OPCODE.
2105 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2106 maybe_optimize_range_tests for inter-bb range optimization.
2107 In that case if oe->op is NULL, oe->id is bb->index whose
2108 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2109 the actual opcode. */
2111 static void
2112 optimize_range_tests (enum tree_code opcode,
2113 vec<operand_entry_t> *ops)
2115 unsigned int length = ops->length (), i, j, first;
2116 operand_entry_t oe;
2117 struct range_entry *ranges;
2118 bool any_changes = false;
2120 if (length == 1)
2121 return;
2123 ranges = XNEWVEC (struct range_entry, length);
2124 for (i = 0; i < length; i++)
2126 oe = (*ops)[i];
2127 ranges[i].idx = i;
2128 init_range_entry (ranges + i, oe->op,
2129 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2130 /* For | invert it now, we will invert it again before emitting
2131 the optimized expression. */
2132 if (opcode == BIT_IOR_EXPR
2133 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2134 ranges[i].in_p = !ranges[i].in_p;
2137 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2138 for (i = 0; i < length; i++)
2139 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2140 break;
2142 /* Try to merge ranges. */
2143 for (first = i; i < length; i++)
2145 tree low = ranges[i].low;
2146 tree high = ranges[i].high;
2147 int in_p = ranges[i].in_p;
2148 bool strict_overflow_p = ranges[i].strict_overflow_p;
2149 int update_fail_count = 0;
2151 for (j = i + 1; j < length; j++)
2153 if (ranges[i].exp != ranges[j].exp)
2154 break;
2155 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2156 ranges[j].in_p, ranges[j].low, ranges[j].high))
2157 break;
2158 strict_overflow_p |= ranges[j].strict_overflow_p;
2161 if (j == i + 1)
2162 continue;
2164 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2165 ops, ranges[i].exp, in_p, low, high,
2166 strict_overflow_p))
2168 i = j - 1;
2169 any_changes = true;
2171 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2172 while update_range_test would fail. */
2173 else if (update_fail_count == 64)
2174 i = j - 1;
2175 else
2176 ++update_fail_count;
2179 /* Optimize X == CST1 || X == CST2
2180 if popcount (CST1 ^ CST2) == 1 into
2181 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2182 Similarly for ranges. E.g.
2183 X != 2 && X != 3 && X != 10 && X != 11
2184 will be transformed by the above loop into
2185 (X - 2U) <= 1U && (X - 10U) <= 1U
2186 and this loop can transform that into
2187 ((X & ~8) - 2U) <= 1U. */
2188 for (i = first; i < length; i++)
2190 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2192 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2193 continue;
2194 type = TREE_TYPE (ranges[i].exp);
2195 if (!INTEGRAL_TYPE_P (type))
2196 continue;
2197 lowi = ranges[i].low;
2198 if (lowi == NULL_TREE)
2199 lowi = TYPE_MIN_VALUE (type);
2200 highi = ranges[i].high;
2201 if (highi == NULL_TREE)
2202 continue;
2203 for (j = i + 1; j < length && j < i + 64; j++)
2205 if (ranges[j].exp == NULL_TREE)
2206 continue;
2207 if (ranges[i].exp != ranges[j].exp)
2208 break;
2209 if (ranges[j].in_p)
2210 continue;
2211 lowj = ranges[j].low;
2212 if (lowj == NULL_TREE)
2213 continue;
2214 highj = ranges[j].high;
2215 if (highj == NULL_TREE)
2216 highj = TYPE_MAX_VALUE (type);
2217 tem = fold_binary (GT_EXPR, boolean_type_node,
2218 lowj, highi);
2219 if (tem == NULL_TREE || !integer_onep (tem))
2220 continue;
2221 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2222 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2223 continue;
2224 gcc_checking_assert (!integer_zerop (lowxor));
2225 tem = fold_binary (MINUS_EXPR, type, lowxor,
2226 build_int_cst (type, 1));
2227 if (tem == NULL_TREE)
2228 continue;
2229 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2230 if (tem == NULL_TREE || !integer_zerop (tem))
2231 continue;
2232 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2233 if (!tree_int_cst_equal (lowxor, highxor))
2234 continue;
2235 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2236 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2237 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2238 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2239 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2240 ranges[i].in_p, lowj, highj,
2241 ranges[i].strict_overflow_p
2242 || ranges[j].strict_overflow_p))
2244 any_changes = true;
2245 break;
2250 if (any_changes && opcode != ERROR_MARK)
2252 j = 0;
2253 FOR_EACH_VEC_ELT (*ops, i, oe)
2255 if (oe->op == error_mark_node)
2256 continue;
2257 else if (i != j)
2258 (*ops)[j] = oe;
2259 j++;
2261 ops->truncate (j);
2264 XDELETEVEC (ranges);
2267 /* Return true if STMT is a cast like:
2268 <bb N>:
2270 _123 = (int) _234;
2272 <bb M>:
2273 # _345 = PHI <_123(N), 1(...), 1(...)>
2274 where _234 has bool type, _123 has single use and
2275 bb N has a single successor M. This is commonly used in
2276 the last block of a range test. */
2278 static bool
2279 final_range_test_p (gimple stmt)
2281 basic_block bb, rhs_bb;
2282 edge e;
2283 tree lhs, rhs;
2284 use_operand_p use_p;
2285 gimple use_stmt;
2287 if (!gimple_assign_cast_p (stmt))
2288 return false;
2289 bb = gimple_bb (stmt);
2290 if (!single_succ_p (bb))
2291 return false;
2292 e = single_succ_edge (bb);
2293 if (e->flags & EDGE_COMPLEX)
2294 return false;
2296 lhs = gimple_assign_lhs (stmt);
2297 rhs = gimple_assign_rhs1 (stmt);
2298 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2299 || TREE_CODE (rhs) != SSA_NAME
2300 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2301 return false;
2303 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2304 if (!single_imm_use (lhs, &use_p, &use_stmt))
2305 return false;
2307 if (gimple_code (use_stmt) != GIMPLE_PHI
2308 || gimple_bb (use_stmt) != e->dest)
2309 return false;
2311 /* And that the rhs is defined in the same loop. */
2312 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2313 if (rhs_bb == NULL
2314 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2315 return false;
2317 return true;
2320 /* Return true if BB is suitable basic block for inter-bb range test
2321 optimization. If BACKWARD is true, BB should be the only predecessor
2322 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2323 or compared with to find a common basic block to which all conditions
2324 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2325 be the only predecessor of BB. */
2327 static bool
2328 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2329 bool backward)
2331 edge_iterator ei, ei2;
2332 edge e, e2;
2333 gimple stmt;
2334 gimple_stmt_iterator gsi;
2335 bool other_edge_seen = false;
2336 bool is_cond;
2338 if (test_bb == bb)
2339 return false;
2340 /* Check last stmt first. */
2341 stmt = last_stmt (bb);
2342 if (stmt == NULL
2343 || (gimple_code (stmt) != GIMPLE_COND
2344 && (backward || !final_range_test_p (stmt)))
2345 || gimple_visited_p (stmt)
2346 || stmt_could_throw_p (stmt)
2347 || *other_bb == bb)
2348 return false;
2349 is_cond = gimple_code (stmt) == GIMPLE_COND;
2350 if (is_cond)
2352 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2353 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2354 to *OTHER_BB (if not set yet, try to find it out). */
2355 if (EDGE_COUNT (bb->succs) != 2)
2356 return false;
2357 FOR_EACH_EDGE (e, ei, bb->succs)
2359 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2360 return false;
2361 if (e->dest == test_bb)
2363 if (backward)
2364 continue;
2365 else
2366 return false;
2368 if (e->dest == bb)
2369 return false;
2370 if (*other_bb == NULL)
2372 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2373 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2374 return false;
2375 else if (e->dest == e2->dest)
2376 *other_bb = e->dest;
2377 if (*other_bb == NULL)
2378 return false;
2380 if (e->dest == *other_bb)
2381 other_edge_seen = true;
2382 else if (backward)
2383 return false;
2385 if (*other_bb == NULL || !other_edge_seen)
2386 return false;
2388 else if (single_succ (bb) != *other_bb)
2389 return false;
2391 /* Now check all PHIs of *OTHER_BB. */
2392 e = find_edge (bb, *other_bb);
2393 e2 = find_edge (test_bb, *other_bb);
2394 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2396 gimple phi = gsi_stmt (gsi);
2397 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2398 corresponding to BB and TEST_BB predecessor must be the same. */
2399 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2400 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2402 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2403 one of the PHIs should have the lhs of the last stmt in
2404 that block as PHI arg and that PHI should have 0 or 1
2405 corresponding to it in all other range test basic blocks
2406 considered. */
2407 if (!is_cond)
2409 if (gimple_phi_arg_def (phi, e->dest_idx)
2410 == gimple_assign_lhs (stmt)
2411 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2412 || integer_onep (gimple_phi_arg_def (phi,
2413 e2->dest_idx))))
2414 continue;
2416 else
2418 gimple test_last = last_stmt (test_bb);
2419 if (gimple_code (test_last) != GIMPLE_COND
2420 && gimple_phi_arg_def (phi, e2->dest_idx)
2421 == gimple_assign_lhs (test_last)
2422 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2423 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2424 continue;
2427 return false;
2430 return true;
2433 /* Return true if BB doesn't have side-effects that would disallow
2434 range test optimization, all SSA_NAMEs set in the bb are consumed
2435 in the bb and there are no PHIs. */
2437 static bool
2438 no_side_effect_bb (basic_block bb)
2440 gimple_stmt_iterator gsi;
2441 gimple last;
2443 if (!gimple_seq_empty_p (phi_nodes (bb)))
2444 return false;
2445 last = last_stmt (bb);
2446 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2448 gimple stmt = gsi_stmt (gsi);
2449 tree lhs;
2450 imm_use_iterator imm_iter;
2451 use_operand_p use_p;
2453 if (is_gimple_debug (stmt))
2454 continue;
2455 if (gimple_has_side_effects (stmt))
2456 return false;
2457 if (stmt == last)
2458 return true;
2459 if (!is_gimple_assign (stmt))
2460 return false;
2461 lhs = gimple_assign_lhs (stmt);
2462 if (TREE_CODE (lhs) != SSA_NAME)
2463 return false;
2464 if (gimple_assign_rhs_could_trap_p (stmt))
2465 return false;
2466 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2468 gimple use_stmt = USE_STMT (use_p);
2469 if (is_gimple_debug (use_stmt))
2470 continue;
2471 if (gimple_bb (use_stmt) != bb)
2472 return false;
2475 return false;
2478 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2479 return true and fill in *OPS recursively. */
2481 static bool
2482 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2483 struct loop *loop)
2485 gimple stmt = SSA_NAME_DEF_STMT (var);
2486 tree rhs[2];
2487 int i;
2489 if (!is_reassociable_op (stmt, code, loop))
2490 return false;
2492 rhs[0] = gimple_assign_rhs1 (stmt);
2493 rhs[1] = gimple_assign_rhs2 (stmt);
2494 gimple_set_visited (stmt, true);
2495 for (i = 0; i < 2; i++)
2496 if (TREE_CODE (rhs[i]) == SSA_NAME
2497 && !get_ops (rhs[i], code, ops, loop)
2498 && has_single_use (rhs[i]))
2500 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2502 oe->op = rhs[i];
2503 oe->rank = code;
2504 oe->id = 0;
2505 oe->count = 1;
2506 ops->safe_push (oe);
2508 return true;
2511 /* Inter-bb range test optimization. */
2513 static void
2514 maybe_optimize_range_tests (gimple stmt)
2516 basic_block first_bb = gimple_bb (stmt);
2517 basic_block last_bb = first_bb;
2518 basic_block other_bb = NULL;
2519 basic_block bb;
2520 edge_iterator ei;
2521 edge e;
2522 vec<operand_entry_t> ops = vNULL;
2524 /* Consider only basic blocks that end with GIMPLE_COND or
2525 a cast statement satisfying final_range_test_p. All
2526 but the last bb in the first_bb .. last_bb range
2527 should end with GIMPLE_COND. */
2528 if (gimple_code (stmt) == GIMPLE_COND)
2530 if (EDGE_COUNT (first_bb->succs) != 2)
2531 return;
2533 else if (final_range_test_p (stmt))
2534 other_bb = single_succ (first_bb);
2535 else
2536 return;
2538 if (stmt_could_throw_p (stmt))
2539 return;
2541 /* As relative ordering of post-dominator sons isn't fixed,
2542 maybe_optimize_range_tests can be called first on any
2543 bb in the range we want to optimize. So, start searching
2544 backwards, if first_bb can be set to a predecessor. */
2545 while (single_pred_p (first_bb))
2547 basic_block pred_bb = single_pred (first_bb);
2548 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2549 break;
2550 if (!no_side_effect_bb (first_bb))
2551 break;
2552 first_bb = pred_bb;
2554 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2555 Before starting forward search in last_bb successors, find
2556 out the other_bb. */
2557 if (first_bb == last_bb)
2559 other_bb = NULL;
2560 /* As non-GIMPLE_COND last stmt always terminates the range,
2561 if forward search didn't discover anything, just give up. */
2562 if (gimple_code (stmt) != GIMPLE_COND)
2563 return;
2564 /* Look at both successors. Either it ends with a GIMPLE_COND
2565 and satisfies suitable_cond_bb, or ends with a cast and
2566 other_bb is that cast's successor. */
2567 FOR_EACH_EDGE (e, ei, first_bb->succs)
2568 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2569 || e->dest == first_bb)
2570 return;
2571 else if (single_pred_p (e->dest))
2573 stmt = last_stmt (e->dest);
2574 if (stmt
2575 && gimple_code (stmt) == GIMPLE_COND
2576 && EDGE_COUNT (e->dest->succs) == 2)
2578 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2579 break;
2580 else
2581 other_bb = NULL;
2583 else if (stmt
2584 && final_range_test_p (stmt)
2585 && find_edge (first_bb, single_succ (e->dest)))
2587 other_bb = single_succ (e->dest);
2588 if (other_bb == first_bb)
2589 other_bb = NULL;
2592 if (other_bb == NULL)
2593 return;
2595 /* Now do the forward search, moving last_bb to successor bbs
2596 that aren't other_bb. */
2597 while (EDGE_COUNT (last_bb->succs) == 2)
2599 FOR_EACH_EDGE (e, ei, last_bb->succs)
2600 if (e->dest != other_bb)
2601 break;
2602 if (e == NULL)
2603 break;
2604 if (!single_pred_p (e->dest))
2605 break;
2606 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2607 break;
2608 if (!no_side_effect_bb (e->dest))
2609 break;
2610 last_bb = e->dest;
2612 if (first_bb == last_bb)
2613 return;
2614 /* Here basic blocks first_bb through last_bb's predecessor
2615 end with GIMPLE_COND, all of them have one of the edges to
2616 other_bb and another to another block in the range,
2617 all blocks except first_bb don't have side-effects and
2618 last_bb ends with either GIMPLE_COND, or cast satisfying
2619 final_range_test_p. */
2620 for (bb = last_bb; ; bb = single_pred (bb))
2622 enum tree_code code;
2623 tree lhs, rhs;
2625 e = find_edge (bb, other_bb);
2626 stmt = last_stmt (bb);
2627 gimple_set_visited (stmt, true);
2628 if (gimple_code (stmt) != GIMPLE_COND)
2630 use_operand_p use_p;
2631 gimple phi;
2632 edge e2;
2633 unsigned int d;
2635 lhs = gimple_assign_lhs (stmt);
2636 rhs = gimple_assign_rhs1 (stmt);
2637 gcc_assert (bb == last_bb);
2639 /* stmt is
2640 _123 = (int) _234;
2642 followed by:
2643 <bb M>:
2644 # _345 = PHI <_123(N), 1(...), 1(...)>
2646 or 0 instead of 1. If it is 0, the _234
2647 range test is anded together with all the
2648 other range tests, if it is 1, it is ored with
2649 them. */
2650 single_imm_use (lhs, &use_p, &phi);
2651 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2652 e2 = find_edge (first_bb, other_bb);
2653 d = e2->dest_idx;
2654 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2655 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2656 code = BIT_AND_EXPR;
2657 else
2659 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2660 code = BIT_IOR_EXPR;
2663 /* If _234 SSA_NAME_DEF_STMT is
2664 _234 = _567 | _789;
2665 (or &, corresponding to 1/0 in the phi arguments,
2666 push into ops the individual range test arguments
2667 of the bitwise or resp. and, recursively. */
2668 if (!get_ops (rhs, code, &ops,
2669 loop_containing_stmt (stmt))
2670 && has_single_use (rhs))
2672 /* Otherwise, push the _234 range test itself. */
2673 operand_entry_t oe
2674 = (operand_entry_t) pool_alloc (operand_entry_pool);
2676 oe->op = rhs;
2677 oe->rank = code;
2678 oe->id = 0;
2679 oe->count = 1;
2680 ops.safe_push (oe);
2682 continue;
2684 /* Otherwise stmt is GIMPLE_COND. */
2685 code = gimple_cond_code (stmt);
2686 lhs = gimple_cond_lhs (stmt);
2687 rhs = gimple_cond_rhs (stmt);
2688 if (TREE_CODE (lhs) == SSA_NAME
2689 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2690 && ((code != EQ_EXPR && code != NE_EXPR)
2691 || rhs != boolean_false_node
2692 /* Either push into ops the individual bitwise
2693 or resp. and operands, depending on which
2694 edge is other_bb. */
2695 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2696 ^ (code == EQ_EXPR))
2697 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2698 loop_containing_stmt (stmt))))
2700 /* Or push the GIMPLE_COND stmt itself. */
2701 operand_entry_t oe
2702 = (operand_entry_t) pool_alloc (operand_entry_pool);
2704 oe->op = NULL;
2705 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2706 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2707 /* oe->op = NULL signs that there is no SSA_NAME
2708 for the range test, and oe->id instead is the
2709 basic block number, at which's end the GIMPLE_COND
2710 is. */
2711 oe->id = bb->index;
2712 oe->count = 1;
2713 ops.safe_push (oe);
2715 if (bb == first_bb)
2716 break;
2718 if (ops.length () > 1)
2719 optimize_range_tests (ERROR_MARK, &ops);
2720 ops.release ();
2723 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2724 of STMT in it's operands. This is also known as a "destructive
2725 update" operation. */
2727 static bool
2728 is_phi_for_stmt (gimple stmt, tree operand)
2730 gimple def_stmt;
2731 tree lhs;
2732 use_operand_p arg_p;
2733 ssa_op_iter i;
2735 if (TREE_CODE (operand) != SSA_NAME)
2736 return false;
2738 lhs = gimple_assign_lhs (stmt);
2740 def_stmt = SSA_NAME_DEF_STMT (operand);
2741 if (gimple_code (def_stmt) != GIMPLE_PHI)
2742 return false;
2744 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2745 if (lhs == USE_FROM_PTR (arg_p))
2746 return true;
2747 return false;
2750 /* Remove def stmt of VAR if VAR has zero uses and recurse
2751 on rhs1 operand if so. */
2753 static void
2754 remove_visited_stmt_chain (tree var)
2756 gimple stmt;
2757 gimple_stmt_iterator gsi;
2759 while (1)
2761 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2762 return;
2763 stmt = SSA_NAME_DEF_STMT (var);
2764 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2766 var = gimple_assign_rhs1 (stmt);
2767 gsi = gsi_for_stmt (stmt);
2768 gsi_remove (&gsi, true);
2769 release_defs (stmt);
2771 else
2772 return;
2776 /* This function checks three consequtive operands in
2777 passed operands vector OPS starting from OPINDEX and
2778 swaps two operands if it is profitable for binary operation
2779 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2781 We pair ops with the same rank if possible.
2783 The alternative we try is to see if STMT is a destructive
2784 update style statement, which is like:
2785 b = phi (a, ...)
2786 a = c + b;
2787 In that case, we want to use the destructive update form to
2788 expose the possible vectorizer sum reduction opportunity.
2789 In that case, the third operand will be the phi node. This
2790 check is not performed if STMT is null.
2792 We could, of course, try to be better as noted above, and do a
2793 lot of work to try to find these opportunities in >3 operand
2794 cases, but it is unlikely to be worth it. */
2796 static void
2797 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2798 unsigned int opindex, gimple stmt)
2800 operand_entry_t oe1, oe2, oe3;
2802 oe1 = ops[opindex];
2803 oe2 = ops[opindex + 1];
2804 oe3 = ops[opindex + 2];
2806 if ((oe1->rank == oe2->rank
2807 && oe2->rank != oe3->rank)
2808 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2809 && !is_phi_for_stmt (stmt, oe1->op)
2810 && !is_phi_for_stmt (stmt, oe2->op)))
2812 struct operand_entry temp = *oe3;
2813 oe3->op = oe1->op;
2814 oe3->rank = oe1->rank;
2815 oe1->op = temp.op;
2816 oe1->rank= temp.rank;
2818 else if ((oe1->rank == oe3->rank
2819 && oe2->rank != oe3->rank)
2820 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2821 && !is_phi_for_stmt (stmt, oe1->op)
2822 && !is_phi_for_stmt (stmt, oe3->op)))
2824 struct operand_entry temp = *oe2;
2825 oe2->op = oe1->op;
2826 oe2->rank = oe1->rank;
2827 oe1->op = temp.op;
2828 oe1->rank= temp.rank;
2832 /* Recursively rewrite our linearized statements so that the operators
2833 match those in OPS[OPINDEX], putting the computation in rank
2834 order. */
2836 static void
2837 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2838 vec<operand_entry_t> ops, bool moved)
2840 tree rhs1 = gimple_assign_rhs1 (stmt);
2841 tree rhs2 = gimple_assign_rhs2 (stmt);
2842 operand_entry_t oe;
2844 /* If we have three operands left, then we want to make sure the ones
2845 that get the double binary op are chosen wisely. */
2846 if (opindex + 3 == ops.length ())
2847 swap_ops_for_binary_stmt (ops, opindex, stmt);
2849 /* The final recursion case for this function is that you have
2850 exactly two operations left.
2851 If we had one exactly one op in the entire list to start with, we
2852 would have never called this function, and the tail recursion
2853 rewrites them one at a time. */
2854 if (opindex + 2 == ops.length ())
2856 operand_entry_t oe1, oe2;
2858 oe1 = ops[opindex];
2859 oe2 = ops[opindex + 1];
2861 if (rhs1 != oe1->op || rhs2 != oe2->op)
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2865 fprintf (dump_file, "Transforming ");
2866 print_gimple_stmt (dump_file, stmt, 0, 0);
2869 gimple_assign_set_rhs1 (stmt, oe1->op);
2870 gimple_assign_set_rhs2 (stmt, oe2->op);
2871 update_stmt (stmt);
2872 if (rhs1 != oe1->op && rhs1 != oe2->op)
2873 remove_visited_stmt_chain (rhs1);
2875 if (dump_file && (dump_flags & TDF_DETAILS))
2877 fprintf (dump_file, " into ");
2878 print_gimple_stmt (dump_file, stmt, 0, 0);
2881 return;
2884 /* If we hit here, we should have 3 or more ops left. */
2885 gcc_assert (opindex + 2 < ops.length ());
2887 /* Rewrite the next operator. */
2888 oe = ops[opindex];
2890 if (oe->op != rhs2)
2892 if (!moved)
2894 gimple_stmt_iterator gsinow, gsirhs1;
2895 gimple stmt1 = stmt, stmt2;
2896 unsigned int count;
2898 gsinow = gsi_for_stmt (stmt);
2899 count = ops.length () - opindex - 2;
2900 while (count-- != 0)
2902 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2903 gsirhs1 = gsi_for_stmt (stmt2);
2904 gsi_move_before (&gsirhs1, &gsinow);
2905 gsi_prev (&gsinow);
2906 stmt1 = stmt2;
2908 moved = true;
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "Transforming ");
2914 print_gimple_stmt (dump_file, stmt, 0, 0);
2917 gimple_assign_set_rhs2 (stmt, oe->op);
2918 update_stmt (stmt);
2920 if (dump_file && (dump_flags & TDF_DETAILS))
2922 fprintf (dump_file, " into ");
2923 print_gimple_stmt (dump_file, stmt, 0, 0);
2926 /* Recurse on the LHS of the binary operator, which is guaranteed to
2927 be the non-leaf side. */
2928 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2931 /* Find out how many cycles we need to compute statements chain.
2932 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2933 maximum number of independent statements we may execute per cycle. */
2935 static int
2936 get_required_cycles (int ops_num, int cpu_width)
2938 int res;
2939 int elog;
2940 unsigned int rest;
2942 /* While we have more than 2 * cpu_width operands
2943 we may reduce number of operands by cpu_width
2944 per cycle. */
2945 res = ops_num / (2 * cpu_width);
2947 /* Remained operands count may be reduced twice per cycle
2948 until we have only one operand. */
2949 rest = (unsigned)(ops_num - res * cpu_width);
2950 elog = exact_log2 (rest);
2951 if (elog >= 0)
2952 res += elog;
2953 else
2954 res += floor_log2 (rest) + 1;
2956 return res;
2959 /* Returns an optimal number of registers to use for computation of
2960 given statements. */
2962 static int
2963 get_reassociation_width (int ops_num, enum tree_code opc,
2964 enum machine_mode mode)
2966 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2967 int width;
2968 int width_min;
2969 int cycles_best;
2971 if (param_width > 0)
2972 width = param_width;
2973 else
2974 width = targetm.sched.reassociation_width (opc, mode);
2976 if (width == 1)
2977 return width;
2979 /* Get the minimal time required for sequence computation. */
2980 cycles_best = get_required_cycles (ops_num, width);
2982 /* Check if we may use less width and still compute sequence for
2983 the same time. It will allow us to reduce registers usage.
2984 get_required_cycles is monotonically increasing with lower width
2985 so we can perform a binary search for the minimal width that still
2986 results in the optimal cycle count. */
2987 width_min = 1;
2988 while (width > width_min)
2990 int width_mid = (width + width_min) / 2;
2992 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2993 width = width_mid;
2994 else if (width_min < width_mid)
2995 width_min = width_mid;
2996 else
2997 break;
3000 return width;
3003 /* Recursively rewrite our linearized statements so that the operators
3004 match those in OPS[OPINDEX], putting the computation in rank
3005 order and trying to allow operations to be executed in
3006 parallel. */
3008 static void
3009 rewrite_expr_tree_parallel (gimple stmt, int width,
3010 vec<operand_entry_t> ops)
3012 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3013 int op_num = ops.length ();
3014 int stmt_num = op_num - 1;
3015 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3016 int op_index = op_num - 1;
3017 int stmt_index = 0;
3018 int ready_stmts_end = 0;
3019 int i = 0;
3020 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3022 /* We start expression rewriting from the top statements.
3023 So, in this loop we create a full list of statements
3024 we will work with. */
3025 stmts[stmt_num - 1] = stmt;
3026 for (i = stmt_num - 2; i >= 0; i--)
3027 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3029 for (i = 0; i < stmt_num; i++)
3031 tree op1, op2;
3033 /* Determine whether we should use results of
3034 already handled statements or not. */
3035 if (ready_stmts_end == 0
3036 && (i - stmt_index >= width || op_index < 1))
3037 ready_stmts_end = i;
3039 /* Now we choose operands for the next statement. Non zero
3040 value in ready_stmts_end means here that we should use
3041 the result of already generated statements as new operand. */
3042 if (ready_stmts_end > 0)
3044 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3045 if (ready_stmts_end > stmt_index)
3046 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3047 else if (op_index >= 0)
3048 op2 = ops[op_index--]->op;
3049 else
3051 gcc_assert (stmt_index < i);
3052 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3055 if (stmt_index >= ready_stmts_end)
3056 ready_stmts_end = 0;
3058 else
3060 if (op_index > 1)
3061 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3062 op2 = ops[op_index--]->op;
3063 op1 = ops[op_index--]->op;
3066 /* If we emit the last statement then we should put
3067 operands into the last statement. It will also
3068 break the loop. */
3069 if (op_index < 0 && stmt_index == i)
3070 i = stmt_num - 1;
3072 if (dump_file && (dump_flags & TDF_DETAILS))
3074 fprintf (dump_file, "Transforming ");
3075 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3078 /* We keep original statement only for the last one. All
3079 others are recreated. */
3080 if (i == stmt_num - 1)
3082 gimple_assign_set_rhs1 (stmts[i], op1);
3083 gimple_assign_set_rhs2 (stmts[i], op2);
3084 update_stmt (stmts[i]);
3086 else
3087 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3089 if (dump_file && (dump_flags & TDF_DETAILS))
3091 fprintf (dump_file, " into ");
3092 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3096 remove_visited_stmt_chain (last_rhs1);
3099 /* Transform STMT, which is really (A +B) + (C + D) into the left
3100 linear form, ((A+B)+C)+D.
3101 Recurse on D if necessary. */
3103 static void
3104 linearize_expr (gimple stmt)
3106 gimple_stmt_iterator gsinow, gsirhs;
3107 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3108 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3109 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3110 gimple newbinrhs = NULL;
3111 struct loop *loop = loop_containing_stmt (stmt);
3113 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3114 && is_reassociable_op (binrhs, rhscode, loop));
3116 gsinow = gsi_for_stmt (stmt);
3117 gsirhs = gsi_for_stmt (binrhs);
3118 gsi_move_before (&gsirhs, &gsinow);
3120 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3121 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3122 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3124 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3125 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3127 if (dump_file && (dump_flags & TDF_DETAILS))
3129 fprintf (dump_file, "Linearized: ");
3130 print_gimple_stmt (dump_file, stmt, 0, 0);
3133 reassociate_stats.linearized++;
3134 update_stmt (binrhs);
3135 update_stmt (binlhs);
3136 update_stmt (stmt);
3138 gimple_set_visited (stmt, true);
3139 gimple_set_visited (binlhs, true);
3140 gimple_set_visited (binrhs, true);
3142 /* Tail recurse on the new rhs if it still needs reassociation. */
3143 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3144 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3145 want to change the algorithm while converting to tuples. */
3146 linearize_expr (stmt);
3149 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3150 it. Otherwise, return NULL. */
3152 static gimple
3153 get_single_immediate_use (tree lhs)
3155 use_operand_p immuse;
3156 gimple immusestmt;
3158 if (TREE_CODE (lhs) == SSA_NAME
3159 && single_imm_use (lhs, &immuse, &immusestmt)
3160 && is_gimple_assign (immusestmt))
3161 return immusestmt;
3163 return NULL;
3166 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3167 representing the negated value. Insertions of any necessary
3168 instructions go before GSI.
3169 This function is recursive in that, if you hand it "a_5" as the
3170 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3171 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3173 static tree
3174 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3176 gimple negatedefstmt= NULL;
3177 tree resultofnegate;
3179 /* If we are trying to negate a name, defined by an add, negate the
3180 add operands instead. */
3181 if (TREE_CODE (tonegate) == SSA_NAME)
3182 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3183 if (TREE_CODE (tonegate) == SSA_NAME
3184 && is_gimple_assign (negatedefstmt)
3185 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3186 && has_single_use (gimple_assign_lhs (negatedefstmt))
3187 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3189 gimple_stmt_iterator gsi;
3190 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3191 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3193 gsi = gsi_for_stmt (negatedefstmt);
3194 rhs1 = negate_value (rhs1, &gsi);
3195 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3197 gsi = gsi_for_stmt (negatedefstmt);
3198 rhs2 = negate_value (rhs2, &gsi);
3199 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3201 update_stmt (negatedefstmt);
3202 return gimple_assign_lhs (negatedefstmt);
3205 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3206 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3207 NULL_TREE, true, GSI_SAME_STMT);
3208 return resultofnegate;
3211 /* Return true if we should break up the subtract in STMT into an add
3212 with negate. This is true when we the subtract operands are really
3213 adds, or the subtract itself is used in an add expression. In
3214 either case, breaking up the subtract into an add with negate
3215 exposes the adds to reassociation. */
3217 static bool
3218 should_break_up_subtract (gimple stmt)
3220 tree lhs = gimple_assign_lhs (stmt);
3221 tree binlhs = gimple_assign_rhs1 (stmt);
3222 tree binrhs = gimple_assign_rhs2 (stmt);
3223 gimple immusestmt;
3224 struct loop *loop = loop_containing_stmt (stmt);
3226 if (TREE_CODE (binlhs) == SSA_NAME
3227 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3228 return true;
3230 if (TREE_CODE (binrhs) == SSA_NAME
3231 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3232 return true;
3234 if (TREE_CODE (lhs) == SSA_NAME
3235 && (immusestmt = get_single_immediate_use (lhs))
3236 && is_gimple_assign (immusestmt)
3237 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3238 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3239 return true;
3240 return false;
3243 /* Transform STMT from A - B into A + -B. */
3245 static void
3246 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3248 tree rhs1 = gimple_assign_rhs1 (stmt);
3249 tree rhs2 = gimple_assign_rhs2 (stmt);
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3253 fprintf (dump_file, "Breaking up subtract ");
3254 print_gimple_stmt (dump_file, stmt, 0, 0);
3257 rhs2 = negate_value (rhs2, gsip);
3258 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3259 update_stmt (stmt);
3262 /* Determine whether STMT is a builtin call that raises an SSA name
3263 to an integer power and has only one use. If so, and this is early
3264 reassociation and unsafe math optimizations are permitted, place
3265 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3266 If any of these conditions does not hold, return FALSE. */
3268 static bool
3269 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3271 tree fndecl, arg1;
3272 REAL_VALUE_TYPE c, cint;
3274 if (!first_pass_instance
3275 || !flag_unsafe_math_optimizations
3276 || !is_gimple_call (stmt)
3277 || !has_single_use (gimple_call_lhs (stmt)))
3278 return false;
3280 fndecl = gimple_call_fndecl (stmt);
3282 if (!fndecl
3283 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3284 return false;
3286 switch (DECL_FUNCTION_CODE (fndecl))
3288 CASE_FLT_FN (BUILT_IN_POW):
3289 *base = gimple_call_arg (stmt, 0);
3290 arg1 = gimple_call_arg (stmt, 1);
3292 if (TREE_CODE (arg1) != REAL_CST)
3293 return false;
3295 c = TREE_REAL_CST (arg1);
3297 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3298 return false;
3300 *exponent = real_to_integer (&c);
3301 real_from_integer (&cint, VOIDmode, *exponent,
3302 *exponent < 0 ? -1 : 0, 0);
3303 if (!real_identical (&c, &cint))
3304 return false;
3306 break;
3308 CASE_FLT_FN (BUILT_IN_POWI):
3309 *base = gimple_call_arg (stmt, 0);
3310 arg1 = gimple_call_arg (stmt, 1);
3312 if (!host_integerp (arg1, 0))
3313 return false;
3315 *exponent = TREE_INT_CST_LOW (arg1);
3316 break;
3318 default:
3319 return false;
3322 /* Expanding negative exponents is generally unproductive, so we don't
3323 complicate matters with those. Exponents of zero and one should
3324 have been handled by expression folding. */
3325 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3326 return false;
3328 return true;
3331 /* Recursively linearize a binary expression that is the RHS of STMT.
3332 Place the operands of the expression tree in the vector named OPS. */
3334 static void
3335 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3336 bool is_associative, bool set_visited)
3338 tree binlhs = gimple_assign_rhs1 (stmt);
3339 tree binrhs = gimple_assign_rhs2 (stmt);
3340 gimple binlhsdef = NULL, binrhsdef = NULL;
3341 bool binlhsisreassoc = false;
3342 bool binrhsisreassoc = false;
3343 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3344 struct loop *loop = loop_containing_stmt (stmt);
3345 tree base = NULL_TREE;
3346 HOST_WIDE_INT exponent = 0;
3348 if (set_visited)
3349 gimple_set_visited (stmt, true);
3351 if (TREE_CODE (binlhs) == SSA_NAME)
3353 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3354 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3355 && !stmt_could_throw_p (binlhsdef));
3358 if (TREE_CODE (binrhs) == SSA_NAME)
3360 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3361 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3362 && !stmt_could_throw_p (binrhsdef));
3365 /* If the LHS is not reassociable, but the RHS is, we need to swap
3366 them. If neither is reassociable, there is nothing we can do, so
3367 just put them in the ops vector. If the LHS is reassociable,
3368 linearize it. If both are reassociable, then linearize the RHS
3369 and the LHS. */
3371 if (!binlhsisreassoc)
3373 tree temp;
3375 /* If this is not a associative operation like division, give up. */
3376 if (!is_associative)
3378 add_to_ops_vec (ops, binrhs);
3379 return;
3382 if (!binrhsisreassoc)
3384 if (rhscode == MULT_EXPR
3385 && TREE_CODE (binrhs) == SSA_NAME
3386 && acceptable_pow_call (binrhsdef, &base, &exponent))
3388 add_repeat_to_ops_vec (ops, base, exponent);
3389 gimple_set_visited (binrhsdef, true);
3391 else
3392 add_to_ops_vec (ops, binrhs);
3394 if (rhscode == MULT_EXPR
3395 && TREE_CODE (binlhs) == SSA_NAME
3396 && acceptable_pow_call (binlhsdef, &base, &exponent))
3398 add_repeat_to_ops_vec (ops, base, exponent);
3399 gimple_set_visited (binlhsdef, true);
3401 else
3402 add_to_ops_vec (ops, binlhs);
3404 return;
3407 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, "swapping operands of ");
3410 print_gimple_stmt (dump_file, stmt, 0, 0);
3413 swap_tree_operands (stmt,
3414 gimple_assign_rhs1_ptr (stmt),
3415 gimple_assign_rhs2_ptr (stmt));
3416 update_stmt (stmt);
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, " is now ");
3421 print_gimple_stmt (dump_file, stmt, 0, 0);
3424 /* We want to make it so the lhs is always the reassociative op,
3425 so swap. */
3426 temp = binlhs;
3427 binlhs = binrhs;
3428 binrhs = temp;
3430 else if (binrhsisreassoc)
3432 linearize_expr (stmt);
3433 binlhs = gimple_assign_rhs1 (stmt);
3434 binrhs = gimple_assign_rhs2 (stmt);
3437 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3438 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3439 rhscode, loop));
3440 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3441 is_associative, set_visited);
3443 if (rhscode == MULT_EXPR
3444 && TREE_CODE (binrhs) == SSA_NAME
3445 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3447 add_repeat_to_ops_vec (ops, base, exponent);
3448 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3450 else
3451 add_to_ops_vec (ops, binrhs);
3454 /* Repropagate the negates back into subtracts, since no other pass
3455 currently does it. */
3457 static void
3458 repropagate_negates (void)
3460 unsigned int i = 0;
3461 tree negate;
3463 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3465 gimple user = get_single_immediate_use (negate);
3467 if (!user || !is_gimple_assign (user))
3468 continue;
3470 /* The negate operand can be either operand of a PLUS_EXPR
3471 (it can be the LHS if the RHS is a constant for example).
3473 Force the negate operand to the RHS of the PLUS_EXPR, then
3474 transform the PLUS_EXPR into a MINUS_EXPR. */
3475 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3477 /* If the negated operand appears on the LHS of the
3478 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3479 to force the negated operand to the RHS of the PLUS_EXPR. */
3480 if (gimple_assign_rhs1 (user) == negate)
3482 swap_tree_operands (user,
3483 gimple_assign_rhs1_ptr (user),
3484 gimple_assign_rhs2_ptr (user));
3487 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3488 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3489 if (gimple_assign_rhs2 (user) == negate)
3491 tree rhs1 = gimple_assign_rhs1 (user);
3492 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3493 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3494 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3495 update_stmt (user);
3498 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3500 if (gimple_assign_rhs1 (user) == negate)
3502 /* We have
3503 x = -a
3504 y = x - b
3505 which we transform into
3506 x = a + b
3507 y = -x .
3508 This pushes down the negate which we possibly can merge
3509 into some other operation, hence insert it into the
3510 plus_negates vector. */
3511 gimple feed = SSA_NAME_DEF_STMT (negate);
3512 tree a = gimple_assign_rhs1 (feed);
3513 tree rhs2 = gimple_assign_rhs2 (user);
3514 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3515 gimple_replace_lhs (feed, negate);
3516 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3517 update_stmt (gsi_stmt (gsi));
3518 gsi2 = gsi_for_stmt (user);
3519 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3520 update_stmt (gsi_stmt (gsi2));
3521 gsi_move_before (&gsi, &gsi2);
3522 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3524 else
3526 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3527 rid of one operation. */
3528 gimple feed = SSA_NAME_DEF_STMT (negate);
3529 tree a = gimple_assign_rhs1 (feed);
3530 tree rhs1 = gimple_assign_rhs1 (user);
3531 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3532 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3533 update_stmt (gsi_stmt (gsi));
3539 /* Returns true if OP is of a type for which we can do reassociation.
3540 That is for integral or non-saturating fixed-point types, and for
3541 floating point type when associative-math is enabled. */
3543 static bool
3544 can_reassociate_p (tree op)
3546 tree type = TREE_TYPE (op);
3547 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3548 || NON_SAT_FIXED_POINT_TYPE_P (type)
3549 || (flag_associative_math && FLOAT_TYPE_P (type)))
3550 return true;
3551 return false;
3554 /* Break up subtract operations in block BB.
3556 We do this top down because we don't know whether the subtract is
3557 part of a possible chain of reassociation except at the top.
3559 IE given
3560 d = f + g
3561 c = a + e
3562 b = c - d
3563 q = b - r
3564 k = t - q
3566 we want to break up k = t - q, but we won't until we've transformed q
3567 = b - r, which won't be broken up until we transform b = c - d.
3569 En passant, clear the GIMPLE visited flag on every statement. */
3571 static void
3572 break_up_subtract_bb (basic_block bb)
3574 gimple_stmt_iterator gsi;
3575 basic_block son;
3577 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3579 gimple stmt = gsi_stmt (gsi);
3580 gimple_set_visited (stmt, false);
3582 if (!is_gimple_assign (stmt)
3583 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3584 continue;
3586 /* Look for simple gimple subtract operations. */
3587 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3589 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3590 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3591 continue;
3593 /* Check for a subtract used only in an addition. If this
3594 is the case, transform it into add of a negate for better
3595 reassociation. IE transform C = A-B into C = A + -B if C
3596 is only used in an addition. */
3597 if (should_break_up_subtract (stmt))
3598 break_up_subtract (stmt, &gsi);
3600 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3601 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3602 plus_negates.safe_push (gimple_assign_lhs (stmt));
3604 for (son = first_dom_son (CDI_DOMINATORS, bb);
3605 son;
3606 son = next_dom_son (CDI_DOMINATORS, son))
3607 break_up_subtract_bb (son);
3610 /* Used for repeated factor analysis. */
3611 struct repeat_factor_d
3613 /* An SSA name that occurs in a multiply chain. */
3614 tree factor;
3616 /* Cached rank of the factor. */
3617 unsigned rank;
3619 /* Number of occurrences of the factor in the chain. */
3620 HOST_WIDE_INT count;
3622 /* An SSA name representing the product of this factor and
3623 all factors appearing later in the repeated factor vector. */
3624 tree repr;
3627 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3628 typedef const struct repeat_factor_d *const_repeat_factor_t;
3631 static vec<repeat_factor> repeat_factor_vec;
3633 /* Used for sorting the repeat factor vector. Sort primarily by
3634 ascending occurrence count, secondarily by descending rank. */
3636 static int
3637 compare_repeat_factors (const void *x1, const void *x2)
3639 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3640 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3642 if (rf1->count != rf2->count)
3643 return rf1->count - rf2->count;
3645 return rf2->rank - rf1->rank;
3648 /* Look for repeated operands in OPS in the multiply tree rooted at
3649 STMT. Replace them with an optimal sequence of multiplies and powi
3650 builtin calls, and remove the used operands from OPS. Return an
3651 SSA name representing the value of the replacement sequence. */
3653 static tree
3654 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3656 unsigned i, j, vec_len;
3657 int ii;
3658 operand_entry_t oe;
3659 repeat_factor_t rf1, rf2;
3660 repeat_factor rfnew;
3661 tree result = NULL_TREE;
3662 tree target_ssa, iter_result;
3663 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3664 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3665 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3666 gimple mul_stmt, pow_stmt;
3668 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3669 target. */
3670 if (!powi_fndecl)
3671 return NULL_TREE;
3673 /* Allocate the repeated factor vector. */
3674 repeat_factor_vec.create (10);
3676 /* Scan the OPS vector for all SSA names in the product and build
3677 up a vector of occurrence counts for each factor. */
3678 FOR_EACH_VEC_ELT (*ops, i, oe)
3680 if (TREE_CODE (oe->op) == SSA_NAME)
3682 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3684 if (rf1->factor == oe->op)
3686 rf1->count += oe->count;
3687 break;
3691 if (j >= repeat_factor_vec.length ())
3693 rfnew.factor = oe->op;
3694 rfnew.rank = oe->rank;
3695 rfnew.count = oe->count;
3696 rfnew.repr = NULL_TREE;
3697 repeat_factor_vec.safe_push (rfnew);
3702 /* Sort the repeated factor vector by (a) increasing occurrence count,
3703 and (b) decreasing rank. */
3704 repeat_factor_vec.qsort (compare_repeat_factors);
3706 /* It is generally best to combine as many base factors as possible
3707 into a product before applying __builtin_powi to the result.
3708 However, the sort order chosen for the repeated factor vector
3709 allows us to cache partial results for the product of the base
3710 factors for subsequent use. When we already have a cached partial
3711 result from a previous iteration, it is best to make use of it
3712 before looking for another __builtin_pow opportunity.
3714 As an example, consider x * x * y * y * y * z * z * z * z.
3715 We want to first compose the product x * y * z, raise it to the
3716 second power, then multiply this by y * z, and finally multiply
3717 by z. This can be done in 5 multiplies provided we cache y * z
3718 for use in both expressions:
3720 t1 = y * z
3721 t2 = t1 * x
3722 t3 = t2 * t2
3723 t4 = t1 * t3
3724 result = t4 * z
3726 If we instead ignored the cached y * z and first multiplied by
3727 the __builtin_pow opportunity z * z, we would get the inferior:
3729 t1 = y * z
3730 t2 = t1 * x
3731 t3 = t2 * t2
3732 t4 = z * z
3733 t5 = t3 * t4
3734 result = t5 * y */
3736 vec_len = repeat_factor_vec.length ();
3738 /* Repeatedly look for opportunities to create a builtin_powi call. */
3739 while (true)
3741 HOST_WIDE_INT power;
3743 /* First look for the largest cached product of factors from
3744 preceding iterations. If found, create a builtin_powi for
3745 it if the minimum occurrence count for its factors is at
3746 least 2, or just use this cached product as our next
3747 multiplicand if the minimum occurrence count is 1. */
3748 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3750 if (rf1->repr && rf1->count > 0)
3751 break;
3754 if (j < vec_len)
3756 power = rf1->count;
3758 if (power == 1)
3760 iter_result = rf1->repr;
3762 if (dump_file && (dump_flags & TDF_DETAILS))
3764 unsigned elt;
3765 repeat_factor_t rf;
3766 fputs ("Multiplying by cached product ", dump_file);
3767 for (elt = j; elt < vec_len; elt++)
3769 rf = &repeat_factor_vec[elt];
3770 print_generic_expr (dump_file, rf->factor, 0);
3771 if (elt < vec_len - 1)
3772 fputs (" * ", dump_file);
3774 fputs ("\n", dump_file);
3777 else
3779 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3780 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3781 build_int_cst (integer_type_node,
3782 power));
3783 gimple_call_set_lhs (pow_stmt, iter_result);
3784 gimple_set_location (pow_stmt, gimple_location (stmt));
3785 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3787 if (dump_file && (dump_flags & TDF_DETAILS))
3789 unsigned elt;
3790 repeat_factor_t rf;
3791 fputs ("Building __builtin_pow call for cached product (",
3792 dump_file);
3793 for (elt = j; elt < vec_len; elt++)
3795 rf = &repeat_factor_vec[elt];
3796 print_generic_expr (dump_file, rf->factor, 0);
3797 if (elt < vec_len - 1)
3798 fputs (" * ", dump_file);
3800 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3801 power);
3805 else
3807 /* Otherwise, find the first factor in the repeated factor
3808 vector whose occurrence count is at least 2. If no such
3809 factor exists, there are no builtin_powi opportunities
3810 remaining. */
3811 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3813 if (rf1->count >= 2)
3814 break;
3817 if (j >= vec_len)
3818 break;
3820 power = rf1->count;
3822 if (dump_file && (dump_flags & TDF_DETAILS))
3824 unsigned elt;
3825 repeat_factor_t rf;
3826 fputs ("Building __builtin_pow call for (", dump_file);
3827 for (elt = j; elt < vec_len; elt++)
3829 rf = &repeat_factor_vec[elt];
3830 print_generic_expr (dump_file, rf->factor, 0);
3831 if (elt < vec_len - 1)
3832 fputs (" * ", dump_file);
3834 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3837 reassociate_stats.pows_created++;
3839 /* Visit each element of the vector in reverse order (so that
3840 high-occurrence elements are visited first, and within the
3841 same occurrence count, lower-ranked elements are visited
3842 first). Form a linear product of all elements in this order
3843 whose occurrencce count is at least that of element J.
3844 Record the SSA name representing the product of each element
3845 with all subsequent elements in the vector. */
3846 if (j == vec_len - 1)
3847 rf1->repr = rf1->factor;
3848 else
3850 for (ii = vec_len - 2; ii >= (int)j; ii--)
3852 tree op1, op2;
3854 rf1 = &repeat_factor_vec[ii];
3855 rf2 = &repeat_factor_vec[ii + 1];
3857 /* Init the last factor's representative to be itself. */
3858 if (!rf2->repr)
3859 rf2->repr = rf2->factor;
3861 op1 = rf1->factor;
3862 op2 = rf2->repr;
3864 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3865 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3866 target_ssa,
3867 op1, op2);
3868 gimple_set_location (mul_stmt, gimple_location (stmt));
3869 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3870 rf1->repr = target_ssa;
3872 /* Don't reprocess the multiply we just introduced. */
3873 gimple_set_visited (mul_stmt, true);
3877 /* Form a call to __builtin_powi for the maximum product
3878 just formed, raised to the power obtained earlier. */
3879 rf1 = &repeat_factor_vec[j];
3880 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3881 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3882 build_int_cst (integer_type_node,
3883 power));
3884 gimple_call_set_lhs (pow_stmt, iter_result);
3885 gimple_set_location (pow_stmt, gimple_location (stmt));
3886 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3889 /* If we previously formed at least one other builtin_powi call,
3890 form the product of this one and those others. */
3891 if (result)
3893 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3894 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3895 result, iter_result);
3896 gimple_set_location (mul_stmt, gimple_location (stmt));
3897 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3898 gimple_set_visited (mul_stmt, true);
3899 result = new_result;
3901 else
3902 result = iter_result;
3904 /* Decrement the occurrence count of each element in the product
3905 by the count found above, and remove this many copies of each
3906 factor from OPS. */
3907 for (i = j; i < vec_len; i++)
3909 unsigned k = power;
3910 unsigned n;
3912 rf1 = &repeat_factor_vec[i];
3913 rf1->count -= power;
3915 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3917 if (oe->op == rf1->factor)
3919 if (oe->count <= k)
3921 ops->ordered_remove (n);
3922 k -= oe->count;
3924 if (k == 0)
3925 break;
3927 else
3929 oe->count -= k;
3930 break;
3937 /* At this point all elements in the repeated factor vector have a
3938 remaining occurrence count of 0 or 1, and those with a count of 1
3939 don't have cached representatives. Re-sort the ops vector and
3940 clean up. */
3941 ops->qsort (sort_by_operand_rank);
3942 repeat_factor_vec.release ();
3944 /* Return the final product computed herein. Note that there may
3945 still be some elements with single occurrence count left in OPS;
3946 those will be handled by the normal reassociation logic. */
3947 return result;
3950 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3952 static void
3953 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3955 tree rhs1;
3957 if (dump_file && (dump_flags & TDF_DETAILS))
3959 fprintf (dump_file, "Transforming ");
3960 print_gimple_stmt (dump_file, stmt, 0, 0);
3963 rhs1 = gimple_assign_rhs1 (stmt);
3964 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3965 update_stmt (stmt);
3966 remove_visited_stmt_chain (rhs1);
3968 if (dump_file && (dump_flags & TDF_DETAILS))
3970 fprintf (dump_file, " into ");
3971 print_gimple_stmt (dump_file, stmt, 0, 0);
3975 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3977 static void
3978 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3979 tree rhs1, tree rhs2)
3981 if (dump_file && (dump_flags & TDF_DETAILS))
3983 fprintf (dump_file, "Transforming ");
3984 print_gimple_stmt (dump_file, stmt, 0, 0);
3987 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3988 update_stmt (gsi_stmt (*gsi));
3989 remove_visited_stmt_chain (rhs1);
3991 if (dump_file && (dump_flags & TDF_DETAILS))
3993 fprintf (dump_file, " into ");
3994 print_gimple_stmt (dump_file, stmt, 0, 0);
3998 /* Reassociate expressions in basic block BB and its post-dominator as
3999 children. */
4001 static void
4002 reassociate_bb (basic_block bb)
4004 gimple_stmt_iterator gsi;
4005 basic_block son;
4006 gimple stmt = last_stmt (bb);
4008 if (stmt && !gimple_visited_p (stmt))
4009 maybe_optimize_range_tests (stmt);
4011 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4013 stmt = gsi_stmt (gsi);
4015 if (is_gimple_assign (stmt)
4016 && !stmt_could_throw_p (stmt))
4018 tree lhs, rhs1, rhs2;
4019 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4021 /* If this is not a gimple binary expression, there is
4022 nothing for us to do with it. */
4023 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4024 continue;
4026 /* If this was part of an already processed statement,
4027 we don't need to touch it again. */
4028 if (gimple_visited_p (stmt))
4030 /* This statement might have become dead because of previous
4031 reassociations. */
4032 if (has_zero_uses (gimple_get_lhs (stmt)))
4034 gsi_remove (&gsi, true);
4035 release_defs (stmt);
4036 /* We might end up removing the last stmt above which
4037 places the iterator to the end of the sequence.
4038 Reset it to the last stmt in this case which might
4039 be the end of the sequence as well if we removed
4040 the last statement of the sequence. In which case
4041 we need to bail out. */
4042 if (gsi_end_p (gsi))
4044 gsi = gsi_last_bb (bb);
4045 if (gsi_end_p (gsi))
4046 break;
4049 continue;
4052 lhs = gimple_assign_lhs (stmt);
4053 rhs1 = gimple_assign_rhs1 (stmt);
4054 rhs2 = gimple_assign_rhs2 (stmt);
4056 /* For non-bit or min/max operations we can't associate
4057 all types. Verify that here. */
4058 if (rhs_code != BIT_IOR_EXPR
4059 && rhs_code != BIT_AND_EXPR
4060 && rhs_code != BIT_XOR_EXPR
4061 && rhs_code != MIN_EXPR
4062 && rhs_code != MAX_EXPR
4063 && (!can_reassociate_p (lhs)
4064 || !can_reassociate_p (rhs1)
4065 || !can_reassociate_p (rhs2)))
4066 continue;
4068 if (associative_tree_code (rhs_code))
4070 vec<operand_entry_t> ops = vNULL;
4071 tree powi_result = NULL_TREE;
4073 /* There may be no immediate uses left by the time we
4074 get here because we may have eliminated them all. */
4075 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4076 continue;
4078 gimple_set_visited (stmt, true);
4079 linearize_expr_tree (&ops, stmt, true, true);
4080 ops.qsort (sort_by_operand_rank);
4081 optimize_ops_list (rhs_code, &ops);
4082 if (undistribute_ops_list (rhs_code, &ops,
4083 loop_containing_stmt (stmt)))
4085 ops.qsort (sort_by_operand_rank);
4086 optimize_ops_list (rhs_code, &ops);
4089 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4090 optimize_range_tests (rhs_code, &ops);
4092 if (first_pass_instance
4093 && rhs_code == MULT_EXPR
4094 && flag_unsafe_math_optimizations)
4095 powi_result = attempt_builtin_powi (stmt, &ops);
4097 /* If the operand vector is now empty, all operands were
4098 consumed by the __builtin_powi optimization. */
4099 if (ops.length () == 0)
4100 transform_stmt_to_copy (&gsi, stmt, powi_result);
4101 else if (ops.length () == 1)
4103 tree last_op = ops.last ()->op;
4105 if (powi_result)
4106 transform_stmt_to_multiply (&gsi, stmt, last_op,
4107 powi_result);
4108 else
4109 transform_stmt_to_copy (&gsi, stmt, last_op);
4111 else
4113 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4114 int ops_num = ops.length ();
4115 int width = get_reassociation_width (ops_num, rhs_code, mode);
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 fprintf (dump_file,
4119 "Width = %d was chosen for reassociation\n", width);
4121 if (width > 1
4122 && ops.length () > 3)
4123 rewrite_expr_tree_parallel (stmt, width, ops);
4124 else
4125 rewrite_expr_tree (stmt, 0, ops, false);
4127 /* If we combined some repeated factors into a
4128 __builtin_powi call, multiply that result by the
4129 reassociated operands. */
4130 if (powi_result)
4132 gimple mul_stmt;
4133 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4134 tree target_ssa = make_temp_ssa_name (type, NULL,
4135 "reassocpow");
4136 gimple_set_lhs (stmt, target_ssa);
4137 update_stmt (stmt);
4138 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4139 powi_result,
4140 target_ssa);
4141 gimple_set_location (mul_stmt, gimple_location (stmt));
4142 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4146 ops.release ();
4150 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4151 son;
4152 son = next_dom_son (CDI_POST_DOMINATORS, son))
4153 reassociate_bb (son);
4156 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4157 void debug_ops_vector (vec<operand_entry_t> ops);
4159 /* Dump the operand entry vector OPS to FILE. */
4161 void
4162 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4164 operand_entry_t oe;
4165 unsigned int i;
4167 FOR_EACH_VEC_ELT (ops, i, oe)
4169 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4170 print_generic_expr (file, oe->op, 0);
4174 /* Dump the operand entry vector OPS to STDERR. */
4176 DEBUG_FUNCTION void
4177 debug_ops_vector (vec<operand_entry_t> ops)
4179 dump_ops_vector (stderr, ops);
4182 static void
4183 do_reassoc (void)
4185 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4186 reassociate_bb (EXIT_BLOCK_PTR);
4189 /* Initialize the reassociation pass. */
4191 static void
4192 init_reassoc (void)
4194 int i;
4195 long rank = 2;
4196 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4198 /* Find the loops, so that we can prevent moving calculations in
4199 them. */
4200 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4202 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4204 operand_entry_pool = create_alloc_pool ("operand entry pool",
4205 sizeof (struct operand_entry), 30);
4206 next_operand_entry_id = 0;
4208 /* Reverse RPO (Reverse Post Order) will give us something where
4209 deeper loops come later. */
4210 pre_and_rev_post_order_compute (NULL, bbs, false);
4211 bb_rank = XCNEWVEC (long, last_basic_block);
4212 operand_rank = pointer_map_create ();
4214 /* Give each default definition a distinct rank. This includes
4215 parameters and the static chain. Walk backwards over all
4216 SSA names so that we get proper rank ordering according
4217 to tree_swap_operands_p. */
4218 for (i = num_ssa_names - 1; i > 0; --i)
4220 tree name = ssa_name (i);
4221 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4222 insert_operand_rank (name, ++rank);
4225 /* Set up rank for each BB */
4226 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4227 bb_rank[bbs[i]] = ++rank << 16;
4229 free (bbs);
4230 calculate_dominance_info (CDI_POST_DOMINATORS);
4231 plus_negates = vNULL;
4234 /* Cleanup after the reassociation pass, and print stats if
4235 requested. */
4237 static void
4238 fini_reassoc (void)
4240 statistics_counter_event (cfun, "Linearized",
4241 reassociate_stats.linearized);
4242 statistics_counter_event (cfun, "Constants eliminated",
4243 reassociate_stats.constants_eliminated);
4244 statistics_counter_event (cfun, "Ops eliminated",
4245 reassociate_stats.ops_eliminated);
4246 statistics_counter_event (cfun, "Statements rewritten",
4247 reassociate_stats.rewritten);
4248 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4249 reassociate_stats.pows_encountered);
4250 statistics_counter_event (cfun, "Built-in powi calls created",
4251 reassociate_stats.pows_created);
4253 pointer_map_destroy (operand_rank);
4254 free_alloc_pool (operand_entry_pool);
4255 free (bb_rank);
4256 plus_negates.release ();
4257 free_dominance_info (CDI_POST_DOMINATORS);
4258 loop_optimizer_finalize ();
4261 /* Gate and execute functions for Reassociation. */
4263 static unsigned int
4264 execute_reassoc (void)
4266 init_reassoc ();
4268 do_reassoc ();
4269 repropagate_negates ();
4271 fini_reassoc ();
4272 return 0;
4275 static bool
4276 gate_tree_ssa_reassoc (void)
4278 return flag_tree_reassoc != 0;
4281 struct gimple_opt_pass pass_reassoc =
4284 GIMPLE_PASS,
4285 "reassoc", /* name */
4286 OPTGROUP_NONE, /* optinfo_flags */
4287 gate_tree_ssa_reassoc, /* gate */
4288 execute_reassoc, /* execute */
4289 NULL, /* sub */
4290 NULL, /* next */
4291 0, /* static_pass_number */
4292 TV_TREE_REASSOC, /* tv_id */
4293 PROP_cfg | PROP_ssa, /* properties_required */
4294 0, /* properties_provided */
4295 0, /* properties_destroyed */
4296 0, /* todo_flags_start */
4297 TODO_verify_ssa
4298 | TODO_verify_flow
4299 | TODO_ggc_collect /* todo_flags_finish */