2013-10-01 Saurabh Verma <saurabh.verma@codito.com>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob6859518811cef440c4feffd46f0429666be0fa02
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-ssa.h"
31 #include "gimple.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
35 #include "vec.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
38 #include "cfgloop.h"
39 #include "flags.h"
40 #include "target.h"
41 #include "params.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
93 those on it.
95 You want to first merge all leaves of the same rank, as much as
96 possible.
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
111 mergetmp2 = d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
120 So we have
122 build binary op
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
148 mergetmp = op1 + op2
149 newstmt = mergetmp + op3
151 instead of
152 mergetmp = op2 + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
157 can do.
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
166 /* Statistics */
167 static struct
169 int linearized;
170 int constants_eliminated;
171 int ops_eliminated;
172 int rewritten;
173 int pows_encountered;
174 int pows_created;
175 } reassociate_stats;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
180 unsigned int rank;
181 int id;
182 tree op;
183 unsigned int count;
184 } *operand_entry_t;
186 static alloc_pool operand_entry_pool;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
194 depth. */
195 static long *bb_rank;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t *operand_rank;
200 /* Forward decls. */
201 static long get_rank (tree);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
216 static long
217 phi_rank (gimple stmt)
219 basic_block bb = gimple_bb (stmt);
220 struct loop *father = bb->loop_father;
221 tree res;
222 unsigned i;
223 use_operand_p use;
224 gimple use_stmt;
226 /* We only care about real loops (those with a latch). */
227 if (!father->latch)
228 return bb_rank[bb->index];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb != father->header
232 || father->inner)
233 return bb_rank[bb->index];
235 /* Ignore virtual SSA_NAMEs. */
236 res = gimple_phi_result (stmt);
237 if (virtual_operand_p (res))
238 return bb_rank[bb->index];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res, &use, &use_stmt)
243 || gimple_bb (use_stmt)->loop_father != father)
244 return bb_rank[bb->index];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i = 0; i < gimple_phi_num_args (stmt); i++)
249 tree arg = gimple_phi_arg_def (stmt, i);
250 if (TREE_CODE (arg) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg))
253 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
254 if (gimple_bb (def_stmt)->loop_father == father)
255 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
259 /* Must be an uninteresting phi. */
260 return bb_rank[bb->index];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
265 return FALSE. */
266 static bool
267 loop_carried_phi (tree exp)
269 gimple phi_stmt;
270 long block_rank;
272 if (TREE_CODE (exp) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp))
274 return false;
276 phi_stmt = SSA_NAME_DEF_STMT (exp);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
279 return false;
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
286 if (phi_rank (phi_stmt) != block_rank)
287 return true;
289 return false;
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
296 static long
297 propagate_rank (long rank, tree op)
299 long op_rank;
301 if (loop_carried_phi (op))
302 return rank;
304 op_rank = get_rank (op);
306 return MAX (rank, op_rank);
309 /* Look up the operand rank structure for expression E. */
311 static inline long
312 find_operand_rank (tree e)
314 void **slot = pointer_map_contains (operand_rank, e);
315 return slot ? (long) (intptr_t) *slot : -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
320 static inline void
321 insert_operand_rank (tree e, long rank)
323 void **slot;
324 gcc_assert (rank > 0);
325 slot = pointer_map_insert (operand_rank, e);
326 gcc_assert (!*slot);
327 *slot = (void *) (intptr_t) rank;
330 /* Given an expression E, return the rank of the expression. */
332 static long
333 get_rank (tree e)
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e))
337 return 0;
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
344 the BB.
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
348 results. */
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
354 x_1 = phi(x_0, x_2)
355 b = a + x_1
356 c = b + d
357 x_2 = c + e
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
363 x_1 = phi(x_0, x_2)
364 b = a + d
365 c = b + e
366 x_2 = c + x_1
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e) == SSA_NAME)
378 gimple stmt;
379 long rank;
380 int i, n;
381 tree op;
383 if (SSA_NAME_IS_DEFAULT_DEF (e))
384 return find_operand_rank (e);
386 stmt = SSA_NAME_DEF_STMT (e);
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
396 if (rank != -1)
397 return rank;
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
402 rank = 0;
403 if (gimple_assign_single_p (stmt))
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
407 if (n == 0)
408 rank = propagate_rank (rank, rhs);
409 else
411 for (i = 0; i < n; i++)
413 op = TREE_OPERAND (rhs, i);
415 if (op != NULL_TREE)
416 rank = propagate_rank (rank, op);
420 else
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
425 op = gimple_op (stmt, i);
426 gcc_assert (op);
427 rank = propagate_rank (rank, op);
431 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
440 return (rank + 1);
443 /* Globals, etc, are rank 0 */
444 return 0;
448 /* We want integer ones to end up last no matter what, since they are
449 the ones we can do the most with. */
450 #define INTEGER_CONST_TYPE 1 << 3
451 #define FLOAT_CONST_TYPE 1 << 2
452 #define OTHER_CONST_TYPE 1 << 1
454 /* Classify an invariant tree into integer, float, or other, so that
455 we can sort them to be near other constants of the same type. */
456 static inline int
457 constant_type (tree t)
459 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
460 return INTEGER_CONST_TYPE;
461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
462 return FLOAT_CONST_TYPE;
463 else
464 return OTHER_CONST_TYPE;
467 /* qsort comparison function to sort operand entries PA and PB by rank
468 so that the sorted array is ordered by rank in decreasing order. */
469 static int
470 sort_by_operand_rank (const void *pa, const void *pb)
472 const operand_entry_t oea = *(const operand_entry_t *)pa;
473 const operand_entry_t oeb = *(const operand_entry_t *)pb;
475 /* It's nicer for optimize_expression if constants that are likely
476 to fold when added/multiplied//whatever are put next to each
477 other. Since all constants have rank 0, order them by type. */
478 if (oeb->rank == 0 && oea->rank == 0)
480 if (constant_type (oeb->op) != constant_type (oea->op))
481 return constant_type (oeb->op) - constant_type (oea->op);
482 else
483 /* To make sorting result stable, we use unique IDs to determine
484 order. */
485 return oeb->id - oea->id;
488 /* Lastly, make sure the versions that are the same go next to each
489 other. We use SSA_NAME_VERSION because it's stable. */
490 if ((oeb->rank - oea->rank == 0)
491 && TREE_CODE (oea->op) == SSA_NAME
492 && TREE_CODE (oeb->op) == SSA_NAME)
494 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
495 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
496 else
497 return oeb->id - oea->id;
500 if (oeb->rank != oea->rank)
501 return oeb->rank - oea->rank;
502 else
503 return oeb->id - oea->id;
506 /* Add an operand entry to *OPS for the tree operand OP. */
508 static void
509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
511 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
513 oe->op = op;
514 oe->rank = get_rank (op);
515 oe->id = next_operand_entry_id++;
516 oe->count = 1;
517 ops->safe_push (oe);
520 /* Add an operand entry to *OPS for the tree operand OP with repeat
521 count REPEAT. */
523 static void
524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
525 HOST_WIDE_INT repeat)
527 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
529 oe->op = op;
530 oe->rank = get_rank (op);
531 oe->id = next_operand_entry_id++;
532 oe->count = repeat;
533 ops->safe_push (oe);
535 reassociate_stats.pows_encountered++;
538 /* Return true if STMT is reassociable operation containing a binary
539 operation with tree code CODE, and is inside LOOP. */
541 static bool
542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
544 basic_block bb = gimple_bb (stmt);
546 if (gimple_bb (stmt) == NULL)
547 return false;
549 if (!flow_bb_inside_loop_p (loop, bb))
550 return false;
552 if (is_gimple_assign (stmt)
553 && gimple_assign_rhs_code (stmt) == code
554 && has_single_use (gimple_assign_lhs (stmt)))
555 return true;
557 return false;
561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
562 operand of the negate operation. Otherwise, return NULL. */
564 static tree
565 get_unary_op (tree name, enum tree_code opcode)
567 gimple stmt = SSA_NAME_DEF_STMT (name);
569 if (!is_gimple_assign (stmt))
570 return NULL_TREE;
572 if (gimple_assign_rhs_code (stmt) == opcode)
573 return gimple_assign_rhs1 (stmt);
574 return NULL_TREE;
577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
578 eliminate through equivalences, do so, remove them from OPS, and
579 return true. Otherwise, return false. */
581 static bool
582 eliminate_duplicate_pair (enum tree_code opcode,
583 vec<operand_entry_t> *ops,
584 bool *all_done,
585 unsigned int i,
586 operand_entry_t curr,
587 operand_entry_t last)
590 /* If we have two of the same op, and the opcode is & |, min, or max,
591 we can eliminate one of them.
592 If we have two of the same op, and the opcode is ^, we can
593 eliminate both of them. */
595 if (last && last->op == curr->op)
597 switch (opcode)
599 case MAX_EXPR:
600 case MIN_EXPR:
601 case BIT_IOR_EXPR:
602 case BIT_AND_EXPR:
603 if (dump_file && (dump_flags & TDF_DETAILS))
605 fprintf (dump_file, "Equivalence: ");
606 print_generic_expr (dump_file, curr->op, 0);
607 fprintf (dump_file, " [&|minmax] ");
608 print_generic_expr (dump_file, last->op, 0);
609 fprintf (dump_file, " -> ");
610 print_generic_stmt (dump_file, last->op, 0);
613 ops->ordered_remove (i);
614 reassociate_stats.ops_eliminated ++;
616 return true;
618 case BIT_XOR_EXPR:
619 if (dump_file && (dump_flags & TDF_DETAILS))
621 fprintf (dump_file, "Equivalence: ");
622 print_generic_expr (dump_file, curr->op, 0);
623 fprintf (dump_file, " ^ ");
624 print_generic_expr (dump_file, last->op, 0);
625 fprintf (dump_file, " -> nothing\n");
628 reassociate_stats.ops_eliminated += 2;
630 if (ops->length () == 2)
632 ops->create (0);
633 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
634 *all_done = true;
636 else
638 ops->ordered_remove (i-1);
639 ops->ordered_remove (i-1);
642 return true;
644 default:
645 break;
648 return false;
651 static vec<tree> plus_negates;
653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
654 expression, look in OPS for a corresponding positive operation to cancel
655 it out. If we find one, remove the other from OPS, replace
656 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
657 return false. */
659 static bool
660 eliminate_plus_minus_pair (enum tree_code opcode,
661 vec<operand_entry_t> *ops,
662 unsigned int currindex,
663 operand_entry_t curr)
665 tree negateop;
666 tree notop;
667 unsigned int i;
668 operand_entry_t oe;
670 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
671 return false;
673 negateop = get_unary_op (curr->op, NEGATE_EXPR);
674 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
675 if (negateop == NULL_TREE && notop == NULL_TREE)
676 return false;
678 /* Any non-negated version will have a rank that is one less than
679 the current rank. So once we hit those ranks, if we don't find
680 one, we can stop. */
682 for (i = currindex + 1;
683 ops->iterate (i, &oe)
684 && oe->rank >= curr->rank - 1 ;
685 i++)
687 if (oe->op == negateop)
690 if (dump_file && (dump_flags & TDF_DETAILS))
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, negateop, 0);
694 fprintf (dump_file, " + -");
695 print_generic_expr (dump_file, oe->op, 0);
696 fprintf (dump_file, " -> 0\n");
699 ops->ordered_remove (i);
700 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
701 ops->ordered_remove (currindex);
702 reassociate_stats.ops_eliminated ++;
704 return true;
706 else if (oe->op == notop)
708 tree op_type = TREE_TYPE (oe->op);
710 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, notop, 0);
714 fprintf (dump_file, " + ~");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> -1\n");
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
724 return true;
728 /* CURR->OP is a negate expr in a plus expr: save it for later
729 inspection in repropagate_negates(). */
730 if (negateop != NULL_TREE)
731 plus_negates.safe_push (curr->op);
733 return false;
736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
737 bitwise not expression, look in OPS for a corresponding operand to
738 cancel it out. If we find one, remove the other from OPS, replace
739 OPS[CURRINDEX] with 0, and return true. Otherwise, return
740 false. */
742 static bool
743 eliminate_not_pairs (enum tree_code opcode,
744 vec<operand_entry_t> *ops,
745 unsigned int currindex,
746 operand_entry_t curr)
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
752 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
753 || TREE_CODE (curr->op) != SSA_NAME)
754 return false;
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (notop == NULL_TREE)
758 return false;
760 /* Any non-not version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1;
767 i++)
769 if (oe->op == notop)
771 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "Equivalence: ");
774 print_generic_expr (dump_file, notop, 0);
775 if (opcode == BIT_AND_EXPR)
776 fprintf (dump_file, " & ~");
777 else if (opcode == BIT_IOR_EXPR)
778 fprintf (dump_file, " | ~");
779 print_generic_expr (dump_file, oe->op, 0);
780 if (opcode == BIT_AND_EXPR)
781 fprintf (dump_file, " -> 0\n");
782 else if (opcode == BIT_IOR_EXPR)
783 fprintf (dump_file, " -> -1\n");
786 if (opcode == BIT_AND_EXPR)
787 oe->op = build_zero_cst (TREE_TYPE (oe->op));
788 else if (opcode == BIT_IOR_EXPR)
789 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
790 TYPE_PRECISION (TREE_TYPE (oe->op)));
792 reassociate_stats.ops_eliminated += ops->length () - 1;
793 ops->truncate (0);
794 ops->quick_push (oe);
795 return true;
799 return false;
802 /* Use constant value that may be present in OPS to try to eliminate
803 operands. Note that this function is only really used when we've
804 eliminated ops for other reasons, or merged constants. Across
805 single statements, fold already does all of this, plus more. There
806 is little point in duplicating logic, so I've only included the
807 identities that I could ever construct testcases to trigger. */
809 static void
810 eliminate_using_constants (enum tree_code opcode,
811 vec<operand_entry_t> *ops)
813 operand_entry_t oelast = ops->last ();
814 tree type = TREE_TYPE (oelast->op);
816 if (oelast->rank == 0
817 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
819 switch (opcode)
821 case BIT_AND_EXPR:
822 if (integer_zerop (oelast->op))
824 if (ops->length () != 1)
826 if (dump_file && (dump_flags & TDF_DETAILS))
827 fprintf (dump_file, "Found & 0, removing all other ops\n");
829 reassociate_stats.ops_eliminated += ops->length () - 1;
831 ops->truncate (0);
832 ops->quick_push (oelast);
833 return;
836 else if (integer_all_onesp (oelast->op))
838 if (ops->length () != 1)
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, "Found & -1, removing\n");
842 ops->pop ();
843 reassociate_stats.ops_eliminated++;
846 break;
847 case BIT_IOR_EXPR:
848 if (integer_all_onesp (oelast->op))
850 if (ops->length () != 1)
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "Found | -1, removing all other ops\n");
855 reassociate_stats.ops_eliminated += ops->length () - 1;
857 ops->truncate (0);
858 ops->quick_push (oelast);
859 return;
862 else if (integer_zerop (oelast->op))
864 if (ops->length () != 1)
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "Found | 0, removing\n");
868 ops->pop ();
869 reassociate_stats.ops_eliminated++;
872 break;
873 case MULT_EXPR:
874 if (integer_zerop (oelast->op)
875 || (FLOAT_TYPE_P (type)
876 && !HONOR_NANS (TYPE_MODE (type))
877 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
878 && real_zerop (oelast->op)))
880 if (ops->length () != 1)
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found * 0, removing all other ops\n");
885 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->truncate (1);
887 ops->quick_push (oelast);
888 return;
891 else if (integer_onep (oelast->op)
892 || (FLOAT_TYPE_P (type)
893 && !HONOR_SNANS (TYPE_MODE (type))
894 && real_onep (oelast->op)))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found * 1, removing\n");
900 ops->pop ();
901 reassociate_stats.ops_eliminated++;
902 return;
905 break;
906 case BIT_XOR_EXPR:
907 case PLUS_EXPR:
908 case MINUS_EXPR:
909 if (integer_zerop (oelast->op)
910 || (FLOAT_TYPE_P (type)
911 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
912 && fold_real_zero_addition_p (type, oelast->op,
913 opcode == MINUS_EXPR)))
915 if (ops->length () != 1)
917 if (dump_file && (dump_flags & TDF_DETAILS))
918 fprintf (dump_file, "Found [|^+] 0, removing\n");
919 ops->pop ();
920 reassociate_stats.ops_eliminated++;
921 return;
924 break;
925 default:
926 break;
932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
933 bool, bool);
935 /* Structure for tracking and counting operands. */
936 typedef struct oecount_s {
937 int cnt;
938 int id;
939 enum tree_code oecode;
940 tree op;
941 } oecount;
944 /* The heap for the oecount hashtable and the sorted list of operands. */
945 static vec<oecount> cvec;
948 /* Oecount hashtable helpers. */
950 struct oecount_hasher : typed_noop_remove <void>
952 /* Note that this hash table stores integers, not pointers.
953 So, observe the casting in the member functions. */
954 typedef void value_type;
955 typedef void compare_type;
956 static inline hashval_t hash (const value_type *);
957 static inline bool equal (const value_type *, const compare_type *);
960 /* Hash function for oecount. */
962 inline hashval_t
963 oecount_hasher::hash (const value_type *p)
965 const oecount *c = &cvec[(size_t)p - 42];
966 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
969 /* Comparison function for oecount. */
971 inline bool
972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
974 const oecount *c1 = &cvec[(size_t)p1 - 42];
975 const oecount *c2 = &cvec[(size_t)p2 - 42];
976 return (c1->oecode == c2->oecode
977 && c1->op == c2->op);
980 /* Comparison function for qsort sorting oecount elements by count. */
982 static int
983 oecount_cmp (const void *p1, const void *p2)
985 const oecount *c1 = (const oecount *)p1;
986 const oecount *c2 = (const oecount *)p2;
987 if (c1->cnt != c2->cnt)
988 return c1->cnt - c2->cnt;
989 else
990 /* If counts are identical, use unique IDs to stabilize qsort. */
991 return c1->id - c2->id;
994 /* Return TRUE iff STMT represents a builtin call that raises OP
995 to some exponent. */
997 static bool
998 stmt_is_power_of_op (gimple stmt, tree op)
1000 tree fndecl;
1002 if (!is_gimple_call (stmt))
1003 return false;
1005 fndecl = gimple_call_fndecl (stmt);
1007 if (!fndecl
1008 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009 return false;
1011 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1013 CASE_FLT_FN (BUILT_IN_POW):
1014 CASE_FLT_FN (BUILT_IN_POWI):
1015 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1017 default:
1018 return false;
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023 in place and return the result. Assumes that stmt_is_power_of_op
1024 was previously called for STMT and returned TRUE. */
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1029 REAL_VALUE_TYPE c, cint;
1030 HOST_WIDE_INT power;
1031 tree arg1;
1033 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1035 CASE_FLT_FN (BUILT_IN_POW):
1036 arg1 = gimple_call_arg (stmt, 1);
1037 c = TREE_REAL_CST (arg1);
1038 power = real_to_integer (&c) - 1;
1039 real_from_integer (&cint, VOIDmode, power, 0, 0);
1040 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041 return power;
1043 CASE_FLT_FN (BUILT_IN_POWI):
1044 arg1 = gimple_call_arg (stmt, 1);
1045 power = TREE_INT_CST_LOW (arg1) - 1;
1046 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047 return power;
1049 default:
1050 gcc_unreachable ();
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1056 replace *DEF with OP as well. */
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1061 tree lhs;
1062 gimple use_stmt;
1063 use_operand_p use;
1064 gimple_stmt_iterator gsi;
1066 if (is_gimple_call (stmt))
1067 lhs = gimple_call_lhs (stmt);
1068 else
1069 lhs = gimple_assign_lhs (stmt);
1071 gcc_assert (has_single_use (lhs));
1072 single_imm_use (lhs, &use, &use_stmt);
1073 if (lhs == *def)
1074 *def = op;
1075 SET_USE (use, op);
1076 if (TREE_CODE (op) != SSA_NAME)
1077 update_stmt (use_stmt);
1078 gsi = gsi_for_stmt (stmt);
1079 unlink_stmt_vdef (stmt);
1080 gsi_remove (&gsi, true);
1081 release_defs (stmt);
1084 /* Walks the linear chain with result *DEF searching for an operation
1085 with operand OP and code OPCODE removing that from the chain. *DEF
1086 is updated if there is only one operand but no operation left. */
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1091 gimple stmt = SSA_NAME_DEF_STMT (*def);
1095 tree name;
1097 if (opcode == MULT_EXPR
1098 && stmt_is_power_of_op (stmt, op))
1100 if (decrement_power (stmt) == 1)
1101 propagate_op_to_single_use (op, stmt, def);
1102 return;
1105 name = gimple_assign_rhs1 (stmt);
1107 /* If this is the operation we look for and one of the operands
1108 is ours simply propagate the other operand into the stmts
1109 single use. */
1110 if (gimple_assign_rhs_code (stmt) == opcode
1111 && (name == op
1112 || gimple_assign_rhs2 (stmt) == op))
1114 if (name == op)
1115 name = gimple_assign_rhs2 (stmt);
1116 propagate_op_to_single_use (name, stmt, def);
1117 return;
1120 /* We might have a multiply of two __builtin_pow* calls, and
1121 the operand might be hiding in the rightmost one. */
1122 if (opcode == MULT_EXPR
1123 && gimple_assign_rhs_code (stmt) == opcode
1124 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125 && has_single_use (gimple_assign_rhs2 (stmt)))
1127 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128 if (stmt_is_power_of_op (stmt2, op))
1130 if (decrement_power (stmt2) == 1)
1131 propagate_op_to_single_use (op, stmt2, def);
1132 return;
1136 /* Continue walking the chain. */
1137 gcc_assert (name != op
1138 && TREE_CODE (name) == SSA_NAME);
1139 stmt = SSA_NAME_DEF_STMT (name);
1141 while (1);
1144 /* Returns the UID of STMT if it is non-NULL. Otherwise return 1. */
1146 static inline unsigned
1147 get_stmt_uid_with_default (gimple stmt)
1149 return stmt ? gimple_uid (stmt) : 1;
1152 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1153 the result. Places the statement after the definition of either
1154 OP1 or OP2. Returns the new statement. */
1156 static gimple
1157 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1159 gimple op1def = NULL, op2def = NULL;
1160 gimple_stmt_iterator gsi;
1161 tree op;
1162 gimple sum;
1164 /* Create the addition statement. */
1165 op = make_ssa_name (type, NULL);
1166 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1168 /* Find an insertion place and insert. */
1169 if (TREE_CODE (op1) == SSA_NAME)
1170 op1def = SSA_NAME_DEF_STMT (op1);
1171 if (TREE_CODE (op2) == SSA_NAME)
1172 op2def = SSA_NAME_DEF_STMT (op2);
1173 if ((!op1def || gimple_nop_p (op1def))
1174 && (!op2def || gimple_nop_p (op2def)))
1176 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1177 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1178 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1180 else if ((!op1def || gimple_nop_p (op1def))
1181 || (op2def && !gimple_nop_p (op2def)
1182 && stmt_dominates_stmt_p (op1def, op2def)))
1184 if (gimple_code (op2def) == GIMPLE_PHI)
1186 gsi = gsi_after_labels (gimple_bb (op2def));
1187 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1188 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1190 else
1192 if (!stmt_ends_bb_p (op2def))
1194 gsi = gsi_for_stmt (op2def);
1195 gimple_set_uid (sum, gimple_uid (op2def));
1196 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1198 else
1200 edge e;
1201 edge_iterator ei;
1203 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1204 if (e->flags & EDGE_FALLTHRU)
1205 gsi_insert_on_edge_immediate (e, sum);
1209 else
1211 if (gimple_code (op1def) == GIMPLE_PHI)
1213 gsi = gsi_after_labels (gimple_bb (op1def));
1214 gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1215 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1217 else
1219 if (!stmt_ends_bb_p (op1def))
1221 gsi = gsi_for_stmt (op1def);
1222 gimple_set_uid (sum, gimple_uid (op1def));
1223 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1225 else
1227 edge e;
1228 edge_iterator ei;
1230 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1231 if (e->flags & EDGE_FALLTHRU)
1232 gsi_insert_on_edge_immediate (e, sum);
1236 update_stmt (sum);
1238 return sum;
1241 /* Perform un-distribution of divisions and multiplications.
1242 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1243 to (A + B) / X for real X.
1245 The algorithm is organized as follows.
1247 - First we walk the addition chain *OPS looking for summands that
1248 are defined by a multiplication or a real division. This results
1249 in the candidates bitmap with relevant indices into *OPS.
1251 - Second we build the chains of multiplications or divisions for
1252 these candidates, counting the number of occurrences of (operand, code)
1253 pairs in all of the candidates chains.
1255 - Third we sort the (operand, code) pairs by number of occurrence and
1256 process them starting with the pair with the most uses.
1258 * For each such pair we walk the candidates again to build a
1259 second candidate bitmap noting all multiplication/division chains
1260 that have at least one occurrence of (operand, code).
1262 * We build an alternate addition chain only covering these
1263 candidates with one (operand, code) operation removed from their
1264 multiplication/division chain.
1266 * The first candidate gets replaced by the alternate addition chain
1267 multiplied/divided by the operand.
1269 * All candidate chains get disabled for further processing and
1270 processing of (operand, code) pairs continues.
1272 The alternate addition chains built are re-processed by the main
1273 reassociation algorithm which allows optimizing a * x * y + b * y * x
1274 to (a + b ) * x * y in one invocation of the reassociation pass. */
1276 static bool
1277 undistribute_ops_list (enum tree_code opcode,
1278 vec<operand_entry_t> *ops, struct loop *loop)
1280 unsigned int length = ops->length ();
1281 operand_entry_t oe1;
1282 unsigned i, j;
1283 sbitmap candidates, candidates2;
1284 unsigned nr_candidates, nr_candidates2;
1285 sbitmap_iterator sbi0;
1286 vec<operand_entry_t> *subops;
1287 hash_table <oecount_hasher> ctable;
1288 bool changed = false;
1289 int next_oecount_id = 0;
1291 if (length <= 1
1292 || opcode != PLUS_EXPR)
1293 return false;
1295 /* Build a list of candidates to process. */
1296 candidates = sbitmap_alloc (length);
1297 bitmap_clear (candidates);
1298 nr_candidates = 0;
1299 FOR_EACH_VEC_ELT (*ops, i, oe1)
1301 enum tree_code dcode;
1302 gimple oe1def;
1304 if (TREE_CODE (oe1->op) != SSA_NAME)
1305 continue;
1306 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1307 if (!is_gimple_assign (oe1def))
1308 continue;
1309 dcode = gimple_assign_rhs_code (oe1def);
1310 if ((dcode != MULT_EXPR
1311 && dcode != RDIV_EXPR)
1312 || !is_reassociable_op (oe1def, dcode, loop))
1313 continue;
1315 bitmap_set_bit (candidates, i);
1316 nr_candidates++;
1319 if (nr_candidates < 2)
1321 sbitmap_free (candidates);
1322 return false;
1325 if (dump_file && (dump_flags & TDF_DETAILS))
1327 fprintf (dump_file, "searching for un-distribute opportunities ");
1328 print_generic_expr (dump_file,
1329 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1330 fprintf (dump_file, " %d\n", nr_candidates);
1333 /* Build linearized sub-operand lists and the counting table. */
1334 cvec.create (0);
1335 ctable.create (15);
1336 /* ??? Macro arguments cannot have multi-argument template types in
1337 them. This typedef is needed to workaround that limitation. */
1338 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1339 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1340 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1342 gimple oedef;
1343 enum tree_code oecode;
1344 unsigned j;
1346 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1347 oecode = gimple_assign_rhs_code (oedef);
1348 linearize_expr_tree (&subops[i], oedef,
1349 associative_tree_code (oecode), false);
1351 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1353 oecount c;
1354 void **slot;
1355 size_t idx;
1356 c.oecode = oecode;
1357 c.cnt = 1;
1358 c.id = next_oecount_id++;
1359 c.op = oe1->op;
1360 cvec.safe_push (c);
1361 idx = cvec.length () + 41;
1362 slot = ctable.find_slot ((void *)idx, INSERT);
1363 if (!*slot)
1365 *slot = (void *)idx;
1367 else
1369 cvec.pop ();
1370 cvec[(size_t)*slot - 42].cnt++;
1374 ctable.dispose ();
1376 /* Sort the counting table. */
1377 cvec.qsort (oecount_cmp);
1379 if (dump_file && (dump_flags & TDF_DETAILS))
1381 oecount *c;
1382 fprintf (dump_file, "Candidates:\n");
1383 FOR_EACH_VEC_ELT (cvec, j, c)
1385 fprintf (dump_file, " %u %s: ", c->cnt,
1386 c->oecode == MULT_EXPR
1387 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1388 print_generic_expr (dump_file, c->op, 0);
1389 fprintf (dump_file, "\n");
1393 /* Process the (operand, code) pairs in order of most occurrence. */
1394 candidates2 = sbitmap_alloc (length);
1395 while (!cvec.is_empty ())
1397 oecount *c = &cvec.last ();
1398 if (c->cnt < 2)
1399 break;
1401 /* Now collect the operands in the outer chain that contain
1402 the common operand in their inner chain. */
1403 bitmap_clear (candidates2);
1404 nr_candidates2 = 0;
1405 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1407 gimple oedef;
1408 enum tree_code oecode;
1409 unsigned j;
1410 tree op = (*ops)[i]->op;
1412 /* If we undistributed in this chain already this may be
1413 a constant. */
1414 if (TREE_CODE (op) != SSA_NAME)
1415 continue;
1417 oedef = SSA_NAME_DEF_STMT (op);
1418 oecode = gimple_assign_rhs_code (oedef);
1419 if (oecode != c->oecode)
1420 continue;
1422 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1424 if (oe1->op == c->op)
1426 bitmap_set_bit (candidates2, i);
1427 ++nr_candidates2;
1428 break;
1433 if (nr_candidates2 >= 2)
1435 operand_entry_t oe1, oe2;
1436 gimple prod;
1437 int first = bitmap_first_set_bit (candidates2);
1439 /* Build the new addition chain. */
1440 oe1 = (*ops)[first];
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1443 fprintf (dump_file, "Building (");
1444 print_generic_expr (dump_file, oe1->op, 0);
1446 zero_one_operation (&oe1->op, c->oecode, c->op);
1447 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1449 gimple sum;
1450 oe2 = (*ops)[i];
1451 if (dump_file && (dump_flags & TDF_DETAILS))
1453 fprintf (dump_file, " + ");
1454 print_generic_expr (dump_file, oe2->op, 0);
1456 zero_one_operation (&oe2->op, c->oecode, c->op);
1457 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1458 oe1->op, oe2->op, opcode);
1459 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1460 oe2->rank = 0;
1461 oe1->op = gimple_get_lhs (sum);
1464 /* Apply the multiplication/division. */
1465 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1466 oe1->op, c->op, c->oecode);
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1469 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1470 print_generic_expr (dump_file, c->op, 0);
1471 fprintf (dump_file, "\n");
1474 /* Record it in the addition chain and disable further
1475 undistribution with this op. */
1476 oe1->op = gimple_assign_lhs (prod);
1477 oe1->rank = get_rank (oe1->op);
1478 subops[first].release ();
1480 changed = true;
1483 cvec.pop ();
1486 for (i = 0; i < ops->length (); ++i)
1487 subops[i].release ();
1488 free (subops);
1489 cvec.release ();
1490 sbitmap_free (candidates);
1491 sbitmap_free (candidates2);
1493 return changed;
1496 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1497 expression, examine the other OPS to see if any of them are comparisons
1498 of the same values, which we may be able to combine or eliminate.
1499 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1501 static bool
1502 eliminate_redundant_comparison (enum tree_code opcode,
1503 vec<operand_entry_t> *ops,
1504 unsigned int currindex,
1505 operand_entry_t curr)
1507 tree op1, op2;
1508 enum tree_code lcode, rcode;
1509 gimple def1, def2;
1510 int i;
1511 operand_entry_t oe;
1513 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1514 return false;
1516 /* Check that CURR is a comparison. */
1517 if (TREE_CODE (curr->op) != SSA_NAME)
1518 return false;
1519 def1 = SSA_NAME_DEF_STMT (curr->op);
1520 if (!is_gimple_assign (def1))
1521 return false;
1522 lcode = gimple_assign_rhs_code (def1);
1523 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1524 return false;
1525 op1 = gimple_assign_rhs1 (def1);
1526 op2 = gimple_assign_rhs2 (def1);
1528 /* Now look for a similar comparison in the remaining OPS. */
1529 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1531 tree t;
1533 if (TREE_CODE (oe->op) != SSA_NAME)
1534 continue;
1535 def2 = SSA_NAME_DEF_STMT (oe->op);
1536 if (!is_gimple_assign (def2))
1537 continue;
1538 rcode = gimple_assign_rhs_code (def2);
1539 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1540 continue;
1542 /* If we got here, we have a match. See if we can combine the
1543 two comparisons. */
1544 if (opcode == BIT_IOR_EXPR)
1545 t = maybe_fold_or_comparisons (lcode, op1, op2,
1546 rcode, gimple_assign_rhs1 (def2),
1547 gimple_assign_rhs2 (def2));
1548 else
1549 t = maybe_fold_and_comparisons (lcode, op1, op2,
1550 rcode, gimple_assign_rhs1 (def2),
1551 gimple_assign_rhs2 (def2));
1552 if (!t)
1553 continue;
1555 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1556 always give us a boolean_type_node value back. If the original
1557 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1558 we need to convert. */
1559 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1560 t = fold_convert (TREE_TYPE (curr->op), t);
1562 if (TREE_CODE (t) != INTEGER_CST
1563 && !operand_equal_p (t, curr->op, 0))
1565 enum tree_code subcode;
1566 tree newop1, newop2;
1567 if (!COMPARISON_CLASS_P (t))
1568 continue;
1569 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1570 STRIP_USELESS_TYPE_CONVERSION (newop1);
1571 STRIP_USELESS_TYPE_CONVERSION (newop2);
1572 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1573 continue;
1576 if (dump_file && (dump_flags & TDF_DETAILS))
1578 fprintf (dump_file, "Equivalence: ");
1579 print_generic_expr (dump_file, curr->op, 0);
1580 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1581 print_generic_expr (dump_file, oe->op, 0);
1582 fprintf (dump_file, " -> ");
1583 print_generic_expr (dump_file, t, 0);
1584 fprintf (dump_file, "\n");
1587 /* Now we can delete oe, as it has been subsumed by the new combined
1588 expression t. */
1589 ops->ordered_remove (i);
1590 reassociate_stats.ops_eliminated ++;
1592 /* If t is the same as curr->op, we're done. Otherwise we must
1593 replace curr->op with t. Special case is if we got a constant
1594 back, in which case we add it to the end instead of in place of
1595 the current entry. */
1596 if (TREE_CODE (t) == INTEGER_CST)
1598 ops->ordered_remove (currindex);
1599 add_to_ops_vec (ops, t);
1601 else if (!operand_equal_p (t, curr->op, 0))
1603 gimple sum;
1604 enum tree_code subcode;
1605 tree newop1;
1606 tree newop2;
1607 gcc_assert (COMPARISON_CLASS_P (t));
1608 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1609 STRIP_USELESS_TYPE_CONVERSION (newop1);
1610 STRIP_USELESS_TYPE_CONVERSION (newop2);
1611 gcc_checking_assert (is_gimple_val (newop1)
1612 && is_gimple_val (newop2));
1613 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1614 curr->op = gimple_get_lhs (sum);
1616 return true;
1619 return false;
1622 /* Perform various identities and other optimizations on the list of
1623 operand entries, stored in OPS. The tree code for the binary
1624 operation between all the operands is OPCODE. */
1626 static void
1627 optimize_ops_list (enum tree_code opcode,
1628 vec<operand_entry_t> *ops)
1630 unsigned int length = ops->length ();
1631 unsigned int i;
1632 operand_entry_t oe;
1633 operand_entry_t oelast = NULL;
1634 bool iterate = false;
1636 if (length == 1)
1637 return;
1639 oelast = ops->last ();
1641 /* If the last two are constants, pop the constants off, merge them
1642 and try the next two. */
1643 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1645 operand_entry_t oelm1 = (*ops)[length - 2];
1647 if (oelm1->rank == 0
1648 && is_gimple_min_invariant (oelm1->op)
1649 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1650 TREE_TYPE (oelast->op)))
1652 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1653 oelm1->op, oelast->op);
1655 if (folded && is_gimple_min_invariant (folded))
1657 if (dump_file && (dump_flags & TDF_DETAILS))
1658 fprintf (dump_file, "Merging constants\n");
1660 ops->pop ();
1661 ops->pop ();
1663 add_to_ops_vec (ops, folded);
1664 reassociate_stats.constants_eliminated++;
1666 optimize_ops_list (opcode, ops);
1667 return;
1672 eliminate_using_constants (opcode, ops);
1673 oelast = NULL;
1675 for (i = 0; ops->iterate (i, &oe);)
1677 bool done = false;
1679 if (eliminate_not_pairs (opcode, ops, i, oe))
1680 return;
1681 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1682 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1683 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1685 if (done)
1686 return;
1687 iterate = true;
1688 oelast = NULL;
1689 continue;
1691 oelast = oe;
1692 i++;
1695 length = ops->length ();
1696 oelast = ops->last ();
1698 if (iterate)
1699 optimize_ops_list (opcode, ops);
1702 /* The following functions are subroutines to optimize_range_tests and allow
1703 it to try to change a logical combination of comparisons into a range
1704 test.
1706 For example, both
1707 X == 2 || X == 5 || X == 3 || X == 4
1709 X >= 2 && X <= 5
1710 are converted to
1711 (unsigned) (X - 2) <= 3
1713 For more information see comments above fold_test_range in fold-const.c,
1714 this implementation is for GIMPLE. */
1716 struct range_entry
1718 tree exp;
1719 tree low;
1720 tree high;
1721 bool in_p;
1722 bool strict_overflow_p;
1723 unsigned int idx, next;
1726 /* This is similar to make_range in fold-const.c, but on top of
1727 GIMPLE instead of trees. If EXP is non-NULL, it should be
1728 an SSA_NAME and STMT argument is ignored, otherwise STMT
1729 argument should be a GIMPLE_COND. */
1731 static void
1732 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1734 int in_p;
1735 tree low, high;
1736 bool is_bool, strict_overflow_p;
1738 r->exp = NULL_TREE;
1739 r->in_p = false;
1740 r->strict_overflow_p = false;
1741 r->low = NULL_TREE;
1742 r->high = NULL_TREE;
1743 if (exp != NULL_TREE
1744 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1745 return;
1747 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1748 and see if we can refine the range. Some of the cases below may not
1749 happen, but it doesn't seem worth worrying about this. We "continue"
1750 the outer loop when we've changed something; otherwise we "break"
1751 the switch, which will "break" the while. */
1752 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1753 high = low;
1754 in_p = 0;
1755 strict_overflow_p = false;
1756 is_bool = false;
1757 if (exp == NULL_TREE)
1758 is_bool = true;
1759 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1761 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1762 is_bool = true;
1763 else
1764 return;
1766 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1767 is_bool = true;
1769 while (1)
1771 enum tree_code code;
1772 tree arg0, arg1, exp_type;
1773 tree nexp;
1774 location_t loc;
1776 if (exp != NULL_TREE)
1778 if (TREE_CODE (exp) != SSA_NAME)
1779 break;
1781 stmt = SSA_NAME_DEF_STMT (exp);
1782 if (!is_gimple_assign (stmt))
1783 break;
1785 code = gimple_assign_rhs_code (stmt);
1786 arg0 = gimple_assign_rhs1 (stmt);
1787 arg1 = gimple_assign_rhs2 (stmt);
1788 exp_type = TREE_TYPE (exp);
1790 else
1792 code = gimple_cond_code (stmt);
1793 arg0 = gimple_cond_lhs (stmt);
1794 arg1 = gimple_cond_rhs (stmt);
1795 exp_type = boolean_type_node;
1798 if (TREE_CODE (arg0) != SSA_NAME)
1799 break;
1800 loc = gimple_location (stmt);
1801 switch (code)
1803 case BIT_NOT_EXPR:
1804 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1805 /* Ensure the range is either +[-,0], +[0,0],
1806 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1807 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1808 or similar expression of unconditional true or
1809 false, it should not be negated. */
1810 && ((high && integer_zerop (high))
1811 || (low && integer_onep (low))))
1813 in_p = !in_p;
1814 exp = arg0;
1815 continue;
1817 break;
1818 case SSA_NAME:
1819 exp = arg0;
1820 continue;
1821 CASE_CONVERT:
1822 if (is_bool)
1823 goto do_default;
1824 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1826 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1827 is_bool = true;
1828 else
1829 return;
1831 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1832 is_bool = true;
1833 goto do_default;
1834 case EQ_EXPR:
1835 case NE_EXPR:
1836 case LT_EXPR:
1837 case LE_EXPR:
1838 case GE_EXPR:
1839 case GT_EXPR:
1840 is_bool = true;
1841 /* FALLTHRU */
1842 default:
1843 if (!is_bool)
1844 return;
1845 do_default:
1846 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1847 &low, &high, &in_p,
1848 &strict_overflow_p);
1849 if (nexp != NULL_TREE)
1851 exp = nexp;
1852 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1853 continue;
1855 break;
1857 break;
1859 if (is_bool)
1861 r->exp = exp;
1862 r->in_p = in_p;
1863 r->low = low;
1864 r->high = high;
1865 r->strict_overflow_p = strict_overflow_p;
1869 /* Comparison function for qsort. Sort entries
1870 without SSA_NAME exp first, then with SSA_NAMEs sorted
1871 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1872 by increasing ->low and if ->low is the same, by increasing
1873 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1874 maximum. */
1876 static int
1877 range_entry_cmp (const void *a, const void *b)
1879 const struct range_entry *p = (const struct range_entry *) a;
1880 const struct range_entry *q = (const struct range_entry *) b;
1882 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1884 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1886 /* Group range_entries for the same SSA_NAME together. */
1887 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1888 return -1;
1889 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1890 return 1;
1891 /* If ->low is different, NULL low goes first, then by
1892 ascending low. */
1893 if (p->low != NULL_TREE)
1895 if (q->low != NULL_TREE)
1897 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1898 p->low, q->low);
1899 if (tem && integer_onep (tem))
1900 return -1;
1901 tem = fold_binary (GT_EXPR, boolean_type_node,
1902 p->low, q->low);
1903 if (tem && integer_onep (tem))
1904 return 1;
1906 else
1907 return 1;
1909 else if (q->low != NULL_TREE)
1910 return -1;
1911 /* If ->high is different, NULL high goes last, before that by
1912 ascending high. */
1913 if (p->high != NULL_TREE)
1915 if (q->high != NULL_TREE)
1917 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1918 p->high, q->high);
1919 if (tem && integer_onep (tem))
1920 return -1;
1921 tem = fold_binary (GT_EXPR, boolean_type_node,
1922 p->high, q->high);
1923 if (tem && integer_onep (tem))
1924 return 1;
1926 else
1927 return -1;
1929 else if (p->high != NULL_TREE)
1930 return 1;
1931 /* If both ranges are the same, sort below by ascending idx. */
1933 else
1934 return 1;
1936 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1937 return -1;
1939 if (p->idx < q->idx)
1940 return -1;
1941 else
1943 gcc_checking_assert (p->idx > q->idx);
1944 return 1;
1948 /* Helper routine of optimize_range_test.
1949 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1950 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1951 OPCODE and OPS are arguments of optimize_range_tests. Return
1952 true if the range merge has been successful.
1953 If OPCODE is ERROR_MARK, this is called from within
1954 maybe_optimize_range_tests and is performing inter-bb range optimization.
1955 Changes should be then performed right away, and whether an op is
1956 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1958 static bool
1959 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1960 unsigned int count, enum tree_code opcode,
1961 vec<operand_entry_t> *ops, tree exp, bool in_p,
1962 tree low, tree high, bool strict_overflow_p)
1964 operand_entry_t oe = (*ops)[range->idx];
1965 tree op = oe->op;
1966 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1967 location_t loc = gimple_location (stmt);
1968 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1969 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1970 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1971 gimple_stmt_iterator gsi;
1973 if (tem == NULL_TREE)
1974 return false;
1976 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1977 warning_at (loc, OPT_Wstrict_overflow,
1978 "assuming signed overflow does not occur "
1979 "when simplifying range test");
1981 if (dump_file && (dump_flags & TDF_DETAILS))
1983 struct range_entry *r;
1984 fprintf (dump_file, "Optimizing range tests ");
1985 print_generic_expr (dump_file, range->exp, 0);
1986 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1987 print_generic_expr (dump_file, range->low, 0);
1988 fprintf (dump_file, ", ");
1989 print_generic_expr (dump_file, range->high, 0);
1990 fprintf (dump_file, "]");
1991 for (r = otherrange; r < otherrange + count; r++)
1993 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1994 print_generic_expr (dump_file, r->low, 0);
1995 fprintf (dump_file, ", ");
1996 print_generic_expr (dump_file, r->high, 0);
1997 fprintf (dump_file, "]");
1999 fprintf (dump_file, "\n into ");
2000 print_generic_expr (dump_file, tem, 0);
2001 fprintf (dump_file, "\n");
2004 if (opcode == BIT_IOR_EXPR
2005 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2006 tem = invert_truthvalue_loc (loc, tem);
2008 tem = fold_convert_loc (loc, optype, tem);
2009 gsi = gsi_for_stmt (stmt);
2010 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2011 GSI_SAME_STMT);
2013 /* If doing inter-bb range test optimization, update the
2014 stmts immediately. Start with changing the first range test
2015 immediate use to the new value (TEM), or, if the first range
2016 test is a GIMPLE_COND stmt, change that condition. */
2017 if (opcode == ERROR_MARK)
2019 if (op)
2021 imm_use_iterator iter;
2022 use_operand_p use_p;
2023 gimple use_stmt;
2025 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2027 if (is_gimple_debug (use_stmt))
2028 continue;
2029 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2030 SET_USE (use_p, tem);
2031 update_stmt (use_stmt);
2034 else
2036 gimple_cond_set_code (stmt, NE_EXPR);
2037 gimple_cond_set_lhs (stmt, tem);
2038 gimple_cond_set_rhs (stmt, boolean_false_node);
2039 update_stmt (stmt);
2042 oe->op = tem;
2043 range->exp = exp;
2044 range->low = low;
2045 range->high = high;
2046 range->in_p = in_p;
2047 range->strict_overflow_p = false;
2049 for (range = otherrange; range < otherrange + count; range++)
2051 oe = (*ops)[range->idx];
2052 /* Now change all the other range test immediate uses, so that
2053 those tests will be optimized away. */
2054 if (opcode == ERROR_MARK)
2056 if (oe->op)
2058 imm_use_iterator iter;
2059 use_operand_p use_p;
2060 gimple use_stmt;
2062 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2064 if (is_gimple_debug (use_stmt))
2065 continue;
2066 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2067 adjust it into _7 = _9;. */
2068 if (is_gimple_assign (use_stmt)
2069 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2071 tree expr = NULL_TREE;
2072 if (oe->op == gimple_assign_rhs1 (use_stmt))
2073 expr = gimple_assign_rhs2 (use_stmt);
2074 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2075 expr = gimple_assign_rhs1 (use_stmt);
2076 if (expr
2077 && expr != oe->op
2078 && TREE_CODE (expr) == SSA_NAME)
2080 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2081 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2082 expr, NULL_TREE);
2083 update_stmt (use_stmt);
2084 continue;
2087 /* If imm use of _8 is a statement like _7 = (int) _8;,
2088 adjust it into _7 = 0; or _7 = 1;. */
2089 if (gimple_assign_cast_p (use_stmt)
2090 && oe->op == gimple_assign_rhs1 (use_stmt))
2092 tree lhs = gimple_assign_lhs (use_stmt);
2093 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2095 gimple_stmt_iterator gsi2
2096 = gsi_for_stmt (use_stmt);
2097 tree expr = build_int_cst (TREE_TYPE (lhs),
2098 oe->rank == BIT_IOR_EXPR
2099 ? 0 : 1);
2100 gimple_assign_set_rhs_with_ops (&gsi2,
2101 INTEGER_CST,
2102 expr, NULL_TREE);
2103 update_stmt (use_stmt);
2104 continue;
2107 /* Otherwise replace the use with 0 or 1. */
2108 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2109 SET_USE (use_p,
2110 build_int_cst (TREE_TYPE (oe->op),
2111 oe->rank == BIT_IOR_EXPR
2112 ? 0 : 1));
2113 update_stmt (use_stmt);
2116 else
2118 /* If range test was a GIMPLE_COND, simply change it
2119 into an always false or always true condition. */
2120 stmt = last_stmt (BASIC_BLOCK (oe->id));
2121 if (oe->rank == BIT_IOR_EXPR)
2122 gimple_cond_make_false (stmt);
2123 else
2124 gimple_cond_make_true (stmt);
2125 update_stmt (stmt);
2128 oe->op = error_mark_node;
2129 range->exp = NULL_TREE;
2131 return true;
2134 /* Optimize range tests, similarly how fold_range_test optimizes
2135 it on trees. The tree code for the binary
2136 operation between all the operands is OPCODE.
2137 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2138 maybe_optimize_range_tests for inter-bb range optimization.
2139 In that case if oe->op is NULL, oe->id is bb->index whose
2140 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2141 the actual opcode. */
2143 static void
2144 optimize_range_tests (enum tree_code opcode,
2145 vec<operand_entry_t> *ops)
2147 unsigned int length = ops->length (), i, j, first;
2148 operand_entry_t oe;
2149 struct range_entry *ranges;
2150 bool any_changes = false;
2152 if (length == 1)
2153 return;
2155 ranges = XNEWVEC (struct range_entry, length);
2156 for (i = 0; i < length; i++)
2158 oe = (*ops)[i];
2159 ranges[i].idx = i;
2160 init_range_entry (ranges + i, oe->op,
2161 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2162 /* For | invert it now, we will invert it again before emitting
2163 the optimized expression. */
2164 if (opcode == BIT_IOR_EXPR
2165 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2166 ranges[i].in_p = !ranges[i].in_p;
2169 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2170 for (i = 0; i < length; i++)
2171 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2172 break;
2174 /* Try to merge ranges. */
2175 for (first = i; i < length; i++)
2177 tree low = ranges[i].low;
2178 tree high = ranges[i].high;
2179 int in_p = ranges[i].in_p;
2180 bool strict_overflow_p = ranges[i].strict_overflow_p;
2181 int update_fail_count = 0;
2183 for (j = i + 1; j < length; j++)
2185 if (ranges[i].exp != ranges[j].exp)
2186 break;
2187 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2188 ranges[j].in_p, ranges[j].low, ranges[j].high))
2189 break;
2190 strict_overflow_p |= ranges[j].strict_overflow_p;
2193 if (j == i + 1)
2194 continue;
2196 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2197 ops, ranges[i].exp, in_p, low, high,
2198 strict_overflow_p))
2200 i = j - 1;
2201 any_changes = true;
2203 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2204 while update_range_test would fail. */
2205 else if (update_fail_count == 64)
2206 i = j - 1;
2207 else
2208 ++update_fail_count;
2211 /* Optimize X == CST1 || X == CST2
2212 if popcount (CST1 ^ CST2) == 1 into
2213 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2214 Similarly for ranges. E.g.
2215 X != 2 && X != 3 && X != 10 && X != 11
2216 will be transformed by the above loop into
2217 (X - 2U) <= 1U && (X - 10U) <= 1U
2218 and this loop can transform that into
2219 ((X & ~8) - 2U) <= 1U. */
2220 for (i = first; i < length; i++)
2222 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2224 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2225 continue;
2226 type = TREE_TYPE (ranges[i].exp);
2227 if (!INTEGRAL_TYPE_P (type))
2228 continue;
2229 lowi = ranges[i].low;
2230 if (lowi == NULL_TREE)
2231 lowi = TYPE_MIN_VALUE (type);
2232 highi = ranges[i].high;
2233 if (highi == NULL_TREE)
2234 continue;
2235 for (j = i + 1; j < length && j < i + 64; j++)
2237 if (ranges[j].exp == NULL_TREE)
2238 continue;
2239 if (ranges[i].exp != ranges[j].exp)
2240 break;
2241 if (ranges[j].in_p)
2242 continue;
2243 lowj = ranges[j].low;
2244 if (lowj == NULL_TREE)
2245 continue;
2246 highj = ranges[j].high;
2247 if (highj == NULL_TREE)
2248 highj = TYPE_MAX_VALUE (type);
2249 tem = fold_binary (GT_EXPR, boolean_type_node,
2250 lowj, highi);
2251 if (tem == NULL_TREE || !integer_onep (tem))
2252 continue;
2253 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2254 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2255 continue;
2256 gcc_checking_assert (!integer_zerop (lowxor));
2257 tem = fold_binary (MINUS_EXPR, type, lowxor,
2258 build_int_cst (type, 1));
2259 if (tem == NULL_TREE)
2260 continue;
2261 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2262 if (tem == NULL_TREE || !integer_zerop (tem))
2263 continue;
2264 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2265 if (!tree_int_cst_equal (lowxor, highxor))
2266 continue;
2267 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2268 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2269 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2270 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2271 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2272 ranges[i].in_p, lowj, highj,
2273 ranges[i].strict_overflow_p
2274 || ranges[j].strict_overflow_p))
2276 any_changes = true;
2277 break;
2282 if (any_changes && opcode != ERROR_MARK)
2284 j = 0;
2285 FOR_EACH_VEC_ELT (*ops, i, oe)
2287 if (oe->op == error_mark_node)
2288 continue;
2289 else if (i != j)
2290 (*ops)[j] = oe;
2291 j++;
2293 ops->truncate (j);
2296 XDELETEVEC (ranges);
2299 /* Return true if STMT is a cast like:
2300 <bb N>:
2302 _123 = (int) _234;
2304 <bb M>:
2305 # _345 = PHI <_123(N), 1(...), 1(...)>
2306 where _234 has bool type, _123 has single use and
2307 bb N has a single successor M. This is commonly used in
2308 the last block of a range test. */
2310 static bool
2311 final_range_test_p (gimple stmt)
2313 basic_block bb, rhs_bb;
2314 edge e;
2315 tree lhs, rhs;
2316 use_operand_p use_p;
2317 gimple use_stmt;
2319 if (!gimple_assign_cast_p (stmt))
2320 return false;
2321 bb = gimple_bb (stmt);
2322 if (!single_succ_p (bb))
2323 return false;
2324 e = single_succ_edge (bb);
2325 if (e->flags & EDGE_COMPLEX)
2326 return false;
2328 lhs = gimple_assign_lhs (stmt);
2329 rhs = gimple_assign_rhs1 (stmt);
2330 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2331 || TREE_CODE (rhs) != SSA_NAME
2332 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2333 return false;
2335 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2336 if (!single_imm_use (lhs, &use_p, &use_stmt))
2337 return false;
2339 if (gimple_code (use_stmt) != GIMPLE_PHI
2340 || gimple_bb (use_stmt) != e->dest)
2341 return false;
2343 /* And that the rhs is defined in the same loop. */
2344 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2345 if (rhs_bb == NULL
2346 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2347 return false;
2349 return true;
2352 /* Return true if BB is suitable basic block for inter-bb range test
2353 optimization. If BACKWARD is true, BB should be the only predecessor
2354 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2355 or compared with to find a common basic block to which all conditions
2356 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2357 be the only predecessor of BB. */
2359 static bool
2360 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2361 bool backward)
2363 edge_iterator ei, ei2;
2364 edge e, e2;
2365 gimple stmt;
2366 gimple_stmt_iterator gsi;
2367 bool other_edge_seen = false;
2368 bool is_cond;
2370 if (test_bb == bb)
2371 return false;
2372 /* Check last stmt first. */
2373 stmt = last_stmt (bb);
2374 if (stmt == NULL
2375 || (gimple_code (stmt) != GIMPLE_COND
2376 && (backward || !final_range_test_p (stmt)))
2377 || gimple_visited_p (stmt)
2378 || stmt_could_throw_p (stmt)
2379 || *other_bb == bb)
2380 return false;
2381 is_cond = gimple_code (stmt) == GIMPLE_COND;
2382 if (is_cond)
2384 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2385 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2386 to *OTHER_BB (if not set yet, try to find it out). */
2387 if (EDGE_COUNT (bb->succs) != 2)
2388 return false;
2389 FOR_EACH_EDGE (e, ei, bb->succs)
2391 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2392 return false;
2393 if (e->dest == test_bb)
2395 if (backward)
2396 continue;
2397 else
2398 return false;
2400 if (e->dest == bb)
2401 return false;
2402 if (*other_bb == NULL)
2404 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2405 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2406 return false;
2407 else if (e->dest == e2->dest)
2408 *other_bb = e->dest;
2409 if (*other_bb == NULL)
2410 return false;
2412 if (e->dest == *other_bb)
2413 other_edge_seen = true;
2414 else if (backward)
2415 return false;
2417 if (*other_bb == NULL || !other_edge_seen)
2418 return false;
2420 else if (single_succ (bb) != *other_bb)
2421 return false;
2423 /* Now check all PHIs of *OTHER_BB. */
2424 e = find_edge (bb, *other_bb);
2425 e2 = find_edge (test_bb, *other_bb);
2426 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2428 gimple phi = gsi_stmt (gsi);
2429 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2430 corresponding to BB and TEST_BB predecessor must be the same. */
2431 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2432 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2434 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2435 one of the PHIs should have the lhs of the last stmt in
2436 that block as PHI arg and that PHI should have 0 or 1
2437 corresponding to it in all other range test basic blocks
2438 considered. */
2439 if (!is_cond)
2441 if (gimple_phi_arg_def (phi, e->dest_idx)
2442 == gimple_assign_lhs (stmt)
2443 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2444 || integer_onep (gimple_phi_arg_def (phi,
2445 e2->dest_idx))))
2446 continue;
2448 else
2450 gimple test_last = last_stmt (test_bb);
2451 if (gimple_code (test_last) != GIMPLE_COND
2452 && gimple_phi_arg_def (phi, e2->dest_idx)
2453 == gimple_assign_lhs (test_last)
2454 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2455 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2456 continue;
2459 return false;
2462 return true;
2465 /* Return true if BB doesn't have side-effects that would disallow
2466 range test optimization, all SSA_NAMEs set in the bb are consumed
2467 in the bb and there are no PHIs. */
2469 static bool
2470 no_side_effect_bb (basic_block bb)
2472 gimple_stmt_iterator gsi;
2473 gimple last;
2475 if (!gimple_seq_empty_p (phi_nodes (bb)))
2476 return false;
2477 last = last_stmt (bb);
2478 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2480 gimple stmt = gsi_stmt (gsi);
2481 tree lhs;
2482 imm_use_iterator imm_iter;
2483 use_operand_p use_p;
2485 if (is_gimple_debug (stmt))
2486 continue;
2487 if (gimple_has_side_effects (stmt))
2488 return false;
2489 if (stmt == last)
2490 return true;
2491 if (!is_gimple_assign (stmt))
2492 return false;
2493 lhs = gimple_assign_lhs (stmt);
2494 if (TREE_CODE (lhs) != SSA_NAME)
2495 return false;
2496 if (gimple_assign_rhs_could_trap_p (stmt))
2497 return false;
2498 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2500 gimple use_stmt = USE_STMT (use_p);
2501 if (is_gimple_debug (use_stmt))
2502 continue;
2503 if (gimple_bb (use_stmt) != bb)
2504 return false;
2507 return false;
2510 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2511 return true and fill in *OPS recursively. */
2513 static bool
2514 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2515 struct loop *loop)
2517 gimple stmt = SSA_NAME_DEF_STMT (var);
2518 tree rhs[2];
2519 int i;
2521 if (!is_reassociable_op (stmt, code, loop))
2522 return false;
2524 rhs[0] = gimple_assign_rhs1 (stmt);
2525 rhs[1] = gimple_assign_rhs2 (stmt);
2526 gimple_set_visited (stmt, true);
2527 for (i = 0; i < 2; i++)
2528 if (TREE_CODE (rhs[i]) == SSA_NAME
2529 && !get_ops (rhs[i], code, ops, loop)
2530 && has_single_use (rhs[i]))
2532 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2534 oe->op = rhs[i];
2535 oe->rank = code;
2536 oe->id = 0;
2537 oe->count = 1;
2538 ops->safe_push (oe);
2540 return true;
2543 /* Inter-bb range test optimization. */
2545 static void
2546 maybe_optimize_range_tests (gimple stmt)
2548 basic_block first_bb = gimple_bb (stmt);
2549 basic_block last_bb = first_bb;
2550 basic_block other_bb = NULL;
2551 basic_block bb;
2552 edge_iterator ei;
2553 edge e;
2554 vec<operand_entry_t> ops = vNULL;
2556 /* Consider only basic blocks that end with GIMPLE_COND or
2557 a cast statement satisfying final_range_test_p. All
2558 but the last bb in the first_bb .. last_bb range
2559 should end with GIMPLE_COND. */
2560 if (gimple_code (stmt) == GIMPLE_COND)
2562 if (EDGE_COUNT (first_bb->succs) != 2)
2563 return;
2565 else if (final_range_test_p (stmt))
2566 other_bb = single_succ (first_bb);
2567 else
2568 return;
2570 if (stmt_could_throw_p (stmt))
2571 return;
2573 /* As relative ordering of post-dominator sons isn't fixed,
2574 maybe_optimize_range_tests can be called first on any
2575 bb in the range we want to optimize. So, start searching
2576 backwards, if first_bb can be set to a predecessor. */
2577 while (single_pred_p (first_bb))
2579 basic_block pred_bb = single_pred (first_bb);
2580 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2581 break;
2582 if (!no_side_effect_bb (first_bb))
2583 break;
2584 first_bb = pred_bb;
2586 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2587 Before starting forward search in last_bb successors, find
2588 out the other_bb. */
2589 if (first_bb == last_bb)
2591 other_bb = NULL;
2592 /* As non-GIMPLE_COND last stmt always terminates the range,
2593 if forward search didn't discover anything, just give up. */
2594 if (gimple_code (stmt) != GIMPLE_COND)
2595 return;
2596 /* Look at both successors. Either it ends with a GIMPLE_COND
2597 and satisfies suitable_cond_bb, or ends with a cast and
2598 other_bb is that cast's successor. */
2599 FOR_EACH_EDGE (e, ei, first_bb->succs)
2600 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2601 || e->dest == first_bb)
2602 return;
2603 else if (single_pred_p (e->dest))
2605 stmt = last_stmt (e->dest);
2606 if (stmt
2607 && gimple_code (stmt) == GIMPLE_COND
2608 && EDGE_COUNT (e->dest->succs) == 2)
2610 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2611 break;
2612 else
2613 other_bb = NULL;
2615 else if (stmt
2616 && final_range_test_p (stmt)
2617 && find_edge (first_bb, single_succ (e->dest)))
2619 other_bb = single_succ (e->dest);
2620 if (other_bb == first_bb)
2621 other_bb = NULL;
2624 if (other_bb == NULL)
2625 return;
2627 /* Now do the forward search, moving last_bb to successor bbs
2628 that aren't other_bb. */
2629 while (EDGE_COUNT (last_bb->succs) == 2)
2631 FOR_EACH_EDGE (e, ei, last_bb->succs)
2632 if (e->dest != other_bb)
2633 break;
2634 if (e == NULL)
2635 break;
2636 if (!single_pred_p (e->dest))
2637 break;
2638 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2639 break;
2640 if (!no_side_effect_bb (e->dest))
2641 break;
2642 last_bb = e->dest;
2644 if (first_bb == last_bb)
2645 return;
2646 /* Here basic blocks first_bb through last_bb's predecessor
2647 end with GIMPLE_COND, all of them have one of the edges to
2648 other_bb and another to another block in the range,
2649 all blocks except first_bb don't have side-effects and
2650 last_bb ends with either GIMPLE_COND, or cast satisfying
2651 final_range_test_p. */
2652 for (bb = last_bb; ; bb = single_pred (bb))
2654 enum tree_code code;
2655 tree lhs, rhs;
2657 e = find_edge (bb, other_bb);
2658 stmt = last_stmt (bb);
2659 gimple_set_visited (stmt, true);
2660 if (gimple_code (stmt) != GIMPLE_COND)
2662 use_operand_p use_p;
2663 gimple phi;
2664 edge e2;
2665 unsigned int d;
2667 lhs = gimple_assign_lhs (stmt);
2668 rhs = gimple_assign_rhs1 (stmt);
2669 gcc_assert (bb == last_bb);
2671 /* stmt is
2672 _123 = (int) _234;
2674 followed by:
2675 <bb M>:
2676 # _345 = PHI <_123(N), 1(...), 1(...)>
2678 or 0 instead of 1. If it is 0, the _234
2679 range test is anded together with all the
2680 other range tests, if it is 1, it is ored with
2681 them. */
2682 single_imm_use (lhs, &use_p, &phi);
2683 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2684 e2 = find_edge (first_bb, other_bb);
2685 d = e2->dest_idx;
2686 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2687 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2688 code = BIT_AND_EXPR;
2689 else
2691 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2692 code = BIT_IOR_EXPR;
2695 /* If _234 SSA_NAME_DEF_STMT is
2696 _234 = _567 | _789;
2697 (or &, corresponding to 1/0 in the phi arguments,
2698 push into ops the individual range test arguments
2699 of the bitwise or resp. and, recursively. */
2700 if (!get_ops (rhs, code, &ops,
2701 loop_containing_stmt (stmt))
2702 && has_single_use (rhs))
2704 /* Otherwise, push the _234 range test itself. */
2705 operand_entry_t oe
2706 = (operand_entry_t) pool_alloc (operand_entry_pool);
2708 oe->op = rhs;
2709 oe->rank = code;
2710 oe->id = 0;
2711 oe->count = 1;
2712 ops.safe_push (oe);
2714 continue;
2716 /* Otherwise stmt is GIMPLE_COND. */
2717 code = gimple_cond_code (stmt);
2718 lhs = gimple_cond_lhs (stmt);
2719 rhs = gimple_cond_rhs (stmt);
2720 if (TREE_CODE (lhs) == SSA_NAME
2721 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2722 && ((code != EQ_EXPR && code != NE_EXPR)
2723 || rhs != boolean_false_node
2724 /* Either push into ops the individual bitwise
2725 or resp. and operands, depending on which
2726 edge is other_bb. */
2727 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2728 ^ (code == EQ_EXPR))
2729 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2730 loop_containing_stmt (stmt))))
2732 /* Or push the GIMPLE_COND stmt itself. */
2733 operand_entry_t oe
2734 = (operand_entry_t) pool_alloc (operand_entry_pool);
2736 oe->op = NULL;
2737 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2738 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2739 /* oe->op = NULL signs that there is no SSA_NAME
2740 for the range test, and oe->id instead is the
2741 basic block number, at which's end the GIMPLE_COND
2742 is. */
2743 oe->id = bb->index;
2744 oe->count = 1;
2745 ops.safe_push (oe);
2747 if (bb == first_bb)
2748 break;
2750 if (ops.length () > 1)
2751 optimize_range_tests (ERROR_MARK, &ops);
2752 ops.release ();
2755 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2756 of STMT in it's operands. This is also known as a "destructive
2757 update" operation. */
2759 static bool
2760 is_phi_for_stmt (gimple stmt, tree operand)
2762 gimple def_stmt;
2763 tree lhs;
2764 use_operand_p arg_p;
2765 ssa_op_iter i;
2767 if (TREE_CODE (operand) != SSA_NAME)
2768 return false;
2770 lhs = gimple_assign_lhs (stmt);
2772 def_stmt = SSA_NAME_DEF_STMT (operand);
2773 if (gimple_code (def_stmt) != GIMPLE_PHI)
2774 return false;
2776 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2777 if (lhs == USE_FROM_PTR (arg_p))
2778 return true;
2779 return false;
2782 /* Remove def stmt of VAR if VAR has zero uses and recurse
2783 on rhs1 operand if so. */
2785 static void
2786 remove_visited_stmt_chain (tree var)
2788 gimple stmt;
2789 gimple_stmt_iterator gsi;
2791 while (1)
2793 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2794 return;
2795 stmt = SSA_NAME_DEF_STMT (var);
2796 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2798 var = gimple_assign_rhs1 (stmt);
2799 gsi = gsi_for_stmt (stmt);
2800 gsi_remove (&gsi, true);
2801 release_defs (stmt);
2803 else
2804 return;
2808 /* This function checks three consequtive operands in
2809 passed operands vector OPS starting from OPINDEX and
2810 swaps two operands if it is profitable for binary operation
2811 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2813 We pair ops with the same rank if possible.
2815 The alternative we try is to see if STMT is a destructive
2816 update style statement, which is like:
2817 b = phi (a, ...)
2818 a = c + b;
2819 In that case, we want to use the destructive update form to
2820 expose the possible vectorizer sum reduction opportunity.
2821 In that case, the third operand will be the phi node. This
2822 check is not performed if STMT is null.
2824 We could, of course, try to be better as noted above, and do a
2825 lot of work to try to find these opportunities in >3 operand
2826 cases, but it is unlikely to be worth it. */
2828 static void
2829 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2830 unsigned int opindex, gimple stmt)
2832 operand_entry_t oe1, oe2, oe3;
2834 oe1 = ops[opindex];
2835 oe2 = ops[opindex + 1];
2836 oe3 = ops[opindex + 2];
2838 if ((oe1->rank == oe2->rank
2839 && oe2->rank != oe3->rank)
2840 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2841 && !is_phi_for_stmt (stmt, oe1->op)
2842 && !is_phi_for_stmt (stmt, oe2->op)))
2844 struct operand_entry temp = *oe3;
2845 oe3->op = oe1->op;
2846 oe3->rank = oe1->rank;
2847 oe1->op = temp.op;
2848 oe1->rank= temp.rank;
2850 else if ((oe1->rank == oe3->rank
2851 && oe2->rank != oe3->rank)
2852 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2853 && !is_phi_for_stmt (stmt, oe1->op)
2854 && !is_phi_for_stmt (stmt, oe3->op)))
2856 struct operand_entry temp = *oe2;
2857 oe2->op = oe1->op;
2858 oe2->rank = oe1->rank;
2859 oe1->op = temp.op;
2860 oe1->rank= temp.rank;
2864 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2865 same basic block, then A's UID has to be less than B. If they are
2866 in different BB's, then A's BB must not be dominated by B's BB. */
2868 static inline bool
2869 not_dominated_by (gimple a, gimple b)
2871 basic_block bb_a, bb_b;
2872 bb_a = gimple_bb (a);
2873 bb_b = gimple_bb (b);
2874 return ((bb_a == bb_b && gimple_uid (a) < gimple_uid (b))
2875 || (bb_a != bb_b
2876 && !dominated_by_p (CDI_DOMINATORS, bb_a, bb_b)));
2880 /* Among STMT1 and STMT2, return the statement that appears later. Both
2881 statements are in same BB and have the same UID. */
2883 static gimple
2884 appears_later_in_bb (gimple stmt1, gimple stmt2)
2886 unsigned uid = gimple_uid (stmt1);
2887 gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
2888 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
2890 gimple stmt = gsi_stmt (gsi);
2892 /* If STMT has a different UID than STMT1 and we haven't seen
2893 STMT2 during traversal, we know STMT1 appears later. */
2894 if (gimple_uid (stmt) != uid)
2895 return stmt1;
2896 else if (stmt == stmt2)
2897 return stmt2;
2899 return stmt1;
2902 /* Find the statement after which STMT must be moved so that the
2903 dependency from DEP_STMT to STMT is maintained. */
2905 static gimple
2906 find_insert_point (gimple stmt, gimple dep_stmt)
2908 gimple insert_stmt = stmt;
2909 if (dep_stmt == NULL)
2910 return stmt;
2911 if (gimple_uid (insert_stmt) == gimple_uid (dep_stmt)
2912 && gimple_bb (insert_stmt) == gimple_bb (dep_stmt)
2913 && insert_stmt != dep_stmt)
2914 insert_stmt = appears_later_in_bb (insert_stmt, dep_stmt);
2915 else if (not_dominated_by (insert_stmt, dep_stmt))
2916 insert_stmt = dep_stmt;
2917 return insert_stmt;
2920 /* Insert STMT after INSERT_POINT. */
2922 static void
2923 insert_stmt_after (gimple stmt, gimple insert_point)
2925 imm_use_iterator iter;
2926 tree lhs;
2927 gimple use_stmt;
2928 gimple_stmt_iterator gsistmt = gsi_for_stmt (stmt), gsi_insert;
2929 basic_block insert_bb = gimple_bb (insert_point);
2930 bool insert_bb_different = (insert_bb != gimple_bb (stmt));
2931 lhs = gimple_assign_lhs (stmt);
2932 /* If there are any debug uses of LHS, reset them. */
2933 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2935 if (is_gimple_debug (use_stmt)
2936 && not_dominated_by (use_stmt, insert_point))
2938 gimple_debug_bind_reset_value (use_stmt);
2939 update_stmt (use_stmt);
2942 /* If INSERT_STMT is a phi node, then do not insert just after that statement.
2943 Instead, find the first non-label gimple statement in BB and insert before
2944 that. */
2945 if (gimple_code (insert_point) == GIMPLE_PHI)
2947 gsi_insert = gsi_after_labels (insert_bb);
2948 gsi_move_before (&gsistmt, &gsi_insert);
2950 /* Statements marked for throw can not be in the middle of a basic block. So
2951 we can not insert a statement (not marked for throw) immediately after. */
2952 else if (stmt_ends_bb_p (insert_point))
2954 edge succ_edge = find_fallthru_edge (insert_bb->succs);
2955 insert_bb = succ_edge->dest;
2956 insert_bb_different = (insert_bb != gimple_bb (stmt));
2957 /* Insert STMT at the beginning of the successor basic block. */
2958 gsi_insert = gsi_after_labels (insert_bb);
2959 gsi_move_before (&gsistmt, &gsi_insert);
2961 else
2963 gsi_insert = gsi_for_stmt (insert_point);
2964 gsi_move_after (&gsistmt, &gsi_insert);
2966 /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
2967 of UIDs to determine dominance within a basic block works. */
2968 gimple_set_uid (stmt, gimple_uid (insert_point));
2969 if (dump_file && (dump_flags & TDF_DETAILS))
2971 fprintf (dump_file, "Moved stmt ");
2972 print_gimple_stmt (dump_file, stmt, 0, 0);
2973 fprintf (dump_file, " %s to satisfy dependences\n",
2974 insert_bb_different ? "to a different BB" : "within same BB");
2979 /* If OP is a SSA variable and is not the default definition, return the
2980 gimple statement that defines OP. Else return NULL. */
2982 static inline gimple
2983 get_def_stmt (tree op)
2985 if (TREE_CODE (op) == SSA_NAME
2986 && !SSA_NAME_IS_DEFAULT_DEF (op))
2987 return SSA_NAME_DEF_STMT (op);
2988 else
2989 return NULL;
2992 /* Ensure that operands in the OPS vector are available for STMT and all
2993 gimple statements on which STMT depends. */
2995 static void
2996 ensure_ops_are_available (gimple stmt, vec<operand_entry_t> ops, int opindex)
2998 unsigned int len = ops.length ();
2999 gimple insert_stmt = stmt;
3000 gimple dep_stmts[2];
3001 dep_stmts[0] = get_def_stmt (ops[opindex]->op);
3002 if (len - opindex == 2)
3004 dep_stmts[1] = get_def_stmt (ops[opindex + 1]->op);
3006 else
3008 gimple stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3009 ensure_ops_are_available (stmt1, ops, opindex + 1);
3010 dep_stmts[1] = stmt1;
3012 for (int i = 0; i < 2; i++)
3013 insert_stmt = find_insert_point (insert_stmt, dep_stmts[i]);
3015 if (insert_stmt != stmt)
3016 insert_stmt_after (stmt, insert_stmt);
3019 /* Recursively rewrite our linearized statements so that the operators
3020 match those in OPS[OPINDEX], putting the computation in rank
3021 order. */
3023 static void
3024 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3025 vec<operand_entry_t> ops, bool moved)
3027 tree rhs1 = gimple_assign_rhs1 (stmt);
3028 tree rhs2 = gimple_assign_rhs2 (stmt);
3029 operand_entry_t oe;
3031 /* The final recursion case for this function is that you have
3032 exactly two operations left.
3033 If we had one exactly one op in the entire list to start with, we
3034 would have never called this function, and the tail recursion
3035 rewrites them one at a time. */
3036 if (opindex + 2 == ops.length ())
3038 operand_entry_t oe1, oe2;
3040 oe1 = ops[opindex];
3041 oe2 = ops[opindex + 1];
3043 if (rhs1 != oe1->op || rhs2 != oe2->op)
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3047 fprintf (dump_file, "Transforming ");
3048 print_gimple_stmt (dump_file, stmt, 0, 0);
3051 gimple_assign_set_rhs1 (stmt, oe1->op);
3052 gimple_assign_set_rhs2 (stmt, oe2->op);
3053 update_stmt (stmt);
3054 if (rhs1 != oe1->op && rhs1 != oe2->op)
3055 remove_visited_stmt_chain (rhs1);
3057 if (dump_file && (dump_flags & TDF_DETAILS))
3059 fprintf (dump_file, " into ");
3060 print_gimple_stmt (dump_file, stmt, 0, 0);
3063 return;
3066 /* If we hit here, we should have 3 or more ops left. */
3067 gcc_assert (opindex + 2 < ops.length ());
3069 /* Rewrite the next operator. */
3070 oe = ops[opindex];
3072 if (oe->op != rhs2)
3074 if (!moved)
3076 ensure_ops_are_available (stmt, ops, opindex);
3077 moved = true;
3080 if (dump_file && (dump_flags & TDF_DETAILS))
3082 fprintf (dump_file, "Transforming ");
3083 print_gimple_stmt (dump_file, stmt, 0, 0);
3086 gimple_assign_set_rhs2 (stmt, oe->op);
3087 update_stmt (stmt);
3089 if (dump_file && (dump_flags & TDF_DETAILS))
3091 fprintf (dump_file, " into ");
3092 print_gimple_stmt (dump_file, stmt, 0, 0);
3095 /* Recurse on the LHS of the binary operator, which is guaranteed to
3096 be the non-leaf side. */
3097 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
3100 /* Find out how many cycles we need to compute statements chain.
3101 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3102 maximum number of independent statements we may execute per cycle. */
3104 static int
3105 get_required_cycles (int ops_num, int cpu_width)
3107 int res;
3108 int elog;
3109 unsigned int rest;
3111 /* While we have more than 2 * cpu_width operands
3112 we may reduce number of operands by cpu_width
3113 per cycle. */
3114 res = ops_num / (2 * cpu_width);
3116 /* Remained operands count may be reduced twice per cycle
3117 until we have only one operand. */
3118 rest = (unsigned)(ops_num - res * cpu_width);
3119 elog = exact_log2 (rest);
3120 if (elog >= 0)
3121 res += elog;
3122 else
3123 res += floor_log2 (rest) + 1;
3125 return res;
3128 /* Returns an optimal number of registers to use for computation of
3129 given statements. */
3131 static int
3132 get_reassociation_width (int ops_num, enum tree_code opc,
3133 enum machine_mode mode)
3135 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3136 int width;
3137 int width_min;
3138 int cycles_best;
3140 if (param_width > 0)
3141 width = param_width;
3142 else
3143 width = targetm.sched.reassociation_width (opc, mode);
3145 if (width == 1)
3146 return width;
3148 /* Get the minimal time required for sequence computation. */
3149 cycles_best = get_required_cycles (ops_num, width);
3151 /* Check if we may use less width and still compute sequence for
3152 the same time. It will allow us to reduce registers usage.
3153 get_required_cycles is monotonically increasing with lower width
3154 so we can perform a binary search for the minimal width that still
3155 results in the optimal cycle count. */
3156 width_min = 1;
3157 while (width > width_min)
3159 int width_mid = (width + width_min) / 2;
3161 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3162 width = width_mid;
3163 else if (width_min < width_mid)
3164 width_min = width_mid;
3165 else
3166 break;
3169 return width;
3172 /* Recursively rewrite our linearized statements so that the operators
3173 match those in OPS[OPINDEX], putting the computation in rank
3174 order and trying to allow operations to be executed in
3175 parallel. */
3177 static void
3178 rewrite_expr_tree_parallel (gimple stmt, int width,
3179 vec<operand_entry_t> ops)
3181 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3182 int op_num = ops.length ();
3183 int stmt_num = op_num - 1;
3184 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3185 int op_index = op_num - 1;
3186 int stmt_index = 0;
3187 int ready_stmts_end = 0;
3188 int i = 0;
3189 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3191 /* We start expression rewriting from the top statements.
3192 So, in this loop we create a full list of statements
3193 we will work with. */
3194 stmts[stmt_num - 1] = stmt;
3195 for (i = stmt_num - 2; i >= 0; i--)
3196 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3198 for (i = 0; i < stmt_num; i++)
3200 tree op1, op2;
3202 /* Determine whether we should use results of
3203 already handled statements or not. */
3204 if (ready_stmts_end == 0
3205 && (i - stmt_index >= width || op_index < 1))
3206 ready_stmts_end = i;
3208 /* Now we choose operands for the next statement. Non zero
3209 value in ready_stmts_end means here that we should use
3210 the result of already generated statements as new operand. */
3211 if (ready_stmts_end > 0)
3213 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3214 if (ready_stmts_end > stmt_index)
3215 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3216 else if (op_index >= 0)
3217 op2 = ops[op_index--]->op;
3218 else
3220 gcc_assert (stmt_index < i);
3221 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3224 if (stmt_index >= ready_stmts_end)
3225 ready_stmts_end = 0;
3227 else
3229 if (op_index > 1)
3230 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3231 op2 = ops[op_index--]->op;
3232 op1 = ops[op_index--]->op;
3235 /* If we emit the last statement then we should put
3236 operands into the last statement. It will also
3237 break the loop. */
3238 if (op_index < 0 && stmt_index == i)
3239 i = stmt_num - 1;
3241 if (dump_file && (dump_flags & TDF_DETAILS))
3243 fprintf (dump_file, "Transforming ");
3244 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3247 /* We keep original statement only for the last one. All
3248 others are recreated. */
3249 if (i == stmt_num - 1)
3251 gimple_assign_set_rhs1 (stmts[i], op1);
3252 gimple_assign_set_rhs2 (stmts[i], op2);
3253 update_stmt (stmts[i]);
3255 else
3256 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3258 if (dump_file && (dump_flags & TDF_DETAILS))
3260 fprintf (dump_file, " into ");
3261 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3265 remove_visited_stmt_chain (last_rhs1);
3268 /* Transform STMT, which is really (A +B) + (C + D) into the left
3269 linear form, ((A+B)+C)+D.
3270 Recurse on D if necessary. */
3272 static void
3273 linearize_expr (gimple stmt)
3275 gimple_stmt_iterator gsinow, gsirhs;
3276 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3277 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3278 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3279 gimple newbinrhs = NULL;
3280 struct loop *loop = loop_containing_stmt (stmt);
3282 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3283 && is_reassociable_op (binrhs, rhscode, loop));
3285 gsinow = gsi_for_stmt (stmt);
3286 gsirhs = gsi_for_stmt (binrhs);
3287 gsi_move_before (&gsirhs, &gsinow);
3288 gimple_set_uid (binrhs, gimple_uid (stmt));
3290 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3291 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3292 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3294 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3295 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3297 if (dump_file && (dump_flags & TDF_DETAILS))
3299 fprintf (dump_file, "Linearized: ");
3300 print_gimple_stmt (dump_file, stmt, 0, 0);
3303 reassociate_stats.linearized++;
3304 update_stmt (binrhs);
3305 update_stmt (binlhs);
3306 update_stmt (stmt);
3308 gimple_set_visited (stmt, true);
3309 gimple_set_visited (binlhs, true);
3310 gimple_set_visited (binrhs, true);
3312 /* Tail recurse on the new rhs if it still needs reassociation. */
3313 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3314 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3315 want to change the algorithm while converting to tuples. */
3316 linearize_expr (stmt);
3319 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3320 it. Otherwise, return NULL. */
3322 static gimple
3323 get_single_immediate_use (tree lhs)
3325 use_operand_p immuse;
3326 gimple immusestmt;
3328 if (TREE_CODE (lhs) == SSA_NAME
3329 && single_imm_use (lhs, &immuse, &immusestmt)
3330 && is_gimple_assign (immusestmt))
3331 return immusestmt;
3333 return NULL;
3336 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3337 representing the negated value. Insertions of any necessary
3338 instructions go before GSI.
3339 This function is recursive in that, if you hand it "a_5" as the
3340 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3341 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3343 static tree
3344 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3346 gimple negatedefstmt= NULL;
3347 tree resultofnegate;
3349 /* If we are trying to negate a name, defined by an add, negate the
3350 add operands instead. */
3351 if (TREE_CODE (tonegate) == SSA_NAME)
3352 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3353 if (TREE_CODE (tonegate) == SSA_NAME
3354 && is_gimple_assign (negatedefstmt)
3355 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3356 && has_single_use (gimple_assign_lhs (negatedefstmt))
3357 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3359 gimple_stmt_iterator gsi;
3360 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3361 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3363 gsi = gsi_for_stmt (negatedefstmt);
3364 rhs1 = negate_value (rhs1, &gsi);
3365 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3367 gsi = gsi_for_stmt (negatedefstmt);
3368 rhs2 = negate_value (rhs2, &gsi);
3369 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3371 update_stmt (negatedefstmt);
3372 return gimple_assign_lhs (negatedefstmt);
3375 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3376 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3377 NULL_TREE, true, GSI_SAME_STMT);
3378 return resultofnegate;
3381 /* Return true if we should break up the subtract in STMT into an add
3382 with negate. This is true when we the subtract operands are really
3383 adds, or the subtract itself is used in an add expression. In
3384 either case, breaking up the subtract into an add with negate
3385 exposes the adds to reassociation. */
3387 static bool
3388 should_break_up_subtract (gimple stmt)
3390 tree lhs = gimple_assign_lhs (stmt);
3391 tree binlhs = gimple_assign_rhs1 (stmt);
3392 tree binrhs = gimple_assign_rhs2 (stmt);
3393 gimple immusestmt;
3394 struct loop *loop = loop_containing_stmt (stmt);
3396 if (TREE_CODE (binlhs) == SSA_NAME
3397 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3398 return true;
3400 if (TREE_CODE (binrhs) == SSA_NAME
3401 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3402 return true;
3404 if (TREE_CODE (lhs) == SSA_NAME
3405 && (immusestmt = get_single_immediate_use (lhs))
3406 && is_gimple_assign (immusestmt)
3407 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3408 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3409 return true;
3410 return false;
3413 /* Transform STMT from A - B into A + -B. */
3415 static void
3416 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3418 tree rhs1 = gimple_assign_rhs1 (stmt);
3419 tree rhs2 = gimple_assign_rhs2 (stmt);
3421 if (dump_file && (dump_flags & TDF_DETAILS))
3423 fprintf (dump_file, "Breaking up subtract ");
3424 print_gimple_stmt (dump_file, stmt, 0, 0);
3427 rhs2 = negate_value (rhs2, gsip);
3428 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3429 update_stmt (stmt);
3432 /* Determine whether STMT is a builtin call that raises an SSA name
3433 to an integer power and has only one use. If so, and this is early
3434 reassociation and unsafe math optimizations are permitted, place
3435 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3436 If any of these conditions does not hold, return FALSE. */
3438 static bool
3439 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3441 tree fndecl, arg1;
3442 REAL_VALUE_TYPE c, cint;
3444 if (!first_pass_instance
3445 || !flag_unsafe_math_optimizations
3446 || !is_gimple_call (stmt)
3447 || !has_single_use (gimple_call_lhs (stmt)))
3448 return false;
3450 fndecl = gimple_call_fndecl (stmt);
3452 if (!fndecl
3453 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3454 return false;
3456 switch (DECL_FUNCTION_CODE (fndecl))
3458 CASE_FLT_FN (BUILT_IN_POW):
3459 *base = gimple_call_arg (stmt, 0);
3460 arg1 = gimple_call_arg (stmt, 1);
3462 if (TREE_CODE (arg1) != REAL_CST)
3463 return false;
3465 c = TREE_REAL_CST (arg1);
3467 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3468 return false;
3470 *exponent = real_to_integer (&c);
3471 real_from_integer (&cint, VOIDmode, *exponent,
3472 *exponent < 0 ? -1 : 0, 0);
3473 if (!real_identical (&c, &cint))
3474 return false;
3476 break;
3478 CASE_FLT_FN (BUILT_IN_POWI):
3479 *base = gimple_call_arg (stmt, 0);
3480 arg1 = gimple_call_arg (stmt, 1);
3482 if (!host_integerp (arg1, 0))
3483 return false;
3485 *exponent = TREE_INT_CST_LOW (arg1);
3486 break;
3488 default:
3489 return false;
3492 /* Expanding negative exponents is generally unproductive, so we don't
3493 complicate matters with those. Exponents of zero and one should
3494 have been handled by expression folding. */
3495 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3496 return false;
3498 return true;
3501 /* Recursively linearize a binary expression that is the RHS of STMT.
3502 Place the operands of the expression tree in the vector named OPS. */
3504 static void
3505 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3506 bool is_associative, bool set_visited)
3508 tree binlhs = gimple_assign_rhs1 (stmt);
3509 tree binrhs = gimple_assign_rhs2 (stmt);
3510 gimple binlhsdef = NULL, binrhsdef = NULL;
3511 bool binlhsisreassoc = false;
3512 bool binrhsisreassoc = false;
3513 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3514 struct loop *loop = loop_containing_stmt (stmt);
3515 tree base = NULL_TREE;
3516 HOST_WIDE_INT exponent = 0;
3518 if (set_visited)
3519 gimple_set_visited (stmt, true);
3521 if (TREE_CODE (binlhs) == SSA_NAME)
3523 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3524 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3525 && !stmt_could_throw_p (binlhsdef));
3528 if (TREE_CODE (binrhs) == SSA_NAME)
3530 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3531 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3532 && !stmt_could_throw_p (binrhsdef));
3535 /* If the LHS is not reassociable, but the RHS is, we need to swap
3536 them. If neither is reassociable, there is nothing we can do, so
3537 just put them in the ops vector. If the LHS is reassociable,
3538 linearize it. If both are reassociable, then linearize the RHS
3539 and the LHS. */
3541 if (!binlhsisreassoc)
3543 tree temp;
3545 /* If this is not a associative operation like division, give up. */
3546 if (!is_associative)
3548 add_to_ops_vec (ops, binrhs);
3549 return;
3552 if (!binrhsisreassoc)
3554 if (rhscode == MULT_EXPR
3555 && TREE_CODE (binrhs) == SSA_NAME
3556 && acceptable_pow_call (binrhsdef, &base, &exponent))
3558 add_repeat_to_ops_vec (ops, base, exponent);
3559 gimple_set_visited (binrhsdef, true);
3561 else
3562 add_to_ops_vec (ops, binrhs);
3564 if (rhscode == MULT_EXPR
3565 && TREE_CODE (binlhs) == SSA_NAME
3566 && acceptable_pow_call (binlhsdef, &base, &exponent))
3568 add_repeat_to_ops_vec (ops, base, exponent);
3569 gimple_set_visited (binlhsdef, true);
3571 else
3572 add_to_ops_vec (ops, binlhs);
3574 return;
3577 if (dump_file && (dump_flags & TDF_DETAILS))
3579 fprintf (dump_file, "swapping operands of ");
3580 print_gimple_stmt (dump_file, stmt, 0, 0);
3583 swap_ssa_operands (stmt,
3584 gimple_assign_rhs1_ptr (stmt),
3585 gimple_assign_rhs2_ptr (stmt));
3586 update_stmt (stmt);
3588 if (dump_file && (dump_flags & TDF_DETAILS))
3590 fprintf (dump_file, " is now ");
3591 print_gimple_stmt (dump_file, stmt, 0, 0);
3594 /* We want to make it so the lhs is always the reassociative op,
3595 so swap. */
3596 temp = binlhs;
3597 binlhs = binrhs;
3598 binrhs = temp;
3600 else if (binrhsisreassoc)
3602 linearize_expr (stmt);
3603 binlhs = gimple_assign_rhs1 (stmt);
3604 binrhs = gimple_assign_rhs2 (stmt);
3607 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3608 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3609 rhscode, loop));
3610 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3611 is_associative, set_visited);
3613 if (rhscode == MULT_EXPR
3614 && TREE_CODE (binrhs) == SSA_NAME
3615 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3617 add_repeat_to_ops_vec (ops, base, exponent);
3618 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3620 else
3621 add_to_ops_vec (ops, binrhs);
3624 /* Repropagate the negates back into subtracts, since no other pass
3625 currently does it. */
3627 static void
3628 repropagate_negates (void)
3630 unsigned int i = 0;
3631 tree negate;
3633 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3635 gimple user = get_single_immediate_use (negate);
3637 if (!user || !is_gimple_assign (user))
3638 continue;
3640 /* The negate operand can be either operand of a PLUS_EXPR
3641 (it can be the LHS if the RHS is a constant for example).
3643 Force the negate operand to the RHS of the PLUS_EXPR, then
3644 transform the PLUS_EXPR into a MINUS_EXPR. */
3645 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3647 /* If the negated operand appears on the LHS of the
3648 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3649 to force the negated operand to the RHS of the PLUS_EXPR. */
3650 if (gimple_assign_rhs1 (user) == negate)
3652 swap_ssa_operands (user,
3653 gimple_assign_rhs1_ptr (user),
3654 gimple_assign_rhs2_ptr (user));
3657 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3658 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3659 if (gimple_assign_rhs2 (user) == negate)
3661 tree rhs1 = gimple_assign_rhs1 (user);
3662 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3663 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3664 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3665 update_stmt (user);
3668 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3670 if (gimple_assign_rhs1 (user) == negate)
3672 /* We have
3673 x = -a
3674 y = x - b
3675 which we transform into
3676 x = a + b
3677 y = -x .
3678 This pushes down the negate which we possibly can merge
3679 into some other operation, hence insert it into the
3680 plus_negates vector. */
3681 gimple feed = SSA_NAME_DEF_STMT (negate);
3682 tree a = gimple_assign_rhs1 (feed);
3683 tree rhs2 = gimple_assign_rhs2 (user);
3684 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3685 gimple_replace_ssa_lhs (feed, negate);
3686 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3687 update_stmt (gsi_stmt (gsi));
3688 gsi2 = gsi_for_stmt (user);
3689 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3690 update_stmt (gsi_stmt (gsi2));
3691 gsi_move_before (&gsi, &gsi2);
3692 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3694 else
3696 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3697 rid of one operation. */
3698 gimple feed = SSA_NAME_DEF_STMT (negate);
3699 tree a = gimple_assign_rhs1 (feed);
3700 tree rhs1 = gimple_assign_rhs1 (user);
3701 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3702 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3703 update_stmt (gsi_stmt (gsi));
3709 /* Returns true if OP is of a type for which we can do reassociation.
3710 That is for integral or non-saturating fixed-point types, and for
3711 floating point type when associative-math is enabled. */
3713 static bool
3714 can_reassociate_p (tree op)
3716 tree type = TREE_TYPE (op);
3717 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3718 || NON_SAT_FIXED_POINT_TYPE_P (type)
3719 || (flag_associative_math && FLOAT_TYPE_P (type)))
3720 return true;
3721 return false;
3724 /* Break up subtract operations in block BB.
3726 We do this top down because we don't know whether the subtract is
3727 part of a possible chain of reassociation except at the top.
3729 IE given
3730 d = f + g
3731 c = a + e
3732 b = c - d
3733 q = b - r
3734 k = t - q
3736 we want to break up k = t - q, but we won't until we've transformed q
3737 = b - r, which won't be broken up until we transform b = c - d.
3739 En passant, clear the GIMPLE visited flag on every statement. */
3741 static void
3742 break_up_subtract_bb (basic_block bb)
3744 gimple_stmt_iterator gsi;
3745 basic_block son;
3747 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3749 gimple stmt = gsi_stmt (gsi);
3750 gimple_set_visited (stmt, false);
3752 if (!is_gimple_assign (stmt)
3753 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3754 continue;
3756 /* Look for simple gimple subtract operations. */
3757 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3759 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3760 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3761 continue;
3763 /* Check for a subtract used only in an addition. If this
3764 is the case, transform it into add of a negate for better
3765 reassociation. IE transform C = A-B into C = A + -B if C
3766 is only used in an addition. */
3767 if (should_break_up_subtract (stmt))
3768 break_up_subtract (stmt, &gsi);
3770 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3771 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3772 plus_negates.safe_push (gimple_assign_lhs (stmt));
3774 for (son = first_dom_son (CDI_DOMINATORS, bb);
3775 son;
3776 son = next_dom_son (CDI_DOMINATORS, son))
3777 break_up_subtract_bb (son);
3780 /* Used for repeated factor analysis. */
3781 struct repeat_factor_d
3783 /* An SSA name that occurs in a multiply chain. */
3784 tree factor;
3786 /* Cached rank of the factor. */
3787 unsigned rank;
3789 /* Number of occurrences of the factor in the chain. */
3790 HOST_WIDE_INT count;
3792 /* An SSA name representing the product of this factor and
3793 all factors appearing later in the repeated factor vector. */
3794 tree repr;
3797 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3798 typedef const struct repeat_factor_d *const_repeat_factor_t;
3801 static vec<repeat_factor> repeat_factor_vec;
3803 /* Used for sorting the repeat factor vector. Sort primarily by
3804 ascending occurrence count, secondarily by descending rank. */
3806 static int
3807 compare_repeat_factors (const void *x1, const void *x2)
3809 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3810 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3812 if (rf1->count != rf2->count)
3813 return rf1->count - rf2->count;
3815 return rf2->rank - rf1->rank;
3818 /* Look for repeated operands in OPS in the multiply tree rooted at
3819 STMT. Replace them with an optimal sequence of multiplies and powi
3820 builtin calls, and remove the used operands from OPS. Return an
3821 SSA name representing the value of the replacement sequence. */
3823 static tree
3824 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3826 unsigned i, j, vec_len;
3827 int ii;
3828 operand_entry_t oe;
3829 repeat_factor_t rf1, rf2;
3830 repeat_factor rfnew;
3831 tree result = NULL_TREE;
3832 tree target_ssa, iter_result;
3833 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3834 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3835 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3836 gimple mul_stmt, pow_stmt;
3838 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3839 target. */
3840 if (!powi_fndecl)
3841 return NULL_TREE;
3843 /* Allocate the repeated factor vector. */
3844 repeat_factor_vec.create (10);
3846 /* Scan the OPS vector for all SSA names in the product and build
3847 up a vector of occurrence counts for each factor. */
3848 FOR_EACH_VEC_ELT (*ops, i, oe)
3850 if (TREE_CODE (oe->op) == SSA_NAME)
3852 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3854 if (rf1->factor == oe->op)
3856 rf1->count += oe->count;
3857 break;
3861 if (j >= repeat_factor_vec.length ())
3863 rfnew.factor = oe->op;
3864 rfnew.rank = oe->rank;
3865 rfnew.count = oe->count;
3866 rfnew.repr = NULL_TREE;
3867 repeat_factor_vec.safe_push (rfnew);
3872 /* Sort the repeated factor vector by (a) increasing occurrence count,
3873 and (b) decreasing rank. */
3874 repeat_factor_vec.qsort (compare_repeat_factors);
3876 /* It is generally best to combine as many base factors as possible
3877 into a product before applying __builtin_powi to the result.
3878 However, the sort order chosen for the repeated factor vector
3879 allows us to cache partial results for the product of the base
3880 factors for subsequent use. When we already have a cached partial
3881 result from a previous iteration, it is best to make use of it
3882 before looking for another __builtin_pow opportunity.
3884 As an example, consider x * x * y * y * y * z * z * z * z.
3885 We want to first compose the product x * y * z, raise it to the
3886 second power, then multiply this by y * z, and finally multiply
3887 by z. This can be done in 5 multiplies provided we cache y * z
3888 for use in both expressions:
3890 t1 = y * z
3891 t2 = t1 * x
3892 t3 = t2 * t2
3893 t4 = t1 * t3
3894 result = t4 * z
3896 If we instead ignored the cached y * z and first multiplied by
3897 the __builtin_pow opportunity z * z, we would get the inferior:
3899 t1 = y * z
3900 t2 = t1 * x
3901 t3 = t2 * t2
3902 t4 = z * z
3903 t5 = t3 * t4
3904 result = t5 * y */
3906 vec_len = repeat_factor_vec.length ();
3908 /* Repeatedly look for opportunities to create a builtin_powi call. */
3909 while (true)
3911 HOST_WIDE_INT power;
3913 /* First look for the largest cached product of factors from
3914 preceding iterations. If found, create a builtin_powi for
3915 it if the minimum occurrence count for its factors is at
3916 least 2, or just use this cached product as our next
3917 multiplicand if the minimum occurrence count is 1. */
3918 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3920 if (rf1->repr && rf1->count > 0)
3921 break;
3924 if (j < vec_len)
3926 power = rf1->count;
3928 if (power == 1)
3930 iter_result = rf1->repr;
3932 if (dump_file && (dump_flags & TDF_DETAILS))
3934 unsigned elt;
3935 repeat_factor_t rf;
3936 fputs ("Multiplying by cached product ", dump_file);
3937 for (elt = j; elt < vec_len; elt++)
3939 rf = &repeat_factor_vec[elt];
3940 print_generic_expr (dump_file, rf->factor, 0);
3941 if (elt < vec_len - 1)
3942 fputs (" * ", dump_file);
3944 fputs ("\n", dump_file);
3947 else
3949 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3950 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3951 build_int_cst (integer_type_node,
3952 power));
3953 gimple_call_set_lhs (pow_stmt, iter_result);
3954 gimple_set_location (pow_stmt, gimple_location (stmt));
3955 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3957 if (dump_file && (dump_flags & TDF_DETAILS))
3959 unsigned elt;
3960 repeat_factor_t rf;
3961 fputs ("Building __builtin_pow call for cached product (",
3962 dump_file);
3963 for (elt = j; elt < vec_len; elt++)
3965 rf = &repeat_factor_vec[elt];
3966 print_generic_expr (dump_file, rf->factor, 0);
3967 if (elt < vec_len - 1)
3968 fputs (" * ", dump_file);
3970 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3971 power);
3975 else
3977 /* Otherwise, find the first factor in the repeated factor
3978 vector whose occurrence count is at least 2. If no such
3979 factor exists, there are no builtin_powi opportunities
3980 remaining. */
3981 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3983 if (rf1->count >= 2)
3984 break;
3987 if (j >= vec_len)
3988 break;
3990 power = rf1->count;
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3994 unsigned elt;
3995 repeat_factor_t rf;
3996 fputs ("Building __builtin_pow call for (", dump_file);
3997 for (elt = j; elt < vec_len; elt++)
3999 rf = &repeat_factor_vec[elt];
4000 print_generic_expr (dump_file, rf->factor, 0);
4001 if (elt < vec_len - 1)
4002 fputs (" * ", dump_file);
4004 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4007 reassociate_stats.pows_created++;
4009 /* Visit each element of the vector in reverse order (so that
4010 high-occurrence elements are visited first, and within the
4011 same occurrence count, lower-ranked elements are visited
4012 first). Form a linear product of all elements in this order
4013 whose occurrencce count is at least that of element J.
4014 Record the SSA name representing the product of each element
4015 with all subsequent elements in the vector. */
4016 if (j == vec_len - 1)
4017 rf1->repr = rf1->factor;
4018 else
4020 for (ii = vec_len - 2; ii >= (int)j; ii--)
4022 tree op1, op2;
4024 rf1 = &repeat_factor_vec[ii];
4025 rf2 = &repeat_factor_vec[ii + 1];
4027 /* Init the last factor's representative to be itself. */
4028 if (!rf2->repr)
4029 rf2->repr = rf2->factor;
4031 op1 = rf1->factor;
4032 op2 = rf2->repr;
4034 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4035 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4036 target_ssa,
4037 op1, op2);
4038 gimple_set_location (mul_stmt, gimple_location (stmt));
4039 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4040 rf1->repr = target_ssa;
4042 /* Don't reprocess the multiply we just introduced. */
4043 gimple_set_visited (mul_stmt, true);
4047 /* Form a call to __builtin_powi for the maximum product
4048 just formed, raised to the power obtained earlier. */
4049 rf1 = &repeat_factor_vec[j];
4050 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4051 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4052 build_int_cst (integer_type_node,
4053 power));
4054 gimple_call_set_lhs (pow_stmt, iter_result);
4055 gimple_set_location (pow_stmt, gimple_location (stmt));
4056 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4059 /* If we previously formed at least one other builtin_powi call,
4060 form the product of this one and those others. */
4061 if (result)
4063 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4064 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4065 result, iter_result);
4066 gimple_set_location (mul_stmt, gimple_location (stmt));
4067 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4068 gimple_set_visited (mul_stmt, true);
4069 result = new_result;
4071 else
4072 result = iter_result;
4074 /* Decrement the occurrence count of each element in the product
4075 by the count found above, and remove this many copies of each
4076 factor from OPS. */
4077 for (i = j; i < vec_len; i++)
4079 unsigned k = power;
4080 unsigned n;
4082 rf1 = &repeat_factor_vec[i];
4083 rf1->count -= power;
4085 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4087 if (oe->op == rf1->factor)
4089 if (oe->count <= k)
4091 ops->ordered_remove (n);
4092 k -= oe->count;
4094 if (k == 0)
4095 break;
4097 else
4099 oe->count -= k;
4100 break;
4107 /* At this point all elements in the repeated factor vector have a
4108 remaining occurrence count of 0 or 1, and those with a count of 1
4109 don't have cached representatives. Re-sort the ops vector and
4110 clean up. */
4111 ops->qsort (sort_by_operand_rank);
4112 repeat_factor_vec.release ();
4114 /* Return the final product computed herein. Note that there may
4115 still be some elements with single occurrence count left in OPS;
4116 those will be handled by the normal reassociation logic. */
4117 return result;
4120 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4122 static void
4123 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4125 tree rhs1;
4127 if (dump_file && (dump_flags & TDF_DETAILS))
4129 fprintf (dump_file, "Transforming ");
4130 print_gimple_stmt (dump_file, stmt, 0, 0);
4133 rhs1 = gimple_assign_rhs1 (stmt);
4134 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4135 update_stmt (stmt);
4136 remove_visited_stmt_chain (rhs1);
4138 if (dump_file && (dump_flags & TDF_DETAILS))
4140 fprintf (dump_file, " into ");
4141 print_gimple_stmt (dump_file, stmt, 0, 0);
4145 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4147 static void
4148 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4149 tree rhs1, tree rhs2)
4151 if (dump_file && (dump_flags & TDF_DETAILS))
4153 fprintf (dump_file, "Transforming ");
4154 print_gimple_stmt (dump_file, stmt, 0, 0);
4157 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4158 update_stmt (gsi_stmt (*gsi));
4159 remove_visited_stmt_chain (rhs1);
4161 if (dump_file && (dump_flags & TDF_DETAILS))
4163 fprintf (dump_file, " into ");
4164 print_gimple_stmt (dump_file, stmt, 0, 0);
4168 /* Reassociate expressions in basic block BB and its post-dominator as
4169 children. */
4171 static void
4172 reassociate_bb (basic_block bb)
4174 gimple_stmt_iterator gsi;
4175 basic_block son;
4176 gimple stmt = last_stmt (bb);
4178 if (stmt && !gimple_visited_p (stmt))
4179 maybe_optimize_range_tests (stmt);
4181 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4183 stmt = gsi_stmt (gsi);
4185 if (is_gimple_assign (stmt)
4186 && !stmt_could_throw_p (stmt))
4188 tree lhs, rhs1, rhs2;
4189 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4191 /* If this is not a gimple binary expression, there is
4192 nothing for us to do with it. */
4193 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4194 continue;
4196 /* If this was part of an already processed statement,
4197 we don't need to touch it again. */
4198 if (gimple_visited_p (stmt))
4200 /* This statement might have become dead because of previous
4201 reassociations. */
4202 if (has_zero_uses (gimple_get_lhs (stmt)))
4204 gsi_remove (&gsi, true);
4205 release_defs (stmt);
4206 /* We might end up removing the last stmt above which
4207 places the iterator to the end of the sequence.
4208 Reset it to the last stmt in this case which might
4209 be the end of the sequence as well if we removed
4210 the last statement of the sequence. In which case
4211 we need to bail out. */
4212 if (gsi_end_p (gsi))
4214 gsi = gsi_last_bb (bb);
4215 if (gsi_end_p (gsi))
4216 break;
4219 continue;
4222 lhs = gimple_assign_lhs (stmt);
4223 rhs1 = gimple_assign_rhs1 (stmt);
4224 rhs2 = gimple_assign_rhs2 (stmt);
4226 /* For non-bit or min/max operations we can't associate
4227 all types. Verify that here. */
4228 if (rhs_code != BIT_IOR_EXPR
4229 && rhs_code != BIT_AND_EXPR
4230 && rhs_code != BIT_XOR_EXPR
4231 && rhs_code != MIN_EXPR
4232 && rhs_code != MAX_EXPR
4233 && (!can_reassociate_p (lhs)
4234 || !can_reassociate_p (rhs1)
4235 || !can_reassociate_p (rhs2)))
4236 continue;
4238 if (associative_tree_code (rhs_code))
4240 vec<operand_entry_t> ops = vNULL;
4241 tree powi_result = NULL_TREE;
4243 /* There may be no immediate uses left by the time we
4244 get here because we may have eliminated them all. */
4245 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4246 continue;
4248 gimple_set_visited (stmt, true);
4249 linearize_expr_tree (&ops, stmt, true, true);
4250 ops.qsort (sort_by_operand_rank);
4251 optimize_ops_list (rhs_code, &ops);
4252 if (undistribute_ops_list (rhs_code, &ops,
4253 loop_containing_stmt (stmt)))
4255 ops.qsort (sort_by_operand_rank);
4256 optimize_ops_list (rhs_code, &ops);
4259 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4260 optimize_range_tests (rhs_code, &ops);
4262 if (first_pass_instance
4263 && rhs_code == MULT_EXPR
4264 && flag_unsafe_math_optimizations)
4265 powi_result = attempt_builtin_powi (stmt, &ops);
4267 /* If the operand vector is now empty, all operands were
4268 consumed by the __builtin_powi optimization. */
4269 if (ops.length () == 0)
4270 transform_stmt_to_copy (&gsi, stmt, powi_result);
4271 else if (ops.length () == 1)
4273 tree last_op = ops.last ()->op;
4275 if (powi_result)
4276 transform_stmt_to_multiply (&gsi, stmt, last_op,
4277 powi_result);
4278 else
4279 transform_stmt_to_copy (&gsi, stmt, last_op);
4281 else
4283 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4284 int ops_num = ops.length ();
4285 int width = get_reassociation_width (ops_num, rhs_code, mode);
4287 if (dump_file && (dump_flags & TDF_DETAILS))
4288 fprintf (dump_file,
4289 "Width = %d was chosen for reassociation\n", width);
4291 if (width > 1
4292 && ops.length () > 3)
4293 rewrite_expr_tree_parallel (stmt, width, ops);
4294 else
4296 /* When there are three operands left, we want
4297 to make sure the ones that get the double
4298 binary op are chosen wisely. */
4299 int len = ops.length ();
4300 if (len >= 3)
4301 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4303 rewrite_expr_tree (stmt, 0, ops, false);
4306 /* If we combined some repeated factors into a
4307 __builtin_powi call, multiply that result by the
4308 reassociated operands. */
4309 if (powi_result)
4311 gimple mul_stmt;
4312 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4313 tree target_ssa = make_temp_ssa_name (type, NULL,
4314 "reassocpow");
4315 gimple_set_lhs (stmt, target_ssa);
4316 update_stmt (stmt);
4317 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4318 powi_result,
4319 target_ssa);
4320 gimple_set_location (mul_stmt, gimple_location (stmt));
4321 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4325 ops.release ();
4329 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4330 son;
4331 son = next_dom_son (CDI_POST_DOMINATORS, son))
4332 reassociate_bb (son);
4335 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4336 void debug_ops_vector (vec<operand_entry_t> ops);
4338 /* Dump the operand entry vector OPS to FILE. */
4340 void
4341 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4343 operand_entry_t oe;
4344 unsigned int i;
4346 FOR_EACH_VEC_ELT (ops, i, oe)
4348 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4349 print_generic_expr (file, oe->op, 0);
4353 /* Dump the operand entry vector OPS to STDERR. */
4355 DEBUG_FUNCTION void
4356 debug_ops_vector (vec<operand_entry_t> ops)
4358 dump_ops_vector (stderr, ops);
4361 static void
4362 do_reassoc (void)
4364 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4365 renumber_gimple_stmt_uids ();
4366 reassociate_bb (EXIT_BLOCK_PTR);
4369 /* Initialize the reassociation pass. */
4371 static void
4372 init_reassoc (void)
4374 int i;
4375 long rank = 2;
4376 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4378 /* Find the loops, so that we can prevent moving calculations in
4379 them. */
4380 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4382 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4384 operand_entry_pool = create_alloc_pool ("operand entry pool",
4385 sizeof (struct operand_entry), 30);
4386 next_operand_entry_id = 0;
4388 /* Reverse RPO (Reverse Post Order) will give us something where
4389 deeper loops come later. */
4390 pre_and_rev_post_order_compute (NULL, bbs, false);
4391 bb_rank = XCNEWVEC (long, last_basic_block);
4392 operand_rank = pointer_map_create ();
4394 /* Give each default definition a distinct rank. This includes
4395 parameters and the static chain. Walk backwards over all
4396 SSA names so that we get proper rank ordering according
4397 to tree_swap_operands_p. */
4398 for (i = num_ssa_names - 1; i > 0; --i)
4400 tree name = ssa_name (i);
4401 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4402 insert_operand_rank (name, ++rank);
4405 /* Set up rank for each BB */
4406 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4407 bb_rank[bbs[i]] = ++rank << 16;
4409 free (bbs);
4410 calculate_dominance_info (CDI_POST_DOMINATORS);
4411 plus_negates = vNULL;
4414 /* Cleanup after the reassociation pass, and print stats if
4415 requested. */
4417 static void
4418 fini_reassoc (void)
4420 statistics_counter_event (cfun, "Linearized",
4421 reassociate_stats.linearized);
4422 statistics_counter_event (cfun, "Constants eliminated",
4423 reassociate_stats.constants_eliminated);
4424 statistics_counter_event (cfun, "Ops eliminated",
4425 reassociate_stats.ops_eliminated);
4426 statistics_counter_event (cfun, "Statements rewritten",
4427 reassociate_stats.rewritten);
4428 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4429 reassociate_stats.pows_encountered);
4430 statistics_counter_event (cfun, "Built-in powi calls created",
4431 reassociate_stats.pows_created);
4433 pointer_map_destroy (operand_rank);
4434 free_alloc_pool (operand_entry_pool);
4435 free (bb_rank);
4436 plus_negates.release ();
4437 free_dominance_info (CDI_POST_DOMINATORS);
4438 loop_optimizer_finalize ();
4441 /* Gate and execute functions for Reassociation. */
4443 static unsigned int
4444 execute_reassoc (void)
4446 init_reassoc ();
4448 do_reassoc ();
4449 repropagate_negates ();
4451 fini_reassoc ();
4452 return 0;
4455 static bool
4456 gate_tree_ssa_reassoc (void)
4458 return flag_tree_reassoc != 0;
4461 namespace {
4463 const pass_data pass_data_reassoc =
4465 GIMPLE_PASS, /* type */
4466 "reassoc", /* name */
4467 OPTGROUP_NONE, /* optinfo_flags */
4468 true, /* has_gate */
4469 true, /* has_execute */
4470 TV_TREE_REASSOC, /* tv_id */
4471 ( PROP_cfg | PROP_ssa ), /* properties_required */
4472 0, /* properties_provided */
4473 0, /* properties_destroyed */
4474 0, /* todo_flags_start */
4475 ( TODO_verify_ssa
4476 | TODO_update_ssa_only_virtuals
4477 | TODO_verify_flow ), /* todo_flags_finish */
4480 class pass_reassoc : public gimple_opt_pass
4482 public:
4483 pass_reassoc (gcc::context *ctxt)
4484 : gimple_opt_pass (pass_data_reassoc, ctxt)
4487 /* opt_pass methods: */
4488 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4489 bool gate () { return gate_tree_ssa_reassoc (); }
4490 unsigned int execute () { return execute_reassoc (); }
4492 }; // class pass_reassoc
4494 } // anon namespace
4496 gimple_opt_pass *
4497 make_pass_reassoc (gcc::context *ctxt)
4499 return new pass_reassoc (ctxt);