1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
95 You want to first merge all leaves of the same rank, as much as
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
149 newstmt = mergetmp + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
170 int constants_eliminated
;
173 int pows_encountered
;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
186 static alloc_pool operand_entry_pool
;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id
;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
195 static long *bb_rank
;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t
*operand_rank
;
201 static long get_rank (tree
);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
217 phi_rank (gimple stmt
)
219 basic_block bb
= gimple_bb (stmt
);
220 struct loop
*father
= bb
->loop_father
;
226 /* We only care about real loops (those with a latch). */
228 return bb_rank
[bb
->index
];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb
!= father
->header
233 return bb_rank
[bb
->index
];
235 /* Ignore virtual SSA_NAMEs. */
236 res
= gimple_phi_result (stmt
);
237 if (!is_gimple_reg (SSA_NAME_VAR (res
)))
238 return bb_rank
[bb
->index
];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res
, &use
, &use_stmt
)
243 || gimple_bb (use_stmt
)->loop_father
!= father
)
244 return bb_rank
[bb
->index
];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
249 tree arg
= gimple_phi_arg_def (stmt
, i
);
250 if (TREE_CODE (arg
) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
253 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
254 if (gimple_bb (def_stmt
)->loop_father
== father
)
255 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
259 /* Must be an uninteresting phi. */
260 return bb_rank
[bb
->index
];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
267 loop_carried_phi (tree exp
)
272 if (TREE_CODE (exp
) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp
))
276 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
286 if (phi_rank (phi_stmt
) != block_rank
)
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
297 propagate_rank (long rank
, tree op
)
301 if (loop_carried_phi (op
))
304 op_rank
= get_rank (op
);
306 return MAX (rank
, op_rank
);
309 /* Look up the operand rank structure for expression E. */
312 find_operand_rank (tree e
)
314 void **slot
= pointer_map_contains (operand_rank
, e
);
315 return slot
? (long) (intptr_t) *slot
: -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
321 insert_operand_rank (tree e
, long rank
)
324 gcc_assert (rank
> 0);
325 slot
= pointer_map_insert (operand_rank
, e
);
327 *slot
= (void *) (intptr_t) rank
;
330 /* Given an expression E, return the rank of the expression. */
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e
))
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e
) == SSA_NAME
)
383 if (SSA_NAME_IS_DEFAULT_DEF (e
))
384 return find_operand_rank (e
);
386 stmt
= SSA_NAME_DEF_STMT (e
);
387 if (gimple_code (stmt
) == GIMPLE_PHI
)
388 return phi_rank (stmt
);
390 if (!is_gimple_assign (stmt
)
391 || gimple_vdef (stmt
))
392 return bb_rank
[gimple_bb (stmt
)->index
];
394 /* If we already have a rank for this expression, use that. */
395 rank
= find_operand_rank (e
);
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
403 if (gimple_assign_single_p (stmt
))
405 tree rhs
= gimple_assign_rhs1 (stmt
);
406 n
= TREE_OPERAND_LENGTH (rhs
);
408 rank
= propagate_rank (rank
, rhs
);
411 for (i
= 0; i
< n
; i
++)
413 op
= TREE_OPERAND (rhs
, i
);
416 rank
= propagate_rank (rank
, op
);
422 n
= gimple_num_ops (stmt
);
423 for (i
= 1; i
< n
; i
++)
425 op
= gimple_op (stmt
, i
);
427 rank
= propagate_rank (rank
, op
);
431 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
433 fprintf (dump_file
, "Rank for ");
434 print_generic_expr (dump_file
, e
, 0);
435 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e
, (rank
+ 1));
443 /* Globals, etc, are rank 0 */
447 DEF_VEC_P(operand_entry_t
);
448 DEF_VEC_ALLOC_P(operand_entry_t
, heap
);
450 /* We want integer ones to end up last no matter what, since they are
451 the ones we can do the most with. */
452 #define INTEGER_CONST_TYPE 1 << 3
453 #define FLOAT_CONST_TYPE 1 << 2
454 #define OTHER_CONST_TYPE 1 << 1
456 /* Classify an invariant tree into integer, float, or other, so that
457 we can sort them to be near other constants of the same type. */
459 constant_type (tree t
)
461 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
462 return INTEGER_CONST_TYPE
;
463 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
464 return FLOAT_CONST_TYPE
;
466 return OTHER_CONST_TYPE
;
469 /* qsort comparison function to sort operand entries PA and PB by rank
470 so that the sorted array is ordered by rank in decreasing order. */
472 sort_by_operand_rank (const void *pa
, const void *pb
)
474 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
475 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
477 /* It's nicer for optimize_expression if constants that are likely
478 to fold when added/multiplied//whatever are put next to each
479 other. Since all constants have rank 0, order them by type. */
480 if (oeb
->rank
== 0 && oea
->rank
== 0)
482 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
483 return constant_type (oeb
->op
) - constant_type (oea
->op
);
485 /* To make sorting result stable, we use unique IDs to determine
487 return oeb
->id
- oea
->id
;
490 /* Lastly, make sure the versions that are the same go next to each
491 other. We use SSA_NAME_VERSION because it's stable. */
492 if ((oeb
->rank
- oea
->rank
== 0)
493 && TREE_CODE (oea
->op
) == SSA_NAME
494 && TREE_CODE (oeb
->op
) == SSA_NAME
)
496 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
497 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
499 return oeb
->id
- oea
->id
;
502 if (oeb
->rank
!= oea
->rank
)
503 return oeb
->rank
- oea
->rank
;
505 return oeb
->id
- oea
->id
;
508 /* Add an operand entry to *OPS for the tree operand OP. */
511 add_to_ops_vec (VEC(operand_entry_t
, heap
) **ops
, tree op
)
513 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
516 oe
->rank
= get_rank (op
);
517 oe
->id
= next_operand_entry_id
++;
519 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
522 /* Add an operand entry to *OPS for the tree operand OP with repeat
526 add_repeat_to_ops_vec (VEC(operand_entry_t
, heap
) **ops
, tree op
,
527 HOST_WIDE_INT repeat
)
529 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
532 oe
->rank
= get_rank (op
);
533 oe
->id
= next_operand_entry_id
++;
535 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
537 reassociate_stats
.pows_encountered
++;
540 /* Return true if STMT is reassociable operation containing a binary
541 operation with tree code CODE, and is inside LOOP. */
544 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
546 basic_block bb
= gimple_bb (stmt
);
548 if (gimple_bb (stmt
) == NULL
)
551 if (!flow_bb_inside_loop_p (loop
, bb
))
554 if (is_gimple_assign (stmt
)
555 && gimple_assign_rhs_code (stmt
) == code
556 && has_single_use (gimple_assign_lhs (stmt
)))
563 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
564 operand of the negate operation. Otherwise, return NULL. */
567 get_unary_op (tree name
, enum tree_code opcode
)
569 gimple stmt
= SSA_NAME_DEF_STMT (name
);
571 if (!is_gimple_assign (stmt
))
574 if (gimple_assign_rhs_code (stmt
) == opcode
)
575 return gimple_assign_rhs1 (stmt
);
579 /* If CURR and LAST are a pair of ops that OPCODE allows us to
580 eliminate through equivalences, do so, remove them from OPS, and
581 return true. Otherwise, return false. */
584 eliminate_duplicate_pair (enum tree_code opcode
,
585 VEC (operand_entry_t
, heap
) **ops
,
588 operand_entry_t curr
,
589 operand_entry_t last
)
592 /* If we have two of the same op, and the opcode is & |, min, or max,
593 we can eliminate one of them.
594 If we have two of the same op, and the opcode is ^, we can
595 eliminate both of them. */
597 if (last
&& last
->op
== curr
->op
)
605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
607 fprintf (dump_file
, "Equivalence: ");
608 print_generic_expr (dump_file
, curr
->op
, 0);
609 fprintf (dump_file
, " [&|minmax] ");
610 print_generic_expr (dump_file
, last
->op
, 0);
611 fprintf (dump_file
, " -> ");
612 print_generic_stmt (dump_file
, last
->op
, 0);
615 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
616 reassociate_stats
.ops_eliminated
++;
621 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
623 fprintf (dump_file
, "Equivalence: ");
624 print_generic_expr (dump_file
, curr
->op
, 0);
625 fprintf (dump_file
, " ^ ");
626 print_generic_expr (dump_file
, last
->op
, 0);
627 fprintf (dump_file
, " -> nothing\n");
630 reassociate_stats
.ops_eliminated
+= 2;
632 if (VEC_length (operand_entry_t
, *ops
) == 2)
634 VEC_free (operand_entry_t
, heap
, *ops
);
636 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
641 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
642 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
654 static VEC(tree
, heap
) *plus_negates
;
656 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
657 expression, look in OPS for a corresponding positive operation to cancel
658 it out. If we find one, remove the other from OPS, replace
659 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
663 eliminate_plus_minus_pair (enum tree_code opcode
,
664 VEC (operand_entry_t
, heap
) **ops
,
665 unsigned int currindex
,
666 operand_entry_t curr
)
673 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
676 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
677 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
678 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
681 /* Any non-negated version will have a rank that is one less than
682 the current rank. So once we hit those ranks, if we don't find
685 for (i
= currindex
+ 1;
686 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
687 && oe
->rank
>= curr
->rank
- 1 ;
690 if (oe
->op
== negateop
)
693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
695 fprintf (dump_file
, "Equivalence: ");
696 print_generic_expr (dump_file
, negateop
, 0);
697 fprintf (dump_file
, " + -");
698 print_generic_expr (dump_file
, oe
->op
, 0);
699 fprintf (dump_file
, " -> 0\n");
702 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
703 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
704 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
705 reassociate_stats
.ops_eliminated
++;
709 else if (oe
->op
== notop
)
711 tree op_type
= TREE_TYPE (oe
->op
);
713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
715 fprintf (dump_file
, "Equivalence: ");
716 print_generic_expr (dump_file
, notop
, 0);
717 fprintf (dump_file
, " + ~");
718 print_generic_expr (dump_file
, oe
->op
, 0);
719 fprintf (dump_file
, " -> -1\n");
722 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
723 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
724 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
725 reassociate_stats
.ops_eliminated
++;
731 /* CURR->OP is a negate expr in a plus expr: save it for later
732 inspection in repropagate_negates(). */
733 if (negateop
!= NULL_TREE
)
734 VEC_safe_push (tree
, heap
, plus_negates
, curr
->op
);
739 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
740 bitwise not expression, look in OPS for a corresponding operand to
741 cancel it out. If we find one, remove the other from OPS, replace
742 OPS[CURRINDEX] with 0, and return true. Otherwise, return
746 eliminate_not_pairs (enum tree_code opcode
,
747 VEC (operand_entry_t
, heap
) **ops
,
748 unsigned int currindex
,
749 operand_entry_t curr
)
755 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
756 || TREE_CODE (curr
->op
) != SSA_NAME
)
759 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
760 if (notop
== NULL_TREE
)
763 /* Any non-not version will have a rank that is one less than
764 the current rank. So once we hit those ranks, if we don't find
767 for (i
= currindex
+ 1;
768 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
769 && oe
->rank
>= curr
->rank
- 1;
774 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
776 fprintf (dump_file
, "Equivalence: ");
777 print_generic_expr (dump_file
, notop
, 0);
778 if (opcode
== BIT_AND_EXPR
)
779 fprintf (dump_file
, " & ~");
780 else if (opcode
== BIT_IOR_EXPR
)
781 fprintf (dump_file
, " | ~");
782 print_generic_expr (dump_file
, oe
->op
, 0);
783 if (opcode
== BIT_AND_EXPR
)
784 fprintf (dump_file
, " -> 0\n");
785 else if (opcode
== BIT_IOR_EXPR
)
786 fprintf (dump_file
, " -> -1\n");
789 if (opcode
== BIT_AND_EXPR
)
790 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
791 else if (opcode
== BIT_IOR_EXPR
)
792 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
793 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
795 reassociate_stats
.ops_eliminated
796 += VEC_length (operand_entry_t
, *ops
) - 1;
797 VEC_free (operand_entry_t
, heap
, *ops
);
799 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
807 /* Use constant value that may be present in OPS to try to eliminate
808 operands. Note that this function is only really used when we've
809 eliminated ops for other reasons, or merged constants. Across
810 single statements, fold already does all of this, plus more. There
811 is little point in duplicating logic, so I've only included the
812 identities that I could ever construct testcases to trigger. */
815 eliminate_using_constants (enum tree_code opcode
,
816 VEC(operand_entry_t
, heap
) **ops
)
818 operand_entry_t oelast
= VEC_last (operand_entry_t
, *ops
);
819 tree type
= TREE_TYPE (oelast
->op
);
821 if (oelast
->rank
== 0
822 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
827 if (integer_zerop (oelast
->op
))
829 if (VEC_length (operand_entry_t
, *ops
) != 1)
831 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
832 fprintf (dump_file
, "Found & 0, removing all other ops\n");
834 reassociate_stats
.ops_eliminated
835 += VEC_length (operand_entry_t
, *ops
) - 1;
837 VEC_free (operand_entry_t
, heap
, *ops
);
839 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
843 else if (integer_all_onesp (oelast
->op
))
845 if (VEC_length (operand_entry_t
, *ops
) != 1)
847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
848 fprintf (dump_file
, "Found & -1, removing\n");
849 VEC_pop (operand_entry_t
, *ops
);
850 reassociate_stats
.ops_eliminated
++;
855 if (integer_all_onesp (oelast
->op
))
857 if (VEC_length (operand_entry_t
, *ops
) != 1)
859 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
860 fprintf (dump_file
, "Found | -1, removing all other ops\n");
862 reassociate_stats
.ops_eliminated
863 += VEC_length (operand_entry_t
, *ops
) - 1;
865 VEC_free (operand_entry_t
, heap
, *ops
);
867 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
871 else if (integer_zerop (oelast
->op
))
873 if (VEC_length (operand_entry_t
, *ops
) != 1)
875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
876 fprintf (dump_file
, "Found | 0, removing\n");
877 VEC_pop (operand_entry_t
, *ops
);
878 reassociate_stats
.ops_eliminated
++;
883 if (integer_zerop (oelast
->op
)
884 || (FLOAT_TYPE_P (type
)
885 && !HONOR_NANS (TYPE_MODE (type
))
886 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
887 && real_zerop (oelast
->op
)))
889 if (VEC_length (operand_entry_t
, *ops
) != 1)
891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
892 fprintf (dump_file
, "Found * 0, removing all other ops\n");
894 reassociate_stats
.ops_eliminated
895 += VEC_length (operand_entry_t
, *ops
) - 1;
896 VEC_free (operand_entry_t
, heap
, *ops
);
898 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
902 else if (integer_onep (oelast
->op
)
903 || (FLOAT_TYPE_P (type
)
904 && !HONOR_SNANS (TYPE_MODE (type
))
905 && real_onep (oelast
->op
)))
907 if (VEC_length (operand_entry_t
, *ops
) != 1)
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
910 fprintf (dump_file
, "Found * 1, removing\n");
911 VEC_pop (operand_entry_t
, *ops
);
912 reassociate_stats
.ops_eliminated
++;
920 if (integer_zerop (oelast
->op
)
921 || (FLOAT_TYPE_P (type
)
922 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
923 && fold_real_zero_addition_p (type
, oelast
->op
,
924 opcode
== MINUS_EXPR
)))
926 if (VEC_length (operand_entry_t
, *ops
) != 1)
928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
929 fprintf (dump_file
, "Found [|^+] 0, removing\n");
930 VEC_pop (operand_entry_t
, *ops
);
931 reassociate_stats
.ops_eliminated
++;
943 static void linearize_expr_tree (VEC(operand_entry_t
, heap
) **, gimple
,
946 /* Structure for tracking and counting operands. */
947 typedef struct oecount_s
{
950 enum tree_code oecode
;
955 DEF_VEC_ALLOC_O(oecount
,heap
);
957 /* The heap for the oecount hashtable and the sorted list of operands. */
958 static VEC (oecount
, heap
) *cvec
;
960 /* Hash function for oecount. */
963 oecount_hash (const void *p
)
965 const oecount
*c
= VEC_index (oecount
, cvec
, (size_t)p
- 42);
966 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
969 /* Comparison function for oecount. */
972 oecount_eq (const void *p1
, const void *p2
)
974 const oecount
*c1
= VEC_index (oecount
, cvec
, (size_t)p1
- 42);
975 const oecount
*c2
= VEC_index (oecount
, cvec
, (size_t)p2
- 42);
976 return (c1
->oecode
== c2
->oecode
977 && c1
->op
== c2
->op
);
980 /* Comparison function for qsort sorting oecount elements by count. */
983 oecount_cmp (const void *p1
, const void *p2
)
985 const oecount
*c1
= (const oecount
*)p1
;
986 const oecount
*c2
= (const oecount
*)p2
;
987 if (c1
->cnt
!= c2
->cnt
)
988 return c1
->cnt
- c2
->cnt
;
990 /* If counts are identical, use unique IDs to stabilize qsort. */
991 return c1
->id
- c2
->id
;
994 /* Return TRUE iff STMT represents a builtin call that raises OP
998 stmt_is_power_of_op (gimple stmt
, tree op
)
1002 if (!is_gimple_call (stmt
))
1005 fndecl
= gimple_call_fndecl (stmt
);
1008 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1011 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1013 CASE_FLT_FN (BUILT_IN_POW
):
1014 CASE_FLT_FN (BUILT_IN_POWI
):
1015 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023 in place and return the result. Assumes that stmt_is_power_of_op
1024 was previously called for STMT and returned TRUE. */
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt
)
1029 REAL_VALUE_TYPE c
, cint
;
1030 HOST_WIDE_INT power
;
1033 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1035 CASE_FLT_FN (BUILT_IN_POW
):
1036 arg1
= gimple_call_arg (stmt
, 1);
1037 c
= TREE_REAL_CST (arg1
);
1038 power
= real_to_integer (&c
) - 1;
1039 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1040 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1043 CASE_FLT_FN (BUILT_IN_POWI
):
1044 arg1
= gimple_call_arg (stmt
, 1);
1045 power
= TREE_INT_CST_LOW (arg1
) - 1;
1046 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1056 replace *DEF with OP as well. */
1059 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1064 gimple_stmt_iterator gsi
;
1066 if (is_gimple_call (stmt
))
1067 lhs
= gimple_call_lhs (stmt
);
1069 lhs
= gimple_assign_lhs (stmt
);
1071 gcc_assert (has_single_use (lhs
));
1072 single_imm_use (lhs
, &use
, &use_stmt
);
1076 if (TREE_CODE (op
) != SSA_NAME
)
1077 update_stmt (use_stmt
);
1078 gsi
= gsi_for_stmt (stmt
);
1079 gsi_remove (&gsi
, true);
1080 release_defs (stmt
);
1082 if (is_gimple_call (stmt
))
1083 unlink_stmt_vdef (stmt
);
1086 /* Walks the linear chain with result *DEF searching for an operation
1087 with operand OP and code OPCODE removing that from the chain. *DEF
1088 is updated if there is only one operand but no operation left. */
1091 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1093 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1099 if (opcode
== MULT_EXPR
1100 && stmt_is_power_of_op (stmt
, op
))
1102 if (decrement_power (stmt
) == 1)
1103 propagate_op_to_single_use (op
, stmt
, def
);
1107 name
= gimple_assign_rhs1 (stmt
);
1109 /* If this is the operation we look for and one of the operands
1110 is ours simply propagate the other operand into the stmts
1112 if (gimple_assign_rhs_code (stmt
) == opcode
1114 || gimple_assign_rhs2 (stmt
) == op
))
1117 name
= gimple_assign_rhs2 (stmt
);
1118 propagate_op_to_single_use (name
, stmt
, def
);
1122 /* We might have a multiply of two __builtin_pow* calls, and
1123 the operand might be hiding in the rightmost one. */
1124 if (opcode
== MULT_EXPR
1125 && gimple_assign_rhs_code (stmt
) == opcode
1126 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
1128 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1129 if (stmt_is_power_of_op (stmt2
, op
))
1131 if (decrement_power (stmt2
) == 1)
1132 propagate_op_to_single_use (op
, stmt2
, def
);
1137 /* Continue walking the chain. */
1138 gcc_assert (name
!= op
1139 && TREE_CODE (name
) == SSA_NAME
);
1140 stmt
= SSA_NAME_DEF_STMT (name
);
1145 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1146 the result. Places the statement after the definition of either
1147 OP1 or OP2. Returns the new statement. */
1150 build_and_add_sum (tree tmpvar
, tree op1
, tree op2
, enum tree_code opcode
)
1152 gimple op1def
= NULL
, op2def
= NULL
;
1153 gimple_stmt_iterator gsi
;
1157 /* Create the addition statement. */
1158 sum
= gimple_build_assign_with_ops (opcode
, tmpvar
, op1
, op2
);
1159 op
= make_ssa_name (tmpvar
, sum
);
1160 gimple_assign_set_lhs (sum
, op
);
1162 /* Find an insertion place and insert. */
1163 if (TREE_CODE (op1
) == SSA_NAME
)
1164 op1def
= SSA_NAME_DEF_STMT (op1
);
1165 if (TREE_CODE (op2
) == SSA_NAME
)
1166 op2def
= SSA_NAME_DEF_STMT (op2
);
1167 if ((!op1def
|| gimple_nop_p (op1def
))
1168 && (!op2def
|| gimple_nop_p (op2def
)))
1170 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
1171 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1173 else if ((!op1def
|| gimple_nop_p (op1def
))
1174 || (op2def
&& !gimple_nop_p (op2def
)
1175 && stmt_dominates_stmt_p (op1def
, op2def
)))
1177 if (gimple_code (op2def
) == GIMPLE_PHI
)
1179 gsi
= gsi_after_labels (gimple_bb (op2def
));
1180 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1184 if (!stmt_ends_bb_p (op2def
))
1186 gsi
= gsi_for_stmt (op2def
);
1187 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1194 FOR_EACH_EDGE (e
, ei
, gimple_bb (op2def
)->succs
)
1195 if (e
->flags
& EDGE_FALLTHRU
)
1196 gsi_insert_on_edge_immediate (e
, sum
);
1202 if (gimple_code (op1def
) == GIMPLE_PHI
)
1204 gsi
= gsi_after_labels (gimple_bb (op1def
));
1205 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1209 if (!stmt_ends_bb_p (op1def
))
1211 gsi
= gsi_for_stmt (op1def
);
1212 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1219 FOR_EACH_EDGE (e
, ei
, gimple_bb (op1def
)->succs
)
1220 if (e
->flags
& EDGE_FALLTHRU
)
1221 gsi_insert_on_edge_immediate (e
, sum
);
1230 /* Perform un-distribution of divisions and multiplications.
1231 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1232 to (A + B) / X for real X.
1234 The algorithm is organized as follows.
1236 - First we walk the addition chain *OPS looking for summands that
1237 are defined by a multiplication or a real division. This results
1238 in the candidates bitmap with relevant indices into *OPS.
1240 - Second we build the chains of multiplications or divisions for
1241 these candidates, counting the number of occurrences of (operand, code)
1242 pairs in all of the candidates chains.
1244 - Third we sort the (operand, code) pairs by number of occurrence and
1245 process them starting with the pair with the most uses.
1247 * For each such pair we walk the candidates again to build a
1248 second candidate bitmap noting all multiplication/division chains
1249 that have at least one occurrence of (operand, code).
1251 * We build an alternate addition chain only covering these
1252 candidates with one (operand, code) operation removed from their
1253 multiplication/division chain.
1255 * The first candidate gets replaced by the alternate addition chain
1256 multiplied/divided by the operand.
1258 * All candidate chains get disabled for further processing and
1259 processing of (operand, code) pairs continues.
1261 The alternate addition chains built are re-processed by the main
1262 reassociation algorithm which allows optimizing a * x * y + b * y * x
1263 to (a + b ) * x * y in one invocation of the reassociation pass. */
1266 undistribute_ops_list (enum tree_code opcode
,
1267 VEC (operand_entry_t
, heap
) **ops
, struct loop
*loop
)
1269 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
1270 operand_entry_t oe1
;
1272 sbitmap candidates
, candidates2
;
1273 unsigned nr_candidates
, nr_candidates2
;
1274 sbitmap_iterator sbi0
;
1275 VEC (operand_entry_t
, heap
) **subops
;
1277 bool changed
= false;
1278 int next_oecount_id
= 0;
1281 || opcode
!= PLUS_EXPR
)
1284 /* Build a list of candidates to process. */
1285 candidates
= sbitmap_alloc (length
);
1286 sbitmap_zero (candidates
);
1288 FOR_EACH_VEC_ELT (operand_entry_t
, *ops
, i
, oe1
)
1290 enum tree_code dcode
;
1293 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1295 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1296 if (!is_gimple_assign (oe1def
))
1298 dcode
= gimple_assign_rhs_code (oe1def
);
1299 if ((dcode
!= MULT_EXPR
1300 && dcode
!= RDIV_EXPR
)
1301 || !is_reassociable_op (oe1def
, dcode
, loop
))
1304 SET_BIT (candidates
, i
);
1308 if (nr_candidates
< 2)
1310 sbitmap_free (candidates
);
1314 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1316 fprintf (dump_file
, "searching for un-distribute opportunities ");
1317 print_generic_expr (dump_file
,
1318 VEC_index (operand_entry_t
, *ops
,
1319 sbitmap_first_set_bit (candidates
))->op
, 0);
1320 fprintf (dump_file
, " %d\n", nr_candidates
);
1323 /* Build linearized sub-operand lists and the counting table. */
1325 ctable
= htab_create (15, oecount_hash
, oecount_eq
, NULL
);
1326 subops
= XCNEWVEC (VEC (operand_entry_t
, heap
) *,
1327 VEC_length (operand_entry_t
, *ops
));
1328 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1331 enum tree_code oecode
;
1334 oedef
= SSA_NAME_DEF_STMT (VEC_index (operand_entry_t
, *ops
, i
)->op
);
1335 oecode
= gimple_assign_rhs_code (oedef
);
1336 linearize_expr_tree (&subops
[i
], oedef
,
1337 associative_tree_code (oecode
), false);
1339 FOR_EACH_VEC_ELT (operand_entry_t
, subops
[i
], j
, oe1
)
1346 c
.id
= next_oecount_id
++;
1348 VEC_safe_push (oecount
, heap
, cvec
, &c
);
1349 idx
= VEC_length (oecount
, cvec
) + 41;
1350 slot
= htab_find_slot (ctable
, (void *)idx
, INSERT
);
1353 *slot
= (void *)idx
;
1357 VEC_pop (oecount
, cvec
);
1358 VEC_index (oecount
, cvec
, (size_t)*slot
- 42)->cnt
++;
1362 htab_delete (ctable
);
1364 /* Sort the counting table. */
1365 VEC_qsort (oecount
, cvec
, oecount_cmp
);
1367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1370 fprintf (dump_file
, "Candidates:\n");
1371 FOR_EACH_VEC_ELT (oecount
, cvec
, j
, c
)
1373 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1374 c
->oecode
== MULT_EXPR
1375 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1376 print_generic_expr (dump_file
, c
->op
, 0);
1377 fprintf (dump_file
, "\n");
1381 /* Process the (operand, code) pairs in order of most occurence. */
1382 candidates2
= sbitmap_alloc (length
);
1383 while (!VEC_empty (oecount
, cvec
))
1385 oecount
*c
= VEC_last (oecount
, cvec
);
1389 /* Now collect the operands in the outer chain that contain
1390 the common operand in their inner chain. */
1391 sbitmap_zero (candidates2
);
1393 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1396 enum tree_code oecode
;
1398 tree op
= VEC_index (operand_entry_t
, *ops
, i
)->op
;
1400 /* If we undistributed in this chain already this may be
1402 if (TREE_CODE (op
) != SSA_NAME
)
1405 oedef
= SSA_NAME_DEF_STMT (op
);
1406 oecode
= gimple_assign_rhs_code (oedef
);
1407 if (oecode
!= c
->oecode
)
1410 FOR_EACH_VEC_ELT (operand_entry_t
, subops
[i
], j
, oe1
)
1412 if (oe1
->op
== c
->op
)
1414 SET_BIT (candidates2
, i
);
1421 if (nr_candidates2
>= 2)
1423 operand_entry_t oe1
, oe2
;
1426 int first
= sbitmap_first_set_bit (candidates2
);
1428 /* Build the new addition chain. */
1429 oe1
= VEC_index (operand_entry_t
, *ops
, first
);
1430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1432 fprintf (dump_file
, "Building (");
1433 print_generic_expr (dump_file
, oe1
->op
, 0);
1435 tmpvar
= create_tmp_reg (TREE_TYPE (oe1
->op
), NULL
);
1436 add_referenced_var (tmpvar
);
1437 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1438 EXECUTE_IF_SET_IN_SBITMAP (candidates2
, first
+1, i
, sbi0
)
1441 oe2
= VEC_index (operand_entry_t
, *ops
, i
);
1442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1444 fprintf (dump_file
, " + ");
1445 print_generic_expr (dump_file
, oe2
->op
, 0);
1447 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1448 sum
= build_and_add_sum (tmpvar
, oe1
->op
, oe2
->op
, opcode
);
1449 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1451 oe1
->op
= gimple_get_lhs (sum
);
1454 /* Apply the multiplication/division. */
1455 prod
= build_and_add_sum (tmpvar
, oe1
->op
, c
->op
, c
->oecode
);
1456 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1458 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1459 print_generic_expr (dump_file
, c
->op
, 0);
1460 fprintf (dump_file
, "\n");
1463 /* Record it in the addition chain and disable further
1464 undistribution with this op. */
1465 oe1
->op
= gimple_assign_lhs (prod
);
1466 oe1
->rank
= get_rank (oe1
->op
);
1467 VEC_free (operand_entry_t
, heap
, subops
[first
]);
1472 VEC_pop (oecount
, cvec
);
1475 for (i
= 0; i
< VEC_length (operand_entry_t
, *ops
); ++i
)
1476 VEC_free (operand_entry_t
, heap
, subops
[i
]);
1478 VEC_free (oecount
, heap
, cvec
);
1479 sbitmap_free (candidates
);
1480 sbitmap_free (candidates2
);
1485 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1486 expression, examine the other OPS to see if any of them are comparisons
1487 of the same values, which we may be able to combine or eliminate.
1488 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1491 eliminate_redundant_comparison (enum tree_code opcode
,
1492 VEC (operand_entry_t
, heap
) **ops
,
1493 unsigned int currindex
,
1494 operand_entry_t curr
)
1497 enum tree_code lcode
, rcode
;
1502 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1505 /* Check that CURR is a comparison. */
1506 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1508 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1509 if (!is_gimple_assign (def1
))
1511 lcode
= gimple_assign_rhs_code (def1
);
1512 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1514 op1
= gimple_assign_rhs1 (def1
);
1515 op2
= gimple_assign_rhs2 (def1
);
1517 /* Now look for a similar comparison in the remaining OPS. */
1518 for (i
= currindex
+ 1;
1519 VEC_iterate (operand_entry_t
, *ops
, i
, oe
);
1524 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1526 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1527 if (!is_gimple_assign (def2
))
1529 rcode
= gimple_assign_rhs_code (def2
);
1530 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1533 /* If we got here, we have a match. See if we can combine the
1535 if (opcode
== BIT_IOR_EXPR
)
1536 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1537 rcode
, gimple_assign_rhs1 (def2
),
1538 gimple_assign_rhs2 (def2
));
1540 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1541 rcode
, gimple_assign_rhs1 (def2
),
1542 gimple_assign_rhs2 (def2
));
1546 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1547 always give us a boolean_type_node value back. If the original
1548 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1549 we need to convert. */
1550 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1551 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1553 if (TREE_CODE (t
) != INTEGER_CST
1554 && !operand_equal_p (t
, curr
->op
, 0))
1556 enum tree_code subcode
;
1557 tree newop1
, newop2
;
1558 if (!COMPARISON_CLASS_P (t
))
1560 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1561 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1562 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1563 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1567 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1569 fprintf (dump_file
, "Equivalence: ");
1570 print_generic_expr (dump_file
, curr
->op
, 0);
1571 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1572 print_generic_expr (dump_file
, oe
->op
, 0);
1573 fprintf (dump_file
, " -> ");
1574 print_generic_expr (dump_file
, t
, 0);
1575 fprintf (dump_file
, "\n");
1578 /* Now we can delete oe, as it has been subsumed by the new combined
1580 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
1581 reassociate_stats
.ops_eliminated
++;
1583 /* If t is the same as curr->op, we're done. Otherwise we must
1584 replace curr->op with t. Special case is if we got a constant
1585 back, in which case we add it to the end instead of in place of
1586 the current entry. */
1587 if (TREE_CODE (t
) == INTEGER_CST
)
1589 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
1590 add_to_ops_vec (ops
, t
);
1592 else if (!operand_equal_p (t
, curr
->op
, 0))
1596 enum tree_code subcode
;
1599 gcc_assert (COMPARISON_CLASS_P (t
));
1600 tmpvar
= create_tmp_var (TREE_TYPE (t
), NULL
);
1601 add_referenced_var (tmpvar
);
1602 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1603 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1604 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1605 gcc_checking_assert (is_gimple_val (newop1
)
1606 && is_gimple_val (newop2
));
1607 sum
= build_and_add_sum (tmpvar
, newop1
, newop2
, subcode
);
1608 curr
->op
= gimple_get_lhs (sum
);
1616 /* Perform various identities and other optimizations on the list of
1617 operand entries, stored in OPS. The tree code for the binary
1618 operation between all the operands is OPCODE. */
1621 optimize_ops_list (enum tree_code opcode
,
1622 VEC (operand_entry_t
, heap
) **ops
)
1624 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
1627 operand_entry_t oelast
= NULL
;
1628 bool iterate
= false;
1633 oelast
= VEC_last (operand_entry_t
, *ops
);
1635 /* If the last two are constants, pop the constants off, merge them
1636 and try the next two. */
1637 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1639 operand_entry_t oelm1
= VEC_index (operand_entry_t
, *ops
, length
- 2);
1641 if (oelm1
->rank
== 0
1642 && is_gimple_min_invariant (oelm1
->op
)
1643 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1644 TREE_TYPE (oelast
->op
)))
1646 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1647 oelm1
->op
, oelast
->op
);
1649 if (folded
&& is_gimple_min_invariant (folded
))
1651 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1652 fprintf (dump_file
, "Merging constants\n");
1654 VEC_pop (operand_entry_t
, *ops
);
1655 VEC_pop (operand_entry_t
, *ops
);
1657 add_to_ops_vec (ops
, folded
);
1658 reassociate_stats
.constants_eliminated
++;
1660 optimize_ops_list (opcode
, ops
);
1666 eliminate_using_constants (opcode
, ops
);
1669 for (i
= 0; VEC_iterate (operand_entry_t
, *ops
, i
, oe
);)
1673 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1675 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1676 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1677 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1689 length
= VEC_length (operand_entry_t
, *ops
);
1690 oelast
= VEC_last (operand_entry_t
, *ops
);
1693 optimize_ops_list (opcode
, ops
);
1696 /* The following functions are subroutines to optimize_range_tests and allow
1697 it to try to change a logical combination of comparisons into a range
1701 X == 2 || X == 5 || X == 3 || X == 4
1705 (unsigned) (X - 2) <= 3
1707 For more information see comments above fold_test_range in fold-const.c,
1708 this implementation is for GIMPLE. */
1716 bool strict_overflow_p
;
1717 unsigned int idx
, next
;
1720 /* This is similar to make_range in fold-const.c, but on top of
1721 GIMPLE instead of trees. */
1724 init_range_entry (struct range_entry
*r
, tree exp
)
1728 bool is_bool
, strict_overflow_p
;
1732 r
->strict_overflow_p
= false;
1734 r
->high
= NULL_TREE
;
1735 if (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
)))
1738 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1739 and see if we can refine the range. Some of the cases below may not
1740 happen, but it doesn't seem worth worrying about this. We "continue"
1741 the outer loop when we've changed something; otherwise we "break"
1742 the switch, which will "break" the while. */
1743 low
= build_int_cst (TREE_TYPE (exp
), 0);
1746 strict_overflow_p
= false;
1748 if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1750 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1755 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1761 enum tree_code code
;
1762 tree arg0
, arg1
, exp_type
;
1766 if (TREE_CODE (exp
) != SSA_NAME
)
1769 stmt
= SSA_NAME_DEF_STMT (exp
);
1770 if (!is_gimple_assign (stmt
))
1773 code
= gimple_assign_rhs_code (stmt
);
1774 arg0
= gimple_assign_rhs1 (stmt
);
1775 if (TREE_CODE (arg0
) != SSA_NAME
)
1777 arg1
= gimple_assign_rhs2 (stmt
);
1778 exp_type
= TREE_TYPE (exp
);
1779 loc
= gimple_location (stmt
);
1783 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1796 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1798 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1803 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1818 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1820 &strict_overflow_p
);
1821 if (nexp
!= NULL_TREE
)
1824 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1837 r
->strict_overflow_p
= strict_overflow_p
;
1841 /* Comparison function for qsort. Sort entries
1842 without SSA_NAME exp first, then with SSA_NAMEs sorted
1843 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1844 by increasing ->low and if ->low is the same, by increasing
1845 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1849 range_entry_cmp (const void *a
, const void *b
)
1851 const struct range_entry
*p
= (const struct range_entry
*) a
;
1852 const struct range_entry
*q
= (const struct range_entry
*) b
;
1854 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1856 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1858 /* Group range_entries for the same SSA_NAME together. */
1859 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1861 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1863 /* If ->low is different, NULL low goes first, then by
1865 if (p
->low
!= NULL_TREE
)
1867 if (q
->low
!= NULL_TREE
)
1869 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1871 if (tem
&& integer_onep (tem
))
1873 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1875 if (tem
&& integer_onep (tem
))
1881 else if (q
->low
!= NULL_TREE
)
1883 /* If ->high is different, NULL high goes last, before that by
1885 if (p
->high
!= NULL_TREE
)
1887 if (q
->high
!= NULL_TREE
)
1889 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1891 if (tem
&& integer_onep (tem
))
1893 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1895 if (tem
&& integer_onep (tem
))
1901 else if (p
->high
!= NULL_TREE
)
1903 /* If both ranges are the same, sort below by ascending idx. */
1908 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1911 if (p
->idx
< q
->idx
)
1915 gcc_checking_assert (p
->idx
> q
->idx
);
1920 /* Helper routine of optimize_range_test.
1921 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1922 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1923 OPCODE and OPS are arguments of optimize_range_tests. Return
1924 true if the range merge has been successful. */
1927 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
1928 unsigned int count
, enum tree_code opcode
,
1929 VEC (operand_entry_t
, heap
) **ops
, tree exp
, bool in_p
,
1930 tree low
, tree high
, bool strict_overflow_p
)
1932 tree op
= VEC_index (operand_entry_t
, *ops
, range
->idx
)->op
;
1933 location_t loc
= gimple_location (SSA_NAME_DEF_STMT (op
));
1934 tree tem
= build_range_check (loc
, TREE_TYPE (op
), exp
, in_p
, low
, high
);
1935 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
1936 gimple_stmt_iterator gsi
;
1938 if (tem
== NULL_TREE
)
1941 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
1942 warning_at (loc
, OPT_Wstrict_overflow
,
1943 "assuming signed overflow does not occur "
1944 "when simplifying range test");
1946 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1948 struct range_entry
*r
;
1949 fprintf (dump_file
, "Optimizing range tests ");
1950 print_generic_expr (dump_file
, range
->exp
, 0);
1951 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
1952 print_generic_expr (dump_file
, range
->low
, 0);
1953 fprintf (dump_file
, ", ");
1954 print_generic_expr (dump_file
, range
->high
, 0);
1955 fprintf (dump_file
, "]");
1956 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
1958 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
1959 print_generic_expr (dump_file
, r
->low
, 0);
1960 fprintf (dump_file
, ", ");
1961 print_generic_expr (dump_file
, r
->high
, 0);
1962 fprintf (dump_file
, "]");
1964 fprintf (dump_file
, "\n into ");
1965 print_generic_expr (dump_file
, tem
, 0);
1966 fprintf (dump_file
, "\n");
1969 if (opcode
== BIT_IOR_EXPR
)
1970 tem
= invert_truthvalue_loc (loc
, tem
);
1972 tem
= fold_convert_loc (loc
, TREE_TYPE (op
), tem
);
1973 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (op
));
1974 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
1977 VEC_index (operand_entry_t
, *ops
, range
->idx
)->op
= tem
;
1982 range
->strict_overflow_p
= false;
1984 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
1986 VEC_index (operand_entry_t
, *ops
, range
->idx
)->op
= error_mark_node
;
1987 range
->exp
= NULL_TREE
;
1992 /* Optimize range tests, similarly how fold_range_test optimizes
1993 it on trees. The tree code for the binary
1994 operation between all the operands is OPCODE. */
1997 optimize_range_tests (enum tree_code opcode
,
1998 VEC (operand_entry_t
, heap
) **ops
)
2000 unsigned int length
= VEC_length (operand_entry_t
, *ops
), i
, j
, first
;
2002 struct range_entry
*ranges
;
2003 bool any_changes
= false;
2008 ranges
= XNEWVEC (struct range_entry
, length
);
2009 for (i
= 0; i
< length
; i
++)
2012 init_range_entry (ranges
+ i
, VEC_index (operand_entry_t
, *ops
, i
)->op
);
2013 /* For | invert it now, we will invert it again before emitting
2014 the optimized expression. */
2015 if (opcode
== BIT_IOR_EXPR
)
2016 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2019 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2020 for (i
= 0; i
< length
; i
++)
2021 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2024 /* Try to merge ranges. */
2025 for (first
= i
; i
< length
; i
++)
2027 tree low
= ranges
[i
].low
;
2028 tree high
= ranges
[i
].high
;
2029 int in_p
= ranges
[i
].in_p
;
2030 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2031 int update_fail_count
= 0;
2033 for (j
= i
+ 1; j
< length
; j
++)
2035 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2037 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2038 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2040 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2046 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2047 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2053 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2054 while update_range_test would fail. */
2055 else if (update_fail_count
== 64)
2058 ++update_fail_count
;
2061 /* Optimize X == CST1 || X == CST2
2062 if popcount (CST1 ^ CST2) == 1 into
2063 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2064 Similarly for ranges. E.g.
2065 X != 2 && X != 3 && X != 10 && X != 11
2066 will be transformed by the above loop into
2067 (X - 2U) <= 1U && (X - 10U) <= 1U
2068 and this loop can transform that into
2069 ((X & ~8) - 2U) <= 1U. */
2070 for (i
= first
; i
< length
; i
++)
2072 tree lowi
, highi
, lowj
, highj
, type
, lowxor
, highxor
, tem
, exp
;
2074 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2076 type
= TREE_TYPE (ranges
[i
].exp
);
2077 if (!INTEGRAL_TYPE_P (type
))
2079 lowi
= ranges
[i
].low
;
2080 if (lowi
== NULL_TREE
)
2081 lowi
= TYPE_MIN_VALUE (type
);
2082 highi
= ranges
[i
].high
;
2083 if (highi
== NULL_TREE
)
2085 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2087 if (ranges
[j
].exp
== NULL_TREE
)
2089 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2093 lowj
= ranges
[j
].low
;
2094 if (lowj
== NULL_TREE
)
2096 highj
= ranges
[j
].high
;
2097 if (highj
== NULL_TREE
)
2098 highj
= TYPE_MAX_VALUE (type
);
2099 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2101 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2103 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2104 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2106 gcc_checking_assert (!integer_zerop (lowxor
));
2107 tem
= fold_binary (MINUS_EXPR
, type
, lowxor
,
2108 build_int_cst (type
, 1));
2109 if (tem
== NULL_TREE
)
2111 tem
= fold_binary (BIT_AND_EXPR
, type
, lowxor
, tem
);
2112 if (tem
== NULL_TREE
|| !integer_zerop (tem
))
2114 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2115 if (!tree_int_cst_equal (lowxor
, highxor
))
2117 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2118 exp
= fold_build2 (BIT_AND_EXPR
, type
, ranges
[i
].exp
, tem
);
2119 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2120 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2121 if (update_range_test (ranges
+ i
, ranges
+ j
, 1, opcode
, ops
, exp
,
2122 ranges
[i
].in_p
, lowj
, highj
,
2123 ranges
[i
].strict_overflow_p
2124 || ranges
[j
].strict_overflow_p
))
2135 FOR_EACH_VEC_ELT (operand_entry_t
, *ops
, i
, oe
)
2137 if (oe
->op
== error_mark_node
)
2140 VEC_replace (operand_entry_t
, *ops
, j
, oe
);
2143 VEC_truncate (operand_entry_t
, *ops
, j
);
2146 XDELETEVEC (ranges
);
2149 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2150 of STMT in it's operands. This is also known as a "destructive
2151 update" operation. */
2154 is_phi_for_stmt (gimple stmt
, tree operand
)
2158 use_operand_p arg_p
;
2161 if (TREE_CODE (operand
) != SSA_NAME
)
2164 lhs
= gimple_assign_lhs (stmt
);
2166 def_stmt
= SSA_NAME_DEF_STMT (operand
);
2167 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
2170 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
2171 if (lhs
== USE_FROM_PTR (arg_p
))
2176 /* Remove def stmt of VAR if VAR has zero uses and recurse
2177 on rhs1 operand if so. */
2180 remove_visited_stmt_chain (tree var
)
2183 gimple_stmt_iterator gsi
;
2187 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
2189 stmt
= SSA_NAME_DEF_STMT (var
);
2190 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
2192 var
= gimple_assign_rhs1 (stmt
);
2193 gsi
= gsi_for_stmt (stmt
);
2194 gsi_remove (&gsi
, true);
2195 release_defs (stmt
);
2202 /* This function checks three consequtive operands in
2203 passed operands vector OPS starting from OPINDEX and
2204 swaps two operands if it is profitable for binary operation
2205 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2207 We pair ops with the same rank if possible.
2209 The alternative we try is to see if STMT is a destructive
2210 update style statement, which is like:
2213 In that case, we want to use the destructive update form to
2214 expose the possible vectorizer sum reduction opportunity.
2215 In that case, the third operand will be the phi node. This
2216 check is not performed if STMT is null.
2218 We could, of course, try to be better as noted above, and do a
2219 lot of work to try to find these opportunities in >3 operand
2220 cases, but it is unlikely to be worth it. */
2223 swap_ops_for_binary_stmt (VEC(operand_entry_t
, heap
) * ops
,
2224 unsigned int opindex
, gimple stmt
)
2226 operand_entry_t oe1
, oe2
, oe3
;
2228 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
2229 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
2230 oe3
= VEC_index (operand_entry_t
, ops
, opindex
+ 2);
2232 if ((oe1
->rank
== oe2
->rank
2233 && oe2
->rank
!= oe3
->rank
)
2234 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
2235 && !is_phi_for_stmt (stmt
, oe1
->op
)
2236 && !is_phi_for_stmt (stmt
, oe2
->op
)))
2238 struct operand_entry temp
= *oe3
;
2240 oe3
->rank
= oe1
->rank
;
2242 oe1
->rank
= temp
.rank
;
2244 else if ((oe1
->rank
== oe3
->rank
2245 && oe2
->rank
!= oe3
->rank
)
2246 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
2247 && !is_phi_for_stmt (stmt
, oe1
->op
)
2248 && !is_phi_for_stmt (stmt
, oe3
->op
)))
2250 struct operand_entry temp
= *oe2
;
2252 oe2
->rank
= oe1
->rank
;
2254 oe1
->rank
= temp
.rank
;
2258 /* Recursively rewrite our linearized statements so that the operators
2259 match those in OPS[OPINDEX], putting the computation in rank
2263 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
2264 VEC(operand_entry_t
, heap
) * ops
, bool moved
)
2266 tree rhs1
= gimple_assign_rhs1 (stmt
);
2267 tree rhs2
= gimple_assign_rhs2 (stmt
);
2270 /* If we have three operands left, then we want to make sure the ones
2271 that get the double binary op are chosen wisely. */
2272 if (opindex
+ 3 == VEC_length (operand_entry_t
, ops
))
2273 swap_ops_for_binary_stmt (ops
, opindex
, stmt
);
2275 /* The final recursion case for this function is that you have
2276 exactly two operations left.
2277 If we had one exactly one op in the entire list to start with, we
2278 would have never called this function, and the tail recursion
2279 rewrites them one at a time. */
2280 if (opindex
+ 2 == VEC_length (operand_entry_t
, ops
))
2282 operand_entry_t oe1
, oe2
;
2284 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
2285 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
2287 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
2289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2291 fprintf (dump_file
, "Transforming ");
2292 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2295 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
2296 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
2298 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
2299 remove_visited_stmt_chain (rhs1
);
2301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2303 fprintf (dump_file
, " into ");
2304 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2310 /* If we hit here, we should have 3 or more ops left. */
2311 gcc_assert (opindex
+ 2 < VEC_length (operand_entry_t
, ops
));
2313 /* Rewrite the next operator. */
2314 oe
= VEC_index (operand_entry_t
, ops
, opindex
);
2320 gimple_stmt_iterator gsinow
, gsirhs1
;
2321 gimple stmt1
= stmt
, stmt2
;
2324 gsinow
= gsi_for_stmt (stmt
);
2325 count
= VEC_length (operand_entry_t
, ops
) - opindex
- 2;
2326 while (count
-- != 0)
2328 stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1
));
2329 gsirhs1
= gsi_for_stmt (stmt2
);
2330 gsi_move_before (&gsirhs1
, &gsinow
);
2337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2339 fprintf (dump_file
, "Transforming ");
2340 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2343 gimple_assign_set_rhs2 (stmt
, oe
->op
);
2346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2348 fprintf (dump_file
, " into ");
2349 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2352 /* Recurse on the LHS of the binary operator, which is guaranteed to
2353 be the non-leaf side. */
2354 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
, moved
);
2357 /* Find out how many cycles we need to compute statements chain.
2358 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2359 maximum number of independent statements we may execute per cycle. */
2362 get_required_cycles (int ops_num
, int cpu_width
)
2368 /* While we have more than 2 * cpu_width operands
2369 we may reduce number of operands by cpu_width
2371 res
= ops_num
/ (2 * cpu_width
);
2373 /* Remained operands count may be reduced twice per cycle
2374 until we have only one operand. */
2375 rest
= (unsigned)(ops_num
- res
* cpu_width
);
2376 elog
= exact_log2 (rest
);
2380 res
+= floor_log2 (rest
) + 1;
2385 /* Returns an optimal number of registers to use for computation of
2386 given statements. */
2389 get_reassociation_width (int ops_num
, enum tree_code opc
,
2390 enum machine_mode mode
)
2392 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
2397 if (param_width
> 0)
2398 width
= param_width
;
2400 width
= targetm
.sched
.reassociation_width (opc
, mode
);
2405 /* Get the minimal time required for sequence computation. */
2406 cycles_best
= get_required_cycles (ops_num
, width
);
2408 /* Check if we may use less width and still compute sequence for
2409 the same time. It will allow us to reduce registers usage.
2410 get_required_cycles is monotonically increasing with lower width
2411 so we can perform a binary search for the minimal width that still
2412 results in the optimal cycle count. */
2414 while (width
> width_min
)
2416 int width_mid
= (width
+ width_min
) / 2;
2418 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
2420 else if (width_min
< width_mid
)
2421 width_min
= width_mid
;
2429 /* Recursively rewrite our linearized statements so that the operators
2430 match those in OPS[OPINDEX], putting the computation in rank
2431 order and trying to allow operations to be executed in
2435 rewrite_expr_tree_parallel (gimple stmt
, int width
,
2436 VEC(operand_entry_t
, heap
) * ops
)
2438 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
2439 int op_num
= VEC_length (operand_entry_t
, ops
);
2440 int stmt_num
= op_num
- 1;
2441 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
2442 int op_index
= op_num
- 1;
2444 int ready_stmts_end
= 0;
2446 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
2449 /* We start expression rewriting from the top statements.
2450 So, in this loop we create a full list of statements
2451 we will work with. */
2452 stmts
[stmt_num
- 1] = stmt
;
2453 for (i
= stmt_num
- 2; i
>= 0; i
--)
2454 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
2456 lhs_var
= create_tmp_reg (TREE_TYPE (last_rhs1
), NULL
);
2457 add_referenced_var (lhs_var
);
2459 for (i
= 0; i
< stmt_num
; i
++)
2463 /* Determine whether we should use results of
2464 already handled statements or not. */
2465 if (ready_stmts_end
== 0
2466 && (i
- stmt_index
>= width
|| op_index
< 1))
2467 ready_stmts_end
= i
;
2469 /* Now we choose operands for the next statement. Non zero
2470 value in ready_stmts_end means here that we should use
2471 the result of already generated statements as new operand. */
2472 if (ready_stmts_end
> 0)
2474 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
2475 if (ready_stmts_end
> stmt_index
)
2476 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
2477 else if (op_index
>= 0)
2478 op2
= VEC_index (operand_entry_t
, ops
, op_index
--)->op
;
2481 gcc_assert (stmt_index
< i
);
2482 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
2485 if (stmt_index
>= ready_stmts_end
)
2486 ready_stmts_end
= 0;
2491 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
2492 op2
= VEC_index (operand_entry_t
, ops
, op_index
--)->op
;
2493 op1
= VEC_index (operand_entry_t
, ops
, op_index
--)->op
;
2496 /* If we emit the last statement then we should put
2497 operands into the last statement. It will also
2499 if (op_index
< 0 && stmt_index
== i
)
2502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2504 fprintf (dump_file
, "Transforming ");
2505 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
2508 /* We keep original statement only for the last one. All
2509 others are recreated. */
2510 if (i
== stmt_num
- 1)
2512 gimple_assign_set_rhs1 (stmts
[i
], op1
);
2513 gimple_assign_set_rhs2 (stmts
[i
], op2
);
2514 update_stmt (stmts
[i
]);
2517 stmts
[i
] = build_and_add_sum (lhs_var
, op1
, op2
, opcode
);
2519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2521 fprintf (dump_file
, " into ");
2522 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
2526 remove_visited_stmt_chain (last_rhs1
);
2529 /* Transform STMT, which is really (A +B) + (C + D) into the left
2530 linear form, ((A+B)+C)+D.
2531 Recurse on D if necessary. */
2534 linearize_expr (gimple stmt
)
2536 gimple_stmt_iterator gsinow
, gsirhs
;
2537 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2538 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
2539 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
2540 gimple newbinrhs
= NULL
;
2541 struct loop
*loop
= loop_containing_stmt (stmt
);
2543 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
2544 && is_reassociable_op (binrhs
, rhscode
, loop
));
2546 gsinow
= gsi_for_stmt (stmt
);
2547 gsirhs
= gsi_for_stmt (binrhs
);
2548 gsi_move_before (&gsirhs
, &gsinow
);
2550 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
2551 gimple_assign_set_rhs1 (binrhs
, gimple_assign_lhs (binlhs
));
2552 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
2554 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
2555 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
2557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2559 fprintf (dump_file
, "Linearized: ");
2560 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2563 reassociate_stats
.linearized
++;
2564 update_stmt (binrhs
);
2565 update_stmt (binlhs
);
2568 gimple_set_visited (stmt
, true);
2569 gimple_set_visited (binlhs
, true);
2570 gimple_set_visited (binrhs
, true);
2572 /* Tail recurse on the new rhs if it still needs reassociation. */
2573 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
2574 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2575 want to change the algorithm while converting to tuples. */
2576 linearize_expr (stmt
);
2579 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2580 it. Otherwise, return NULL. */
2583 get_single_immediate_use (tree lhs
)
2585 use_operand_p immuse
;
2588 if (TREE_CODE (lhs
) == SSA_NAME
2589 && single_imm_use (lhs
, &immuse
, &immusestmt
)
2590 && is_gimple_assign (immusestmt
))
2596 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2597 representing the negated value. Insertions of any necessary
2598 instructions go before GSI.
2599 This function is recursive in that, if you hand it "a_5" as the
2600 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2601 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
2604 negate_value (tree tonegate
, gimple_stmt_iterator
*gsi
)
2606 gimple negatedefstmt
= NULL
;
2607 tree resultofnegate
;
2609 /* If we are trying to negate a name, defined by an add, negate the
2610 add operands instead. */
2611 if (TREE_CODE (tonegate
) == SSA_NAME
)
2612 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
2613 if (TREE_CODE (tonegate
) == SSA_NAME
2614 && is_gimple_assign (negatedefstmt
)
2615 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
2616 && has_single_use (gimple_assign_lhs (negatedefstmt
))
2617 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
2619 gimple_stmt_iterator gsi
;
2620 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
2621 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
2623 gsi
= gsi_for_stmt (negatedefstmt
);
2624 rhs1
= negate_value (rhs1
, &gsi
);
2625 gimple_assign_set_rhs1 (negatedefstmt
, rhs1
);
2627 gsi
= gsi_for_stmt (negatedefstmt
);
2628 rhs2
= negate_value (rhs2
, &gsi
);
2629 gimple_assign_set_rhs2 (negatedefstmt
, rhs2
);
2631 update_stmt (negatedefstmt
);
2632 return gimple_assign_lhs (negatedefstmt
);
2635 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
2636 resultofnegate
= force_gimple_operand_gsi (gsi
, tonegate
, true,
2637 NULL_TREE
, true, GSI_SAME_STMT
);
2638 return resultofnegate
;
2641 /* Return true if we should break up the subtract in STMT into an add
2642 with negate. This is true when we the subtract operands are really
2643 adds, or the subtract itself is used in an add expression. In
2644 either case, breaking up the subtract into an add with negate
2645 exposes the adds to reassociation. */
2648 should_break_up_subtract (gimple stmt
)
2650 tree lhs
= gimple_assign_lhs (stmt
);
2651 tree binlhs
= gimple_assign_rhs1 (stmt
);
2652 tree binrhs
= gimple_assign_rhs2 (stmt
);
2654 struct loop
*loop
= loop_containing_stmt (stmt
);
2656 if (TREE_CODE (binlhs
) == SSA_NAME
2657 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
2660 if (TREE_CODE (binrhs
) == SSA_NAME
2661 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
2664 if (TREE_CODE (lhs
) == SSA_NAME
2665 && (immusestmt
= get_single_immediate_use (lhs
))
2666 && is_gimple_assign (immusestmt
)
2667 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
2668 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
2673 /* Transform STMT from A - B into A + -B. */
2676 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
2678 tree rhs1
= gimple_assign_rhs1 (stmt
);
2679 tree rhs2
= gimple_assign_rhs2 (stmt
);
2681 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2683 fprintf (dump_file
, "Breaking up subtract ");
2684 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2687 rhs2
= negate_value (rhs2
, gsip
);
2688 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
2692 /* Determine whether STMT is a builtin call that raises an SSA name
2693 to an integer power and has only one use. If so, and this is early
2694 reassociation and unsafe math optimizations are permitted, place
2695 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2696 If any of these conditions does not hold, return FALSE. */
2699 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
2702 REAL_VALUE_TYPE c
, cint
;
2704 if (!first_pass_instance
2705 || !flag_unsafe_math_optimizations
2706 || !is_gimple_call (stmt
)
2707 || !has_single_use (gimple_call_lhs (stmt
)))
2710 fndecl
= gimple_call_fndecl (stmt
);
2713 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
2716 switch (DECL_FUNCTION_CODE (fndecl
))
2718 CASE_FLT_FN (BUILT_IN_POW
):
2719 *base
= gimple_call_arg (stmt
, 0);
2720 arg1
= gimple_call_arg (stmt
, 1);
2722 if (TREE_CODE (arg1
) != REAL_CST
)
2725 c
= TREE_REAL_CST (arg1
);
2727 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
2730 *exponent
= real_to_integer (&c
);
2731 real_from_integer (&cint
, VOIDmode
, *exponent
,
2732 *exponent
< 0 ? -1 : 0, 0);
2733 if (!real_identical (&c
, &cint
))
2738 CASE_FLT_FN (BUILT_IN_POWI
):
2739 *base
= gimple_call_arg (stmt
, 0);
2740 arg1
= gimple_call_arg (stmt
, 1);
2742 if (!host_integerp (arg1
, 0))
2745 *exponent
= TREE_INT_CST_LOW (arg1
);
2752 /* Expanding negative exponents is generally unproductive, so we don't
2753 complicate matters with those. Exponents of zero and one should
2754 have been handled by expression folding. */
2755 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
2761 /* Recursively linearize a binary expression that is the RHS of STMT.
2762 Place the operands of the expression tree in the vector named OPS. */
2765 linearize_expr_tree (VEC(operand_entry_t
, heap
) **ops
, gimple stmt
,
2766 bool is_associative
, bool set_visited
)
2768 tree binlhs
= gimple_assign_rhs1 (stmt
);
2769 tree binrhs
= gimple_assign_rhs2 (stmt
);
2770 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
2771 bool binlhsisreassoc
= false;
2772 bool binrhsisreassoc
= false;
2773 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
2774 struct loop
*loop
= loop_containing_stmt (stmt
);
2775 tree base
= NULL_TREE
;
2776 HOST_WIDE_INT exponent
= 0;
2779 gimple_set_visited (stmt
, true);
2781 if (TREE_CODE (binlhs
) == SSA_NAME
)
2783 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
2784 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
2785 && !stmt_could_throw_p (binlhsdef
));
2788 if (TREE_CODE (binrhs
) == SSA_NAME
)
2790 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
2791 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
2792 && !stmt_could_throw_p (binrhsdef
));
2795 /* If the LHS is not reassociable, but the RHS is, we need to swap
2796 them. If neither is reassociable, there is nothing we can do, so
2797 just put them in the ops vector. If the LHS is reassociable,
2798 linearize it. If both are reassociable, then linearize the RHS
2801 if (!binlhsisreassoc
)
2805 /* If this is not a associative operation like division, give up. */
2806 if (!is_associative
)
2808 add_to_ops_vec (ops
, binrhs
);
2812 if (!binrhsisreassoc
)
2814 if (rhscode
== MULT_EXPR
2815 && TREE_CODE (binrhs
) == SSA_NAME
2816 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
2818 add_repeat_to_ops_vec (ops
, base
, exponent
);
2819 gimple_set_visited (binrhsdef
, true);
2822 add_to_ops_vec (ops
, binrhs
);
2824 if (rhscode
== MULT_EXPR
2825 && TREE_CODE (binlhs
) == SSA_NAME
2826 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
2828 add_repeat_to_ops_vec (ops
, base
, exponent
);
2829 gimple_set_visited (binlhsdef
, true);
2832 add_to_ops_vec (ops
, binlhs
);
2837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2839 fprintf (dump_file
, "swapping operands of ");
2840 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2843 swap_tree_operands (stmt
,
2844 gimple_assign_rhs1_ptr (stmt
),
2845 gimple_assign_rhs2_ptr (stmt
));
2848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2850 fprintf (dump_file
, " is now ");
2851 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2854 /* We want to make it so the lhs is always the reassociative op,
2860 else if (binrhsisreassoc
)
2862 linearize_expr (stmt
);
2863 binlhs
= gimple_assign_rhs1 (stmt
);
2864 binrhs
= gimple_assign_rhs2 (stmt
);
2867 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
2868 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
2870 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
2871 is_associative
, set_visited
);
2873 if (rhscode
== MULT_EXPR
2874 && TREE_CODE (binrhs
) == SSA_NAME
2875 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
2877 add_repeat_to_ops_vec (ops
, base
, exponent
);
2878 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
2881 add_to_ops_vec (ops
, binrhs
);
2884 /* Repropagate the negates back into subtracts, since no other pass
2885 currently does it. */
2888 repropagate_negates (void)
2893 FOR_EACH_VEC_ELT (tree
, plus_negates
, i
, negate
)
2895 gimple user
= get_single_immediate_use (negate
);
2897 if (!user
|| !is_gimple_assign (user
))
2900 /* The negate operand can be either operand of a PLUS_EXPR
2901 (it can be the LHS if the RHS is a constant for example).
2903 Force the negate operand to the RHS of the PLUS_EXPR, then
2904 transform the PLUS_EXPR into a MINUS_EXPR. */
2905 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
2907 /* If the negated operand appears on the LHS of the
2908 PLUS_EXPR, exchange the operands of the PLUS_EXPR
2909 to force the negated operand to the RHS of the PLUS_EXPR. */
2910 if (gimple_assign_rhs1 (user
) == negate
)
2912 swap_tree_operands (user
,
2913 gimple_assign_rhs1_ptr (user
),
2914 gimple_assign_rhs2_ptr (user
));
2917 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2918 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
2919 if (gimple_assign_rhs2 (user
) == negate
)
2921 tree rhs1
= gimple_assign_rhs1 (user
);
2922 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
2923 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
2924 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
2928 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
2930 if (gimple_assign_rhs1 (user
) == negate
)
2935 which we transform into
2938 This pushes down the negate which we possibly can merge
2939 into some other operation, hence insert it into the
2940 plus_negates vector. */
2941 gimple feed
= SSA_NAME_DEF_STMT (negate
);
2942 tree a
= gimple_assign_rhs1 (feed
);
2943 tree rhs2
= gimple_assign_rhs2 (user
);
2944 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
), gsi2
;
2945 gimple_replace_lhs (feed
, negate
);
2946 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, a
, rhs2
);
2947 update_stmt (gsi_stmt (gsi
));
2948 gsi2
= gsi_for_stmt (user
);
2949 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, negate
, NULL
);
2950 update_stmt (gsi_stmt (gsi2
));
2951 gsi_move_before (&gsi
, &gsi2
);
2952 VEC_safe_push (tree
, heap
, plus_negates
,
2953 gimple_assign_lhs (gsi_stmt (gsi2
)));
2957 /* Transform "x = -a; y = b - x" into "y = b + a", getting
2958 rid of one operation. */
2959 gimple feed
= SSA_NAME_DEF_STMT (negate
);
2960 tree a
= gimple_assign_rhs1 (feed
);
2961 tree rhs1
= gimple_assign_rhs1 (user
);
2962 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
2963 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
2964 update_stmt (gsi_stmt (gsi
));
2970 /* Returns true if OP is of a type for which we can do reassociation.
2971 That is for integral or non-saturating fixed-point types, and for
2972 floating point type when associative-math is enabled. */
2975 can_reassociate_p (tree op
)
2977 tree type
= TREE_TYPE (op
);
2978 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
2979 || NON_SAT_FIXED_POINT_TYPE_P (type
)
2980 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
2985 /* Break up subtract operations in block BB.
2987 We do this top down because we don't know whether the subtract is
2988 part of a possible chain of reassociation except at the top.
2997 we want to break up k = t - q, but we won't until we've transformed q
2998 = b - r, which won't be broken up until we transform b = c - d.
3000 En passant, clear the GIMPLE visited flag on every statement. */
3003 break_up_subtract_bb (basic_block bb
)
3005 gimple_stmt_iterator gsi
;
3008 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3010 gimple stmt
= gsi_stmt (gsi
);
3011 gimple_set_visited (stmt
, false);
3013 if (!is_gimple_assign (stmt
)
3014 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3017 /* Look for simple gimple subtract operations. */
3018 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3020 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3021 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3024 /* Check for a subtract used only in an addition. If this
3025 is the case, transform it into add of a negate for better
3026 reassociation. IE transform C = A-B into C = A + -B if C
3027 is only used in an addition. */
3028 if (should_break_up_subtract (stmt
))
3029 break_up_subtract (stmt
, &gsi
);
3031 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3032 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3033 VEC_safe_push (tree
, heap
, plus_negates
, gimple_assign_lhs (stmt
));
3035 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3037 son
= next_dom_son (CDI_DOMINATORS
, son
))
3038 break_up_subtract_bb (son
);
3041 /* Used for repeated factor analysis. */
3042 struct repeat_factor_d
3044 /* An SSA name that occurs in a multiply chain. */
3047 /* Cached rank of the factor. */
3050 /* Number of occurrences of the factor in the chain. */
3051 HOST_WIDE_INT count
;
3053 /* An SSA name representing the product of this factor and
3054 all factors appearing later in the repeated factor vector. */
3058 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3059 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3061 DEF_VEC_O (repeat_factor
);
3062 DEF_VEC_ALLOC_O (repeat_factor
, heap
);
3064 static VEC (repeat_factor
, heap
) *repeat_factor_vec
;
3066 /* Used for sorting the repeat factor vector. Sort primarily by
3067 ascending occurrence count, secondarily by descending rank. */
3070 compare_repeat_factors (const void *x1
, const void *x2
)
3072 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3073 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3075 if (rf1
->count
!= rf2
->count
)
3076 return rf1
->count
- rf2
->count
;
3078 return rf2
->rank
- rf1
->rank
;
3081 /* Get a new SSA name for register variable *TARGET of type TYPE.
3082 If *TARGET is null or incompatible with TYPE, create the variable
3086 get_reassoc_pow_ssa_name (tree
*target
, tree type
)
3088 if (!*target
|| !types_compatible_p (type
, TREE_TYPE (*target
)))
3090 *target
= create_tmp_reg (type
, "reassocpow");
3091 add_referenced_var (*target
);
3094 return make_ssa_name (*target
, NULL
);
3097 /* Look for repeated operands in OPS in the multiply tree rooted at
3098 STMT. Replace them with an optimal sequence of multiplies and powi
3099 builtin calls, and remove the used operands from OPS. Return an
3100 SSA name representing the value of the replacement sequence. */
3103 attempt_builtin_powi (gimple stmt
, VEC(operand_entry_t
, heap
) **ops
,
3106 unsigned i
, j
, vec_len
;
3109 repeat_factor_t rf1
, rf2
;
3110 repeat_factor rfnew
;
3111 tree result
= NULL_TREE
;
3112 tree target_ssa
, iter_result
;
3113 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3114 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
3115 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3116 gimple mul_stmt
, pow_stmt
;
3118 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3123 /* Allocate the repeated factor vector. */
3124 repeat_factor_vec
= VEC_alloc (repeat_factor
, heap
, 10);
3126 /* Scan the OPS vector for all SSA names in the product and build
3127 up a vector of occurrence counts for each factor. */
3128 FOR_EACH_VEC_ELT (operand_entry_t
, *ops
, i
, oe
)
3130 if (TREE_CODE (oe
->op
) == SSA_NAME
)
3132 FOR_EACH_VEC_ELT (repeat_factor
, repeat_factor_vec
, j
, rf1
)
3134 if (rf1
->factor
== oe
->op
)
3136 rf1
->count
+= oe
->count
;
3141 if (j
>= VEC_length (repeat_factor
, repeat_factor_vec
))
3143 rfnew
.factor
= oe
->op
;
3144 rfnew
.rank
= oe
->rank
;
3145 rfnew
.count
= oe
->count
;
3146 rfnew
.repr
= NULL_TREE
;
3147 VEC_safe_push (repeat_factor
, heap
, repeat_factor_vec
, &rfnew
);
3152 /* Sort the repeated factor vector by (a) increasing occurrence count,
3153 and (b) decreasing rank. */
3154 VEC_qsort (repeat_factor
, repeat_factor_vec
, compare_repeat_factors
);
3156 /* It is generally best to combine as many base factors as possible
3157 into a product before applying __builtin_powi to the result.
3158 However, the sort order chosen for the repeated factor vector
3159 allows us to cache partial results for the product of the base
3160 factors for subsequent use. When we already have a cached partial
3161 result from a previous iteration, it is best to make use of it
3162 before looking for another __builtin_pow opportunity.
3164 As an example, consider x * x * y * y * y * z * z * z * z.
3165 We want to first compose the product x * y * z, raise it to the
3166 second power, then multiply this by y * z, and finally multiply
3167 by z. This can be done in 5 multiplies provided we cache y * z
3168 for use in both expressions:
3176 If we instead ignored the cached y * z and first multiplied by
3177 the __builtin_pow opportunity z * z, we would get the inferior:
3186 vec_len
= VEC_length (repeat_factor
, repeat_factor_vec
);
3188 /* Repeatedly look for opportunities to create a builtin_powi call. */
3191 HOST_WIDE_INT power
;
3193 /* First look for the largest cached product of factors from
3194 preceding iterations. If found, create a builtin_powi for
3195 it if the minimum occurrence count for its factors is at
3196 least 2, or just use this cached product as our next
3197 multiplicand if the minimum occurrence count is 1. */
3198 FOR_EACH_VEC_ELT (repeat_factor
, repeat_factor_vec
, j
, rf1
)
3200 if (rf1
->repr
&& rf1
->count
> 0)
3210 iter_result
= rf1
->repr
;
3212 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3216 fputs ("Multiplying by cached product ", dump_file
);
3217 for (elt
= j
; elt
< vec_len
; elt
++)
3219 rf
= VEC_index (repeat_factor
, repeat_factor_vec
, elt
);
3220 print_generic_expr (dump_file
, rf
->factor
, 0);
3221 if (elt
< vec_len
- 1)
3222 fputs (" * ", dump_file
);
3224 fputs ("\n", dump_file
);
3229 iter_result
= get_reassoc_pow_ssa_name (target
, type
);
3230 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
3231 build_int_cst (integer_type_node
,
3233 gimple_call_set_lhs (pow_stmt
, iter_result
);
3234 gimple_set_location (pow_stmt
, gimple_location (stmt
));
3235 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
3237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3241 fputs ("Building __builtin_pow call for cached product (",
3243 for (elt
= j
; elt
< vec_len
; elt
++)
3245 rf
= VEC_index (repeat_factor
, repeat_factor_vec
, elt
);
3246 print_generic_expr (dump_file
, rf
->factor
, 0);
3247 if (elt
< vec_len
- 1)
3248 fputs (" * ", dump_file
);
3250 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
3257 /* Otherwise, find the first factor in the repeated factor
3258 vector whose occurrence count is at least 2. If no such
3259 factor exists, there are no builtin_powi opportunities
3261 FOR_EACH_VEC_ELT (repeat_factor
, repeat_factor_vec
, j
, rf1
)
3263 if (rf1
->count
>= 2)
3272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3276 fputs ("Building __builtin_pow call for (", dump_file
);
3277 for (elt
= j
; elt
< vec_len
; elt
++)
3279 rf
= VEC_index (repeat_factor
, repeat_factor_vec
, elt
);
3280 print_generic_expr (dump_file
, rf
->factor
, 0);
3281 if (elt
< vec_len
- 1)
3282 fputs (" * ", dump_file
);
3284 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
3287 reassociate_stats
.pows_created
++;
3289 /* Visit each element of the vector in reverse order (so that
3290 high-occurrence elements are visited first, and within the
3291 same occurrence count, lower-ranked elements are visited
3292 first). Form a linear product of all elements in this order
3293 whose occurrencce count is at least that of element J.
3294 Record the SSA name representing the product of each element
3295 with all subsequent elements in the vector. */
3296 if (j
== vec_len
- 1)
3297 rf1
->repr
= rf1
->factor
;
3300 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
3304 rf1
= VEC_index (repeat_factor
, repeat_factor_vec
, ii
);
3305 rf2
= VEC_index (repeat_factor
, repeat_factor_vec
, ii
+ 1);
3307 /* Init the last factor's representative to be itself. */
3309 rf2
->repr
= rf2
->factor
;
3314 target_ssa
= get_reassoc_pow_ssa_name (target
, type
);
3315 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
3318 gimple_set_location (mul_stmt
, gimple_location (stmt
));
3319 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
3320 rf1
->repr
= target_ssa
;
3322 /* Don't reprocess the multiply we just introduced. */
3323 gimple_set_visited (mul_stmt
, true);
3327 /* Form a call to __builtin_powi for the maximum product
3328 just formed, raised to the power obtained earlier. */
3329 rf1
= VEC_index (repeat_factor
, repeat_factor_vec
, j
);
3330 iter_result
= get_reassoc_pow_ssa_name (target
, type
);
3331 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
3332 build_int_cst (integer_type_node
,
3334 gimple_call_set_lhs (pow_stmt
, iter_result
);
3335 gimple_set_location (pow_stmt
, gimple_location (stmt
));
3336 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
3339 /* If we previously formed at least one other builtin_powi call,
3340 form the product of this one and those others. */
3343 tree new_result
= get_reassoc_pow_ssa_name (target
, type
);
3344 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
3345 result
, iter_result
);
3346 gimple_set_location (mul_stmt
, gimple_location (stmt
));
3347 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
3348 gimple_set_visited (mul_stmt
, true);
3349 result
= new_result
;
3352 result
= iter_result
;
3354 /* Decrement the occurrence count of each element in the product
3355 by the count found above, and remove this many copies of each
3357 for (i
= j
; i
< vec_len
; i
++)
3362 rf1
= VEC_index (repeat_factor
, repeat_factor_vec
, i
);
3363 rf1
->count
-= power
;
3365 FOR_EACH_VEC_ELT_REVERSE (operand_entry_t
, *ops
, n
, oe
)
3367 if (oe
->op
== rf1
->factor
)
3371 VEC_ordered_remove (operand_entry_t
, *ops
, n
);
3387 /* At this point all elements in the repeated factor vector have a
3388 remaining occurrence count of 0 or 1, and those with a count of 1
3389 don't have cached representatives. Re-sort the ops vector and
3391 VEC_qsort (operand_entry_t
, *ops
, sort_by_operand_rank
);
3392 VEC_free (repeat_factor
, heap
, repeat_factor_vec
);
3394 /* Return the final product computed herein. Note that there may
3395 still be some elements with single occurrence count left in OPS;
3396 those will be handled by the normal reassociation logic. */
3400 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3403 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
3407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3409 fprintf (dump_file
, "Transforming ");
3410 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3413 rhs1
= gimple_assign_rhs1 (stmt
);
3414 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
3416 remove_visited_stmt_chain (rhs1
);
3418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3420 fprintf (dump_file
, " into ");
3421 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3425 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3428 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
3429 tree rhs1
, tree rhs2
)
3431 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3433 fprintf (dump_file
, "Transforming ");
3434 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3437 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
3438 update_stmt (gsi_stmt (*gsi
));
3439 remove_visited_stmt_chain (rhs1
);
3441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3443 fprintf (dump_file
, " into ");
3444 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3448 /* Reassociate expressions in basic block BB and its post-dominator as
3452 reassociate_bb (basic_block bb
)
3454 gimple_stmt_iterator gsi
;
3456 tree target
= NULL_TREE
;
3458 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3460 gimple stmt
= gsi_stmt (gsi
);
3462 if (is_gimple_assign (stmt
)
3463 && !stmt_could_throw_p (stmt
))
3465 tree lhs
, rhs1
, rhs2
;
3466 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
3468 /* If this is not a gimple binary expression, there is
3469 nothing for us to do with it. */
3470 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
3473 /* If this was part of an already processed statement,
3474 we don't need to touch it again. */
3475 if (gimple_visited_p (stmt
))
3477 /* This statement might have become dead because of previous
3479 if (has_zero_uses (gimple_get_lhs (stmt
)))
3481 gsi_remove (&gsi
, true);
3482 release_defs (stmt
);
3483 /* We might end up removing the last stmt above which
3484 places the iterator to the end of the sequence.
3485 Reset it to the last stmt in this case which might
3486 be the end of the sequence as well if we removed
3487 the last statement of the sequence. In which case
3488 we need to bail out. */
3489 if (gsi_end_p (gsi
))
3491 gsi
= gsi_last_bb (bb
);
3492 if (gsi_end_p (gsi
))
3499 lhs
= gimple_assign_lhs (stmt
);
3500 rhs1
= gimple_assign_rhs1 (stmt
);
3501 rhs2
= gimple_assign_rhs2 (stmt
);
3503 /* For non-bit or min/max operations we can't associate
3504 all types. Verify that here. */
3505 if (rhs_code
!= BIT_IOR_EXPR
3506 && rhs_code
!= BIT_AND_EXPR
3507 && rhs_code
!= BIT_XOR_EXPR
3508 && rhs_code
!= MIN_EXPR
3509 && rhs_code
!= MAX_EXPR
3510 && (!can_reassociate_p (lhs
)
3511 || !can_reassociate_p (rhs1
)
3512 || !can_reassociate_p (rhs2
)))
3515 if (associative_tree_code (rhs_code
))
3517 VEC(operand_entry_t
, heap
) *ops
= NULL
;
3518 tree powi_result
= NULL_TREE
;
3520 /* There may be no immediate uses left by the time we
3521 get here because we may have eliminated them all. */
3522 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
3525 gimple_set_visited (stmt
, true);
3526 linearize_expr_tree (&ops
, stmt
, true, true);
3527 VEC_qsort (operand_entry_t
, ops
, sort_by_operand_rank
);
3528 optimize_ops_list (rhs_code
, &ops
);
3529 if (undistribute_ops_list (rhs_code
, &ops
,
3530 loop_containing_stmt (stmt
)))
3532 VEC_qsort (operand_entry_t
, ops
, sort_by_operand_rank
);
3533 optimize_ops_list (rhs_code
, &ops
);
3536 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
3537 optimize_range_tests (rhs_code
, &ops
);
3539 if (first_pass_instance
3540 && rhs_code
== MULT_EXPR
3541 && flag_unsafe_math_optimizations
)
3542 powi_result
= attempt_builtin_powi (stmt
, &ops
, &target
);
3544 /* If the operand vector is now empty, all operands were
3545 consumed by the __builtin_powi optimization. */
3546 if (VEC_length (operand_entry_t
, ops
) == 0)
3547 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
3548 else if (VEC_length (operand_entry_t
, ops
) == 1)
3550 tree last_op
= VEC_last (operand_entry_t
, ops
)->op
;
3553 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
3556 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
3560 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
3561 int ops_num
= VEC_length (operand_entry_t
, ops
);
3562 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
3564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3566 "Width = %d was chosen for reassociation\n", width
);
3569 && VEC_length (operand_entry_t
, ops
) > 3)
3570 rewrite_expr_tree_parallel (stmt
, width
, ops
);
3572 rewrite_expr_tree (stmt
, 0, ops
, false);
3574 /* If we combined some repeated factors into a
3575 __builtin_powi call, multiply that result by the
3576 reassociated operands. */
3580 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3581 tree target_ssa
= get_reassoc_pow_ssa_name (&target
,
3583 gimple_set_lhs (stmt
, target_ssa
);
3585 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
3588 gimple_set_location (mul_stmt
, gimple_location (stmt
));
3589 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
3593 VEC_free (operand_entry_t
, heap
, ops
);
3597 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
3599 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
3600 reassociate_bb (son
);
3603 void dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
);
3604 void debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
);
3606 /* Dump the operand entry vector OPS to FILE. */
3609 dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
)
3614 FOR_EACH_VEC_ELT (operand_entry_t
, ops
, i
, oe
)
3616 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
3617 print_generic_expr (file
, oe
->op
, 0);
3621 /* Dump the operand entry vector OPS to STDERR. */
3624 debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
)
3626 dump_ops_vector (stderr
, ops
);
3632 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
3633 reassociate_bb (EXIT_BLOCK_PTR
);
3636 /* Initialize the reassociation pass. */
3643 int *bbs
= XNEWVEC (int, last_basic_block
+ 1);
3645 /* Find the loops, so that we can prevent moving calculations in
3647 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
3649 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
3651 operand_entry_pool
= create_alloc_pool ("operand entry pool",
3652 sizeof (struct operand_entry
), 30);
3653 next_operand_entry_id
= 0;
3655 /* Reverse RPO (Reverse Post Order) will give us something where
3656 deeper loops come later. */
3657 pre_and_rev_post_order_compute (NULL
, bbs
, false);
3658 bb_rank
= XCNEWVEC (long, last_basic_block
+ 1);
3659 operand_rank
= pointer_map_create ();
3661 /* Give each default definition a distinct rank. This includes
3662 parameters and the static chain. Walk backwards over all
3663 SSA names so that we get proper rank ordering according
3664 to tree_swap_operands_p. */
3665 for (i
= num_ssa_names
- 1; i
> 0; --i
)
3667 tree name
= ssa_name (i
);
3668 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
3669 insert_operand_rank (name
, ++rank
);
3672 /* Set up rank for each BB */
3673 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
3674 bb_rank
[bbs
[i
]] = ++rank
<< 16;
3677 calculate_dominance_info (CDI_POST_DOMINATORS
);
3678 plus_negates
= NULL
;
3681 /* Cleanup after the reassociation pass, and print stats if
3687 statistics_counter_event (cfun
, "Linearized",
3688 reassociate_stats
.linearized
);
3689 statistics_counter_event (cfun
, "Constants eliminated",
3690 reassociate_stats
.constants_eliminated
);
3691 statistics_counter_event (cfun
, "Ops eliminated",
3692 reassociate_stats
.ops_eliminated
);
3693 statistics_counter_event (cfun
, "Statements rewritten",
3694 reassociate_stats
.rewritten
);
3695 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
3696 reassociate_stats
.pows_encountered
);
3697 statistics_counter_event (cfun
, "Built-in powi calls created",
3698 reassociate_stats
.pows_created
);
3700 pointer_map_destroy (operand_rank
);
3701 free_alloc_pool (operand_entry_pool
);
3703 VEC_free (tree
, heap
, plus_negates
);
3704 free_dominance_info (CDI_POST_DOMINATORS
);
3705 loop_optimizer_finalize ();
3708 /* Gate and execute functions for Reassociation. */
3711 execute_reassoc (void)
3716 repropagate_negates ();
3723 gate_tree_ssa_reassoc (void)
3725 return flag_tree_reassoc
!= 0;
3728 struct gimple_opt_pass pass_reassoc
=
3732 "reassoc", /* name */
3733 gate_tree_ssa_reassoc
, /* gate */
3734 execute_reassoc
, /* execute */
3737 0, /* static_pass_number */
3738 TV_TREE_REASSOC
, /* tv_id */
3739 PROP_cfg
| PROP_ssa
, /* properties_required */
3740 0, /* properties_provided */
3741 0, /* properties_destroyed */
3742 0, /* todo_flags_start */
3745 | TODO_ggc_collect
/* todo_flags_finish */