1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
95 You want to first merge all leaves of the same rank, as much as
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
149 newstmt = mergetmp + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
170 int constants_eliminated
;
173 int pows_encountered
;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
186 static alloc_pool operand_entry_pool
;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id
;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
195 static long *bb_rank
;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t
*operand_rank
;
201 static long get_rank (tree
);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
217 phi_rank (gimple stmt
)
219 basic_block bb
= gimple_bb (stmt
);
220 struct loop
*father
= bb
->loop_father
;
226 /* We only care about real loops (those with a latch). */
228 return bb_rank
[bb
->index
];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb
!= father
->header
233 return bb_rank
[bb
->index
];
235 /* Ignore virtual SSA_NAMEs. */
236 res
= gimple_phi_result (stmt
);
237 if (virtual_operand_p (res
))
238 return bb_rank
[bb
->index
];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res
, &use
, &use_stmt
)
243 || gimple_bb (use_stmt
)->loop_father
!= father
)
244 return bb_rank
[bb
->index
];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
249 tree arg
= gimple_phi_arg_def (stmt
, i
);
250 if (TREE_CODE (arg
) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
253 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
254 if (gimple_bb (def_stmt
)->loop_father
== father
)
255 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
259 /* Must be an uninteresting phi. */
260 return bb_rank
[bb
->index
];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
267 loop_carried_phi (tree exp
)
272 if (TREE_CODE (exp
) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp
))
276 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
286 if (phi_rank (phi_stmt
) != block_rank
)
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
297 propagate_rank (long rank
, tree op
)
301 if (loop_carried_phi (op
))
304 op_rank
= get_rank (op
);
306 return MAX (rank
, op_rank
);
309 /* Look up the operand rank structure for expression E. */
312 find_operand_rank (tree e
)
314 void **slot
= pointer_map_contains (operand_rank
, e
);
315 return slot
? (long) (intptr_t) *slot
: -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
321 insert_operand_rank (tree e
, long rank
)
324 gcc_assert (rank
> 0);
325 slot
= pointer_map_insert (operand_rank
, e
);
327 *slot
= (void *) (intptr_t) rank
;
330 /* Given an expression E, return the rank of the expression. */
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e
))
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e
) == SSA_NAME
)
383 if (SSA_NAME_IS_DEFAULT_DEF (e
))
384 return find_operand_rank (e
);
386 stmt
= SSA_NAME_DEF_STMT (e
);
387 if (gimple_code (stmt
) == GIMPLE_PHI
)
388 return phi_rank (stmt
);
390 if (!is_gimple_assign (stmt
)
391 || gimple_vdef (stmt
))
392 return bb_rank
[gimple_bb (stmt
)->index
];
394 /* If we already have a rank for this expression, use that. */
395 rank
= find_operand_rank (e
);
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
403 if (gimple_assign_single_p (stmt
))
405 tree rhs
= gimple_assign_rhs1 (stmt
);
406 n
= TREE_OPERAND_LENGTH (rhs
);
408 rank
= propagate_rank (rank
, rhs
);
411 for (i
= 0; i
< n
; i
++)
413 op
= TREE_OPERAND (rhs
, i
);
416 rank
= propagate_rank (rank
, op
);
422 n
= gimple_num_ops (stmt
);
423 for (i
= 1; i
< n
; i
++)
425 op
= gimple_op (stmt
, i
);
427 rank
= propagate_rank (rank
, op
);
431 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
433 fprintf (dump_file
, "Rank for ");
434 print_generic_expr (dump_file
, e
, 0);
435 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e
, (rank
+ 1));
443 /* Globals, etc, are rank 0 */
448 /* We want integer ones to end up last no matter what, since they are
449 the ones we can do the most with. */
450 #define INTEGER_CONST_TYPE 1 << 3
451 #define FLOAT_CONST_TYPE 1 << 2
452 #define OTHER_CONST_TYPE 1 << 1
454 /* Classify an invariant tree into integer, float, or other, so that
455 we can sort them to be near other constants of the same type. */
457 constant_type (tree t
)
459 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
460 return INTEGER_CONST_TYPE
;
461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
462 return FLOAT_CONST_TYPE
;
464 return OTHER_CONST_TYPE
;
467 /* qsort comparison function to sort operand entries PA and PB by rank
468 so that the sorted array is ordered by rank in decreasing order. */
470 sort_by_operand_rank (const void *pa
, const void *pb
)
472 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
473 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
475 /* It's nicer for optimize_expression if constants that are likely
476 to fold when added/multiplied//whatever are put next to each
477 other. Since all constants have rank 0, order them by type. */
478 if (oeb
->rank
== 0 && oea
->rank
== 0)
480 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
481 return constant_type (oeb
->op
) - constant_type (oea
->op
);
483 /* To make sorting result stable, we use unique IDs to determine
485 return oeb
->id
- oea
->id
;
488 /* Lastly, make sure the versions that are the same go next to each
489 other. We use SSA_NAME_VERSION because it's stable. */
490 if ((oeb
->rank
- oea
->rank
== 0)
491 && TREE_CODE (oea
->op
) == SSA_NAME
492 && TREE_CODE (oeb
->op
) == SSA_NAME
)
494 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
495 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
497 return oeb
->id
- oea
->id
;
500 if (oeb
->rank
!= oea
->rank
)
501 return oeb
->rank
- oea
->rank
;
503 return oeb
->id
- oea
->id
;
506 /* Add an operand entry to *OPS for the tree operand OP. */
509 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
511 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
514 oe
->rank
= get_rank (op
);
515 oe
->id
= next_operand_entry_id
++;
520 /* Add an operand entry to *OPS for the tree operand OP with repeat
524 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
525 HOST_WIDE_INT repeat
)
527 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
530 oe
->rank
= get_rank (op
);
531 oe
->id
= next_operand_entry_id
++;
535 reassociate_stats
.pows_encountered
++;
538 /* Return true if STMT is reassociable operation containing a binary
539 operation with tree code CODE, and is inside LOOP. */
542 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
544 basic_block bb
= gimple_bb (stmt
);
546 if (gimple_bb (stmt
) == NULL
)
549 if (!flow_bb_inside_loop_p (loop
, bb
))
552 if (is_gimple_assign (stmt
)
553 && gimple_assign_rhs_code (stmt
) == code
554 && has_single_use (gimple_assign_lhs (stmt
)))
561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
562 operand of the negate operation. Otherwise, return NULL. */
565 get_unary_op (tree name
, enum tree_code opcode
)
567 gimple stmt
= SSA_NAME_DEF_STMT (name
);
569 if (!is_gimple_assign (stmt
))
572 if (gimple_assign_rhs_code (stmt
) == opcode
)
573 return gimple_assign_rhs1 (stmt
);
577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
578 eliminate through equivalences, do so, remove them from OPS, and
579 return true. Otherwise, return false. */
582 eliminate_duplicate_pair (enum tree_code opcode
,
583 vec
<operand_entry_t
> *ops
,
586 operand_entry_t curr
,
587 operand_entry_t last
)
590 /* If we have two of the same op, and the opcode is & |, min, or max,
591 we can eliminate one of them.
592 If we have two of the same op, and the opcode is ^, we can
593 eliminate both of them. */
595 if (last
&& last
->op
== curr
->op
)
603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
605 fprintf (dump_file
, "Equivalence: ");
606 print_generic_expr (dump_file
, curr
->op
, 0);
607 fprintf (dump_file
, " [&|minmax] ");
608 print_generic_expr (dump_file
, last
->op
, 0);
609 fprintf (dump_file
, " -> ");
610 print_generic_stmt (dump_file
, last
->op
, 0);
613 ops
->ordered_remove (i
);
614 reassociate_stats
.ops_eliminated
++;
619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
621 fprintf (dump_file
, "Equivalence: ");
622 print_generic_expr (dump_file
, curr
->op
, 0);
623 fprintf (dump_file
, " ^ ");
624 print_generic_expr (dump_file
, last
->op
, 0);
625 fprintf (dump_file
, " -> nothing\n");
628 reassociate_stats
.ops_eliminated
+= 2;
630 if (ops
->length () == 2)
633 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
638 ops
->ordered_remove (i
-1);
639 ops
->ordered_remove (i
-1);
651 static vec
<tree
> plus_negates
;
653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
654 expression, look in OPS for a corresponding positive operation to cancel
655 it out. If we find one, remove the other from OPS, replace
656 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
660 eliminate_plus_minus_pair (enum tree_code opcode
,
661 vec
<operand_entry_t
> *ops
,
662 unsigned int currindex
,
663 operand_entry_t curr
)
670 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
673 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
674 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
675 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
678 /* Any non-negated version will have a rank that is one less than
679 the current rank. So once we hit those ranks, if we don't find
682 for (i
= currindex
+ 1;
683 ops
->iterate (i
, &oe
)
684 && oe
->rank
>= curr
->rank
- 1 ;
687 if (oe
->op
== negateop
)
690 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
692 fprintf (dump_file
, "Equivalence: ");
693 print_generic_expr (dump_file
, negateop
, 0);
694 fprintf (dump_file
, " + -");
695 print_generic_expr (dump_file
, oe
->op
, 0);
696 fprintf (dump_file
, " -> 0\n");
699 ops
->ordered_remove (i
);
700 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
701 ops
->ordered_remove (currindex
);
702 reassociate_stats
.ops_eliminated
++;
706 else if (oe
->op
== notop
)
708 tree op_type
= TREE_TYPE (oe
->op
);
710 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
712 fprintf (dump_file
, "Equivalence: ");
713 print_generic_expr (dump_file
, notop
, 0);
714 fprintf (dump_file
, " + ~");
715 print_generic_expr (dump_file
, oe
->op
, 0);
716 fprintf (dump_file
, " -> -1\n");
719 ops
->ordered_remove (i
);
720 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
721 ops
->ordered_remove (currindex
);
722 reassociate_stats
.ops_eliminated
++;
728 /* CURR->OP is a negate expr in a plus expr: save it for later
729 inspection in repropagate_negates(). */
730 if (negateop
!= NULL_TREE
)
731 plus_negates
.safe_push (curr
->op
);
736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
737 bitwise not expression, look in OPS for a corresponding operand to
738 cancel it out. If we find one, remove the other from OPS, replace
739 OPS[CURRINDEX] with 0, and return true. Otherwise, return
743 eliminate_not_pairs (enum tree_code opcode
,
744 vec
<operand_entry_t
> *ops
,
745 unsigned int currindex
,
746 operand_entry_t curr
)
752 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
753 || TREE_CODE (curr
->op
) != SSA_NAME
)
756 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
757 if (notop
== NULL_TREE
)
760 /* Any non-not version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
764 for (i
= currindex
+ 1;
765 ops
->iterate (i
, &oe
)
766 && oe
->rank
>= curr
->rank
- 1;
771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
773 fprintf (dump_file
, "Equivalence: ");
774 print_generic_expr (dump_file
, notop
, 0);
775 if (opcode
== BIT_AND_EXPR
)
776 fprintf (dump_file
, " & ~");
777 else if (opcode
== BIT_IOR_EXPR
)
778 fprintf (dump_file
, " | ~");
779 print_generic_expr (dump_file
, oe
->op
, 0);
780 if (opcode
== BIT_AND_EXPR
)
781 fprintf (dump_file
, " -> 0\n");
782 else if (opcode
== BIT_IOR_EXPR
)
783 fprintf (dump_file
, " -> -1\n");
786 if (opcode
== BIT_AND_EXPR
)
787 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
788 else if (opcode
== BIT_IOR_EXPR
)
789 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
790 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
792 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
794 ops
->quick_push (oe
);
802 /* Use constant value that may be present in OPS to try to eliminate
803 operands. Note that this function is only really used when we've
804 eliminated ops for other reasons, or merged constants. Across
805 single statements, fold already does all of this, plus more. There
806 is little point in duplicating logic, so I've only included the
807 identities that I could ever construct testcases to trigger. */
810 eliminate_using_constants (enum tree_code opcode
,
811 vec
<operand_entry_t
> *ops
)
813 operand_entry_t oelast
= ops
->last ();
814 tree type
= TREE_TYPE (oelast
->op
);
816 if (oelast
->rank
== 0
817 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
822 if (integer_zerop (oelast
->op
))
824 if (ops
->length () != 1)
826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
827 fprintf (dump_file
, "Found & 0, removing all other ops\n");
829 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
832 ops
->quick_push (oelast
);
836 else if (integer_all_onesp (oelast
->op
))
838 if (ops
->length () != 1)
840 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
841 fprintf (dump_file
, "Found & -1, removing\n");
843 reassociate_stats
.ops_eliminated
++;
848 if (integer_all_onesp (oelast
->op
))
850 if (ops
->length () != 1)
852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
853 fprintf (dump_file
, "Found | -1, removing all other ops\n");
855 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
858 ops
->quick_push (oelast
);
862 else if (integer_zerop (oelast
->op
))
864 if (ops
->length () != 1)
866 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
867 fprintf (dump_file
, "Found | 0, removing\n");
869 reassociate_stats
.ops_eliminated
++;
874 if (integer_zerop (oelast
->op
)
875 || (FLOAT_TYPE_P (type
)
876 && !HONOR_NANS (TYPE_MODE (type
))
877 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
878 && real_zerop (oelast
->op
)))
880 if (ops
->length () != 1)
882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
883 fprintf (dump_file
, "Found * 0, removing all other ops\n");
885 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
887 ops
->quick_push (oelast
);
891 else if (integer_onep (oelast
->op
)
892 || (FLOAT_TYPE_P (type
)
893 && !HONOR_SNANS (TYPE_MODE (type
))
894 && real_onep (oelast
->op
)))
896 if (ops
->length () != 1)
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
899 fprintf (dump_file
, "Found * 1, removing\n");
901 reassociate_stats
.ops_eliminated
++;
909 if (integer_zerop (oelast
->op
)
910 || (FLOAT_TYPE_P (type
)
911 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
912 && fold_real_zero_addition_p (type
, oelast
->op
,
913 opcode
== MINUS_EXPR
)))
915 if (ops
->length () != 1)
917 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
918 fprintf (dump_file
, "Found [|^+] 0, removing\n");
920 reassociate_stats
.ops_eliminated
++;
932 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
935 /* Structure for tracking and counting operands. */
936 typedef struct oecount_s
{
939 enum tree_code oecode
;
944 /* The heap for the oecount hashtable and the sorted list of operands. */
945 static vec
<oecount
> cvec
;
947 /* Hash function for oecount. */
950 oecount_hash (const void *p
)
952 const oecount
*c
= &cvec
[(size_t)p
- 42];
953 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
956 /* Comparison function for oecount. */
959 oecount_eq (const void *p1
, const void *p2
)
961 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
962 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
963 return (c1
->oecode
== c2
->oecode
964 && c1
->op
== c2
->op
);
967 /* Comparison function for qsort sorting oecount elements by count. */
970 oecount_cmp (const void *p1
, const void *p2
)
972 const oecount
*c1
= (const oecount
*)p1
;
973 const oecount
*c2
= (const oecount
*)p2
;
974 if (c1
->cnt
!= c2
->cnt
)
975 return c1
->cnt
- c2
->cnt
;
977 /* If counts are identical, use unique IDs to stabilize qsort. */
978 return c1
->id
- c2
->id
;
981 /* Return TRUE iff STMT represents a builtin call that raises OP
985 stmt_is_power_of_op (gimple stmt
, tree op
)
989 if (!is_gimple_call (stmt
))
992 fndecl
= gimple_call_fndecl (stmt
);
995 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
998 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1000 CASE_FLT_FN (BUILT_IN_POW
):
1001 CASE_FLT_FN (BUILT_IN_POWI
):
1002 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1009 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1010 in place and return the result. Assumes that stmt_is_power_of_op
1011 was previously called for STMT and returned TRUE. */
1013 static HOST_WIDE_INT
1014 decrement_power (gimple stmt
)
1016 REAL_VALUE_TYPE c
, cint
;
1017 HOST_WIDE_INT power
;
1020 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1022 CASE_FLT_FN (BUILT_IN_POW
):
1023 arg1
= gimple_call_arg (stmt
, 1);
1024 c
= TREE_REAL_CST (arg1
);
1025 power
= real_to_integer (&c
) - 1;
1026 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1027 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1030 CASE_FLT_FN (BUILT_IN_POWI
):
1031 arg1
= gimple_call_arg (stmt
, 1);
1032 power
= TREE_INT_CST_LOW (arg1
) - 1;
1033 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1041 /* Find the single immediate use of STMT's LHS, and replace it
1042 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1043 replace *DEF with OP as well. */
1046 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1051 gimple_stmt_iterator gsi
;
1053 if (is_gimple_call (stmt
))
1054 lhs
= gimple_call_lhs (stmt
);
1056 lhs
= gimple_assign_lhs (stmt
);
1058 gcc_assert (has_single_use (lhs
));
1059 single_imm_use (lhs
, &use
, &use_stmt
);
1063 if (TREE_CODE (op
) != SSA_NAME
)
1064 update_stmt (use_stmt
);
1065 gsi
= gsi_for_stmt (stmt
);
1066 gsi_remove (&gsi
, true);
1067 release_defs (stmt
);
1069 if (is_gimple_call (stmt
))
1070 unlink_stmt_vdef (stmt
);
1073 /* Walks the linear chain with result *DEF searching for an operation
1074 with operand OP and code OPCODE removing that from the chain. *DEF
1075 is updated if there is only one operand but no operation left. */
1078 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1080 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1086 if (opcode
== MULT_EXPR
1087 && stmt_is_power_of_op (stmt
, op
))
1089 if (decrement_power (stmt
) == 1)
1090 propagate_op_to_single_use (op
, stmt
, def
);
1094 name
= gimple_assign_rhs1 (stmt
);
1096 /* If this is the operation we look for and one of the operands
1097 is ours simply propagate the other operand into the stmts
1099 if (gimple_assign_rhs_code (stmt
) == opcode
1101 || gimple_assign_rhs2 (stmt
) == op
))
1104 name
= gimple_assign_rhs2 (stmt
);
1105 propagate_op_to_single_use (name
, stmt
, def
);
1109 /* We might have a multiply of two __builtin_pow* calls, and
1110 the operand might be hiding in the rightmost one. */
1111 if (opcode
== MULT_EXPR
1112 && gimple_assign_rhs_code (stmt
) == opcode
1113 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
1115 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1116 if (stmt_is_power_of_op (stmt2
, op
))
1118 if (decrement_power (stmt2
) == 1)
1119 propagate_op_to_single_use (op
, stmt2
, def
);
1124 /* Continue walking the chain. */
1125 gcc_assert (name
!= op
1126 && TREE_CODE (name
) == SSA_NAME
);
1127 stmt
= SSA_NAME_DEF_STMT (name
);
1132 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1133 the result. Places the statement after the definition of either
1134 OP1 or OP2. Returns the new statement. */
1137 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1139 gimple op1def
= NULL
, op2def
= NULL
;
1140 gimple_stmt_iterator gsi
;
1144 /* Create the addition statement. */
1145 op
= make_ssa_name (type
, NULL
);
1146 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1148 /* Find an insertion place and insert. */
1149 if (TREE_CODE (op1
) == SSA_NAME
)
1150 op1def
= SSA_NAME_DEF_STMT (op1
);
1151 if (TREE_CODE (op2
) == SSA_NAME
)
1152 op2def
= SSA_NAME_DEF_STMT (op2
);
1153 if ((!op1def
|| gimple_nop_p (op1def
))
1154 && (!op2def
|| gimple_nop_p (op2def
)))
1156 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
1157 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1159 else if ((!op1def
|| gimple_nop_p (op1def
))
1160 || (op2def
&& !gimple_nop_p (op2def
)
1161 && stmt_dominates_stmt_p (op1def
, op2def
)))
1163 if (gimple_code (op2def
) == GIMPLE_PHI
)
1165 gsi
= gsi_after_labels (gimple_bb (op2def
));
1166 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1170 if (!stmt_ends_bb_p (op2def
))
1172 gsi
= gsi_for_stmt (op2def
);
1173 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1180 FOR_EACH_EDGE (e
, ei
, gimple_bb (op2def
)->succs
)
1181 if (e
->flags
& EDGE_FALLTHRU
)
1182 gsi_insert_on_edge_immediate (e
, sum
);
1188 if (gimple_code (op1def
) == GIMPLE_PHI
)
1190 gsi
= gsi_after_labels (gimple_bb (op1def
));
1191 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1195 if (!stmt_ends_bb_p (op1def
))
1197 gsi
= gsi_for_stmt (op1def
);
1198 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
1205 FOR_EACH_EDGE (e
, ei
, gimple_bb (op1def
)->succs
)
1206 if (e
->flags
& EDGE_FALLTHRU
)
1207 gsi_insert_on_edge_immediate (e
, sum
);
1216 /* Perform un-distribution of divisions and multiplications.
1217 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1218 to (A + B) / X for real X.
1220 The algorithm is organized as follows.
1222 - First we walk the addition chain *OPS looking for summands that
1223 are defined by a multiplication or a real division. This results
1224 in the candidates bitmap with relevant indices into *OPS.
1226 - Second we build the chains of multiplications or divisions for
1227 these candidates, counting the number of occurrences of (operand, code)
1228 pairs in all of the candidates chains.
1230 - Third we sort the (operand, code) pairs by number of occurrence and
1231 process them starting with the pair with the most uses.
1233 * For each such pair we walk the candidates again to build a
1234 second candidate bitmap noting all multiplication/division chains
1235 that have at least one occurrence of (operand, code).
1237 * We build an alternate addition chain only covering these
1238 candidates with one (operand, code) operation removed from their
1239 multiplication/division chain.
1241 * The first candidate gets replaced by the alternate addition chain
1242 multiplied/divided by the operand.
1244 * All candidate chains get disabled for further processing and
1245 processing of (operand, code) pairs continues.
1247 The alternate addition chains built are re-processed by the main
1248 reassociation algorithm which allows optimizing a * x * y + b * y * x
1249 to (a + b ) * x * y in one invocation of the reassociation pass. */
1252 undistribute_ops_list (enum tree_code opcode
,
1253 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1255 unsigned int length
= ops
->length ();
1256 operand_entry_t oe1
;
1258 sbitmap candidates
, candidates2
;
1259 unsigned nr_candidates
, nr_candidates2
;
1260 sbitmap_iterator sbi0
;
1261 vec
<operand_entry_t
> *subops
;
1263 bool changed
= false;
1264 int next_oecount_id
= 0;
1267 || opcode
!= PLUS_EXPR
)
1270 /* Build a list of candidates to process. */
1271 candidates
= sbitmap_alloc (length
);
1272 bitmap_clear (candidates
);
1274 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1276 enum tree_code dcode
;
1279 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1281 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1282 if (!is_gimple_assign (oe1def
))
1284 dcode
= gimple_assign_rhs_code (oe1def
);
1285 if ((dcode
!= MULT_EXPR
1286 && dcode
!= RDIV_EXPR
)
1287 || !is_reassociable_op (oe1def
, dcode
, loop
))
1290 bitmap_set_bit (candidates
, i
);
1294 if (nr_candidates
< 2)
1296 sbitmap_free (candidates
);
1300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1302 fprintf (dump_file
, "searching for un-distribute opportunities ");
1303 print_generic_expr (dump_file
,
1304 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1305 fprintf (dump_file
, " %d\n", nr_candidates
);
1308 /* Build linearized sub-operand lists and the counting table. */
1310 ctable
= htab_create (15, oecount_hash
, oecount_eq
, NULL
);
1311 /* ??? Macro arguments cannot have multi-argument template types in
1312 them. This typedef is needed to workaround that limitation. */
1313 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1314 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1315 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1318 enum tree_code oecode
;
1321 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1322 oecode
= gimple_assign_rhs_code (oedef
);
1323 linearize_expr_tree (&subops
[i
], oedef
,
1324 associative_tree_code (oecode
), false);
1326 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1333 c
.id
= next_oecount_id
++;
1336 idx
= cvec
.length () + 41;
1337 slot
= htab_find_slot (ctable
, (void *)idx
, INSERT
);
1340 *slot
= (void *)idx
;
1345 cvec
[(size_t)*slot
- 42].cnt
++;
1349 htab_delete (ctable
);
1351 /* Sort the counting table. */
1352 cvec
.qsort (oecount_cmp
);
1354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1357 fprintf (dump_file
, "Candidates:\n");
1358 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1360 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1361 c
->oecode
== MULT_EXPR
1362 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1363 print_generic_expr (dump_file
, c
->op
, 0);
1364 fprintf (dump_file
, "\n");
1368 /* Process the (operand, code) pairs in order of most occurence. */
1369 candidates2
= sbitmap_alloc (length
);
1370 while (!cvec
.is_empty ())
1372 oecount
*c
= &cvec
.last ();
1376 /* Now collect the operands in the outer chain that contain
1377 the common operand in their inner chain. */
1378 bitmap_clear (candidates2
);
1380 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1383 enum tree_code oecode
;
1385 tree op
= (*ops
)[i
]->op
;
1387 /* If we undistributed in this chain already this may be
1389 if (TREE_CODE (op
) != SSA_NAME
)
1392 oedef
= SSA_NAME_DEF_STMT (op
);
1393 oecode
= gimple_assign_rhs_code (oedef
);
1394 if (oecode
!= c
->oecode
)
1397 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1399 if (oe1
->op
== c
->op
)
1401 bitmap_set_bit (candidates2
, i
);
1408 if (nr_candidates2
>= 2)
1410 operand_entry_t oe1
, oe2
;
1412 int first
= bitmap_first_set_bit (candidates2
);
1414 /* Build the new addition chain. */
1415 oe1
= (*ops
)[first
];
1416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1418 fprintf (dump_file
, "Building (");
1419 print_generic_expr (dump_file
, oe1
->op
, 0);
1421 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1422 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1428 fprintf (dump_file
, " + ");
1429 print_generic_expr (dump_file
, oe2
->op
, 0);
1431 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1432 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1433 oe1
->op
, oe2
->op
, opcode
);
1434 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1436 oe1
->op
= gimple_get_lhs (sum
);
1439 /* Apply the multiplication/division. */
1440 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1441 oe1
->op
, c
->op
, c
->oecode
);
1442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1444 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1445 print_generic_expr (dump_file
, c
->op
, 0);
1446 fprintf (dump_file
, "\n");
1449 /* Record it in the addition chain and disable further
1450 undistribution with this op. */
1451 oe1
->op
= gimple_assign_lhs (prod
);
1452 oe1
->rank
= get_rank (oe1
->op
);
1453 subops
[first
].release ();
1461 for (i
= 0; i
< ops
->length (); ++i
)
1462 subops
[i
].release ();
1465 sbitmap_free (candidates
);
1466 sbitmap_free (candidates2
);
1471 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1472 expression, examine the other OPS to see if any of them are comparisons
1473 of the same values, which we may be able to combine or eliminate.
1474 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1477 eliminate_redundant_comparison (enum tree_code opcode
,
1478 vec
<operand_entry_t
> *ops
,
1479 unsigned int currindex
,
1480 operand_entry_t curr
)
1483 enum tree_code lcode
, rcode
;
1488 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1491 /* Check that CURR is a comparison. */
1492 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1494 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1495 if (!is_gimple_assign (def1
))
1497 lcode
= gimple_assign_rhs_code (def1
);
1498 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1500 op1
= gimple_assign_rhs1 (def1
);
1501 op2
= gimple_assign_rhs2 (def1
);
1503 /* Now look for a similar comparison in the remaining OPS. */
1504 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1508 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1510 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1511 if (!is_gimple_assign (def2
))
1513 rcode
= gimple_assign_rhs_code (def2
);
1514 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1517 /* If we got here, we have a match. See if we can combine the
1519 if (opcode
== BIT_IOR_EXPR
)
1520 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1521 rcode
, gimple_assign_rhs1 (def2
),
1522 gimple_assign_rhs2 (def2
));
1524 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1525 rcode
, gimple_assign_rhs1 (def2
),
1526 gimple_assign_rhs2 (def2
));
1530 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1531 always give us a boolean_type_node value back. If the original
1532 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1533 we need to convert. */
1534 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1535 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1537 if (TREE_CODE (t
) != INTEGER_CST
1538 && !operand_equal_p (t
, curr
->op
, 0))
1540 enum tree_code subcode
;
1541 tree newop1
, newop2
;
1542 if (!COMPARISON_CLASS_P (t
))
1544 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1545 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1546 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1547 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1551 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1553 fprintf (dump_file
, "Equivalence: ");
1554 print_generic_expr (dump_file
, curr
->op
, 0);
1555 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1556 print_generic_expr (dump_file
, oe
->op
, 0);
1557 fprintf (dump_file
, " -> ");
1558 print_generic_expr (dump_file
, t
, 0);
1559 fprintf (dump_file
, "\n");
1562 /* Now we can delete oe, as it has been subsumed by the new combined
1564 ops
->ordered_remove (i
);
1565 reassociate_stats
.ops_eliminated
++;
1567 /* If t is the same as curr->op, we're done. Otherwise we must
1568 replace curr->op with t. Special case is if we got a constant
1569 back, in which case we add it to the end instead of in place of
1570 the current entry. */
1571 if (TREE_CODE (t
) == INTEGER_CST
)
1573 ops
->ordered_remove (currindex
);
1574 add_to_ops_vec (ops
, t
);
1576 else if (!operand_equal_p (t
, curr
->op
, 0))
1579 enum tree_code subcode
;
1582 gcc_assert (COMPARISON_CLASS_P (t
));
1583 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1584 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1585 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1586 gcc_checking_assert (is_gimple_val (newop1
)
1587 && is_gimple_val (newop2
));
1588 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1589 curr
->op
= gimple_get_lhs (sum
);
1597 /* Perform various identities and other optimizations on the list of
1598 operand entries, stored in OPS. The tree code for the binary
1599 operation between all the operands is OPCODE. */
1602 optimize_ops_list (enum tree_code opcode
,
1603 vec
<operand_entry_t
> *ops
)
1605 unsigned int length
= ops
->length ();
1608 operand_entry_t oelast
= NULL
;
1609 bool iterate
= false;
1614 oelast
= ops
->last ();
1616 /* If the last two are constants, pop the constants off, merge them
1617 and try the next two. */
1618 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1620 operand_entry_t oelm1
= (*ops
)[length
- 2];
1622 if (oelm1
->rank
== 0
1623 && is_gimple_min_invariant (oelm1
->op
)
1624 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1625 TREE_TYPE (oelast
->op
)))
1627 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1628 oelm1
->op
, oelast
->op
);
1630 if (folded
&& is_gimple_min_invariant (folded
))
1632 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1633 fprintf (dump_file
, "Merging constants\n");
1638 add_to_ops_vec (ops
, folded
);
1639 reassociate_stats
.constants_eliminated
++;
1641 optimize_ops_list (opcode
, ops
);
1647 eliminate_using_constants (opcode
, ops
);
1650 for (i
= 0; ops
->iterate (i
, &oe
);)
1654 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1656 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1657 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1658 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1670 length
= ops
->length ();
1671 oelast
= ops
->last ();
1674 optimize_ops_list (opcode
, ops
);
1677 /* The following functions are subroutines to optimize_range_tests and allow
1678 it to try to change a logical combination of comparisons into a range
1682 X == 2 || X == 5 || X == 3 || X == 4
1686 (unsigned) (X - 2) <= 3
1688 For more information see comments above fold_test_range in fold-const.c,
1689 this implementation is for GIMPLE. */
1697 bool strict_overflow_p
;
1698 unsigned int idx
, next
;
1701 /* This is similar to make_range in fold-const.c, but on top of
1702 GIMPLE instead of trees. If EXP is non-NULL, it should be
1703 an SSA_NAME and STMT argument is ignored, otherwise STMT
1704 argument should be a GIMPLE_COND. */
1707 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1711 bool is_bool
, strict_overflow_p
;
1715 r
->strict_overflow_p
= false;
1717 r
->high
= NULL_TREE
;
1718 if (exp
!= NULL_TREE
1719 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1722 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1723 and see if we can refine the range. Some of the cases below may not
1724 happen, but it doesn't seem worth worrying about this. We "continue"
1725 the outer loop when we've changed something; otherwise we "break"
1726 the switch, which will "break" the while. */
1727 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1730 strict_overflow_p
= false;
1732 if (exp
== NULL_TREE
)
1734 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1736 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1741 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1746 enum tree_code code
;
1747 tree arg0
, arg1
, exp_type
;
1751 if (exp
!= NULL_TREE
)
1753 if (TREE_CODE (exp
) != SSA_NAME
)
1756 stmt
= SSA_NAME_DEF_STMT (exp
);
1757 if (!is_gimple_assign (stmt
))
1760 code
= gimple_assign_rhs_code (stmt
);
1761 arg0
= gimple_assign_rhs1 (stmt
);
1762 arg1
= gimple_assign_rhs2 (stmt
);
1763 exp_type
= TREE_TYPE (exp
);
1767 code
= gimple_cond_code (stmt
);
1768 arg0
= gimple_cond_lhs (stmt
);
1769 arg1
= gimple_cond_rhs (stmt
);
1770 exp_type
= boolean_type_node
;
1773 if (TREE_CODE (arg0
) != SSA_NAME
)
1775 loc
= gimple_location (stmt
);
1779 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1792 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1794 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1799 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1814 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1816 &strict_overflow_p
);
1817 if (nexp
!= NULL_TREE
)
1820 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1833 r
->strict_overflow_p
= strict_overflow_p
;
1837 /* Comparison function for qsort. Sort entries
1838 without SSA_NAME exp first, then with SSA_NAMEs sorted
1839 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1840 by increasing ->low and if ->low is the same, by increasing
1841 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1845 range_entry_cmp (const void *a
, const void *b
)
1847 const struct range_entry
*p
= (const struct range_entry
*) a
;
1848 const struct range_entry
*q
= (const struct range_entry
*) b
;
1850 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1852 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1854 /* Group range_entries for the same SSA_NAME together. */
1855 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1857 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1859 /* If ->low is different, NULL low goes first, then by
1861 if (p
->low
!= NULL_TREE
)
1863 if (q
->low
!= NULL_TREE
)
1865 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1867 if (tem
&& integer_onep (tem
))
1869 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1871 if (tem
&& integer_onep (tem
))
1877 else if (q
->low
!= NULL_TREE
)
1879 /* If ->high is different, NULL high goes last, before that by
1881 if (p
->high
!= NULL_TREE
)
1883 if (q
->high
!= NULL_TREE
)
1885 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1887 if (tem
&& integer_onep (tem
))
1889 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1891 if (tem
&& integer_onep (tem
))
1897 else if (p
->high
!= NULL_TREE
)
1899 /* If both ranges are the same, sort below by ascending idx. */
1904 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1907 if (p
->idx
< q
->idx
)
1911 gcc_checking_assert (p
->idx
> q
->idx
);
1916 /* Helper routine of optimize_range_test.
1917 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1918 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1919 OPCODE and OPS are arguments of optimize_range_tests. Return
1920 true if the range merge has been successful.
1921 If OPCODE is ERROR_MARK, this is called from within
1922 maybe_optimize_range_tests and is performing inter-bb range optimization.
1923 Changes should be then performed right away, and whether an op is
1924 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1927 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
1928 unsigned int count
, enum tree_code opcode
,
1929 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
1930 tree low
, tree high
, bool strict_overflow_p
)
1932 operand_entry_t oe
= (*ops
)[range
->idx
];
1934 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) : last_stmt (BASIC_BLOCK (oe
->id
));
1935 location_t loc
= gimple_location (stmt
);
1936 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
1937 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
1938 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
1939 gimple_stmt_iterator gsi
;
1941 if (tem
== NULL_TREE
)
1944 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
1945 warning_at (loc
, OPT_Wstrict_overflow
,
1946 "assuming signed overflow does not occur "
1947 "when simplifying range test");
1949 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1951 struct range_entry
*r
;
1952 fprintf (dump_file
, "Optimizing range tests ");
1953 print_generic_expr (dump_file
, range
->exp
, 0);
1954 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
1955 print_generic_expr (dump_file
, range
->low
, 0);
1956 fprintf (dump_file
, ", ");
1957 print_generic_expr (dump_file
, range
->high
, 0);
1958 fprintf (dump_file
, "]");
1959 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
1961 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
1962 print_generic_expr (dump_file
, r
->low
, 0);
1963 fprintf (dump_file
, ", ");
1964 print_generic_expr (dump_file
, r
->high
, 0);
1965 fprintf (dump_file
, "]");
1967 fprintf (dump_file
, "\n into ");
1968 print_generic_expr (dump_file
, tem
, 0);
1969 fprintf (dump_file
, "\n");
1972 if (opcode
== BIT_IOR_EXPR
1973 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
1974 tem
= invert_truthvalue_loc (loc
, tem
);
1976 tem
= fold_convert_loc (loc
, optype
, tem
);
1977 gsi
= gsi_for_stmt (stmt
);
1978 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
1981 /* If doing inter-bb range test optimization, update the
1982 stmts immediately. Start with changing the first range test
1983 immediate use to the new value (TEM), or, if the first range
1984 test is a GIMPLE_COND stmt, change that condition. */
1985 if (opcode
== ERROR_MARK
)
1989 imm_use_iterator iter
;
1990 use_operand_p use_p
;
1993 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, op
)
1995 if (is_gimple_debug (use_stmt
))
1997 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
1998 SET_USE (use_p
, tem
);
1999 update_stmt (use_stmt
);
2004 gimple_cond_set_code (stmt
, NE_EXPR
);
2005 gimple_cond_set_lhs (stmt
, tem
);
2006 gimple_cond_set_rhs (stmt
, boolean_false_node
);
2015 range
->strict_overflow_p
= false;
2017 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2019 oe
= (*ops
)[range
->idx
];
2020 /* Now change all the other range test immediate uses, so that
2021 those tests will be optimized away. */
2022 if (opcode
== ERROR_MARK
)
2026 imm_use_iterator iter
;
2027 use_operand_p use_p
;
2030 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, oe
->op
)
2032 if (is_gimple_debug (use_stmt
))
2034 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2035 adjust it into _7 = _9;. */
2036 if (is_gimple_assign (use_stmt
)
2037 && gimple_assign_rhs_code (use_stmt
) == oe
->rank
)
2039 tree expr
= NULL_TREE
;
2040 if (oe
->op
== gimple_assign_rhs1 (use_stmt
))
2041 expr
= gimple_assign_rhs2 (use_stmt
);
2042 else if (oe
->op
== gimple_assign_rhs2 (use_stmt
))
2043 expr
= gimple_assign_rhs1 (use_stmt
);
2046 && TREE_CODE (expr
) == SSA_NAME
)
2048 gimple_stmt_iterator gsi2
= gsi_for_stmt (use_stmt
);
2049 gimple_assign_set_rhs_with_ops (&gsi2
, SSA_NAME
,
2051 update_stmt (use_stmt
);
2055 /* If imm use of _8 is a statement like _7 = (int) _8;,
2056 adjust it into _7 = 0; or _7 = 1;. */
2057 if (gimple_assign_cast_p (use_stmt
)
2058 && oe
->op
== gimple_assign_rhs1 (use_stmt
))
2060 tree lhs
= gimple_assign_lhs (use_stmt
);
2061 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
2063 gimple_stmt_iterator gsi2
2064 = gsi_for_stmt (use_stmt
);
2065 tree expr
= build_int_cst (TREE_TYPE (lhs
),
2066 oe
->rank
== BIT_IOR_EXPR
2068 gimple_assign_set_rhs_with_ops (&gsi2
,
2071 update_stmt (use_stmt
);
2075 /* Otherwise replace the use with 0 or 1. */
2076 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2078 build_int_cst (TREE_TYPE (oe
->op
),
2079 oe
->rank
== BIT_IOR_EXPR
2081 update_stmt (use_stmt
);
2086 /* If range test was a GIMPLE_COND, simply change it
2087 into an always false or always true condition. */
2088 stmt
= last_stmt (BASIC_BLOCK (oe
->id
));
2089 if (oe
->rank
== BIT_IOR_EXPR
)
2090 gimple_cond_make_false (stmt
);
2092 gimple_cond_make_true (stmt
);
2096 oe
->op
= error_mark_node
;
2097 range
->exp
= NULL_TREE
;
2102 /* Optimize range tests, similarly how fold_range_test optimizes
2103 it on trees. The tree code for the binary
2104 operation between all the operands is OPCODE.
2105 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2106 maybe_optimize_range_tests for inter-bb range optimization.
2107 In that case if oe->op is NULL, oe->id is bb->index whose
2108 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2109 the actual opcode. */
2112 optimize_range_tests (enum tree_code opcode
,
2113 vec
<operand_entry_t
> *ops
)
2115 unsigned int length
= ops
->length (), i
, j
, first
;
2117 struct range_entry
*ranges
;
2118 bool any_changes
= false;
2123 ranges
= XNEWVEC (struct range_entry
, length
);
2124 for (i
= 0; i
< length
; i
++)
2128 init_range_entry (ranges
+ i
, oe
->op
,
2129 oe
->op
? NULL
: last_stmt (BASIC_BLOCK (oe
->id
)));
2130 /* For | invert it now, we will invert it again before emitting
2131 the optimized expression. */
2132 if (opcode
== BIT_IOR_EXPR
2133 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2134 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2137 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2138 for (i
= 0; i
< length
; i
++)
2139 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2142 /* Try to merge ranges. */
2143 for (first
= i
; i
< length
; i
++)
2145 tree low
= ranges
[i
].low
;
2146 tree high
= ranges
[i
].high
;
2147 int in_p
= ranges
[i
].in_p
;
2148 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2149 int update_fail_count
= 0;
2151 for (j
= i
+ 1; j
< length
; j
++)
2153 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2155 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2156 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2158 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2164 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2165 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2171 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2172 while update_range_test would fail. */
2173 else if (update_fail_count
== 64)
2176 ++update_fail_count
;
2179 /* Optimize X == CST1 || X == CST2
2180 if popcount (CST1 ^ CST2) == 1 into
2181 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2182 Similarly for ranges. E.g.
2183 X != 2 && X != 3 && X != 10 && X != 11
2184 will be transformed by the above loop into
2185 (X - 2U) <= 1U && (X - 10U) <= 1U
2186 and this loop can transform that into
2187 ((X & ~8) - 2U) <= 1U. */
2188 for (i
= first
; i
< length
; i
++)
2190 tree lowi
, highi
, lowj
, highj
, type
, lowxor
, highxor
, tem
, exp
;
2192 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2194 type
= TREE_TYPE (ranges
[i
].exp
);
2195 if (!INTEGRAL_TYPE_P (type
))
2197 lowi
= ranges
[i
].low
;
2198 if (lowi
== NULL_TREE
)
2199 lowi
= TYPE_MIN_VALUE (type
);
2200 highi
= ranges
[i
].high
;
2201 if (highi
== NULL_TREE
)
2203 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2205 if (ranges
[j
].exp
== NULL_TREE
)
2207 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2211 lowj
= ranges
[j
].low
;
2212 if (lowj
== NULL_TREE
)
2214 highj
= ranges
[j
].high
;
2215 if (highj
== NULL_TREE
)
2216 highj
= TYPE_MAX_VALUE (type
);
2217 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2219 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2221 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2222 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2224 gcc_checking_assert (!integer_zerop (lowxor
));
2225 tem
= fold_binary (MINUS_EXPR
, type
, lowxor
,
2226 build_int_cst (type
, 1));
2227 if (tem
== NULL_TREE
)
2229 tem
= fold_binary (BIT_AND_EXPR
, type
, lowxor
, tem
);
2230 if (tem
== NULL_TREE
|| !integer_zerop (tem
))
2232 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2233 if (!tree_int_cst_equal (lowxor
, highxor
))
2235 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2236 exp
= fold_build2 (BIT_AND_EXPR
, type
, ranges
[i
].exp
, tem
);
2237 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2238 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2239 if (update_range_test (ranges
+ i
, ranges
+ j
, 1, opcode
, ops
, exp
,
2240 ranges
[i
].in_p
, lowj
, highj
,
2241 ranges
[i
].strict_overflow_p
2242 || ranges
[j
].strict_overflow_p
))
2250 if (any_changes
&& opcode
!= ERROR_MARK
)
2253 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2255 if (oe
->op
== error_mark_node
)
2264 XDELETEVEC (ranges
);
2267 /* Return true if STMT is a cast like:
2273 # _345 = PHI <_123(N), 1(...), 1(...)>
2274 where _234 has bool type, _123 has single use and
2275 bb N has a single successor M. This is commonly used in
2276 the last block of a range test. */
2279 final_range_test_p (gimple stmt
)
2281 basic_block bb
, rhs_bb
;
2284 use_operand_p use_p
;
2287 if (!gimple_assign_cast_p (stmt
))
2289 bb
= gimple_bb (stmt
);
2290 if (!single_succ_p (bb
))
2292 e
= single_succ_edge (bb
);
2293 if (e
->flags
& EDGE_COMPLEX
)
2296 lhs
= gimple_assign_lhs (stmt
);
2297 rhs
= gimple_assign_rhs1 (stmt
);
2298 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2299 || TREE_CODE (rhs
) != SSA_NAME
2300 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2303 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2304 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2307 if (gimple_code (use_stmt
) != GIMPLE_PHI
2308 || gimple_bb (use_stmt
) != e
->dest
)
2311 /* And that the rhs is defined in the same loop. */
2312 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2314 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2320 /* Return true if BB is suitable basic block for inter-bb range test
2321 optimization. If BACKWARD is true, BB should be the only predecessor
2322 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2323 or compared with to find a common basic block to which all conditions
2324 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2325 be the only predecessor of BB. */
2328 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2331 edge_iterator ei
, ei2
;
2334 gimple_stmt_iterator gsi
;
2335 bool other_edge_seen
= false;
2340 /* Check last stmt first. */
2341 stmt
= last_stmt (bb
);
2343 || (gimple_code (stmt
) != GIMPLE_COND
2344 && (backward
|| !final_range_test_p (stmt
)))
2345 || gimple_visited_p (stmt
)
2346 || stmt_could_throw_p (stmt
)
2349 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2352 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2353 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2354 to *OTHER_BB (if not set yet, try to find it out). */
2355 if (EDGE_COUNT (bb
->succs
) != 2)
2357 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2359 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2361 if (e
->dest
== test_bb
)
2370 if (*other_bb
== NULL
)
2372 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2373 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2375 else if (e
->dest
== e2
->dest
)
2376 *other_bb
= e
->dest
;
2377 if (*other_bb
== NULL
)
2380 if (e
->dest
== *other_bb
)
2381 other_edge_seen
= true;
2385 if (*other_bb
== NULL
|| !other_edge_seen
)
2388 else if (single_succ (bb
) != *other_bb
)
2391 /* Now check all PHIs of *OTHER_BB. */
2392 e
= find_edge (bb
, *other_bb
);
2393 e2
= find_edge (test_bb
, *other_bb
);
2394 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2396 gimple phi
= gsi_stmt (gsi
);
2397 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2398 corresponding to BB and TEST_BB predecessor must be the same. */
2399 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2400 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2402 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2403 one of the PHIs should have the lhs of the last stmt in
2404 that block as PHI arg and that PHI should have 0 or 1
2405 corresponding to it in all other range test basic blocks
2409 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2410 == gimple_assign_lhs (stmt
)
2411 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2412 || integer_onep (gimple_phi_arg_def (phi
,
2418 gimple test_last
= last_stmt (test_bb
);
2419 if (gimple_code (test_last
) != GIMPLE_COND
2420 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2421 == gimple_assign_lhs (test_last
)
2422 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2423 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2433 /* Return true if BB doesn't have side-effects that would disallow
2434 range test optimization, all SSA_NAMEs set in the bb are consumed
2435 in the bb and there are no PHIs. */
2438 no_side_effect_bb (basic_block bb
)
2440 gimple_stmt_iterator gsi
;
2443 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2445 last
= last_stmt (bb
);
2446 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2448 gimple stmt
= gsi_stmt (gsi
);
2450 imm_use_iterator imm_iter
;
2451 use_operand_p use_p
;
2453 if (is_gimple_debug (stmt
))
2455 if (gimple_has_side_effects (stmt
))
2459 if (!is_gimple_assign (stmt
))
2461 lhs
= gimple_assign_lhs (stmt
);
2462 if (TREE_CODE (lhs
) != SSA_NAME
)
2464 if (gimple_assign_rhs_could_trap_p (stmt
))
2466 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2468 gimple use_stmt
= USE_STMT (use_p
);
2469 if (is_gimple_debug (use_stmt
))
2471 if (gimple_bb (use_stmt
) != bb
)
2478 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2479 return true and fill in *OPS recursively. */
2482 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2485 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2489 if (!is_reassociable_op (stmt
, code
, loop
))
2492 rhs
[0] = gimple_assign_rhs1 (stmt
);
2493 rhs
[1] = gimple_assign_rhs2 (stmt
);
2494 gimple_set_visited (stmt
, true);
2495 for (i
= 0; i
< 2; i
++)
2496 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2497 && !get_ops (rhs
[i
], code
, ops
, loop
)
2498 && has_single_use (rhs
[i
]))
2500 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2506 ops
->safe_push (oe
);
2511 /* Inter-bb range test optimization. */
2514 maybe_optimize_range_tests (gimple stmt
)
2516 basic_block first_bb
= gimple_bb (stmt
);
2517 basic_block last_bb
= first_bb
;
2518 basic_block other_bb
= NULL
;
2522 vec
<operand_entry_t
> ops
= vNULL
;
2524 /* Consider only basic blocks that end with GIMPLE_COND or
2525 a cast statement satisfying final_range_test_p. All
2526 but the last bb in the first_bb .. last_bb range
2527 should end with GIMPLE_COND. */
2528 if (gimple_code (stmt
) == GIMPLE_COND
)
2530 if (EDGE_COUNT (first_bb
->succs
) != 2)
2533 else if (final_range_test_p (stmt
))
2534 other_bb
= single_succ (first_bb
);
2538 if (stmt_could_throw_p (stmt
))
2541 /* As relative ordering of post-dominator sons isn't fixed,
2542 maybe_optimize_range_tests can be called first on any
2543 bb in the range we want to optimize. So, start searching
2544 backwards, if first_bb can be set to a predecessor. */
2545 while (single_pred_p (first_bb
))
2547 basic_block pred_bb
= single_pred (first_bb
);
2548 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2550 if (!no_side_effect_bb (first_bb
))
2554 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2555 Before starting forward search in last_bb successors, find
2556 out the other_bb. */
2557 if (first_bb
== last_bb
)
2560 /* As non-GIMPLE_COND last stmt always terminates the range,
2561 if forward search didn't discover anything, just give up. */
2562 if (gimple_code (stmt
) != GIMPLE_COND
)
2564 /* Look at both successors. Either it ends with a GIMPLE_COND
2565 and satisfies suitable_cond_bb, or ends with a cast and
2566 other_bb is that cast's successor. */
2567 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2568 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2569 || e
->dest
== first_bb
)
2571 else if (single_pred_p (e
->dest
))
2573 stmt
= last_stmt (e
->dest
);
2575 && gimple_code (stmt
) == GIMPLE_COND
2576 && EDGE_COUNT (e
->dest
->succs
) == 2)
2578 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2584 && final_range_test_p (stmt
)
2585 && find_edge (first_bb
, single_succ (e
->dest
)))
2587 other_bb
= single_succ (e
->dest
);
2588 if (other_bb
== first_bb
)
2592 if (other_bb
== NULL
)
2595 /* Now do the forward search, moving last_bb to successor bbs
2596 that aren't other_bb. */
2597 while (EDGE_COUNT (last_bb
->succs
) == 2)
2599 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2600 if (e
->dest
!= other_bb
)
2604 if (!single_pred_p (e
->dest
))
2606 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2608 if (!no_side_effect_bb (e
->dest
))
2612 if (first_bb
== last_bb
)
2614 /* Here basic blocks first_bb through last_bb's predecessor
2615 end with GIMPLE_COND, all of them have one of the edges to
2616 other_bb and another to another block in the range,
2617 all blocks except first_bb don't have side-effects and
2618 last_bb ends with either GIMPLE_COND, or cast satisfying
2619 final_range_test_p. */
2620 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2622 enum tree_code code
;
2625 e
= find_edge (bb
, other_bb
);
2626 stmt
= last_stmt (bb
);
2627 gimple_set_visited (stmt
, true);
2628 if (gimple_code (stmt
) != GIMPLE_COND
)
2630 use_operand_p use_p
;
2635 lhs
= gimple_assign_lhs (stmt
);
2636 rhs
= gimple_assign_rhs1 (stmt
);
2637 gcc_assert (bb
== last_bb
);
2644 # _345 = PHI <_123(N), 1(...), 1(...)>
2646 or 0 instead of 1. If it is 0, the _234
2647 range test is anded together with all the
2648 other range tests, if it is 1, it is ored with
2650 single_imm_use (lhs
, &use_p
, &phi
);
2651 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2652 e2
= find_edge (first_bb
, other_bb
);
2654 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2655 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2656 code
= BIT_AND_EXPR
;
2659 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2660 code
= BIT_IOR_EXPR
;
2663 /* If _234 SSA_NAME_DEF_STMT is
2665 (or &, corresponding to 1/0 in the phi arguments,
2666 push into ops the individual range test arguments
2667 of the bitwise or resp. and, recursively. */
2668 if (!get_ops (rhs
, code
, &ops
,
2669 loop_containing_stmt (stmt
))
2670 && has_single_use (rhs
))
2672 /* Otherwise, push the _234 range test itself. */
2674 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2684 /* Otherwise stmt is GIMPLE_COND. */
2685 code
= gimple_cond_code (stmt
);
2686 lhs
= gimple_cond_lhs (stmt
);
2687 rhs
= gimple_cond_rhs (stmt
);
2688 if (TREE_CODE (lhs
) == SSA_NAME
2689 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2690 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2691 || rhs
!= boolean_false_node
2692 /* Either push into ops the individual bitwise
2693 or resp. and operands, depending on which
2694 edge is other_bb. */
2695 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2696 ^ (code
== EQ_EXPR
))
2697 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2698 loop_containing_stmt (stmt
))))
2700 /* Or push the GIMPLE_COND stmt itself. */
2702 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2705 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2706 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2707 /* oe->op = NULL signs that there is no SSA_NAME
2708 for the range test, and oe->id instead is the
2709 basic block number, at which's end the GIMPLE_COND
2718 if (ops
.length () > 1)
2719 optimize_range_tests (ERROR_MARK
, &ops
);
2723 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2724 of STMT in it's operands. This is also known as a "destructive
2725 update" operation. */
2728 is_phi_for_stmt (gimple stmt
, tree operand
)
2732 use_operand_p arg_p
;
2735 if (TREE_CODE (operand
) != SSA_NAME
)
2738 lhs
= gimple_assign_lhs (stmt
);
2740 def_stmt
= SSA_NAME_DEF_STMT (operand
);
2741 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
2744 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
2745 if (lhs
== USE_FROM_PTR (arg_p
))
2750 /* Remove def stmt of VAR if VAR has zero uses and recurse
2751 on rhs1 operand if so. */
2754 remove_visited_stmt_chain (tree var
)
2757 gimple_stmt_iterator gsi
;
2761 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
2763 stmt
= SSA_NAME_DEF_STMT (var
);
2764 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
2766 var
= gimple_assign_rhs1 (stmt
);
2767 gsi
= gsi_for_stmt (stmt
);
2768 gsi_remove (&gsi
, true);
2769 release_defs (stmt
);
2776 /* This function checks three consequtive operands in
2777 passed operands vector OPS starting from OPINDEX and
2778 swaps two operands if it is profitable for binary operation
2779 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2781 We pair ops with the same rank if possible.
2783 The alternative we try is to see if STMT is a destructive
2784 update style statement, which is like:
2787 In that case, we want to use the destructive update form to
2788 expose the possible vectorizer sum reduction opportunity.
2789 In that case, the third operand will be the phi node. This
2790 check is not performed if STMT is null.
2792 We could, of course, try to be better as noted above, and do a
2793 lot of work to try to find these opportunities in >3 operand
2794 cases, but it is unlikely to be worth it. */
2797 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
2798 unsigned int opindex
, gimple stmt
)
2800 operand_entry_t oe1
, oe2
, oe3
;
2803 oe2
= ops
[opindex
+ 1];
2804 oe3
= ops
[opindex
+ 2];
2806 if ((oe1
->rank
== oe2
->rank
2807 && oe2
->rank
!= oe3
->rank
)
2808 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
2809 && !is_phi_for_stmt (stmt
, oe1
->op
)
2810 && !is_phi_for_stmt (stmt
, oe2
->op
)))
2812 struct operand_entry temp
= *oe3
;
2814 oe3
->rank
= oe1
->rank
;
2816 oe1
->rank
= temp
.rank
;
2818 else if ((oe1
->rank
== oe3
->rank
2819 && oe2
->rank
!= oe3
->rank
)
2820 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
2821 && !is_phi_for_stmt (stmt
, oe1
->op
)
2822 && !is_phi_for_stmt (stmt
, oe3
->op
)))
2824 struct operand_entry temp
= *oe2
;
2826 oe2
->rank
= oe1
->rank
;
2828 oe1
->rank
= temp
.rank
;
2832 /* Recursively rewrite our linearized statements so that the operators
2833 match those in OPS[OPINDEX], putting the computation in rank
2837 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
2838 vec
<operand_entry_t
> ops
, bool moved
)
2840 tree rhs1
= gimple_assign_rhs1 (stmt
);
2841 tree rhs2
= gimple_assign_rhs2 (stmt
);
2844 /* If we have three operands left, then we want to make sure the ones
2845 that get the double binary op are chosen wisely. */
2846 if (opindex
+ 3 == ops
.length ())
2847 swap_ops_for_binary_stmt (ops
, opindex
, stmt
);
2849 /* The final recursion case for this function is that you have
2850 exactly two operations left.
2851 If we had one exactly one op in the entire list to start with, we
2852 would have never called this function, and the tail recursion
2853 rewrites them one at a time. */
2854 if (opindex
+ 2 == ops
.length ())
2856 operand_entry_t oe1
, oe2
;
2859 oe2
= ops
[opindex
+ 1];
2861 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
2863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2865 fprintf (dump_file
, "Transforming ");
2866 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2869 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
2870 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
2872 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
2873 remove_visited_stmt_chain (rhs1
);
2875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2877 fprintf (dump_file
, " into ");
2878 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2884 /* If we hit here, we should have 3 or more ops left. */
2885 gcc_assert (opindex
+ 2 < ops
.length ());
2887 /* Rewrite the next operator. */
2894 gimple_stmt_iterator gsinow
, gsirhs1
;
2895 gimple stmt1
= stmt
, stmt2
;
2898 gsinow
= gsi_for_stmt (stmt
);
2899 count
= ops
.length () - opindex
- 2;
2900 while (count
-- != 0)
2902 stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1
));
2903 gsirhs1
= gsi_for_stmt (stmt2
);
2904 gsi_move_before (&gsirhs1
, &gsinow
);
2911 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2913 fprintf (dump_file
, "Transforming ");
2914 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2917 gimple_assign_set_rhs2 (stmt
, oe
->op
);
2920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2922 fprintf (dump_file
, " into ");
2923 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2926 /* Recurse on the LHS of the binary operator, which is guaranteed to
2927 be the non-leaf side. */
2928 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
, moved
);
2931 /* Find out how many cycles we need to compute statements chain.
2932 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2933 maximum number of independent statements we may execute per cycle. */
2936 get_required_cycles (int ops_num
, int cpu_width
)
2942 /* While we have more than 2 * cpu_width operands
2943 we may reduce number of operands by cpu_width
2945 res
= ops_num
/ (2 * cpu_width
);
2947 /* Remained operands count may be reduced twice per cycle
2948 until we have only one operand. */
2949 rest
= (unsigned)(ops_num
- res
* cpu_width
);
2950 elog
= exact_log2 (rest
);
2954 res
+= floor_log2 (rest
) + 1;
2959 /* Returns an optimal number of registers to use for computation of
2960 given statements. */
2963 get_reassociation_width (int ops_num
, enum tree_code opc
,
2964 enum machine_mode mode
)
2966 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
2971 if (param_width
> 0)
2972 width
= param_width
;
2974 width
= targetm
.sched
.reassociation_width (opc
, mode
);
2979 /* Get the minimal time required for sequence computation. */
2980 cycles_best
= get_required_cycles (ops_num
, width
);
2982 /* Check if we may use less width and still compute sequence for
2983 the same time. It will allow us to reduce registers usage.
2984 get_required_cycles is monotonically increasing with lower width
2985 so we can perform a binary search for the minimal width that still
2986 results in the optimal cycle count. */
2988 while (width
> width_min
)
2990 int width_mid
= (width
+ width_min
) / 2;
2992 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
2994 else if (width_min
< width_mid
)
2995 width_min
= width_mid
;
3003 /* Recursively rewrite our linearized statements so that the operators
3004 match those in OPS[OPINDEX], putting the computation in rank
3005 order and trying to allow operations to be executed in
3009 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3010 vec
<operand_entry_t
> ops
)
3012 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3013 int op_num
= ops
.length ();
3014 int stmt_num
= op_num
- 1;
3015 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3016 int op_index
= op_num
- 1;
3018 int ready_stmts_end
= 0;
3020 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3022 /* We start expression rewriting from the top statements.
3023 So, in this loop we create a full list of statements
3024 we will work with. */
3025 stmts
[stmt_num
- 1] = stmt
;
3026 for (i
= stmt_num
- 2; i
>= 0; i
--)
3027 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3029 for (i
= 0; i
< stmt_num
; i
++)
3033 /* Determine whether we should use results of
3034 already handled statements or not. */
3035 if (ready_stmts_end
== 0
3036 && (i
- stmt_index
>= width
|| op_index
< 1))
3037 ready_stmts_end
= i
;
3039 /* Now we choose operands for the next statement. Non zero
3040 value in ready_stmts_end means here that we should use
3041 the result of already generated statements as new operand. */
3042 if (ready_stmts_end
> 0)
3044 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3045 if (ready_stmts_end
> stmt_index
)
3046 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3047 else if (op_index
>= 0)
3048 op2
= ops
[op_index
--]->op
;
3051 gcc_assert (stmt_index
< i
);
3052 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3055 if (stmt_index
>= ready_stmts_end
)
3056 ready_stmts_end
= 0;
3061 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3062 op2
= ops
[op_index
--]->op
;
3063 op1
= ops
[op_index
--]->op
;
3066 /* If we emit the last statement then we should put
3067 operands into the last statement. It will also
3069 if (op_index
< 0 && stmt_index
== i
)
3072 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3074 fprintf (dump_file
, "Transforming ");
3075 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3078 /* We keep original statement only for the last one. All
3079 others are recreated. */
3080 if (i
== stmt_num
- 1)
3082 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3083 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3084 update_stmt (stmts
[i
]);
3087 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3089 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3091 fprintf (dump_file
, " into ");
3092 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3096 remove_visited_stmt_chain (last_rhs1
);
3099 /* Transform STMT, which is really (A +B) + (C + D) into the left
3100 linear form, ((A+B)+C)+D.
3101 Recurse on D if necessary. */
3104 linearize_expr (gimple stmt
)
3106 gimple_stmt_iterator gsinow
, gsirhs
;
3107 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3108 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3109 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3110 gimple newbinrhs
= NULL
;
3111 struct loop
*loop
= loop_containing_stmt (stmt
);
3113 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3114 && is_reassociable_op (binrhs
, rhscode
, loop
));
3116 gsinow
= gsi_for_stmt (stmt
);
3117 gsirhs
= gsi_for_stmt (binrhs
);
3118 gsi_move_before (&gsirhs
, &gsinow
);
3120 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3121 gimple_assign_set_rhs1 (binrhs
, gimple_assign_lhs (binlhs
));
3122 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3124 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3125 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3127 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3129 fprintf (dump_file
, "Linearized: ");
3130 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3133 reassociate_stats
.linearized
++;
3134 update_stmt (binrhs
);
3135 update_stmt (binlhs
);
3138 gimple_set_visited (stmt
, true);
3139 gimple_set_visited (binlhs
, true);
3140 gimple_set_visited (binrhs
, true);
3142 /* Tail recurse on the new rhs if it still needs reassociation. */
3143 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3144 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3145 want to change the algorithm while converting to tuples. */
3146 linearize_expr (stmt
);
3149 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3150 it. Otherwise, return NULL. */
3153 get_single_immediate_use (tree lhs
)
3155 use_operand_p immuse
;
3158 if (TREE_CODE (lhs
) == SSA_NAME
3159 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3160 && is_gimple_assign (immusestmt
))
3166 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3167 representing the negated value. Insertions of any necessary
3168 instructions go before GSI.
3169 This function is recursive in that, if you hand it "a_5" as the
3170 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3171 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3174 negate_value (tree tonegate
, gimple_stmt_iterator
*gsi
)
3176 gimple negatedefstmt
= NULL
;
3177 tree resultofnegate
;
3179 /* If we are trying to negate a name, defined by an add, negate the
3180 add operands instead. */
3181 if (TREE_CODE (tonegate
) == SSA_NAME
)
3182 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3183 if (TREE_CODE (tonegate
) == SSA_NAME
3184 && is_gimple_assign (negatedefstmt
)
3185 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3186 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3187 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3189 gimple_stmt_iterator gsi
;
3190 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3191 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3193 gsi
= gsi_for_stmt (negatedefstmt
);
3194 rhs1
= negate_value (rhs1
, &gsi
);
3195 gimple_assign_set_rhs1 (negatedefstmt
, rhs1
);
3197 gsi
= gsi_for_stmt (negatedefstmt
);
3198 rhs2
= negate_value (rhs2
, &gsi
);
3199 gimple_assign_set_rhs2 (negatedefstmt
, rhs2
);
3201 update_stmt (negatedefstmt
);
3202 return gimple_assign_lhs (negatedefstmt
);
3205 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3206 resultofnegate
= force_gimple_operand_gsi (gsi
, tonegate
, true,
3207 NULL_TREE
, true, GSI_SAME_STMT
);
3208 return resultofnegate
;
3211 /* Return true if we should break up the subtract in STMT into an add
3212 with negate. This is true when we the subtract operands are really
3213 adds, or the subtract itself is used in an add expression. In
3214 either case, breaking up the subtract into an add with negate
3215 exposes the adds to reassociation. */
3218 should_break_up_subtract (gimple stmt
)
3220 tree lhs
= gimple_assign_lhs (stmt
);
3221 tree binlhs
= gimple_assign_rhs1 (stmt
);
3222 tree binrhs
= gimple_assign_rhs2 (stmt
);
3224 struct loop
*loop
= loop_containing_stmt (stmt
);
3226 if (TREE_CODE (binlhs
) == SSA_NAME
3227 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3230 if (TREE_CODE (binrhs
) == SSA_NAME
3231 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3234 if (TREE_CODE (lhs
) == SSA_NAME
3235 && (immusestmt
= get_single_immediate_use (lhs
))
3236 && is_gimple_assign (immusestmt
)
3237 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3238 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3243 /* Transform STMT from A - B into A + -B. */
3246 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3248 tree rhs1
= gimple_assign_rhs1 (stmt
);
3249 tree rhs2
= gimple_assign_rhs2 (stmt
);
3251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3253 fprintf (dump_file
, "Breaking up subtract ");
3254 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3257 rhs2
= negate_value (rhs2
, gsip
);
3258 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3262 /* Determine whether STMT is a builtin call that raises an SSA name
3263 to an integer power and has only one use. If so, and this is early
3264 reassociation and unsafe math optimizations are permitted, place
3265 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3266 If any of these conditions does not hold, return FALSE. */
3269 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3272 REAL_VALUE_TYPE c
, cint
;
3274 if (!first_pass_instance
3275 || !flag_unsafe_math_optimizations
3276 || !is_gimple_call (stmt
)
3277 || !has_single_use (gimple_call_lhs (stmt
)))
3280 fndecl
= gimple_call_fndecl (stmt
);
3283 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3286 switch (DECL_FUNCTION_CODE (fndecl
))
3288 CASE_FLT_FN (BUILT_IN_POW
):
3289 *base
= gimple_call_arg (stmt
, 0);
3290 arg1
= gimple_call_arg (stmt
, 1);
3292 if (TREE_CODE (arg1
) != REAL_CST
)
3295 c
= TREE_REAL_CST (arg1
);
3297 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3300 *exponent
= real_to_integer (&c
);
3301 real_from_integer (&cint
, VOIDmode
, *exponent
,
3302 *exponent
< 0 ? -1 : 0, 0);
3303 if (!real_identical (&c
, &cint
))
3308 CASE_FLT_FN (BUILT_IN_POWI
):
3309 *base
= gimple_call_arg (stmt
, 0);
3310 arg1
= gimple_call_arg (stmt
, 1);
3312 if (!host_integerp (arg1
, 0))
3315 *exponent
= TREE_INT_CST_LOW (arg1
);
3322 /* Expanding negative exponents is generally unproductive, so we don't
3323 complicate matters with those. Exponents of zero and one should
3324 have been handled by expression folding. */
3325 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3331 /* Recursively linearize a binary expression that is the RHS of STMT.
3332 Place the operands of the expression tree in the vector named OPS. */
3335 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3336 bool is_associative
, bool set_visited
)
3338 tree binlhs
= gimple_assign_rhs1 (stmt
);
3339 tree binrhs
= gimple_assign_rhs2 (stmt
);
3340 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3341 bool binlhsisreassoc
= false;
3342 bool binrhsisreassoc
= false;
3343 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3344 struct loop
*loop
= loop_containing_stmt (stmt
);
3345 tree base
= NULL_TREE
;
3346 HOST_WIDE_INT exponent
= 0;
3349 gimple_set_visited (stmt
, true);
3351 if (TREE_CODE (binlhs
) == SSA_NAME
)
3353 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3354 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3355 && !stmt_could_throw_p (binlhsdef
));
3358 if (TREE_CODE (binrhs
) == SSA_NAME
)
3360 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3361 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3362 && !stmt_could_throw_p (binrhsdef
));
3365 /* If the LHS is not reassociable, but the RHS is, we need to swap
3366 them. If neither is reassociable, there is nothing we can do, so
3367 just put them in the ops vector. If the LHS is reassociable,
3368 linearize it. If both are reassociable, then linearize the RHS
3371 if (!binlhsisreassoc
)
3375 /* If this is not a associative operation like division, give up. */
3376 if (!is_associative
)
3378 add_to_ops_vec (ops
, binrhs
);
3382 if (!binrhsisreassoc
)
3384 if (rhscode
== MULT_EXPR
3385 && TREE_CODE (binrhs
) == SSA_NAME
3386 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3388 add_repeat_to_ops_vec (ops
, base
, exponent
);
3389 gimple_set_visited (binrhsdef
, true);
3392 add_to_ops_vec (ops
, binrhs
);
3394 if (rhscode
== MULT_EXPR
3395 && TREE_CODE (binlhs
) == SSA_NAME
3396 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3398 add_repeat_to_ops_vec (ops
, base
, exponent
);
3399 gimple_set_visited (binlhsdef
, true);
3402 add_to_ops_vec (ops
, binlhs
);
3407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3409 fprintf (dump_file
, "swapping operands of ");
3410 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3413 swap_tree_operands (stmt
,
3414 gimple_assign_rhs1_ptr (stmt
),
3415 gimple_assign_rhs2_ptr (stmt
));
3418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3420 fprintf (dump_file
, " is now ");
3421 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3424 /* We want to make it so the lhs is always the reassociative op,
3430 else if (binrhsisreassoc
)
3432 linearize_expr (stmt
);
3433 binlhs
= gimple_assign_rhs1 (stmt
);
3434 binrhs
= gimple_assign_rhs2 (stmt
);
3437 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3438 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3440 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3441 is_associative
, set_visited
);
3443 if (rhscode
== MULT_EXPR
3444 && TREE_CODE (binrhs
) == SSA_NAME
3445 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3447 add_repeat_to_ops_vec (ops
, base
, exponent
);
3448 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3451 add_to_ops_vec (ops
, binrhs
);
3454 /* Repropagate the negates back into subtracts, since no other pass
3455 currently does it. */
3458 repropagate_negates (void)
3463 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3465 gimple user
= get_single_immediate_use (negate
);
3467 if (!user
|| !is_gimple_assign (user
))
3470 /* The negate operand can be either operand of a PLUS_EXPR
3471 (it can be the LHS if the RHS is a constant for example).
3473 Force the negate operand to the RHS of the PLUS_EXPR, then
3474 transform the PLUS_EXPR into a MINUS_EXPR. */
3475 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3477 /* If the negated operand appears on the LHS of the
3478 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3479 to force the negated operand to the RHS of the PLUS_EXPR. */
3480 if (gimple_assign_rhs1 (user
) == negate
)
3482 swap_tree_operands (user
,
3483 gimple_assign_rhs1_ptr (user
),
3484 gimple_assign_rhs2_ptr (user
));
3487 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3488 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3489 if (gimple_assign_rhs2 (user
) == negate
)
3491 tree rhs1
= gimple_assign_rhs1 (user
);
3492 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3493 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3494 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3498 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3500 if (gimple_assign_rhs1 (user
) == negate
)
3505 which we transform into
3508 This pushes down the negate which we possibly can merge
3509 into some other operation, hence insert it into the
3510 plus_negates vector. */
3511 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3512 tree a
= gimple_assign_rhs1 (feed
);
3513 tree rhs2
= gimple_assign_rhs2 (user
);
3514 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
), gsi2
;
3515 gimple_replace_lhs (feed
, negate
);
3516 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, a
, rhs2
);
3517 update_stmt (gsi_stmt (gsi
));
3518 gsi2
= gsi_for_stmt (user
);
3519 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, negate
, NULL
);
3520 update_stmt (gsi_stmt (gsi2
));
3521 gsi_move_before (&gsi
, &gsi2
);
3522 plus_negates
.safe_push (gimple_assign_lhs (gsi_stmt (gsi2
)));
3526 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3527 rid of one operation. */
3528 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3529 tree a
= gimple_assign_rhs1 (feed
);
3530 tree rhs1
= gimple_assign_rhs1 (user
);
3531 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3532 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3533 update_stmt (gsi_stmt (gsi
));
3539 /* Returns true if OP is of a type for which we can do reassociation.
3540 That is for integral or non-saturating fixed-point types, and for
3541 floating point type when associative-math is enabled. */
3544 can_reassociate_p (tree op
)
3546 tree type
= TREE_TYPE (op
);
3547 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3548 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3549 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3554 /* Break up subtract operations in block BB.
3556 We do this top down because we don't know whether the subtract is
3557 part of a possible chain of reassociation except at the top.
3566 we want to break up k = t - q, but we won't until we've transformed q
3567 = b - r, which won't be broken up until we transform b = c - d.
3569 En passant, clear the GIMPLE visited flag on every statement. */
3572 break_up_subtract_bb (basic_block bb
)
3574 gimple_stmt_iterator gsi
;
3577 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3579 gimple stmt
= gsi_stmt (gsi
);
3580 gimple_set_visited (stmt
, false);
3582 if (!is_gimple_assign (stmt
)
3583 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3586 /* Look for simple gimple subtract operations. */
3587 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3589 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3590 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3593 /* Check for a subtract used only in an addition. If this
3594 is the case, transform it into add of a negate for better
3595 reassociation. IE transform C = A-B into C = A + -B if C
3596 is only used in an addition. */
3597 if (should_break_up_subtract (stmt
))
3598 break_up_subtract (stmt
, &gsi
);
3600 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3601 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3602 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3604 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3606 son
= next_dom_son (CDI_DOMINATORS
, son
))
3607 break_up_subtract_bb (son
);
3610 /* Used for repeated factor analysis. */
3611 struct repeat_factor_d
3613 /* An SSA name that occurs in a multiply chain. */
3616 /* Cached rank of the factor. */
3619 /* Number of occurrences of the factor in the chain. */
3620 HOST_WIDE_INT count
;
3622 /* An SSA name representing the product of this factor and
3623 all factors appearing later in the repeated factor vector. */
3627 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3628 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3631 static vec
<repeat_factor
> repeat_factor_vec
;
3633 /* Used for sorting the repeat factor vector. Sort primarily by
3634 ascending occurrence count, secondarily by descending rank. */
3637 compare_repeat_factors (const void *x1
, const void *x2
)
3639 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3640 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3642 if (rf1
->count
!= rf2
->count
)
3643 return rf1
->count
- rf2
->count
;
3645 return rf2
->rank
- rf1
->rank
;
3648 /* Look for repeated operands in OPS in the multiply tree rooted at
3649 STMT. Replace them with an optimal sequence of multiplies and powi
3650 builtin calls, and remove the used operands from OPS. Return an
3651 SSA name representing the value of the replacement sequence. */
3654 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
3656 unsigned i
, j
, vec_len
;
3659 repeat_factor_t rf1
, rf2
;
3660 repeat_factor rfnew
;
3661 tree result
= NULL_TREE
;
3662 tree target_ssa
, iter_result
;
3663 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3664 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
3665 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3666 gimple mul_stmt
, pow_stmt
;
3668 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3673 /* Allocate the repeated factor vector. */
3674 repeat_factor_vec
.create (10);
3676 /* Scan the OPS vector for all SSA names in the product and build
3677 up a vector of occurrence counts for each factor. */
3678 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3680 if (TREE_CODE (oe
->op
) == SSA_NAME
)
3682 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
3684 if (rf1
->factor
== oe
->op
)
3686 rf1
->count
+= oe
->count
;
3691 if (j
>= repeat_factor_vec
.length ())
3693 rfnew
.factor
= oe
->op
;
3694 rfnew
.rank
= oe
->rank
;
3695 rfnew
.count
= oe
->count
;
3696 rfnew
.repr
= NULL_TREE
;
3697 repeat_factor_vec
.safe_push (rfnew
);
3702 /* Sort the repeated factor vector by (a) increasing occurrence count,
3703 and (b) decreasing rank. */
3704 repeat_factor_vec
.qsort (compare_repeat_factors
);
3706 /* It is generally best to combine as many base factors as possible
3707 into a product before applying __builtin_powi to the result.
3708 However, the sort order chosen for the repeated factor vector
3709 allows us to cache partial results for the product of the base
3710 factors for subsequent use. When we already have a cached partial
3711 result from a previous iteration, it is best to make use of it
3712 before looking for another __builtin_pow opportunity.
3714 As an example, consider x * x * y * y * y * z * z * z * z.
3715 We want to first compose the product x * y * z, raise it to the
3716 second power, then multiply this by y * z, and finally multiply
3717 by z. This can be done in 5 multiplies provided we cache y * z
3718 for use in both expressions:
3726 If we instead ignored the cached y * z and first multiplied by
3727 the __builtin_pow opportunity z * z, we would get the inferior:
3736 vec_len
= repeat_factor_vec
.length ();
3738 /* Repeatedly look for opportunities to create a builtin_powi call. */
3741 HOST_WIDE_INT power
;
3743 /* First look for the largest cached product of factors from
3744 preceding iterations. If found, create a builtin_powi for
3745 it if the minimum occurrence count for its factors is at
3746 least 2, or just use this cached product as our next
3747 multiplicand if the minimum occurrence count is 1. */
3748 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
3750 if (rf1
->repr
&& rf1
->count
> 0)
3760 iter_result
= rf1
->repr
;
3762 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3766 fputs ("Multiplying by cached product ", dump_file
);
3767 for (elt
= j
; elt
< vec_len
; elt
++)
3769 rf
= &repeat_factor_vec
[elt
];
3770 print_generic_expr (dump_file
, rf
->factor
, 0);
3771 if (elt
< vec_len
- 1)
3772 fputs (" * ", dump_file
);
3774 fputs ("\n", dump_file
);
3779 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
3780 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
3781 build_int_cst (integer_type_node
,
3783 gimple_call_set_lhs (pow_stmt
, iter_result
);
3784 gimple_set_location (pow_stmt
, gimple_location (stmt
));
3785 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
3787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3791 fputs ("Building __builtin_pow call for cached product (",
3793 for (elt
= j
; elt
< vec_len
; elt
++)
3795 rf
= &repeat_factor_vec
[elt
];
3796 print_generic_expr (dump_file
, rf
->factor
, 0);
3797 if (elt
< vec_len
- 1)
3798 fputs (" * ", dump_file
);
3800 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
3807 /* Otherwise, find the first factor in the repeated factor
3808 vector whose occurrence count is at least 2. If no such
3809 factor exists, there are no builtin_powi opportunities
3811 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
3813 if (rf1
->count
>= 2)
3822 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3826 fputs ("Building __builtin_pow call for (", dump_file
);
3827 for (elt
= j
; elt
< vec_len
; elt
++)
3829 rf
= &repeat_factor_vec
[elt
];
3830 print_generic_expr (dump_file
, rf
->factor
, 0);
3831 if (elt
< vec_len
- 1)
3832 fputs (" * ", dump_file
);
3834 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
3837 reassociate_stats
.pows_created
++;
3839 /* Visit each element of the vector in reverse order (so that
3840 high-occurrence elements are visited first, and within the
3841 same occurrence count, lower-ranked elements are visited
3842 first). Form a linear product of all elements in this order
3843 whose occurrencce count is at least that of element J.
3844 Record the SSA name representing the product of each element
3845 with all subsequent elements in the vector. */
3846 if (j
== vec_len
- 1)
3847 rf1
->repr
= rf1
->factor
;
3850 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
3854 rf1
= &repeat_factor_vec
[ii
];
3855 rf2
= &repeat_factor_vec
[ii
+ 1];
3857 /* Init the last factor's representative to be itself. */
3859 rf2
->repr
= rf2
->factor
;
3864 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
3865 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
3868 gimple_set_location (mul_stmt
, gimple_location (stmt
));
3869 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
3870 rf1
->repr
= target_ssa
;
3872 /* Don't reprocess the multiply we just introduced. */
3873 gimple_set_visited (mul_stmt
, true);
3877 /* Form a call to __builtin_powi for the maximum product
3878 just formed, raised to the power obtained earlier. */
3879 rf1
= &repeat_factor_vec
[j
];
3880 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
3881 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
3882 build_int_cst (integer_type_node
,
3884 gimple_call_set_lhs (pow_stmt
, iter_result
);
3885 gimple_set_location (pow_stmt
, gimple_location (stmt
));
3886 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
3889 /* If we previously formed at least one other builtin_powi call,
3890 form the product of this one and those others. */
3893 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
3894 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
3895 result
, iter_result
);
3896 gimple_set_location (mul_stmt
, gimple_location (stmt
));
3897 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
3898 gimple_set_visited (mul_stmt
, true);
3899 result
= new_result
;
3902 result
= iter_result
;
3904 /* Decrement the occurrence count of each element in the product
3905 by the count found above, and remove this many copies of each
3907 for (i
= j
; i
< vec_len
; i
++)
3912 rf1
= &repeat_factor_vec
[i
];
3913 rf1
->count
-= power
;
3915 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
3917 if (oe
->op
== rf1
->factor
)
3921 ops
->ordered_remove (n
);
3937 /* At this point all elements in the repeated factor vector have a
3938 remaining occurrence count of 0 or 1, and those with a count of 1
3939 don't have cached representatives. Re-sort the ops vector and
3941 ops
->qsort (sort_by_operand_rank
);
3942 repeat_factor_vec
.release ();
3944 /* Return the final product computed herein. Note that there may
3945 still be some elements with single occurrence count left in OPS;
3946 those will be handled by the normal reassociation logic. */
3950 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3953 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
3957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3959 fprintf (dump_file
, "Transforming ");
3960 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3963 rhs1
= gimple_assign_rhs1 (stmt
);
3964 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
3966 remove_visited_stmt_chain (rhs1
);
3968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3970 fprintf (dump_file
, " into ");
3971 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3975 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3978 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
3979 tree rhs1
, tree rhs2
)
3981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3983 fprintf (dump_file
, "Transforming ");
3984 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3987 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
3988 update_stmt (gsi_stmt (*gsi
));
3989 remove_visited_stmt_chain (rhs1
);
3991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3993 fprintf (dump_file
, " into ");
3994 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3998 /* Reassociate expressions in basic block BB and its post-dominator as
4002 reassociate_bb (basic_block bb
)
4004 gimple_stmt_iterator gsi
;
4006 gimple stmt
= last_stmt (bb
);
4008 if (stmt
&& !gimple_visited_p (stmt
))
4009 maybe_optimize_range_tests (stmt
);
4011 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4013 stmt
= gsi_stmt (gsi
);
4015 if (is_gimple_assign (stmt
)
4016 && !stmt_could_throw_p (stmt
))
4018 tree lhs
, rhs1
, rhs2
;
4019 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4021 /* If this is not a gimple binary expression, there is
4022 nothing for us to do with it. */
4023 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4026 /* If this was part of an already processed statement,
4027 we don't need to touch it again. */
4028 if (gimple_visited_p (stmt
))
4030 /* This statement might have become dead because of previous
4032 if (has_zero_uses (gimple_get_lhs (stmt
)))
4034 gsi_remove (&gsi
, true);
4035 release_defs (stmt
);
4036 /* We might end up removing the last stmt above which
4037 places the iterator to the end of the sequence.
4038 Reset it to the last stmt in this case which might
4039 be the end of the sequence as well if we removed
4040 the last statement of the sequence. In which case
4041 we need to bail out. */
4042 if (gsi_end_p (gsi
))
4044 gsi
= gsi_last_bb (bb
);
4045 if (gsi_end_p (gsi
))
4052 lhs
= gimple_assign_lhs (stmt
);
4053 rhs1
= gimple_assign_rhs1 (stmt
);
4054 rhs2
= gimple_assign_rhs2 (stmt
);
4056 /* For non-bit or min/max operations we can't associate
4057 all types. Verify that here. */
4058 if (rhs_code
!= BIT_IOR_EXPR
4059 && rhs_code
!= BIT_AND_EXPR
4060 && rhs_code
!= BIT_XOR_EXPR
4061 && rhs_code
!= MIN_EXPR
4062 && rhs_code
!= MAX_EXPR
4063 && (!can_reassociate_p (lhs
)
4064 || !can_reassociate_p (rhs1
)
4065 || !can_reassociate_p (rhs2
)))
4068 if (associative_tree_code (rhs_code
))
4070 vec
<operand_entry_t
> ops
= vNULL
;
4071 tree powi_result
= NULL_TREE
;
4073 /* There may be no immediate uses left by the time we
4074 get here because we may have eliminated them all. */
4075 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4078 gimple_set_visited (stmt
, true);
4079 linearize_expr_tree (&ops
, stmt
, true, true);
4080 ops
.qsort (sort_by_operand_rank
);
4081 optimize_ops_list (rhs_code
, &ops
);
4082 if (undistribute_ops_list (rhs_code
, &ops
,
4083 loop_containing_stmt (stmt
)))
4085 ops
.qsort (sort_by_operand_rank
);
4086 optimize_ops_list (rhs_code
, &ops
);
4089 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4090 optimize_range_tests (rhs_code
, &ops
);
4092 if (first_pass_instance
4093 && rhs_code
== MULT_EXPR
4094 && flag_unsafe_math_optimizations
)
4095 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4097 /* If the operand vector is now empty, all operands were
4098 consumed by the __builtin_powi optimization. */
4099 if (ops
.length () == 0)
4100 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4101 else if (ops
.length () == 1)
4103 tree last_op
= ops
.last ()->op
;
4106 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4109 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4113 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4114 int ops_num
= ops
.length ();
4115 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4119 "Width = %d was chosen for reassociation\n", width
);
4122 && ops
.length () > 3)
4123 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4125 rewrite_expr_tree (stmt
, 0, ops
, false);
4127 /* If we combined some repeated factors into a
4128 __builtin_powi call, multiply that result by the
4129 reassociated operands. */
4133 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4134 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4136 gimple_set_lhs (stmt
, target_ssa
);
4138 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4141 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4142 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4150 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4152 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4153 reassociate_bb (son
);
4156 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4157 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4159 /* Dump the operand entry vector OPS to FILE. */
4162 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4167 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4169 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4170 print_generic_expr (file
, oe
->op
, 0);
4174 /* Dump the operand entry vector OPS to STDERR. */
4177 debug_ops_vector (vec
<operand_entry_t
> ops
)
4179 dump_ops_vector (stderr
, ops
);
4185 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
4186 reassociate_bb (EXIT_BLOCK_PTR
);
4189 /* Initialize the reassociation pass. */
4196 int *bbs
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
4198 /* Find the loops, so that we can prevent moving calculations in
4200 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4202 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4204 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4205 sizeof (struct operand_entry
), 30);
4206 next_operand_entry_id
= 0;
4208 /* Reverse RPO (Reverse Post Order) will give us something where
4209 deeper loops come later. */
4210 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4211 bb_rank
= XCNEWVEC (long, last_basic_block
);
4212 operand_rank
= pointer_map_create ();
4214 /* Give each default definition a distinct rank. This includes
4215 parameters and the static chain. Walk backwards over all
4216 SSA names so that we get proper rank ordering according
4217 to tree_swap_operands_p. */
4218 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4220 tree name
= ssa_name (i
);
4221 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4222 insert_operand_rank (name
, ++rank
);
4225 /* Set up rank for each BB */
4226 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
4227 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4230 calculate_dominance_info (CDI_POST_DOMINATORS
);
4231 plus_negates
= vNULL
;
4234 /* Cleanup after the reassociation pass, and print stats if
4240 statistics_counter_event (cfun
, "Linearized",
4241 reassociate_stats
.linearized
);
4242 statistics_counter_event (cfun
, "Constants eliminated",
4243 reassociate_stats
.constants_eliminated
);
4244 statistics_counter_event (cfun
, "Ops eliminated",
4245 reassociate_stats
.ops_eliminated
);
4246 statistics_counter_event (cfun
, "Statements rewritten",
4247 reassociate_stats
.rewritten
);
4248 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4249 reassociate_stats
.pows_encountered
);
4250 statistics_counter_event (cfun
, "Built-in powi calls created",
4251 reassociate_stats
.pows_created
);
4253 pointer_map_destroy (operand_rank
);
4254 free_alloc_pool (operand_entry_pool
);
4256 plus_negates
.release ();
4257 free_dominance_info (CDI_POST_DOMINATORS
);
4258 loop_optimizer_finalize ();
4261 /* Gate and execute functions for Reassociation. */
4264 execute_reassoc (void)
4269 repropagate_negates ();
4276 gate_tree_ssa_reassoc (void)
4278 return flag_tree_reassoc
!= 0;
4281 struct gimple_opt_pass pass_reassoc
=
4285 "reassoc", /* name */
4286 OPTGROUP_NONE
, /* optinfo_flags */
4287 gate_tree_ssa_reassoc
, /* gate */
4288 execute_reassoc
, /* execute */
4291 0, /* static_pass_number */
4292 TV_TREE_REASSOC
, /* tv_id */
4293 PROP_cfg
| PROP_ssa
, /* properties_required */
4294 0, /* properties_provided */
4295 0, /* properties_destroyed */
4296 0, /* todo_flags_start */
4299 | TODO_ggc_collect
/* todo_flags_finish */