1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "gimple-ssa.h"
37 #include "tree-phinodes.h"
38 #include "ssa-iterators.h"
39 #include "tree-ssanames.h"
40 #include "tree-ssa-loop-niter.h"
41 #include "tree-ssa-loop.h"
44 #include "tree-iterator.h"
45 #include "tree-pass.h"
46 #include "alloc-pool.h"
48 #include "langhooks.h"
49 #include "pointer-set.h"
54 #include "diagnostic-core.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_t
70 3. Optimization of the operand lists, eliminating things like a +
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
107 You want to first merge all leaves of the same rank, as much as
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
161 newstmt = mergetmp + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
182 int constants_eliminated
;
185 int pows_encountered
;
189 /* Operator, rank pair. */
190 typedef struct operand_entry
198 static alloc_pool operand_entry_pool
;
200 /* This is used to assign a unique ID to each struct operand_entry
201 so that qsort results are identical on different hosts. */
202 static int next_operand_entry_id
;
204 /* Starting rank number for a given basic block, so that we can rank
205 operations using unmovable instructions in that BB based on the bb
207 static long *bb_rank
;
209 /* Operand->rank hashtable. */
210 static struct pointer_map_t
*operand_rank
;
213 static long get_rank (tree
);
216 /* Bias amount for loop-carried phis. We want this to be larger than
217 the depth of any reassociation tree we can see, but not larger than
218 the rank difference between two blocks. */
219 #define PHI_LOOP_BIAS (1 << 15)
221 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
222 an innermost loop, and the phi has only a single use which is inside
223 the loop, then the rank is the block rank of the loop latch plus an
224 extra bias for the loop-carried dependence. This causes expressions
225 calculated into an accumulator variable to be independent for each
226 iteration of the loop. If STMT is some other phi, the rank is the
227 block rank of its containing block. */
229 phi_rank (gimple stmt
)
231 basic_block bb
= gimple_bb (stmt
);
232 struct loop
*father
= bb
->loop_father
;
238 /* We only care about real loops (those with a latch). */
240 return bb_rank
[bb
->index
];
242 /* Interesting phis must be in headers of innermost loops. */
243 if (bb
!= father
->header
245 return bb_rank
[bb
->index
];
247 /* Ignore virtual SSA_NAMEs. */
248 res
= gimple_phi_result (stmt
);
249 if (virtual_operand_p (res
))
250 return bb_rank
[bb
->index
];
252 /* The phi definition must have a single use, and that use must be
253 within the loop. Otherwise this isn't an accumulator pattern. */
254 if (!single_imm_use (res
, &use
, &use_stmt
)
255 || gimple_bb (use_stmt
)->loop_father
!= father
)
256 return bb_rank
[bb
->index
];
258 /* Look for phi arguments from within the loop. If found, bias this phi. */
259 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
261 tree arg
= gimple_phi_arg_def (stmt
, i
);
262 if (TREE_CODE (arg
) == SSA_NAME
263 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
265 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
266 if (gimple_bb (def_stmt
)->loop_father
== father
)
267 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
271 /* Must be an uninteresting phi. */
272 return bb_rank
[bb
->index
];
275 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
276 loop-carried dependence of an innermost loop, return TRUE; else
279 loop_carried_phi (tree exp
)
284 if (TREE_CODE (exp
) != SSA_NAME
285 || SSA_NAME_IS_DEFAULT_DEF (exp
))
288 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
290 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
293 /* Non-loop-carried phis have block rank. Loop-carried phis have
294 an additional bias added in. If this phi doesn't have block rank,
295 it's biased and should not be propagated. */
296 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
298 if (phi_rank (phi_stmt
) != block_rank
)
304 /* Return the maximum of RANK and the rank that should be propagated
305 from expression OP. For most operands, this is just the rank of OP.
306 For loop-carried phis, the value is zero to avoid undoing the bias
307 in favor of the phi. */
309 propagate_rank (long rank
, tree op
)
313 if (loop_carried_phi (op
))
316 op_rank
= get_rank (op
);
318 return MAX (rank
, op_rank
);
321 /* Look up the operand rank structure for expression E. */
324 find_operand_rank (tree e
)
326 void **slot
= pointer_map_contains (operand_rank
, e
);
327 return slot
? (long) (intptr_t) *slot
: -1;
330 /* Insert {E,RANK} into the operand rank hashtable. */
333 insert_operand_rank (tree e
, long rank
)
336 gcc_assert (rank
> 0);
337 slot
= pointer_map_insert (operand_rank
, e
);
339 *slot
= (void *) (intptr_t) rank
;
342 /* Given an expression E, return the rank of the expression. */
347 /* Constants have rank 0. */
348 if (is_gimple_min_invariant (e
))
351 /* SSA_NAME's have the rank of the expression they are the result
353 For globals and uninitialized values, the rank is 0.
354 For function arguments, use the pre-setup rank.
355 For PHI nodes, stores, asm statements, etc, we use the rank of
357 For simple operations, the rank is the maximum rank of any of
358 its operands, or the bb_rank, whichever is less.
359 I make no claims that this is optimal, however, it gives good
362 /* We make an exception to the normal ranking system to break
363 dependences of accumulator variables in loops. Suppose we
364 have a simple one-block loop containing:
371 As shown, each iteration of the calculation into x is fully
372 dependent upon the iteration before it. We would prefer to
373 see this in the form:
380 If the loop is unrolled, the calculations of b and c from
381 different iterations can be interleaved.
383 To obtain this result during reassociation, we bias the rank
384 of the phi definition x_1 upward, when it is recognized as an
385 accumulator pattern. The artificial rank causes it to be
386 added last, providing the desired independence. */
388 if (TREE_CODE (e
) == SSA_NAME
)
395 if (SSA_NAME_IS_DEFAULT_DEF (e
))
396 return find_operand_rank (e
);
398 stmt
= SSA_NAME_DEF_STMT (e
);
399 if (gimple_code (stmt
) == GIMPLE_PHI
)
400 return phi_rank (stmt
);
402 if (!is_gimple_assign (stmt
)
403 || gimple_vdef (stmt
))
404 return bb_rank
[gimple_bb (stmt
)->index
];
406 /* If we already have a rank for this expression, use that. */
407 rank
= find_operand_rank (e
);
411 /* Otherwise, find the maximum rank for the operands. As an
412 exception, remove the bias from loop-carried phis when propagating
413 the rank so that dependent operations are not also biased. */
415 if (gimple_assign_single_p (stmt
))
417 tree rhs
= gimple_assign_rhs1 (stmt
);
418 n
= TREE_OPERAND_LENGTH (rhs
);
420 rank
= propagate_rank (rank
, rhs
);
423 for (i
= 0; i
< n
; i
++)
425 op
= TREE_OPERAND (rhs
, i
);
428 rank
= propagate_rank (rank
, op
);
434 n
= gimple_num_ops (stmt
);
435 for (i
= 1; i
< n
; i
++)
437 op
= gimple_op (stmt
, i
);
439 rank
= propagate_rank (rank
, op
);
443 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
445 fprintf (dump_file
, "Rank for ");
446 print_generic_expr (dump_file
, e
, 0);
447 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
450 /* Note the rank in the hashtable so we don't recompute it. */
451 insert_operand_rank (e
, (rank
+ 1));
455 /* Globals, etc, are rank 0 */
460 /* We want integer ones to end up last no matter what, since they are
461 the ones we can do the most with. */
462 #define INTEGER_CONST_TYPE 1 << 3
463 #define FLOAT_CONST_TYPE 1 << 2
464 #define OTHER_CONST_TYPE 1 << 1
466 /* Classify an invariant tree into integer, float, or other, so that
467 we can sort them to be near other constants of the same type. */
469 constant_type (tree t
)
471 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
472 return INTEGER_CONST_TYPE
;
473 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
474 return FLOAT_CONST_TYPE
;
476 return OTHER_CONST_TYPE
;
479 /* qsort comparison function to sort operand entries PA and PB by rank
480 so that the sorted array is ordered by rank in decreasing order. */
482 sort_by_operand_rank (const void *pa
, const void *pb
)
484 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
485 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
487 /* It's nicer for optimize_expression if constants that are likely
488 to fold when added/multiplied//whatever are put next to each
489 other. Since all constants have rank 0, order them by type. */
490 if (oeb
->rank
== 0 && oea
->rank
== 0)
492 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
493 return constant_type (oeb
->op
) - constant_type (oea
->op
);
495 /* To make sorting result stable, we use unique IDs to determine
497 return oeb
->id
- oea
->id
;
500 /* Lastly, make sure the versions that are the same go next to each
501 other. We use SSA_NAME_VERSION because it's stable. */
502 if ((oeb
->rank
- oea
->rank
== 0)
503 && TREE_CODE (oea
->op
) == SSA_NAME
504 && TREE_CODE (oeb
->op
) == SSA_NAME
)
506 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
507 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
509 return oeb
->id
- oea
->id
;
512 if (oeb
->rank
!= oea
->rank
)
513 return oeb
->rank
- oea
->rank
;
515 return oeb
->id
- oea
->id
;
518 /* Add an operand entry to *OPS for the tree operand OP. */
521 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
523 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
526 oe
->rank
= get_rank (op
);
527 oe
->id
= next_operand_entry_id
++;
532 /* Add an operand entry to *OPS for the tree operand OP with repeat
536 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
537 HOST_WIDE_INT repeat
)
539 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
542 oe
->rank
= get_rank (op
);
543 oe
->id
= next_operand_entry_id
++;
547 reassociate_stats
.pows_encountered
++;
550 /* Return true if STMT is reassociable operation containing a binary
551 operation with tree code CODE, and is inside LOOP. */
554 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
556 basic_block bb
= gimple_bb (stmt
);
558 if (gimple_bb (stmt
) == NULL
)
561 if (!flow_bb_inside_loop_p (loop
, bb
))
564 if (is_gimple_assign (stmt
)
565 && gimple_assign_rhs_code (stmt
) == code
566 && has_single_use (gimple_assign_lhs (stmt
)))
573 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
574 operand of the negate operation. Otherwise, return NULL. */
577 get_unary_op (tree name
, enum tree_code opcode
)
579 gimple stmt
= SSA_NAME_DEF_STMT (name
);
581 if (!is_gimple_assign (stmt
))
584 if (gimple_assign_rhs_code (stmt
) == opcode
)
585 return gimple_assign_rhs1 (stmt
);
589 /* If CURR and LAST are a pair of ops that OPCODE allows us to
590 eliminate through equivalences, do so, remove them from OPS, and
591 return true. Otherwise, return false. */
594 eliminate_duplicate_pair (enum tree_code opcode
,
595 vec
<operand_entry_t
> *ops
,
598 operand_entry_t curr
,
599 operand_entry_t last
)
602 /* If we have two of the same op, and the opcode is & |, min, or max,
603 we can eliminate one of them.
604 If we have two of the same op, and the opcode is ^, we can
605 eliminate both of them. */
607 if (last
&& last
->op
== curr
->op
)
615 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
617 fprintf (dump_file
, "Equivalence: ");
618 print_generic_expr (dump_file
, curr
->op
, 0);
619 fprintf (dump_file
, " [&|minmax] ");
620 print_generic_expr (dump_file
, last
->op
, 0);
621 fprintf (dump_file
, " -> ");
622 print_generic_stmt (dump_file
, last
->op
, 0);
625 ops
->ordered_remove (i
);
626 reassociate_stats
.ops_eliminated
++;
631 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
633 fprintf (dump_file
, "Equivalence: ");
634 print_generic_expr (dump_file
, curr
->op
, 0);
635 fprintf (dump_file
, " ^ ");
636 print_generic_expr (dump_file
, last
->op
, 0);
637 fprintf (dump_file
, " -> nothing\n");
640 reassociate_stats
.ops_eliminated
+= 2;
642 if (ops
->length () == 2)
645 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
650 ops
->ordered_remove (i
-1);
651 ops
->ordered_remove (i
-1);
663 static vec
<tree
> plus_negates
;
665 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
666 expression, look in OPS for a corresponding positive operation to cancel
667 it out. If we find one, remove the other from OPS, replace
668 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
672 eliminate_plus_minus_pair (enum tree_code opcode
,
673 vec
<operand_entry_t
> *ops
,
674 unsigned int currindex
,
675 operand_entry_t curr
)
682 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
685 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
686 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
687 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
690 /* Any non-negated version will have a rank that is one less than
691 the current rank. So once we hit those ranks, if we don't find
694 for (i
= currindex
+ 1;
695 ops
->iterate (i
, &oe
)
696 && oe
->rank
>= curr
->rank
- 1 ;
699 if (oe
->op
== negateop
)
702 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
704 fprintf (dump_file
, "Equivalence: ");
705 print_generic_expr (dump_file
, negateop
, 0);
706 fprintf (dump_file
, " + -");
707 print_generic_expr (dump_file
, oe
->op
, 0);
708 fprintf (dump_file
, " -> 0\n");
711 ops
->ordered_remove (i
);
712 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
713 ops
->ordered_remove (currindex
);
714 reassociate_stats
.ops_eliminated
++;
718 else if (oe
->op
== notop
)
720 tree op_type
= TREE_TYPE (oe
->op
);
722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
724 fprintf (dump_file
, "Equivalence: ");
725 print_generic_expr (dump_file
, notop
, 0);
726 fprintf (dump_file
, " + ~");
727 print_generic_expr (dump_file
, oe
->op
, 0);
728 fprintf (dump_file
, " -> -1\n");
731 ops
->ordered_remove (i
);
732 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
733 ops
->ordered_remove (currindex
);
734 reassociate_stats
.ops_eliminated
++;
740 /* CURR->OP is a negate expr in a plus expr: save it for later
741 inspection in repropagate_negates(). */
742 if (negateop
!= NULL_TREE
)
743 plus_negates
.safe_push (curr
->op
);
748 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
749 bitwise not expression, look in OPS for a corresponding operand to
750 cancel it out. If we find one, remove the other from OPS, replace
751 OPS[CURRINDEX] with 0, and return true. Otherwise, return
755 eliminate_not_pairs (enum tree_code opcode
,
756 vec
<operand_entry_t
> *ops
,
757 unsigned int currindex
,
758 operand_entry_t curr
)
764 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
765 || TREE_CODE (curr
->op
) != SSA_NAME
)
768 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
769 if (notop
== NULL_TREE
)
772 /* Any non-not version will have a rank that is one less than
773 the current rank. So once we hit those ranks, if we don't find
776 for (i
= currindex
+ 1;
777 ops
->iterate (i
, &oe
)
778 && oe
->rank
>= curr
->rank
- 1;
783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
785 fprintf (dump_file
, "Equivalence: ");
786 print_generic_expr (dump_file
, notop
, 0);
787 if (opcode
== BIT_AND_EXPR
)
788 fprintf (dump_file
, " & ~");
789 else if (opcode
== BIT_IOR_EXPR
)
790 fprintf (dump_file
, " | ~");
791 print_generic_expr (dump_file
, oe
->op
, 0);
792 if (opcode
== BIT_AND_EXPR
)
793 fprintf (dump_file
, " -> 0\n");
794 else if (opcode
== BIT_IOR_EXPR
)
795 fprintf (dump_file
, " -> -1\n");
798 if (opcode
== BIT_AND_EXPR
)
799 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
800 else if (opcode
== BIT_IOR_EXPR
)
801 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
802 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
804 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
806 ops
->quick_push (oe
);
814 /* Use constant value that may be present in OPS to try to eliminate
815 operands. Note that this function is only really used when we've
816 eliminated ops for other reasons, or merged constants. Across
817 single statements, fold already does all of this, plus more. There
818 is little point in duplicating logic, so I've only included the
819 identities that I could ever construct testcases to trigger. */
822 eliminate_using_constants (enum tree_code opcode
,
823 vec
<operand_entry_t
> *ops
)
825 operand_entry_t oelast
= ops
->last ();
826 tree type
= TREE_TYPE (oelast
->op
);
828 if (oelast
->rank
== 0
829 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
834 if (integer_zerop (oelast
->op
))
836 if (ops
->length () != 1)
838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
839 fprintf (dump_file
, "Found & 0, removing all other ops\n");
841 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
844 ops
->quick_push (oelast
);
848 else if (integer_all_onesp (oelast
->op
))
850 if (ops
->length () != 1)
852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
853 fprintf (dump_file
, "Found & -1, removing\n");
855 reassociate_stats
.ops_eliminated
++;
860 if (integer_all_onesp (oelast
->op
))
862 if (ops
->length () != 1)
864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
865 fprintf (dump_file
, "Found | -1, removing all other ops\n");
867 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
870 ops
->quick_push (oelast
);
874 else if (integer_zerop (oelast
->op
))
876 if (ops
->length () != 1)
878 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
879 fprintf (dump_file
, "Found | 0, removing\n");
881 reassociate_stats
.ops_eliminated
++;
886 if (integer_zerop (oelast
->op
)
887 || (FLOAT_TYPE_P (type
)
888 && !HONOR_NANS (TYPE_MODE (type
))
889 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
890 && real_zerop (oelast
->op
)))
892 if (ops
->length () != 1)
894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
895 fprintf (dump_file
, "Found * 0, removing all other ops\n");
897 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
899 ops
->quick_push (oelast
);
903 else if (integer_onep (oelast
->op
)
904 || (FLOAT_TYPE_P (type
)
905 && !HONOR_SNANS (TYPE_MODE (type
))
906 && real_onep (oelast
->op
)))
908 if (ops
->length () != 1)
910 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Found * 1, removing\n");
913 reassociate_stats
.ops_eliminated
++;
921 if (integer_zerop (oelast
->op
)
922 || (FLOAT_TYPE_P (type
)
923 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
924 && fold_real_zero_addition_p (type
, oelast
->op
,
925 opcode
== MINUS_EXPR
)))
927 if (ops
->length () != 1)
929 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
930 fprintf (dump_file
, "Found [|^+] 0, removing\n");
932 reassociate_stats
.ops_eliminated
++;
944 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
947 /* Structure for tracking and counting operands. */
948 typedef struct oecount_s
{
951 enum tree_code oecode
;
956 /* The heap for the oecount hashtable and the sorted list of operands. */
957 static vec
<oecount
> cvec
;
960 /* Oecount hashtable helpers. */
962 struct oecount_hasher
: typed_noop_remove
<void>
964 /* Note that this hash table stores integers, not pointers.
965 So, observe the casting in the member functions. */
966 typedef void value_type
;
967 typedef void compare_type
;
968 static inline hashval_t
hash (const value_type
*);
969 static inline bool equal (const value_type
*, const compare_type
*);
972 /* Hash function for oecount. */
975 oecount_hasher::hash (const value_type
*p
)
977 const oecount
*c
= &cvec
[(size_t)p
- 42];
978 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
981 /* Comparison function for oecount. */
984 oecount_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
986 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
987 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
988 return (c1
->oecode
== c2
->oecode
989 && c1
->op
== c2
->op
);
992 /* Comparison function for qsort sorting oecount elements by count. */
995 oecount_cmp (const void *p1
, const void *p2
)
997 const oecount
*c1
= (const oecount
*)p1
;
998 const oecount
*c2
= (const oecount
*)p2
;
999 if (c1
->cnt
!= c2
->cnt
)
1000 return c1
->cnt
- c2
->cnt
;
1002 /* If counts are identical, use unique IDs to stabilize qsort. */
1003 return c1
->id
- c2
->id
;
1006 /* Return TRUE iff STMT represents a builtin call that raises OP
1007 to some exponent. */
1010 stmt_is_power_of_op (gimple stmt
, tree op
)
1014 if (!is_gimple_call (stmt
))
1017 fndecl
= gimple_call_fndecl (stmt
);
1020 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1023 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1025 CASE_FLT_FN (BUILT_IN_POW
):
1026 CASE_FLT_FN (BUILT_IN_POWI
):
1027 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1034 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1035 in place and return the result. Assumes that stmt_is_power_of_op
1036 was previously called for STMT and returned TRUE. */
1038 static HOST_WIDE_INT
1039 decrement_power (gimple stmt
)
1041 REAL_VALUE_TYPE c
, cint
;
1042 HOST_WIDE_INT power
;
1045 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1047 CASE_FLT_FN (BUILT_IN_POW
):
1048 arg1
= gimple_call_arg (stmt
, 1);
1049 c
= TREE_REAL_CST (arg1
);
1050 power
= real_to_integer (&c
) - 1;
1051 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1052 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1055 CASE_FLT_FN (BUILT_IN_POWI
):
1056 arg1
= gimple_call_arg (stmt
, 1);
1057 power
= TREE_INT_CST_LOW (arg1
) - 1;
1058 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1066 /* Find the single immediate use of STMT's LHS, and replace it
1067 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1068 replace *DEF with OP as well. */
1071 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1076 gimple_stmt_iterator gsi
;
1078 if (is_gimple_call (stmt
))
1079 lhs
= gimple_call_lhs (stmt
);
1081 lhs
= gimple_assign_lhs (stmt
);
1083 gcc_assert (has_single_use (lhs
));
1084 single_imm_use (lhs
, &use
, &use_stmt
);
1088 if (TREE_CODE (op
) != SSA_NAME
)
1089 update_stmt (use_stmt
);
1090 gsi
= gsi_for_stmt (stmt
);
1091 unlink_stmt_vdef (stmt
);
1092 gsi_remove (&gsi
, true);
1093 release_defs (stmt
);
1096 /* Walks the linear chain with result *DEF searching for an operation
1097 with operand OP and code OPCODE removing that from the chain. *DEF
1098 is updated if there is only one operand but no operation left. */
1101 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1103 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1109 if (opcode
== MULT_EXPR
1110 && stmt_is_power_of_op (stmt
, op
))
1112 if (decrement_power (stmt
) == 1)
1113 propagate_op_to_single_use (op
, stmt
, def
);
1117 name
= gimple_assign_rhs1 (stmt
);
1119 /* If this is the operation we look for and one of the operands
1120 is ours simply propagate the other operand into the stmts
1122 if (gimple_assign_rhs_code (stmt
) == opcode
1124 || gimple_assign_rhs2 (stmt
) == op
))
1127 name
= gimple_assign_rhs2 (stmt
);
1128 propagate_op_to_single_use (name
, stmt
, def
);
1132 /* We might have a multiply of two __builtin_pow* calls, and
1133 the operand might be hiding in the rightmost one. */
1134 if (opcode
== MULT_EXPR
1135 && gimple_assign_rhs_code (stmt
) == opcode
1136 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1137 && has_single_use (gimple_assign_rhs2 (stmt
)))
1139 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1140 if (stmt_is_power_of_op (stmt2
, op
))
1142 if (decrement_power (stmt2
) == 1)
1143 propagate_op_to_single_use (op
, stmt2
, def
);
1148 /* Continue walking the chain. */
1149 gcc_assert (name
!= op
1150 && TREE_CODE (name
) == SSA_NAME
);
1151 stmt
= SSA_NAME_DEF_STMT (name
);
1156 /* Returns true if statement S1 dominates statement S2. Like
1157 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1160 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1162 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1164 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1165 SSA_NAME. Assume it lives at the beginning of function and
1166 thus dominates everything. */
1167 if (!bb1
|| s1
== s2
)
1170 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1176 /* PHIs in the same basic block are assumed to be
1177 executed all in parallel, if only one stmt is a PHI,
1178 it dominates the other stmt in the same basic block. */
1179 if (gimple_code (s1
) == GIMPLE_PHI
)
1182 if (gimple_code (s2
) == GIMPLE_PHI
)
1185 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1187 if (gimple_uid (s1
) < gimple_uid (s2
))
1190 if (gimple_uid (s1
) > gimple_uid (s2
))
1193 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1194 unsigned int uid
= gimple_uid (s1
);
1195 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1197 gimple s
= gsi_stmt (gsi
);
1198 if (gimple_uid (s
) != uid
)
1207 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1210 /* Insert STMT after INSERT_POINT. */
1213 insert_stmt_after (gimple stmt
, gimple insert_point
)
1215 gimple_stmt_iterator gsi
;
1218 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1219 bb
= gimple_bb (insert_point
);
1220 else if (!stmt_ends_bb_p (insert_point
))
1222 gsi
= gsi_for_stmt (insert_point
);
1223 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1224 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1228 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1229 thus if it must end a basic block, it should be a call that can
1230 throw, or some assignment that can throw. If it throws, the LHS
1231 of it will not be initialized though, so only valid places using
1232 the SSA_NAME should be dominated by the fallthru edge. */
1233 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1234 gsi
= gsi_after_labels (bb
);
1235 if (gsi_end_p (gsi
))
1237 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1238 gimple_set_uid (stmt
,
1239 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1242 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1243 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1246 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1247 the result. Places the statement after the definition of either
1248 OP1 or OP2. Returns the new statement. */
1251 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1253 gimple op1def
= NULL
, op2def
= NULL
;
1254 gimple_stmt_iterator gsi
;
1258 /* Create the addition statement. */
1259 op
= make_ssa_name (type
, NULL
);
1260 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1262 /* Find an insertion place and insert. */
1263 if (TREE_CODE (op1
) == SSA_NAME
)
1264 op1def
= SSA_NAME_DEF_STMT (op1
);
1265 if (TREE_CODE (op2
) == SSA_NAME
)
1266 op2def
= SSA_NAME_DEF_STMT (op2
);
1267 if ((!op1def
|| gimple_nop_p (op1def
))
1268 && (!op2def
|| gimple_nop_p (op2def
)))
1270 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
1271 if (gsi_end_p (gsi
))
1273 gimple_stmt_iterator gsi2
1274 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR
));
1275 gimple_set_uid (sum
,
1276 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1279 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1280 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1284 gimple insert_point
;
1285 if ((!op1def
|| gimple_nop_p (op1def
))
1286 || (op2def
&& !gimple_nop_p (op2def
)
1287 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1288 insert_point
= op2def
;
1290 insert_point
= op1def
;
1291 insert_stmt_after (sum
, insert_point
);
1298 /* Perform un-distribution of divisions and multiplications.
1299 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1300 to (A + B) / X for real X.
1302 The algorithm is organized as follows.
1304 - First we walk the addition chain *OPS looking for summands that
1305 are defined by a multiplication or a real division. This results
1306 in the candidates bitmap with relevant indices into *OPS.
1308 - Second we build the chains of multiplications or divisions for
1309 these candidates, counting the number of occurrences of (operand, code)
1310 pairs in all of the candidates chains.
1312 - Third we sort the (operand, code) pairs by number of occurrence and
1313 process them starting with the pair with the most uses.
1315 * For each such pair we walk the candidates again to build a
1316 second candidate bitmap noting all multiplication/division chains
1317 that have at least one occurrence of (operand, code).
1319 * We build an alternate addition chain only covering these
1320 candidates with one (operand, code) operation removed from their
1321 multiplication/division chain.
1323 * The first candidate gets replaced by the alternate addition chain
1324 multiplied/divided by the operand.
1326 * All candidate chains get disabled for further processing and
1327 processing of (operand, code) pairs continues.
1329 The alternate addition chains built are re-processed by the main
1330 reassociation algorithm which allows optimizing a * x * y + b * y * x
1331 to (a + b ) * x * y in one invocation of the reassociation pass. */
1334 undistribute_ops_list (enum tree_code opcode
,
1335 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1337 unsigned int length
= ops
->length ();
1338 operand_entry_t oe1
;
1340 sbitmap candidates
, candidates2
;
1341 unsigned nr_candidates
, nr_candidates2
;
1342 sbitmap_iterator sbi0
;
1343 vec
<operand_entry_t
> *subops
;
1344 hash_table
<oecount_hasher
> ctable
;
1345 bool changed
= false;
1346 int next_oecount_id
= 0;
1349 || opcode
!= PLUS_EXPR
)
1352 /* Build a list of candidates to process. */
1353 candidates
= sbitmap_alloc (length
);
1354 bitmap_clear (candidates
);
1356 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1358 enum tree_code dcode
;
1361 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1363 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1364 if (!is_gimple_assign (oe1def
))
1366 dcode
= gimple_assign_rhs_code (oe1def
);
1367 if ((dcode
!= MULT_EXPR
1368 && dcode
!= RDIV_EXPR
)
1369 || !is_reassociable_op (oe1def
, dcode
, loop
))
1372 bitmap_set_bit (candidates
, i
);
1376 if (nr_candidates
< 2)
1378 sbitmap_free (candidates
);
1382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1384 fprintf (dump_file
, "searching for un-distribute opportunities ");
1385 print_generic_expr (dump_file
,
1386 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1387 fprintf (dump_file
, " %d\n", nr_candidates
);
1390 /* Build linearized sub-operand lists and the counting table. */
1393 /* ??? Macro arguments cannot have multi-argument template types in
1394 them. This typedef is needed to workaround that limitation. */
1395 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1396 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1397 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1400 enum tree_code oecode
;
1403 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1404 oecode
= gimple_assign_rhs_code (oedef
);
1405 linearize_expr_tree (&subops
[i
], oedef
,
1406 associative_tree_code (oecode
), false);
1408 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1415 c
.id
= next_oecount_id
++;
1418 idx
= cvec
.length () + 41;
1419 slot
= ctable
.find_slot ((void *)idx
, INSERT
);
1422 *slot
= (void *)idx
;
1427 cvec
[(size_t)*slot
- 42].cnt
++;
1433 /* Sort the counting table. */
1434 cvec
.qsort (oecount_cmp
);
1436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1439 fprintf (dump_file
, "Candidates:\n");
1440 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1442 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1443 c
->oecode
== MULT_EXPR
1444 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1445 print_generic_expr (dump_file
, c
->op
, 0);
1446 fprintf (dump_file
, "\n");
1450 /* Process the (operand, code) pairs in order of most occurrence. */
1451 candidates2
= sbitmap_alloc (length
);
1452 while (!cvec
.is_empty ())
1454 oecount
*c
= &cvec
.last ();
1458 /* Now collect the operands in the outer chain that contain
1459 the common operand in their inner chain. */
1460 bitmap_clear (candidates2
);
1462 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1465 enum tree_code oecode
;
1467 tree op
= (*ops
)[i
]->op
;
1469 /* If we undistributed in this chain already this may be
1471 if (TREE_CODE (op
) != SSA_NAME
)
1474 oedef
= SSA_NAME_DEF_STMT (op
);
1475 oecode
= gimple_assign_rhs_code (oedef
);
1476 if (oecode
!= c
->oecode
)
1479 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1481 if (oe1
->op
== c
->op
)
1483 bitmap_set_bit (candidates2
, i
);
1490 if (nr_candidates2
>= 2)
1492 operand_entry_t oe1
, oe2
;
1494 int first
= bitmap_first_set_bit (candidates2
);
1496 /* Build the new addition chain. */
1497 oe1
= (*ops
)[first
];
1498 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1500 fprintf (dump_file
, "Building (");
1501 print_generic_expr (dump_file
, oe1
->op
, 0);
1503 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1504 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1510 fprintf (dump_file
, " + ");
1511 print_generic_expr (dump_file
, oe2
->op
, 0);
1513 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1514 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1515 oe1
->op
, oe2
->op
, opcode
);
1516 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1518 oe1
->op
= gimple_get_lhs (sum
);
1521 /* Apply the multiplication/division. */
1522 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1523 oe1
->op
, c
->op
, c
->oecode
);
1524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1526 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1527 print_generic_expr (dump_file
, c
->op
, 0);
1528 fprintf (dump_file
, "\n");
1531 /* Record it in the addition chain and disable further
1532 undistribution with this op. */
1533 oe1
->op
= gimple_assign_lhs (prod
);
1534 oe1
->rank
= get_rank (oe1
->op
);
1535 subops
[first
].release ();
1543 for (i
= 0; i
< ops
->length (); ++i
)
1544 subops
[i
].release ();
1547 sbitmap_free (candidates
);
1548 sbitmap_free (candidates2
);
1553 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1554 expression, examine the other OPS to see if any of them are comparisons
1555 of the same values, which we may be able to combine or eliminate.
1556 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1559 eliminate_redundant_comparison (enum tree_code opcode
,
1560 vec
<operand_entry_t
> *ops
,
1561 unsigned int currindex
,
1562 operand_entry_t curr
)
1565 enum tree_code lcode
, rcode
;
1570 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1573 /* Check that CURR is a comparison. */
1574 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1576 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1577 if (!is_gimple_assign (def1
))
1579 lcode
= gimple_assign_rhs_code (def1
);
1580 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1582 op1
= gimple_assign_rhs1 (def1
);
1583 op2
= gimple_assign_rhs2 (def1
);
1585 /* Now look for a similar comparison in the remaining OPS. */
1586 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1590 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1592 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1593 if (!is_gimple_assign (def2
))
1595 rcode
= gimple_assign_rhs_code (def2
);
1596 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1599 /* If we got here, we have a match. See if we can combine the
1601 if (opcode
== BIT_IOR_EXPR
)
1602 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1603 rcode
, gimple_assign_rhs1 (def2
),
1604 gimple_assign_rhs2 (def2
));
1606 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1607 rcode
, gimple_assign_rhs1 (def2
),
1608 gimple_assign_rhs2 (def2
));
1612 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1613 always give us a boolean_type_node value back. If the original
1614 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1615 we need to convert. */
1616 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1617 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1619 if (TREE_CODE (t
) != INTEGER_CST
1620 && !operand_equal_p (t
, curr
->op
, 0))
1622 enum tree_code subcode
;
1623 tree newop1
, newop2
;
1624 if (!COMPARISON_CLASS_P (t
))
1626 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1627 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1628 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1629 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1633 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1635 fprintf (dump_file
, "Equivalence: ");
1636 print_generic_expr (dump_file
, curr
->op
, 0);
1637 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1638 print_generic_expr (dump_file
, oe
->op
, 0);
1639 fprintf (dump_file
, " -> ");
1640 print_generic_expr (dump_file
, t
, 0);
1641 fprintf (dump_file
, "\n");
1644 /* Now we can delete oe, as it has been subsumed by the new combined
1646 ops
->ordered_remove (i
);
1647 reassociate_stats
.ops_eliminated
++;
1649 /* If t is the same as curr->op, we're done. Otherwise we must
1650 replace curr->op with t. Special case is if we got a constant
1651 back, in which case we add it to the end instead of in place of
1652 the current entry. */
1653 if (TREE_CODE (t
) == INTEGER_CST
)
1655 ops
->ordered_remove (currindex
);
1656 add_to_ops_vec (ops
, t
);
1658 else if (!operand_equal_p (t
, curr
->op
, 0))
1661 enum tree_code subcode
;
1664 gcc_assert (COMPARISON_CLASS_P (t
));
1665 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1666 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1667 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1668 gcc_checking_assert (is_gimple_val (newop1
)
1669 && is_gimple_val (newop2
));
1670 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1671 curr
->op
= gimple_get_lhs (sum
);
1679 /* Perform various identities and other optimizations on the list of
1680 operand entries, stored in OPS. The tree code for the binary
1681 operation between all the operands is OPCODE. */
1684 optimize_ops_list (enum tree_code opcode
,
1685 vec
<operand_entry_t
> *ops
)
1687 unsigned int length
= ops
->length ();
1690 operand_entry_t oelast
= NULL
;
1691 bool iterate
= false;
1696 oelast
= ops
->last ();
1698 /* If the last two are constants, pop the constants off, merge them
1699 and try the next two. */
1700 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1702 operand_entry_t oelm1
= (*ops
)[length
- 2];
1704 if (oelm1
->rank
== 0
1705 && is_gimple_min_invariant (oelm1
->op
)
1706 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1707 TREE_TYPE (oelast
->op
)))
1709 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1710 oelm1
->op
, oelast
->op
);
1712 if (folded
&& is_gimple_min_invariant (folded
))
1714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1715 fprintf (dump_file
, "Merging constants\n");
1720 add_to_ops_vec (ops
, folded
);
1721 reassociate_stats
.constants_eliminated
++;
1723 optimize_ops_list (opcode
, ops
);
1729 eliminate_using_constants (opcode
, ops
);
1732 for (i
= 0; ops
->iterate (i
, &oe
);)
1736 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1738 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1739 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1740 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1752 length
= ops
->length ();
1753 oelast
= ops
->last ();
1756 optimize_ops_list (opcode
, ops
);
1759 /* The following functions are subroutines to optimize_range_tests and allow
1760 it to try to change a logical combination of comparisons into a range
1764 X == 2 || X == 5 || X == 3 || X == 4
1768 (unsigned) (X - 2) <= 3
1770 For more information see comments above fold_test_range in fold-const.c,
1771 this implementation is for GIMPLE. */
1779 bool strict_overflow_p
;
1780 unsigned int idx
, next
;
1783 /* This is similar to make_range in fold-const.c, but on top of
1784 GIMPLE instead of trees. If EXP is non-NULL, it should be
1785 an SSA_NAME and STMT argument is ignored, otherwise STMT
1786 argument should be a GIMPLE_COND. */
1789 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1793 bool is_bool
, strict_overflow_p
;
1797 r
->strict_overflow_p
= false;
1799 r
->high
= NULL_TREE
;
1800 if (exp
!= NULL_TREE
1801 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1804 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1805 and see if we can refine the range. Some of the cases below may not
1806 happen, but it doesn't seem worth worrying about this. We "continue"
1807 the outer loop when we've changed something; otherwise we "break"
1808 the switch, which will "break" the while. */
1809 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1812 strict_overflow_p
= false;
1814 if (exp
== NULL_TREE
)
1816 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1818 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1823 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1828 enum tree_code code
;
1829 tree arg0
, arg1
, exp_type
;
1833 if (exp
!= NULL_TREE
)
1835 if (TREE_CODE (exp
) != SSA_NAME
)
1838 stmt
= SSA_NAME_DEF_STMT (exp
);
1839 if (!is_gimple_assign (stmt
))
1842 code
= gimple_assign_rhs_code (stmt
);
1843 arg0
= gimple_assign_rhs1 (stmt
);
1844 arg1
= gimple_assign_rhs2 (stmt
);
1845 exp_type
= TREE_TYPE (exp
);
1849 code
= gimple_cond_code (stmt
);
1850 arg0
= gimple_cond_lhs (stmt
);
1851 arg1
= gimple_cond_rhs (stmt
);
1852 exp_type
= boolean_type_node
;
1855 if (TREE_CODE (arg0
) != SSA_NAME
)
1857 loc
= gimple_location (stmt
);
1861 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1862 /* Ensure the range is either +[-,0], +[0,0],
1863 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1864 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1865 or similar expression of unconditional true or
1866 false, it should not be negated. */
1867 && ((high
&& integer_zerop (high
))
1868 || (low
&& integer_onep (low
))))
1881 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1883 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1888 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1903 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1905 &strict_overflow_p
);
1906 if (nexp
!= NULL_TREE
)
1909 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1922 r
->strict_overflow_p
= strict_overflow_p
;
1926 /* Comparison function for qsort. Sort entries
1927 without SSA_NAME exp first, then with SSA_NAMEs sorted
1928 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1929 by increasing ->low and if ->low is the same, by increasing
1930 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1934 range_entry_cmp (const void *a
, const void *b
)
1936 const struct range_entry
*p
= (const struct range_entry
*) a
;
1937 const struct range_entry
*q
= (const struct range_entry
*) b
;
1939 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1941 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1943 /* Group range_entries for the same SSA_NAME together. */
1944 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1946 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1948 /* If ->low is different, NULL low goes first, then by
1950 if (p
->low
!= NULL_TREE
)
1952 if (q
->low
!= NULL_TREE
)
1954 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1956 if (tem
&& integer_onep (tem
))
1958 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1960 if (tem
&& integer_onep (tem
))
1966 else if (q
->low
!= NULL_TREE
)
1968 /* If ->high is different, NULL high goes last, before that by
1970 if (p
->high
!= NULL_TREE
)
1972 if (q
->high
!= NULL_TREE
)
1974 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1976 if (tem
&& integer_onep (tem
))
1978 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1980 if (tem
&& integer_onep (tem
))
1986 else if (p
->high
!= NULL_TREE
)
1988 /* If both ranges are the same, sort below by ascending idx. */
1993 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1996 if (p
->idx
< q
->idx
)
2000 gcc_checking_assert (p
->idx
> q
->idx
);
2005 /* Helper routine of optimize_range_test.
2006 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2007 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2008 OPCODE and OPS are arguments of optimize_range_tests. Return
2009 true if the range merge has been successful.
2010 If OPCODE is ERROR_MARK, this is called from within
2011 maybe_optimize_range_tests and is performing inter-bb range optimization.
2012 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2016 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2017 unsigned int count
, enum tree_code opcode
,
2018 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
2019 tree low
, tree high
, bool strict_overflow_p
)
2021 operand_entry_t oe
= (*ops
)[range
->idx
];
2023 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) : last_stmt (BASIC_BLOCK (oe
->id
));
2024 location_t loc
= gimple_location (stmt
);
2025 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2026 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
2027 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2028 gimple_stmt_iterator gsi
;
2030 if (tem
== NULL_TREE
)
2033 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2034 warning_at (loc
, OPT_Wstrict_overflow
,
2035 "assuming signed overflow does not occur "
2036 "when simplifying range test");
2038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2040 struct range_entry
*r
;
2041 fprintf (dump_file
, "Optimizing range tests ");
2042 print_generic_expr (dump_file
, range
->exp
, 0);
2043 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2044 print_generic_expr (dump_file
, range
->low
, 0);
2045 fprintf (dump_file
, ", ");
2046 print_generic_expr (dump_file
, range
->high
, 0);
2047 fprintf (dump_file
, "]");
2048 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2050 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2051 print_generic_expr (dump_file
, r
->low
, 0);
2052 fprintf (dump_file
, ", ");
2053 print_generic_expr (dump_file
, r
->high
, 0);
2054 fprintf (dump_file
, "]");
2056 fprintf (dump_file
, "\n into ");
2057 print_generic_expr (dump_file
, tem
, 0);
2058 fprintf (dump_file
, "\n");
2061 if (opcode
== BIT_IOR_EXPR
2062 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2063 tem
= invert_truthvalue_loc (loc
, tem
);
2065 tem
= fold_convert_loc (loc
, optype
, tem
);
2066 gsi
= gsi_for_stmt (stmt
);
2067 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2069 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2070 if (gimple_uid (gsi_stmt (gsi
)))
2073 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2080 range
->strict_overflow_p
= false;
2082 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2084 oe
= (*ops
)[range
->idx
];
2085 /* Now change all the other range test immediate uses, so that
2086 those tests will be optimized away. */
2087 if (opcode
== ERROR_MARK
)
2090 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2091 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2093 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2094 ? boolean_false_node
: boolean_true_node
);
2097 oe
->op
= error_mark_node
;
2098 range
->exp
= NULL_TREE
;
2103 /* Optimize X == CST1 || X == CST2
2104 if popcount (CST1 ^ CST2) == 1 into
2105 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2106 Similarly for ranges. E.g.
2107 X != 2 && X != 3 && X != 10 && X != 11
2108 will be transformed by the previous optimization into
2109 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2110 and this loop can transform that into
2111 !(((X & ~8) - 2U) <= 1U). */
2114 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2115 tree lowi
, tree lowj
, tree highi
, tree highj
,
2116 vec
<operand_entry_t
> *ops
,
2117 struct range_entry
*rangei
,
2118 struct range_entry
*rangej
)
2120 tree lowxor
, highxor
, tem
, exp
;
2121 /* Check highi ^ lowi == highj ^ lowj and
2122 popcount (highi ^ lowi) == 1. */
2123 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2124 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2126 if (tree_log2 (lowxor
) < 0)
2128 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2129 if (!tree_int_cst_equal (lowxor
, highxor
))
2132 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2133 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2134 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2135 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2136 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2137 rangei
->in_p
, lowj
, highj
,
2138 rangei
->strict_overflow_p
2139 || rangej
->strict_overflow_p
))
2144 /* Optimize X == CST1 || X == CST2
2145 if popcount (CST2 - CST1) == 1 into
2146 ((X - CST1) & ~(CST2 - CST1)) == 0.
2147 Similarly for ranges. E.g.
2148 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2149 || X == 75 || X == 45
2150 will be transformed by the previous optimization into
2151 (X - 43U) <= 3U || (X - 75U) <= 3U
2152 and this loop can transform that into
2153 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2155 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2156 tree lowi
, tree lowj
, tree highi
, tree highj
,
2157 vec
<operand_entry_t
> *ops
,
2158 struct range_entry
*rangei
,
2159 struct range_entry
*rangej
)
2161 tree tem1
, tem2
, mask
;
2162 /* Check highi - lowi == highj - lowj. */
2163 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2164 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2166 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2167 if (!tree_int_cst_equal (tem1
, tem2
))
2169 /* Check popcount (lowj - lowi) == 1. */
2170 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2171 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2173 if (tree_log2 (tem1
) < 0)
2176 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2177 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2178 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2179 lowj
= build_int_cst (type
, 0);
2180 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2181 rangei
->in_p
, lowj
, tem2
,
2182 rangei
->strict_overflow_p
2183 || rangej
->strict_overflow_p
))
2188 /* It does some common checks for function optimize_range_tests_xor and
2189 optimize_range_tests_diff.
2190 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2191 Else it calls optimize_range_tests_diff. */
2194 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2195 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2196 struct range_entry
*ranges
)
2199 bool any_changes
= false;
2200 for (i
= first
; i
< length
; i
++)
2202 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2204 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2206 type
= TREE_TYPE (ranges
[i
].exp
);
2207 if (!INTEGRAL_TYPE_P (type
))
2209 lowi
= ranges
[i
].low
;
2210 if (lowi
== NULL_TREE
)
2211 lowi
= TYPE_MIN_VALUE (type
);
2212 highi
= ranges
[i
].high
;
2213 if (highi
== NULL_TREE
)
2215 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2218 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2220 lowj
= ranges
[j
].low
;
2221 if (lowj
== NULL_TREE
)
2223 highj
= ranges
[j
].high
;
2224 if (highj
== NULL_TREE
)
2225 highj
= TYPE_MAX_VALUE (type
);
2226 /* Check lowj > highi. */
2227 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2229 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2232 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2234 ranges
+ i
, ranges
+ j
);
2236 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2238 ranges
+ i
, ranges
+ j
);
2249 /* Optimize range tests, similarly how fold_range_test optimizes
2250 it on trees. The tree code for the binary
2251 operation between all the operands is OPCODE.
2252 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2253 maybe_optimize_range_tests for inter-bb range optimization.
2254 In that case if oe->op is NULL, oe->id is bb->index whose
2255 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2256 the actual opcode. */
2259 optimize_range_tests (enum tree_code opcode
,
2260 vec
<operand_entry_t
> *ops
)
2262 unsigned int length
= ops
->length (), i
, j
, first
;
2264 struct range_entry
*ranges
;
2265 bool any_changes
= false;
2270 ranges
= XNEWVEC (struct range_entry
, length
);
2271 for (i
= 0; i
< length
; i
++)
2275 init_range_entry (ranges
+ i
, oe
->op
,
2276 oe
->op
? NULL
: last_stmt (BASIC_BLOCK (oe
->id
)));
2277 /* For | invert it now, we will invert it again before emitting
2278 the optimized expression. */
2279 if (opcode
== BIT_IOR_EXPR
2280 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2281 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2284 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2285 for (i
= 0; i
< length
; i
++)
2286 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2289 /* Try to merge ranges. */
2290 for (first
= i
; i
< length
; i
++)
2292 tree low
= ranges
[i
].low
;
2293 tree high
= ranges
[i
].high
;
2294 int in_p
= ranges
[i
].in_p
;
2295 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2296 int update_fail_count
= 0;
2298 for (j
= i
+ 1; j
< length
; j
++)
2300 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2302 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2303 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2305 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2311 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2312 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2318 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2319 while update_range_test would fail. */
2320 else if (update_fail_count
== 64)
2323 ++update_fail_count
;
2326 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2329 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2330 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2333 if (any_changes
&& opcode
!= ERROR_MARK
)
2336 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2338 if (oe
->op
== error_mark_node
)
2347 XDELETEVEC (ranges
);
2351 /* Return true if STMT is a cast like:
2357 # _345 = PHI <_123(N), 1(...), 1(...)>
2358 where _234 has bool type, _123 has single use and
2359 bb N has a single successor M. This is commonly used in
2360 the last block of a range test. */
2363 final_range_test_p (gimple stmt
)
2365 basic_block bb
, rhs_bb
;
2368 use_operand_p use_p
;
2371 if (!gimple_assign_cast_p (stmt
))
2373 bb
= gimple_bb (stmt
);
2374 if (!single_succ_p (bb
))
2376 e
= single_succ_edge (bb
);
2377 if (e
->flags
& EDGE_COMPLEX
)
2380 lhs
= gimple_assign_lhs (stmt
);
2381 rhs
= gimple_assign_rhs1 (stmt
);
2382 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2383 || TREE_CODE (rhs
) != SSA_NAME
2384 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2387 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2388 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2391 if (gimple_code (use_stmt
) != GIMPLE_PHI
2392 || gimple_bb (use_stmt
) != e
->dest
)
2395 /* And that the rhs is defined in the same loop. */
2396 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2398 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2404 /* Return true if BB is suitable basic block for inter-bb range test
2405 optimization. If BACKWARD is true, BB should be the only predecessor
2406 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2407 or compared with to find a common basic block to which all conditions
2408 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2409 be the only predecessor of BB. */
2412 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2415 edge_iterator ei
, ei2
;
2418 gimple_stmt_iterator gsi
;
2419 bool other_edge_seen
= false;
2424 /* Check last stmt first. */
2425 stmt
= last_stmt (bb
);
2427 || (gimple_code (stmt
) != GIMPLE_COND
2428 && (backward
|| !final_range_test_p (stmt
)))
2429 || gimple_visited_p (stmt
)
2430 || stmt_could_throw_p (stmt
)
2433 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2436 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2437 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2438 to *OTHER_BB (if not set yet, try to find it out). */
2439 if (EDGE_COUNT (bb
->succs
) != 2)
2441 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2443 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2445 if (e
->dest
== test_bb
)
2454 if (*other_bb
== NULL
)
2456 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2457 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2459 else if (e
->dest
== e2
->dest
)
2460 *other_bb
= e
->dest
;
2461 if (*other_bb
== NULL
)
2464 if (e
->dest
== *other_bb
)
2465 other_edge_seen
= true;
2469 if (*other_bb
== NULL
|| !other_edge_seen
)
2472 else if (single_succ (bb
) != *other_bb
)
2475 /* Now check all PHIs of *OTHER_BB. */
2476 e
= find_edge (bb
, *other_bb
);
2477 e2
= find_edge (test_bb
, *other_bb
);
2478 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2480 gimple phi
= gsi_stmt (gsi
);
2481 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2482 corresponding to BB and TEST_BB predecessor must be the same. */
2483 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2484 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2486 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2487 one of the PHIs should have the lhs of the last stmt in
2488 that block as PHI arg and that PHI should have 0 or 1
2489 corresponding to it in all other range test basic blocks
2493 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2494 == gimple_assign_lhs (stmt
)
2495 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2496 || integer_onep (gimple_phi_arg_def (phi
,
2502 gimple test_last
= last_stmt (test_bb
);
2503 if (gimple_code (test_last
) != GIMPLE_COND
2504 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2505 == gimple_assign_lhs (test_last
)
2506 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2507 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2517 /* Return true if BB doesn't have side-effects that would disallow
2518 range test optimization, all SSA_NAMEs set in the bb are consumed
2519 in the bb and there are no PHIs. */
2522 no_side_effect_bb (basic_block bb
)
2524 gimple_stmt_iterator gsi
;
2527 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2529 last
= last_stmt (bb
);
2530 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2532 gimple stmt
= gsi_stmt (gsi
);
2534 imm_use_iterator imm_iter
;
2535 use_operand_p use_p
;
2537 if (is_gimple_debug (stmt
))
2539 if (gimple_has_side_effects (stmt
))
2543 if (!is_gimple_assign (stmt
))
2545 lhs
= gimple_assign_lhs (stmt
);
2546 if (TREE_CODE (lhs
) != SSA_NAME
)
2548 if (gimple_assign_rhs_could_trap_p (stmt
))
2550 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2552 gimple use_stmt
= USE_STMT (use_p
);
2553 if (is_gimple_debug (use_stmt
))
2555 if (gimple_bb (use_stmt
) != bb
)
2562 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2563 return true and fill in *OPS recursively. */
2566 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2569 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2573 if (!is_reassociable_op (stmt
, code
, loop
))
2576 rhs
[0] = gimple_assign_rhs1 (stmt
);
2577 rhs
[1] = gimple_assign_rhs2 (stmt
);
2578 gimple_set_visited (stmt
, true);
2579 for (i
= 0; i
< 2; i
++)
2580 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2581 && !get_ops (rhs
[i
], code
, ops
, loop
)
2582 && has_single_use (rhs
[i
]))
2584 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2590 ops
->safe_push (oe
);
2595 /* Find the ops that were added by get_ops starting from VAR, see if
2596 they were changed during update_range_test and if yes, create new
2600 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2601 unsigned int *pidx
, struct loop
*loop
)
2603 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2607 if (!is_reassociable_op (stmt
, code
, loop
))
2610 rhs
[0] = gimple_assign_rhs1 (stmt
);
2611 rhs
[1] = gimple_assign_rhs2 (stmt
);
2614 for (i
= 0; i
< 2; i
++)
2615 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2617 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2618 if (rhs
[2 + i
] == NULL_TREE
)
2620 if (has_single_use (rhs
[i
]))
2621 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2623 rhs
[2 + i
] = rhs
[i
];
2626 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2627 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2629 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2630 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
2631 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
2632 var
, rhs
[2], rhs
[3]);
2633 gimple_set_uid (g
, gimple_uid (stmt
));
2634 gimple_set_visited (g
, true);
2635 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2640 /* Structure to track the initial value passed to get_ops and
2641 the range in the ops vector for each basic block. */
2643 struct inter_bb_range_test_entry
2646 unsigned int first_idx
, last_idx
;
2649 /* Inter-bb range test optimization. */
2652 maybe_optimize_range_tests (gimple stmt
)
2654 basic_block first_bb
= gimple_bb (stmt
);
2655 basic_block last_bb
= first_bb
;
2656 basic_block other_bb
= NULL
;
2660 vec
<operand_entry_t
> ops
= vNULL
;
2661 vec
<inter_bb_range_test_entry
> bbinfo
= vNULL
;
2662 bool any_changes
= false;
2664 /* Consider only basic blocks that end with GIMPLE_COND or
2665 a cast statement satisfying final_range_test_p. All
2666 but the last bb in the first_bb .. last_bb range
2667 should end with GIMPLE_COND. */
2668 if (gimple_code (stmt
) == GIMPLE_COND
)
2670 if (EDGE_COUNT (first_bb
->succs
) != 2)
2673 else if (final_range_test_p (stmt
))
2674 other_bb
= single_succ (first_bb
);
2678 if (stmt_could_throw_p (stmt
))
2681 /* As relative ordering of post-dominator sons isn't fixed,
2682 maybe_optimize_range_tests can be called first on any
2683 bb in the range we want to optimize. So, start searching
2684 backwards, if first_bb can be set to a predecessor. */
2685 while (single_pred_p (first_bb
))
2687 basic_block pred_bb
= single_pred (first_bb
);
2688 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2690 if (!no_side_effect_bb (first_bb
))
2694 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2695 Before starting forward search in last_bb successors, find
2696 out the other_bb. */
2697 if (first_bb
== last_bb
)
2700 /* As non-GIMPLE_COND last stmt always terminates the range,
2701 if forward search didn't discover anything, just give up. */
2702 if (gimple_code (stmt
) != GIMPLE_COND
)
2704 /* Look at both successors. Either it ends with a GIMPLE_COND
2705 and satisfies suitable_cond_bb, or ends with a cast and
2706 other_bb is that cast's successor. */
2707 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2708 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2709 || e
->dest
== first_bb
)
2711 else if (single_pred_p (e
->dest
))
2713 stmt
= last_stmt (e
->dest
);
2715 && gimple_code (stmt
) == GIMPLE_COND
2716 && EDGE_COUNT (e
->dest
->succs
) == 2)
2718 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2724 && final_range_test_p (stmt
)
2725 && find_edge (first_bb
, single_succ (e
->dest
)))
2727 other_bb
= single_succ (e
->dest
);
2728 if (other_bb
== first_bb
)
2732 if (other_bb
== NULL
)
2735 /* Now do the forward search, moving last_bb to successor bbs
2736 that aren't other_bb. */
2737 while (EDGE_COUNT (last_bb
->succs
) == 2)
2739 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2740 if (e
->dest
!= other_bb
)
2744 if (!single_pred_p (e
->dest
))
2746 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2748 if (!no_side_effect_bb (e
->dest
))
2752 if (first_bb
== last_bb
)
2754 /* Here basic blocks first_bb through last_bb's predecessor
2755 end with GIMPLE_COND, all of them have one of the edges to
2756 other_bb and another to another block in the range,
2757 all blocks except first_bb don't have side-effects and
2758 last_bb ends with either GIMPLE_COND, or cast satisfying
2759 final_range_test_p. */
2760 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2762 enum tree_code code
;
2764 inter_bb_range_test_entry bb_ent
;
2766 bb_ent
.op
= NULL_TREE
;
2767 bb_ent
.first_idx
= ops
.length ();
2768 bb_ent
.last_idx
= bb_ent
.first_idx
;
2769 e
= find_edge (bb
, other_bb
);
2770 stmt
= last_stmt (bb
);
2771 gimple_set_visited (stmt
, true);
2772 if (gimple_code (stmt
) != GIMPLE_COND
)
2774 use_operand_p use_p
;
2779 lhs
= gimple_assign_lhs (stmt
);
2780 rhs
= gimple_assign_rhs1 (stmt
);
2781 gcc_assert (bb
== last_bb
);
2788 # _345 = PHI <_123(N), 1(...), 1(...)>
2790 or 0 instead of 1. If it is 0, the _234
2791 range test is anded together with all the
2792 other range tests, if it is 1, it is ored with
2794 single_imm_use (lhs
, &use_p
, &phi
);
2795 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2796 e2
= find_edge (first_bb
, other_bb
);
2798 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2799 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2800 code
= BIT_AND_EXPR
;
2803 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2804 code
= BIT_IOR_EXPR
;
2807 /* If _234 SSA_NAME_DEF_STMT is
2809 (or &, corresponding to 1/0 in the phi arguments,
2810 push into ops the individual range test arguments
2811 of the bitwise or resp. and, recursively. */
2812 if (!get_ops (rhs
, code
, &ops
,
2813 loop_containing_stmt (stmt
))
2814 && has_single_use (rhs
))
2816 /* Otherwise, push the _234 range test itself. */
2818 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2828 bb_ent
.last_idx
= ops
.length ();
2830 bbinfo
.safe_push (bb_ent
);
2833 /* Otherwise stmt is GIMPLE_COND. */
2834 code
= gimple_cond_code (stmt
);
2835 lhs
= gimple_cond_lhs (stmt
);
2836 rhs
= gimple_cond_rhs (stmt
);
2837 if (TREE_CODE (lhs
) == SSA_NAME
2838 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2839 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2840 || rhs
!= boolean_false_node
2841 /* Either push into ops the individual bitwise
2842 or resp. and operands, depending on which
2843 edge is other_bb. */
2844 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2845 ^ (code
== EQ_EXPR
))
2846 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2847 loop_containing_stmt (stmt
))))
2849 /* Or push the GIMPLE_COND stmt itself. */
2851 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2854 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2855 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2856 /* oe->op = NULL signs that there is no SSA_NAME
2857 for the range test, and oe->id instead is the
2858 basic block number, at which's end the GIMPLE_COND
2866 else if (ops
.length () > bb_ent
.first_idx
)
2869 bb_ent
.last_idx
= ops
.length ();
2871 bbinfo
.safe_push (bb_ent
);
2875 if (ops
.length () > 1)
2876 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
2880 /* update_ops relies on has_single_use predicates returning the
2881 same values as it did during get_ops earlier. Additionally it
2882 never removes statements, only adds new ones and it should walk
2883 from the single imm use and check the predicate already before
2884 making those changes.
2885 On the other side, the handling of GIMPLE_COND directly can turn
2886 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2887 it needs to be done in a separate loop afterwards. */
2888 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2890 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2891 && bbinfo
[idx
].op
!= NULL_TREE
)
2895 stmt
= last_stmt (bb
);
2896 new_op
= update_ops (bbinfo
[idx
].op
,
2898 ops
[bbinfo
[idx
].first_idx
]->rank
,
2899 ops
, &bbinfo
[idx
].first_idx
,
2900 loop_containing_stmt (stmt
));
2901 if (new_op
== NULL_TREE
)
2903 gcc_assert (bb
== last_bb
);
2904 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
2906 if (bbinfo
[idx
].op
!= new_op
)
2908 imm_use_iterator iter
;
2909 use_operand_p use_p
;
2910 gimple use_stmt
, cast_stmt
= NULL
;
2912 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
2913 if (is_gimple_debug (use_stmt
))
2915 else if (gimple_code (use_stmt
) == GIMPLE_COND
2916 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2917 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2918 SET_USE (use_p
, new_op
);
2919 else if (gimple_assign_cast_p (use_stmt
))
2920 cast_stmt
= use_stmt
;
2925 gcc_assert (bb
== last_bb
);
2926 tree lhs
= gimple_assign_lhs (cast_stmt
);
2927 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
2928 enum tree_code rhs_code
2929 = gimple_assign_rhs_code (cast_stmt
);
2931 = gimple_build_assign_with_ops (rhs_code
, new_lhs
,
2933 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
2934 gimple_set_uid (g
, gimple_uid (cast_stmt
));
2935 gimple_set_visited (g
, true);
2936 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2937 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2938 if (is_gimple_debug (use_stmt
))
2940 else if (gimple_code (use_stmt
) == GIMPLE_COND
2941 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2942 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2943 SET_USE (use_p
, new_lhs
);
2952 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2954 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2955 && bbinfo
[idx
].op
== NULL_TREE
2956 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
2958 stmt
= last_stmt (bb
);
2959 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
2960 gimple_cond_make_false (stmt
);
2961 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
2962 gimple_cond_make_true (stmt
);
2965 gimple_cond_set_code (stmt
, NE_EXPR
);
2966 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
2967 gimple_cond_set_rhs (stmt
, boolean_false_node
);
2979 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2980 of STMT in it's operands. This is also known as a "destructive
2981 update" operation. */
2984 is_phi_for_stmt (gimple stmt
, tree operand
)
2988 use_operand_p arg_p
;
2991 if (TREE_CODE (operand
) != SSA_NAME
)
2994 lhs
= gimple_assign_lhs (stmt
);
2996 def_stmt
= SSA_NAME_DEF_STMT (operand
);
2997 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
3000 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3001 if (lhs
== USE_FROM_PTR (arg_p
))
3006 /* Remove def stmt of VAR if VAR has zero uses and recurse
3007 on rhs1 operand if so. */
3010 remove_visited_stmt_chain (tree var
)
3013 gimple_stmt_iterator gsi
;
3017 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3019 stmt
= SSA_NAME_DEF_STMT (var
);
3020 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3022 var
= gimple_assign_rhs1 (stmt
);
3023 gsi
= gsi_for_stmt (stmt
);
3024 gsi_remove (&gsi
, true);
3025 release_defs (stmt
);
3032 /* This function checks three consequtive operands in
3033 passed operands vector OPS starting from OPINDEX and
3034 swaps two operands if it is profitable for binary operation
3035 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3037 We pair ops with the same rank if possible.
3039 The alternative we try is to see if STMT is a destructive
3040 update style statement, which is like:
3043 In that case, we want to use the destructive update form to
3044 expose the possible vectorizer sum reduction opportunity.
3045 In that case, the third operand will be the phi node. This
3046 check is not performed if STMT is null.
3048 We could, of course, try to be better as noted above, and do a
3049 lot of work to try to find these opportunities in >3 operand
3050 cases, but it is unlikely to be worth it. */
3053 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3054 unsigned int opindex
, gimple stmt
)
3056 operand_entry_t oe1
, oe2
, oe3
;
3059 oe2
= ops
[opindex
+ 1];
3060 oe3
= ops
[opindex
+ 2];
3062 if ((oe1
->rank
== oe2
->rank
3063 && oe2
->rank
!= oe3
->rank
)
3064 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3065 && !is_phi_for_stmt (stmt
, oe1
->op
)
3066 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3068 struct operand_entry temp
= *oe3
;
3070 oe3
->rank
= oe1
->rank
;
3072 oe1
->rank
= temp
.rank
;
3074 else if ((oe1
->rank
== oe3
->rank
3075 && oe2
->rank
!= oe3
->rank
)
3076 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3077 && !is_phi_for_stmt (stmt
, oe1
->op
)
3078 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3080 struct operand_entry temp
= *oe2
;
3082 oe2
->rank
= oe1
->rank
;
3084 oe1
->rank
= temp
.rank
;
3088 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3089 two definitions, otherwise return STMT. */
3091 static inline gimple
3092 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3094 if (TREE_CODE (rhs1
) == SSA_NAME
3095 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3096 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3097 if (TREE_CODE (rhs2
) == SSA_NAME
3098 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3099 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3103 /* Recursively rewrite our linearized statements so that the operators
3104 match those in OPS[OPINDEX], putting the computation in rank
3105 order. Return new lhs. */
3108 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3109 vec
<operand_entry_t
> ops
, bool changed
)
3111 tree rhs1
= gimple_assign_rhs1 (stmt
);
3112 tree rhs2
= gimple_assign_rhs2 (stmt
);
3113 tree lhs
= gimple_assign_lhs (stmt
);
3116 /* The final recursion case for this function is that you have
3117 exactly two operations left.
3118 If we had one exactly one op in the entire list to start with, we
3119 would have never called this function, and the tail recursion
3120 rewrites them one at a time. */
3121 if (opindex
+ 2 == ops
.length ())
3123 operand_entry_t oe1
, oe2
;
3126 oe2
= ops
[opindex
+ 1];
3128 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3130 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3131 unsigned int uid
= gimple_uid (stmt
);
3133 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3135 fprintf (dump_file
, "Transforming ");
3136 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3141 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3142 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3144 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3145 lhs
, oe1
->op
, oe2
->op
);
3146 gimple_set_uid (stmt
, uid
);
3147 gimple_set_visited (stmt
, true);
3148 if (insert_point
== gsi_stmt (gsi
))
3149 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3151 insert_stmt_after (stmt
, insert_point
);
3155 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3157 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3158 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3162 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3163 remove_visited_stmt_chain (rhs1
);
3165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3167 fprintf (dump_file
, " into ");
3168 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3174 /* If we hit here, we should have 3 or more ops left. */
3175 gcc_assert (opindex
+ 2 < ops
.length ());
3177 /* Rewrite the next operator. */
3180 /* Recurse on the LHS of the binary operator, which is guaranteed to
3181 be the non-leaf side. */
3183 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3184 changed
|| oe
->op
!= rhs2
);
3186 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3190 fprintf (dump_file
, "Transforming ");
3191 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3194 /* If changed is false, this is either opindex == 0
3195 or all outer rhs2's were equal to corresponding oe->op,
3196 and powi_result is NULL.
3197 That means lhs is equivalent before and after reassociation.
3198 Otherwise ensure the old lhs SSA_NAME is not reused and
3199 create a new stmt as well, so that any debug stmts will be
3200 properly adjusted. */
3203 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3204 unsigned int uid
= gimple_uid (stmt
);
3205 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3207 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3208 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3209 lhs
, new_rhs1
, oe
->op
);
3210 gimple_set_uid (stmt
, uid
);
3211 gimple_set_visited (stmt
, true);
3212 if (insert_point
== gsi_stmt (gsi
))
3213 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3215 insert_stmt_after (stmt
, insert_point
);
3219 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3221 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3222 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3228 fprintf (dump_file
, " into ");
3229 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3235 /* Find out how many cycles we need to compute statements chain.
3236 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3237 maximum number of independent statements we may execute per cycle. */
3240 get_required_cycles (int ops_num
, int cpu_width
)
3246 /* While we have more than 2 * cpu_width operands
3247 we may reduce number of operands by cpu_width
3249 res
= ops_num
/ (2 * cpu_width
);
3251 /* Remained operands count may be reduced twice per cycle
3252 until we have only one operand. */
3253 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3254 elog
= exact_log2 (rest
);
3258 res
+= floor_log2 (rest
) + 1;
3263 /* Returns an optimal number of registers to use for computation of
3264 given statements. */
3267 get_reassociation_width (int ops_num
, enum tree_code opc
,
3268 enum machine_mode mode
)
3270 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3275 if (param_width
> 0)
3276 width
= param_width
;
3278 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3283 /* Get the minimal time required for sequence computation. */
3284 cycles_best
= get_required_cycles (ops_num
, width
);
3286 /* Check if we may use less width and still compute sequence for
3287 the same time. It will allow us to reduce registers usage.
3288 get_required_cycles is monotonically increasing with lower width
3289 so we can perform a binary search for the minimal width that still
3290 results in the optimal cycle count. */
3292 while (width
> width_min
)
3294 int width_mid
= (width
+ width_min
) / 2;
3296 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3298 else if (width_min
< width_mid
)
3299 width_min
= width_mid
;
3307 /* Recursively rewrite our linearized statements so that the operators
3308 match those in OPS[OPINDEX], putting the computation in rank
3309 order and trying to allow operations to be executed in
3313 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3314 vec
<operand_entry_t
> ops
)
3316 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3317 int op_num
= ops
.length ();
3318 int stmt_num
= op_num
- 1;
3319 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3320 int op_index
= op_num
- 1;
3322 int ready_stmts_end
= 0;
3324 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3326 /* We start expression rewriting from the top statements.
3327 So, in this loop we create a full list of statements
3328 we will work with. */
3329 stmts
[stmt_num
- 1] = stmt
;
3330 for (i
= stmt_num
- 2; i
>= 0; i
--)
3331 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3333 for (i
= 0; i
< stmt_num
; i
++)
3337 /* Determine whether we should use results of
3338 already handled statements or not. */
3339 if (ready_stmts_end
== 0
3340 && (i
- stmt_index
>= width
|| op_index
< 1))
3341 ready_stmts_end
= i
;
3343 /* Now we choose operands for the next statement. Non zero
3344 value in ready_stmts_end means here that we should use
3345 the result of already generated statements as new operand. */
3346 if (ready_stmts_end
> 0)
3348 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3349 if (ready_stmts_end
> stmt_index
)
3350 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3351 else if (op_index
>= 0)
3352 op2
= ops
[op_index
--]->op
;
3355 gcc_assert (stmt_index
< i
);
3356 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3359 if (stmt_index
>= ready_stmts_end
)
3360 ready_stmts_end
= 0;
3365 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3366 op2
= ops
[op_index
--]->op
;
3367 op1
= ops
[op_index
--]->op
;
3370 /* If we emit the last statement then we should put
3371 operands into the last statement. It will also
3373 if (op_index
< 0 && stmt_index
== i
)
3376 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3378 fprintf (dump_file
, "Transforming ");
3379 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3382 /* We keep original statement only for the last one. All
3383 others are recreated. */
3384 if (i
== stmt_num
- 1)
3386 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3387 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3388 update_stmt (stmts
[i
]);
3391 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3395 fprintf (dump_file
, " into ");
3396 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3400 remove_visited_stmt_chain (last_rhs1
);
3403 /* Transform STMT, which is really (A +B) + (C + D) into the left
3404 linear form, ((A+B)+C)+D.
3405 Recurse on D if necessary. */
3408 linearize_expr (gimple stmt
)
3410 gimple_stmt_iterator gsi
;
3411 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3412 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3413 gimple oldbinrhs
= binrhs
;
3414 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3415 gimple newbinrhs
= NULL
;
3416 struct loop
*loop
= loop_containing_stmt (stmt
);
3417 tree lhs
= gimple_assign_lhs (stmt
);
3419 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3420 && is_reassociable_op (binrhs
, rhscode
, loop
));
3422 gsi
= gsi_for_stmt (stmt
);
3424 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3425 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3426 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3427 gimple_assign_lhs (binlhs
),
3428 gimple_assign_rhs2 (binrhs
));
3429 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3430 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3431 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3433 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3434 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3438 fprintf (dump_file
, "Linearized: ");
3439 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3442 reassociate_stats
.linearized
++;
3445 gsi
= gsi_for_stmt (oldbinrhs
);
3446 gsi_remove (&gsi
, true);
3447 release_defs (oldbinrhs
);
3449 gimple_set_visited (stmt
, true);
3450 gimple_set_visited (binlhs
, true);
3451 gimple_set_visited (binrhs
, true);
3453 /* Tail recurse on the new rhs if it still needs reassociation. */
3454 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3455 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3456 want to change the algorithm while converting to tuples. */
3457 linearize_expr (stmt
);
3460 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3461 it. Otherwise, return NULL. */
3464 get_single_immediate_use (tree lhs
)
3466 use_operand_p immuse
;
3469 if (TREE_CODE (lhs
) == SSA_NAME
3470 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3471 && is_gimple_assign (immusestmt
))
3477 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3478 representing the negated value. Insertions of any necessary
3479 instructions go before GSI.
3480 This function is recursive in that, if you hand it "a_5" as the
3481 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3482 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3485 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3487 gimple negatedefstmt
= NULL
;
3488 tree resultofnegate
;
3489 gimple_stmt_iterator gsi
;
3492 /* If we are trying to negate a name, defined by an add, negate the
3493 add operands instead. */
3494 if (TREE_CODE (tonegate
) == SSA_NAME
)
3495 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3496 if (TREE_CODE (tonegate
) == SSA_NAME
3497 && is_gimple_assign (negatedefstmt
)
3498 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3499 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3500 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3502 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3503 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3504 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3507 gsi
= gsi_for_stmt (negatedefstmt
);
3508 rhs1
= negate_value (rhs1
, &gsi
);
3510 gsi
= gsi_for_stmt (negatedefstmt
);
3511 rhs2
= negate_value (rhs2
, &gsi
);
3513 gsi
= gsi_for_stmt (negatedefstmt
);
3514 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3515 gimple_set_visited (negatedefstmt
, true);
3516 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3517 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3518 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3522 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3523 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3524 NULL_TREE
, true, GSI_SAME_STMT
);
3526 uid
= gimple_uid (gsi_stmt (gsi
));
3527 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3529 gimple stmt
= gsi_stmt (gsi
);
3530 if (gimple_uid (stmt
) != 0)
3532 gimple_set_uid (stmt
, uid
);
3534 return resultofnegate
;
3537 /* Return true if we should break up the subtract in STMT into an add
3538 with negate. This is true when we the subtract operands are really
3539 adds, or the subtract itself is used in an add expression. In
3540 either case, breaking up the subtract into an add with negate
3541 exposes the adds to reassociation. */
3544 should_break_up_subtract (gimple stmt
)
3546 tree lhs
= gimple_assign_lhs (stmt
);
3547 tree binlhs
= gimple_assign_rhs1 (stmt
);
3548 tree binrhs
= gimple_assign_rhs2 (stmt
);
3550 struct loop
*loop
= loop_containing_stmt (stmt
);
3552 if (TREE_CODE (binlhs
) == SSA_NAME
3553 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3556 if (TREE_CODE (binrhs
) == SSA_NAME
3557 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3560 if (TREE_CODE (lhs
) == SSA_NAME
3561 && (immusestmt
= get_single_immediate_use (lhs
))
3562 && is_gimple_assign (immusestmt
)
3563 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3564 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3569 /* Transform STMT from A - B into A + -B. */
3572 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3574 tree rhs1
= gimple_assign_rhs1 (stmt
);
3575 tree rhs2
= gimple_assign_rhs2 (stmt
);
3577 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3579 fprintf (dump_file
, "Breaking up subtract ");
3580 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3583 rhs2
= negate_value (rhs2
, gsip
);
3584 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3588 /* Determine whether STMT is a builtin call that raises an SSA name
3589 to an integer power and has only one use. If so, and this is early
3590 reassociation and unsafe math optimizations are permitted, place
3591 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3592 If any of these conditions does not hold, return FALSE. */
3595 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3598 REAL_VALUE_TYPE c
, cint
;
3600 if (!first_pass_instance
3601 || !flag_unsafe_math_optimizations
3602 || !is_gimple_call (stmt
)
3603 || !has_single_use (gimple_call_lhs (stmt
)))
3606 fndecl
= gimple_call_fndecl (stmt
);
3609 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3612 switch (DECL_FUNCTION_CODE (fndecl
))
3614 CASE_FLT_FN (BUILT_IN_POW
):
3615 *base
= gimple_call_arg (stmt
, 0);
3616 arg1
= gimple_call_arg (stmt
, 1);
3618 if (TREE_CODE (arg1
) != REAL_CST
)
3621 c
= TREE_REAL_CST (arg1
);
3623 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3626 *exponent
= real_to_integer (&c
);
3627 real_from_integer (&cint
, VOIDmode
, *exponent
,
3628 *exponent
< 0 ? -1 : 0, 0);
3629 if (!real_identical (&c
, &cint
))
3634 CASE_FLT_FN (BUILT_IN_POWI
):
3635 *base
= gimple_call_arg (stmt
, 0);
3636 arg1
= gimple_call_arg (stmt
, 1);
3638 if (!tree_fits_shwi_p (arg1
))
3641 *exponent
= TREE_INT_CST_LOW (arg1
);
3648 /* Expanding negative exponents is generally unproductive, so we don't
3649 complicate matters with those. Exponents of zero and one should
3650 have been handled by expression folding. */
3651 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3657 /* Recursively linearize a binary expression that is the RHS of STMT.
3658 Place the operands of the expression tree in the vector named OPS. */
3661 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3662 bool is_associative
, bool set_visited
)
3664 tree binlhs
= gimple_assign_rhs1 (stmt
);
3665 tree binrhs
= gimple_assign_rhs2 (stmt
);
3666 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3667 bool binlhsisreassoc
= false;
3668 bool binrhsisreassoc
= false;
3669 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3670 struct loop
*loop
= loop_containing_stmt (stmt
);
3671 tree base
= NULL_TREE
;
3672 HOST_WIDE_INT exponent
= 0;
3675 gimple_set_visited (stmt
, true);
3677 if (TREE_CODE (binlhs
) == SSA_NAME
)
3679 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3680 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3681 && !stmt_could_throw_p (binlhsdef
));
3684 if (TREE_CODE (binrhs
) == SSA_NAME
)
3686 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3687 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3688 && !stmt_could_throw_p (binrhsdef
));
3691 /* If the LHS is not reassociable, but the RHS is, we need to swap
3692 them. If neither is reassociable, there is nothing we can do, so
3693 just put them in the ops vector. If the LHS is reassociable,
3694 linearize it. If both are reassociable, then linearize the RHS
3697 if (!binlhsisreassoc
)
3701 /* If this is not a associative operation like division, give up. */
3702 if (!is_associative
)
3704 add_to_ops_vec (ops
, binrhs
);
3708 if (!binrhsisreassoc
)
3710 if (rhscode
== MULT_EXPR
3711 && TREE_CODE (binrhs
) == SSA_NAME
3712 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3714 add_repeat_to_ops_vec (ops
, base
, exponent
);
3715 gimple_set_visited (binrhsdef
, true);
3718 add_to_ops_vec (ops
, binrhs
);
3720 if (rhscode
== MULT_EXPR
3721 && TREE_CODE (binlhs
) == SSA_NAME
3722 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3724 add_repeat_to_ops_vec (ops
, base
, exponent
);
3725 gimple_set_visited (binlhsdef
, true);
3728 add_to_ops_vec (ops
, binlhs
);
3733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3735 fprintf (dump_file
, "swapping operands of ");
3736 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3739 swap_ssa_operands (stmt
,
3740 gimple_assign_rhs1_ptr (stmt
),
3741 gimple_assign_rhs2_ptr (stmt
));
3744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3746 fprintf (dump_file
, " is now ");
3747 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3750 /* We want to make it so the lhs is always the reassociative op,
3756 else if (binrhsisreassoc
)
3758 linearize_expr (stmt
);
3759 binlhs
= gimple_assign_rhs1 (stmt
);
3760 binrhs
= gimple_assign_rhs2 (stmt
);
3763 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3764 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3766 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3767 is_associative
, set_visited
);
3769 if (rhscode
== MULT_EXPR
3770 && TREE_CODE (binrhs
) == SSA_NAME
3771 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3773 add_repeat_to_ops_vec (ops
, base
, exponent
);
3774 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3777 add_to_ops_vec (ops
, binrhs
);
3780 /* Repropagate the negates back into subtracts, since no other pass
3781 currently does it. */
3784 repropagate_negates (void)
3789 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3791 gimple user
= get_single_immediate_use (negate
);
3793 if (!user
|| !is_gimple_assign (user
))
3796 /* The negate operand can be either operand of a PLUS_EXPR
3797 (it can be the LHS if the RHS is a constant for example).
3799 Force the negate operand to the RHS of the PLUS_EXPR, then
3800 transform the PLUS_EXPR into a MINUS_EXPR. */
3801 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3803 /* If the negated operand appears on the LHS of the
3804 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3805 to force the negated operand to the RHS of the PLUS_EXPR. */
3806 if (gimple_assign_rhs1 (user
) == negate
)
3808 swap_ssa_operands (user
,
3809 gimple_assign_rhs1_ptr (user
),
3810 gimple_assign_rhs2_ptr (user
));
3813 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3814 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3815 if (gimple_assign_rhs2 (user
) == negate
)
3817 tree rhs1
= gimple_assign_rhs1 (user
);
3818 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3819 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3820 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3824 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3826 if (gimple_assign_rhs1 (user
) == negate
)
3831 which we transform into
3834 This pushes down the negate which we possibly can merge
3835 into some other operation, hence insert it into the
3836 plus_negates vector. */
3837 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3838 tree a
= gimple_assign_rhs1 (feed
);
3839 tree b
= gimple_assign_rhs2 (user
);
3840 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
3841 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
3842 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
3843 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
3844 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3845 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
3846 user
= gsi_stmt (gsi2
);
3848 gsi_remove (&gsi
, true);
3849 release_defs (feed
);
3850 plus_negates
.safe_push (gimple_assign_lhs (user
));
3854 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3855 rid of one operation. */
3856 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3857 tree a
= gimple_assign_rhs1 (feed
);
3858 tree rhs1
= gimple_assign_rhs1 (user
);
3859 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3860 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3861 update_stmt (gsi_stmt (gsi
));
3867 /* Returns true if OP is of a type for which we can do reassociation.
3868 That is for integral or non-saturating fixed-point types, and for
3869 floating point type when associative-math is enabled. */
3872 can_reassociate_p (tree op
)
3874 tree type
= TREE_TYPE (op
);
3875 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3876 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3877 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3882 /* Break up subtract operations in block BB.
3884 We do this top down because we don't know whether the subtract is
3885 part of a possible chain of reassociation except at the top.
3894 we want to break up k = t - q, but we won't until we've transformed q
3895 = b - r, which won't be broken up until we transform b = c - d.
3897 En passant, clear the GIMPLE visited flag on every statement
3898 and set UIDs within each basic block. */
3901 break_up_subtract_bb (basic_block bb
)
3903 gimple_stmt_iterator gsi
;
3905 unsigned int uid
= 1;
3907 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3909 gimple stmt
= gsi_stmt (gsi
);
3910 gimple_set_visited (stmt
, false);
3911 gimple_set_uid (stmt
, uid
++);
3913 if (!is_gimple_assign (stmt
)
3914 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3917 /* Look for simple gimple subtract operations. */
3918 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3920 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3921 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3924 /* Check for a subtract used only in an addition. If this
3925 is the case, transform it into add of a negate for better
3926 reassociation. IE transform C = A-B into C = A + -B if C
3927 is only used in an addition. */
3928 if (should_break_up_subtract (stmt
))
3929 break_up_subtract (stmt
, &gsi
);
3931 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3932 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3933 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3935 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3937 son
= next_dom_son (CDI_DOMINATORS
, son
))
3938 break_up_subtract_bb (son
);
3941 /* Used for repeated factor analysis. */
3942 struct repeat_factor_d
3944 /* An SSA name that occurs in a multiply chain. */
3947 /* Cached rank of the factor. */
3950 /* Number of occurrences of the factor in the chain. */
3951 HOST_WIDE_INT count
;
3953 /* An SSA name representing the product of this factor and
3954 all factors appearing later in the repeated factor vector. */
3958 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3959 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3962 static vec
<repeat_factor
> repeat_factor_vec
;
3964 /* Used for sorting the repeat factor vector. Sort primarily by
3965 ascending occurrence count, secondarily by descending rank. */
3968 compare_repeat_factors (const void *x1
, const void *x2
)
3970 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3971 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3973 if (rf1
->count
!= rf2
->count
)
3974 return rf1
->count
- rf2
->count
;
3976 return rf2
->rank
- rf1
->rank
;
3979 /* Look for repeated operands in OPS in the multiply tree rooted at
3980 STMT. Replace them with an optimal sequence of multiplies and powi
3981 builtin calls, and remove the used operands from OPS. Return an
3982 SSA name representing the value of the replacement sequence. */
3985 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
3987 unsigned i
, j
, vec_len
;
3990 repeat_factor_t rf1
, rf2
;
3991 repeat_factor rfnew
;
3992 tree result
= NULL_TREE
;
3993 tree target_ssa
, iter_result
;
3994 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
3995 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
3996 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3997 gimple mul_stmt
, pow_stmt
;
3999 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4004 /* Allocate the repeated factor vector. */
4005 repeat_factor_vec
.create (10);
4007 /* Scan the OPS vector for all SSA names in the product and build
4008 up a vector of occurrence counts for each factor. */
4009 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4011 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4013 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4015 if (rf1
->factor
== oe
->op
)
4017 rf1
->count
+= oe
->count
;
4022 if (j
>= repeat_factor_vec
.length ())
4024 rfnew
.factor
= oe
->op
;
4025 rfnew
.rank
= oe
->rank
;
4026 rfnew
.count
= oe
->count
;
4027 rfnew
.repr
= NULL_TREE
;
4028 repeat_factor_vec
.safe_push (rfnew
);
4033 /* Sort the repeated factor vector by (a) increasing occurrence count,
4034 and (b) decreasing rank. */
4035 repeat_factor_vec
.qsort (compare_repeat_factors
);
4037 /* It is generally best to combine as many base factors as possible
4038 into a product before applying __builtin_powi to the result.
4039 However, the sort order chosen for the repeated factor vector
4040 allows us to cache partial results for the product of the base
4041 factors for subsequent use. When we already have a cached partial
4042 result from a previous iteration, it is best to make use of it
4043 before looking for another __builtin_pow opportunity.
4045 As an example, consider x * x * y * y * y * z * z * z * z.
4046 We want to first compose the product x * y * z, raise it to the
4047 second power, then multiply this by y * z, and finally multiply
4048 by z. This can be done in 5 multiplies provided we cache y * z
4049 for use in both expressions:
4057 If we instead ignored the cached y * z and first multiplied by
4058 the __builtin_pow opportunity z * z, we would get the inferior:
4067 vec_len
= repeat_factor_vec
.length ();
4069 /* Repeatedly look for opportunities to create a builtin_powi call. */
4072 HOST_WIDE_INT power
;
4074 /* First look for the largest cached product of factors from
4075 preceding iterations. If found, create a builtin_powi for
4076 it if the minimum occurrence count for its factors is at
4077 least 2, or just use this cached product as our next
4078 multiplicand if the minimum occurrence count is 1. */
4079 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4081 if (rf1
->repr
&& rf1
->count
> 0)
4091 iter_result
= rf1
->repr
;
4093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4097 fputs ("Multiplying by cached product ", dump_file
);
4098 for (elt
= j
; elt
< vec_len
; elt
++)
4100 rf
= &repeat_factor_vec
[elt
];
4101 print_generic_expr (dump_file
, rf
->factor
, 0);
4102 if (elt
< vec_len
- 1)
4103 fputs (" * ", dump_file
);
4105 fputs ("\n", dump_file
);
4110 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4111 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4112 build_int_cst (integer_type_node
,
4114 gimple_call_set_lhs (pow_stmt
, iter_result
);
4115 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4116 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4122 fputs ("Building __builtin_pow call for cached product (",
4124 for (elt
= j
; elt
< vec_len
; elt
++)
4126 rf
= &repeat_factor_vec
[elt
];
4127 print_generic_expr (dump_file
, rf
->factor
, 0);
4128 if (elt
< vec_len
- 1)
4129 fputs (" * ", dump_file
);
4131 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4138 /* Otherwise, find the first factor in the repeated factor
4139 vector whose occurrence count is at least 2. If no such
4140 factor exists, there are no builtin_powi opportunities
4142 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4144 if (rf1
->count
>= 2)
4153 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4157 fputs ("Building __builtin_pow call for (", dump_file
);
4158 for (elt
= j
; elt
< vec_len
; elt
++)
4160 rf
= &repeat_factor_vec
[elt
];
4161 print_generic_expr (dump_file
, rf
->factor
, 0);
4162 if (elt
< vec_len
- 1)
4163 fputs (" * ", dump_file
);
4165 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4168 reassociate_stats
.pows_created
++;
4170 /* Visit each element of the vector in reverse order (so that
4171 high-occurrence elements are visited first, and within the
4172 same occurrence count, lower-ranked elements are visited
4173 first). Form a linear product of all elements in this order
4174 whose occurrencce count is at least that of element J.
4175 Record the SSA name representing the product of each element
4176 with all subsequent elements in the vector. */
4177 if (j
== vec_len
- 1)
4178 rf1
->repr
= rf1
->factor
;
4181 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4185 rf1
= &repeat_factor_vec
[ii
];
4186 rf2
= &repeat_factor_vec
[ii
+ 1];
4188 /* Init the last factor's representative to be itself. */
4190 rf2
->repr
= rf2
->factor
;
4195 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4196 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4199 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4200 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4201 rf1
->repr
= target_ssa
;
4203 /* Don't reprocess the multiply we just introduced. */
4204 gimple_set_visited (mul_stmt
, true);
4208 /* Form a call to __builtin_powi for the maximum product
4209 just formed, raised to the power obtained earlier. */
4210 rf1
= &repeat_factor_vec
[j
];
4211 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4212 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4213 build_int_cst (integer_type_node
,
4215 gimple_call_set_lhs (pow_stmt
, iter_result
);
4216 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4217 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4220 /* If we previously formed at least one other builtin_powi call,
4221 form the product of this one and those others. */
4224 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4225 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4226 result
, iter_result
);
4227 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4228 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4229 gimple_set_visited (mul_stmt
, true);
4230 result
= new_result
;
4233 result
= iter_result
;
4235 /* Decrement the occurrence count of each element in the product
4236 by the count found above, and remove this many copies of each
4238 for (i
= j
; i
< vec_len
; i
++)
4243 rf1
= &repeat_factor_vec
[i
];
4244 rf1
->count
-= power
;
4246 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4248 if (oe
->op
== rf1
->factor
)
4252 ops
->ordered_remove (n
);
4268 /* At this point all elements in the repeated factor vector have a
4269 remaining occurrence count of 0 or 1, and those with a count of 1
4270 don't have cached representatives. Re-sort the ops vector and
4272 ops
->qsort (sort_by_operand_rank
);
4273 repeat_factor_vec
.release ();
4275 /* Return the final product computed herein. Note that there may
4276 still be some elements with single occurrence count left in OPS;
4277 those will be handled by the normal reassociation logic. */
4281 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4284 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4290 fprintf (dump_file
, "Transforming ");
4291 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4294 rhs1
= gimple_assign_rhs1 (stmt
);
4295 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4297 remove_visited_stmt_chain (rhs1
);
4299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4301 fprintf (dump_file
, " into ");
4302 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4306 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4309 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4310 tree rhs1
, tree rhs2
)
4312 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4314 fprintf (dump_file
, "Transforming ");
4315 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4318 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4319 update_stmt (gsi_stmt (*gsi
));
4320 remove_visited_stmt_chain (rhs1
);
4322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4324 fprintf (dump_file
, " into ");
4325 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4329 /* Reassociate expressions in basic block BB and its post-dominator as
4333 reassociate_bb (basic_block bb
)
4335 gimple_stmt_iterator gsi
;
4337 gimple stmt
= last_stmt (bb
);
4339 if (stmt
&& !gimple_visited_p (stmt
))
4340 maybe_optimize_range_tests (stmt
);
4342 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4344 stmt
= gsi_stmt (gsi
);
4346 if (is_gimple_assign (stmt
)
4347 && !stmt_could_throw_p (stmt
))
4349 tree lhs
, rhs1
, rhs2
;
4350 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4352 /* If this is not a gimple binary expression, there is
4353 nothing for us to do with it. */
4354 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4357 /* If this was part of an already processed statement,
4358 we don't need to touch it again. */
4359 if (gimple_visited_p (stmt
))
4361 /* This statement might have become dead because of previous
4363 if (has_zero_uses (gimple_get_lhs (stmt
)))
4365 gsi_remove (&gsi
, true);
4366 release_defs (stmt
);
4367 /* We might end up removing the last stmt above which
4368 places the iterator to the end of the sequence.
4369 Reset it to the last stmt in this case which might
4370 be the end of the sequence as well if we removed
4371 the last statement of the sequence. In which case
4372 we need to bail out. */
4373 if (gsi_end_p (gsi
))
4375 gsi
= gsi_last_bb (bb
);
4376 if (gsi_end_p (gsi
))
4383 lhs
= gimple_assign_lhs (stmt
);
4384 rhs1
= gimple_assign_rhs1 (stmt
);
4385 rhs2
= gimple_assign_rhs2 (stmt
);
4387 /* For non-bit or min/max operations we can't associate
4388 all types. Verify that here. */
4389 if (rhs_code
!= BIT_IOR_EXPR
4390 && rhs_code
!= BIT_AND_EXPR
4391 && rhs_code
!= BIT_XOR_EXPR
4392 && rhs_code
!= MIN_EXPR
4393 && rhs_code
!= MAX_EXPR
4394 && (!can_reassociate_p (lhs
)
4395 || !can_reassociate_p (rhs1
)
4396 || !can_reassociate_p (rhs2
)))
4399 if (associative_tree_code (rhs_code
))
4401 vec
<operand_entry_t
> ops
= vNULL
;
4402 tree powi_result
= NULL_TREE
;
4404 /* There may be no immediate uses left by the time we
4405 get here because we may have eliminated them all. */
4406 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4409 gimple_set_visited (stmt
, true);
4410 linearize_expr_tree (&ops
, stmt
, true, true);
4411 ops
.qsort (sort_by_operand_rank
);
4412 optimize_ops_list (rhs_code
, &ops
);
4413 if (undistribute_ops_list (rhs_code
, &ops
,
4414 loop_containing_stmt (stmt
)))
4416 ops
.qsort (sort_by_operand_rank
);
4417 optimize_ops_list (rhs_code
, &ops
);
4420 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4421 optimize_range_tests (rhs_code
, &ops
);
4423 if (first_pass_instance
4424 && rhs_code
== MULT_EXPR
4425 && flag_unsafe_math_optimizations
)
4426 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4428 /* If the operand vector is now empty, all operands were
4429 consumed by the __builtin_powi optimization. */
4430 if (ops
.length () == 0)
4431 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4432 else if (ops
.length () == 1)
4434 tree last_op
= ops
.last ()->op
;
4437 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4440 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4444 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4445 int ops_num
= ops
.length ();
4446 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4451 "Width = %d was chosen for reassociation\n", width
);
4454 && ops
.length () > 3)
4455 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4458 /* When there are three operands left, we want
4459 to make sure the ones that get the double
4460 binary op are chosen wisely. */
4461 int len
= ops
.length ();
4463 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4465 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4466 powi_result
!= NULL
);
4469 /* If we combined some repeated factors into a
4470 __builtin_powi call, multiply that result by the
4471 reassociated operands. */
4474 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4475 tree type
= TREE_TYPE (lhs
);
4476 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4478 gimple_set_lhs (lhs_stmt
, target_ssa
);
4479 update_stmt (lhs_stmt
);
4481 target_ssa
= new_lhs
;
4482 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4485 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4486 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4494 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4496 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4497 reassociate_bb (son
);
4500 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4501 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4503 /* Dump the operand entry vector OPS to FILE. */
4506 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4511 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4513 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4514 print_generic_expr (file
, oe
->op
, 0);
4518 /* Dump the operand entry vector OPS to STDERR. */
4521 debug_ops_vector (vec
<operand_entry_t
> ops
)
4523 dump_ops_vector (stderr
, ops
);
4529 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
4530 reassociate_bb (EXIT_BLOCK_PTR
);
4533 /* Initialize the reassociation pass. */
4540 int *bbs
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
4542 /* Find the loops, so that we can prevent moving calculations in
4544 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4546 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4548 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4549 sizeof (struct operand_entry
), 30);
4550 next_operand_entry_id
= 0;
4552 /* Reverse RPO (Reverse Post Order) will give us something where
4553 deeper loops come later. */
4554 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4555 bb_rank
= XCNEWVEC (long, last_basic_block
);
4556 operand_rank
= pointer_map_create ();
4558 /* Give each default definition a distinct rank. This includes
4559 parameters and the static chain. Walk backwards over all
4560 SSA names so that we get proper rank ordering according
4561 to tree_swap_operands_p. */
4562 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4564 tree name
= ssa_name (i
);
4565 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4566 insert_operand_rank (name
, ++rank
);
4569 /* Set up rank for each BB */
4570 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
4571 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4574 calculate_dominance_info (CDI_POST_DOMINATORS
);
4575 plus_negates
= vNULL
;
4578 /* Cleanup after the reassociation pass, and print stats if
4584 statistics_counter_event (cfun
, "Linearized",
4585 reassociate_stats
.linearized
);
4586 statistics_counter_event (cfun
, "Constants eliminated",
4587 reassociate_stats
.constants_eliminated
);
4588 statistics_counter_event (cfun
, "Ops eliminated",
4589 reassociate_stats
.ops_eliminated
);
4590 statistics_counter_event (cfun
, "Statements rewritten",
4591 reassociate_stats
.rewritten
);
4592 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4593 reassociate_stats
.pows_encountered
);
4594 statistics_counter_event (cfun
, "Built-in powi calls created",
4595 reassociate_stats
.pows_created
);
4597 pointer_map_destroy (operand_rank
);
4598 free_alloc_pool (operand_entry_pool
);
4600 plus_negates
.release ();
4601 free_dominance_info (CDI_POST_DOMINATORS
);
4602 loop_optimizer_finalize ();
4605 /* Gate and execute functions for Reassociation. */
4608 execute_reassoc (void)
4613 repropagate_negates ();
4620 gate_tree_ssa_reassoc (void)
4622 return flag_tree_reassoc
!= 0;
4627 const pass_data pass_data_reassoc
=
4629 GIMPLE_PASS
, /* type */
4630 "reassoc", /* name */
4631 OPTGROUP_NONE
, /* optinfo_flags */
4632 true, /* has_gate */
4633 true, /* has_execute */
4634 TV_TREE_REASSOC
, /* tv_id */
4635 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4636 0, /* properties_provided */
4637 0, /* properties_destroyed */
4638 0, /* todo_flags_start */
4640 | TODO_update_ssa_only_virtuals
4641 | TODO_verify_flow
), /* todo_flags_finish */
4644 class pass_reassoc
: public gimple_opt_pass
4647 pass_reassoc (gcc::context
*ctxt
)
4648 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4651 /* opt_pass methods: */
4652 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4653 bool gate () { return gate_tree_ssa_reassoc (); }
4654 unsigned int execute () { return execute_reassoc (); }
4656 }; // class pass_reassoc
4661 make_pass_reassoc (gcc::context
*ctxt
)
4663 return new pass_reassoc (ctxt
);